summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r--fs/xfs/xfs_inode.c224
1 files changed, 84 insertions, 140 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b210224..a59eea0 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -20,7 +20,6 @@
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_types.h"
-#include "xfs_bit.h"
#include "xfs_log.h"
#include "xfs_inum.h"
#include "xfs_trans.h"
@@ -61,6 +60,20 @@ STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
+/*
+ * helper function to extract extent size hint from inode
+ */
+xfs_extlen_t
+xfs_get_extsz_hint(
+ struct xfs_inode *ip)
+{
+ if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize)
+ return ip->i_d.di_extsize;
+ if (XFS_IS_REALTIME_INODE(ip))
+ return ip->i_mount->m_sb.sb_rextsize;
+ return 0;
+}
+
#ifdef DEBUG
/*
* Make sure that the extents in the given memory buffer
@@ -137,6 +150,7 @@ xfs_imap_to_bp(
int ni;
xfs_buf_t *bp;
+ buf_flags |= XBF_UNMAPPED;
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
(int)imap->im_len, buf_flags, &bp);
if (error) {
@@ -226,7 +240,7 @@ xfs_inotobp(
if (error)
return error;
- error = xfs_imap_to_bp(mp, tp, &imap, &bp, XBF_LOCK, imap_flags);
+ error = xfs_imap_to_bp(mp, tp, &imap, &bp, 0, imap_flags);
if (error)
return error;
@@ -782,8 +796,7 @@ xfs_iread(
/*
* Get pointers to the on-disk inode and the buffer containing it.
*/
- error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp,
- XBF_LOCK, iget_flags);
+ error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, 0, iget_flags);
if (error)
return error;
dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
@@ -1342,7 +1355,7 @@ xfs_iunlink(
* Here we put the head pointer into our next pointer,
* and then we fall through to point the head at us.
*/
- error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
+ error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0);
if (error)
return error;
@@ -1423,7 +1436,7 @@ xfs_iunlink_remove(
* of dealing with the buffer when there is no need to
* change it.
*/
- error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
+ error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0);
if (error) {
xfs_warn(mp, "%s: xfs_itobp() returned error %d.",
__func__, error);
@@ -1484,7 +1497,7 @@ xfs_iunlink_remove(
* Now last_ibp points to the buffer previous to us on
* the unlinked list. Pull us from the list.
*/
- error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
+ error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0);
if (error) {
xfs_warn(mp, "%s: xfs_itobp(2) returned error %d.",
__func__, error);
@@ -1566,8 +1579,7 @@ xfs_ifree_cluster(
* to mark all the active inodes on the buffer stale.
*/
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
- mp->m_bsize * blks_per_cluster,
- XBF_LOCK);
+ mp->m_bsize * blks_per_cluster, 0);
if (!bp)
return ENOMEM;
@@ -1656,14 +1668,13 @@ retry:
iip = ip->i_itemp;
if (!iip || xfs_inode_clean(ip)) {
ASSERT(ip != free_ip);
- ip->i_update_core = 0;
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
continue;
}
- iip->ili_last_fields = iip->ili_format.ilf_fields;
- iip->ili_format.ilf_fields = 0;
+ iip->ili_last_fields = iip->ili_fields;
+ iip->ili_fields = 0;
iip->ili_logged = 1;
xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
&iip->ili_item.li_lsn);
@@ -1738,7 +1749,7 @@ xfs_ifree(
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XBF_LOCK);
+ error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0);
if (error)
return error;
@@ -2177,7 +2188,7 @@ xfs_iflush_fork(
mp = ip->i_mount;
switch (XFS_IFORK_FORMAT(ip, whichfork)) {
case XFS_DINODE_FMT_LOCAL:
- if ((iip->ili_format.ilf_fields & dataflag[whichfork]) &&
+ if ((iip->ili_fields & dataflag[whichfork]) &&
(ifp->if_bytes > 0)) {
ASSERT(ifp->if_u1.if_data != NULL);
ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
@@ -2187,8 +2198,8 @@ xfs_iflush_fork(
case XFS_DINODE_FMT_EXTENTS:
ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
- !(iip->ili_format.ilf_fields & extflag[whichfork]));
- if ((iip->ili_format.ilf_fields & extflag[whichfork]) &&
+ !(iip->ili_fields & extflag[whichfork]));
+ if ((iip->ili_fields & extflag[whichfork]) &&
(ifp->if_bytes > 0)) {
ASSERT(xfs_iext_get_ext(ifp, 0));
ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
@@ -2198,7 +2209,7 @@ xfs_iflush_fork(
break;
case XFS_DINODE_FMT_BTREE:
- if ((iip->ili_format.ilf_fields & brootflag[whichfork]) &&
+ if ((iip->ili_fields & brootflag[whichfork]) &&
(ifp->if_broot_bytes > 0)) {
ASSERT(ifp->if_broot != NULL);
ASSERT(ifp->if_broot_bytes <=
@@ -2211,14 +2222,14 @@ xfs_iflush_fork(
break;
case XFS_DINODE_FMT_DEV:
- if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) {
+ if (iip->ili_fields & XFS_ILOG_DEV) {
ASSERT(whichfork == XFS_DATA_FORK);
xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
}
break;
case XFS_DINODE_FMT_UUID:
- if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) {
+ if (iip->ili_fields & XFS_ILOG_UUID) {
ASSERT(whichfork == XFS_DATA_FORK);
memcpy(XFS_DFORK_DPTR(dip),
&ip->i_df.if_u2.if_uuid,
@@ -2348,11 +2359,11 @@ cluster_corrupt_out:
*/
rcu_read_unlock();
/*
- * Clean up the buffer. If it was B_DELWRI, just release it --
+ * Clean up the buffer. If it was delwri, just release it --
* brelse can handle it with no problems. If not, shut down the
* filesystem before releasing the buffer.
*/
- bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp);
+ bufwasdelwri = (bp->b_flags & _XBF_DELWRI_Q);
if (bufwasdelwri)
xfs_buf_relse(bp);
@@ -2378,30 +2389,29 @@ cluster_corrupt_out:
/*
* Unlocks the flush lock
*/
- xfs_iflush_abort(iq);
+ xfs_iflush_abort(iq, false);
kmem_free(ilist);
xfs_perag_put(pag);
return XFS_ERROR(EFSCORRUPTED);
}
/*
- * xfs_iflush() will write a modified inode's changes out to the
- * inode's on disk home. The caller must have the inode lock held
- * in at least shared mode and the inode flush completion must be
- * active as well. The inode lock will still be held upon return from
- * the call and the caller is free to unlock it.
- * The inode flush will be completed when the inode reaches the disk.
- * The flags indicate how the inode's buffer should be written out.
+ * Flush dirty inode metadata into the backing buffer.
+ *
+ * The caller must have the inode lock and the inode flush lock held. The
+ * inode lock will still be held upon return to the caller, and the inode
+ * flush lock will be released after the inode has reached the disk.
+ *
+ * The caller must write out the buffer returned in *bpp and release it.
*/
int
xfs_iflush(
- xfs_inode_t *ip,
- uint flags)
+ struct xfs_inode *ip,
+ struct xfs_buf **bpp)
{
- xfs_inode_log_item_t *iip;
- xfs_buf_t *bp;
- xfs_dinode_t *dip;
- xfs_mount_t *mp;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_buf *bp;
+ struct xfs_dinode *dip;
int error;
XFS_STATS_INC(xs_iflush_count);
@@ -2411,25 +2421,8 @@ xfs_iflush(
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
- iip = ip->i_itemp;
- mp = ip->i_mount;
+ *bpp = NULL;
- /*
- * We can't flush the inode until it is unpinned, so wait for it if we
- * are allowed to block. We know no one new can pin it, because we are
- * holding the inode lock shared and you need to hold it exclusively to
- * pin the inode.
- *
- * If we are not allowed to block, force the log out asynchronously so
- * that when we come back the inode will be unpinned. If other inodes
- * in the same cluster are dirty, they will probably write the inode
- * out for us if they occur after the log force completes.
- */
- if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) {
- xfs_iunpin(ip);
- xfs_ifunlock(ip);
- return EAGAIN;
- }
xfs_iunpin_wait(ip);
/*
@@ -2448,21 +2441,20 @@ xfs_iflush(
/*
* This may have been unpinned because the filesystem is shutting
* down forcibly. If that's the case we must not write this inode
- * to disk, because the log record didn't make it to disk!
+ * to disk, because the log record didn't make it to disk.
+ *
+ * We also have to remove the log item from the AIL in this case,
+ * as we wait for an empty AIL as part of the unmount process.
*/
if (XFS_FORCED_SHUTDOWN(mp)) {
- ip->i_update_core = 0;
- if (iip)
- iip->ili_format.ilf_fields = 0;
- xfs_ifunlock(ip);
- return XFS_ERROR(EIO);
+ error = XFS_ERROR(EIO);
+ goto abort_out;
}
/*
* Get the buffer containing the on-disk inode.
*/
- error = xfs_itobp(mp, NULL, ip, &dip, &bp,
- (flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK);
+ error = xfs_itobp(mp, NULL, ip, &dip, &bp, XBF_TRYLOCK);
if (error || !bp) {
xfs_ifunlock(ip);
return error;
@@ -2490,23 +2482,20 @@ xfs_iflush(
if (error)
goto cluster_corrupt_out;
- if (flags & SYNC_WAIT)
- error = xfs_bwrite(bp);
- else
- xfs_buf_delwri_queue(bp);
-
- xfs_buf_relse(bp);
- return error;
+ *bpp = bp;
+ return 0;
corrupt_out:
xfs_buf_relse(bp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
cluster_corrupt_out:
+ error = XFS_ERROR(EFSCORRUPTED);
+abort_out:
/*
* Unlocks the flush lock
*/
- xfs_iflush_abort(ip);
- return XFS_ERROR(EFSCORRUPTED);
+ xfs_iflush_abort(ip, false);
+ return error;
}
@@ -2533,26 +2522,6 @@ xfs_iflush_int(
/* set *dip = inode's place in the buffer */
dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
- /*
- * Clear i_update_core before copying out the data.
- * This is for coordination with our timestamp updates
- * that don't hold the inode lock. They will always
- * update the timestamps BEFORE setting i_update_core,
- * so if we clear i_update_core after they set it we
- * are guaranteed to see their updates to the timestamps.
- * I believe that this depends on strongly ordered memory
- * semantics, but we have that. We use the SYNCHRONIZE
- * macro to make sure that the compiler does not reorder
- * the i_update_core access below the data copy below.
- */
- ip->i_update_core = 0;
- SYNCHRONIZE();
-
- /*
- * Make sure to get the latest timestamps from the Linux inode.
- */
- xfs_synchronize_times(ip);
-
if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
@@ -2663,36 +2632,33 @@ xfs_iflush_int(
xfs_inobp_check(mp, bp);
/*
- * We've recorded everything logged in the inode, so we'd
- * like to clear the ilf_fields bits so we don't log and
- * flush things unnecessarily. However, we can't stop
- * logging all this information until the data we've copied
- * into the disk buffer is written to disk. If we did we might
- * overwrite the copy of the inode in the log with all the
- * data after re-logging only part of it, and in the face of
- * a crash we wouldn't have all the data we need to recover.
+ * We've recorded everything logged in the inode, so we'd like to clear
+ * the ili_fields bits so we don't log and flush things unnecessarily.
+ * However, we can't stop logging all this information until the data
+ * we've copied into the disk buffer is written to disk. If we did we
+ * might overwrite the copy of the inode in the log with all the data
+ * after re-logging only part of it, and in the face of a crash we
+ * wouldn't have all the data we need to recover.
*
- * What we do is move the bits to the ili_last_fields field.
- * When logging the inode, these bits are moved back to the
- * ilf_fields field. In the xfs_iflush_done() routine we
- * clear ili_last_fields, since we know that the information
- * those bits represent is permanently on disk. As long as
- * the flush completes before the inode is logged again, then
- * both ilf_fields and ili_last_fields will be cleared.
+ * What we do is move the bits to the ili_last_fields field. When
+ * logging the inode, these bits are moved back to the ili_fields field.
+ * In the xfs_iflush_done() routine we clear ili_last_fields, since we
+ * know that the information those bits represent is permanently on
+ * disk. As long as the flush completes before the inode is logged
+ * again, then both ili_fields and ili_last_fields will be cleared.
*
- * We can play with the ilf_fields bits here, because the inode
- * lock must be held exclusively in order to set bits there
- * and the flush lock protects the ili_last_fields bits.
- * Set ili_logged so the flush done
- * routine can tell whether or not to look in the AIL.
- * Also, store the current LSN of the inode so that we can tell
- * whether the item has moved in the AIL from xfs_iflush_done().
- * In order to read the lsn we need the AIL lock, because
- * it is a 64 bit value that cannot be read atomically.
+ * We can play with the ili_fields bits here, because the inode lock
+ * must be held exclusively in order to set bits there and the flush
+ * lock protects the ili_last_fields bits. Set ili_logged so the flush
+ * done routine can tell whether or not to look in the AIL. Also, store
+ * the current LSN of the inode so that we can tell whether the item has
+ * moved in the AIL from xfs_iflush_done(). In order to read the lsn we
+ * need the AIL lock, because it is a 64 bit value that cannot be read
+ * atomically.
*/
- if (iip != NULL && iip->ili_format.ilf_fields != 0) {
- iip->ili_last_fields = iip->ili_format.ilf_fields;
- iip->ili_format.ilf_fields = 0;
+ if (iip != NULL && iip->ili_fields != 0) {
+ iip->ili_last_fields = iip->ili_fields;
+ iip->ili_fields = 0;
iip->ili_logged = 1;
xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
@@ -2711,8 +2677,7 @@ xfs_iflush_int(
} else {
/*
* We're flushing an inode which is not in the AIL and has
- * not been logged but has i_update_core set. For this
- * case we can use a B_DELWRI flush and immediately drop
+ * not been logged. For this case we can immediately drop
* the inode flush lock because we can avoid the whole
* AIL state thing. It's OK to drop the flush lock now,
* because we've already locked the buffer and to do anything
@@ -2732,27 +2697,6 @@ corrupt_out:
return XFS_ERROR(EFSCORRUPTED);
}
-void
-xfs_promote_inode(
- struct xfs_inode *ip)
-{
- struct xfs_buf *bp;
-
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
-
- bp = xfs_incore(ip->i_mount->m_ddev_targp, ip->i_imap.im_blkno,
- ip->i_imap.im_len, XBF_TRYLOCK);
- if (!bp)
- return;
-
- if (XFS_BUF_ISDELAYWRITE(bp)) {
- xfs_buf_delwri_promote(bp);
- wake_up_process(ip->i_mount->m_ddev_targp->bt_task);
- }
-
- xfs_buf_relse(bp);
-}
-
/*
* Return a pointer to the extent record at file index idx.
*/