summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/xfs_dir2.c342
-rw-r--r--fs/xfs/xfs_iops.c14
-rw-r--r--fs/xfs/xfs_log_cil.c52
3 files changed, 265 insertions, 143 deletions
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index ce16ef0..fda4625 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -180,16 +180,23 @@ xfs_dir_init(
xfs_inode_t *dp,
xfs_inode_t *pdp)
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int error;
- memset((char *)&args, 0, sizeof(args));
- args.dp = dp;
- args.trans = tp;
ASSERT(S_ISDIR(dp->i_d.di_mode));
- if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino)))
+ error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino);
+ if (error)
return error;
- return xfs_dir2_sf_create(&args, pdp->i_ino);
+
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ if (!args)
+ return ENOMEM;
+
+ args->dp = dp;
+ args->trans = tp;
+ error = xfs_dir2_sf_create(args, pdp->i_ino);
+ kmem_free(args);
+ return error;
}
/*
@@ -205,41 +212,56 @@ xfs_dir_createname(
xfs_bmap_free_t *flist, /* bmap's freeblock list */
xfs_extlen_t total) /* bmap's total block count */
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode));
- if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
+ rval = xfs_dir_ino_validate(tp->t_mountp, inum);
+ if (rval)
return rval;
XFS_STATS_INC(xs_dir_create);
- memset(&args, 0, sizeof(xfs_da_args_t));
- args.name = name->name;
- args.namelen = name->len;
- args.filetype = name->type;
- args.hashval = dp->i_mount->m_dirnameops->hashname(name);
- args.inumber = inum;
- args.dp = dp;
- args.firstblock = first;
- args.flist = flist;
- args.total = total;
- args.whichfork = XFS_DATA_FORK;
- args.trans = tp;
- args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
-
- if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_addname(&args);
- else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_block_addname(&args);
- else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_leaf_addname(&args);
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ if (!args)
+ return ENOMEM;
+
+ args->name = name->name;
+ args->namelen = name->len;
+ args->filetype = name->type;
+ args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->inumber = inum;
+ args->dp = dp;
+ args->firstblock = first;
+ args->flist = flist;
+ args->total = total;
+ args->whichfork = XFS_DATA_FORK;
+ args->trans = tp;
+ args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
+
+ if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ rval = xfs_dir2_sf_addname(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isblock(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v) {
+ rval = xfs_dir2_block_addname(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isleaf(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v)
+ rval = xfs_dir2_leaf_addname(args);
else
- rval = xfs_dir2_node_addname(&args);
+ rval = xfs_dir2_node_addname(args);
+
+out_free:
+ kmem_free(args);
return rval;
}
@@ -282,46 +304,66 @@ xfs_dir_lookup(
xfs_ino_t *inum, /* out: inode number */
struct xfs_name *ci_name) /* out: actual name if CI match */
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_lookup);
- memset(&args, 0, sizeof(xfs_da_args_t));
- args.name = name->name;
- args.namelen = name->len;
- args.filetype = name->type;
- args.hashval = dp->i_mount->m_dirnameops->hashname(name);
- args.dp = dp;
- args.whichfork = XFS_DATA_FORK;
- args.trans = tp;
- args.op_flags = XFS_DA_OP_OKNOENT;
+ /*
+ * We need to use KM_NOFS here so that lockdep will not throw false
+ * positive deadlock warnings on a non-transactional lookup path. It is
+ * safe to recurse into inode recalim in that case, but lockdep can't
+ * easily be taught about it. Hence KM_NOFS avoids having to add more
+ * lockdep Doing this avoids having to add a bunch of lockdep class
+ * annotations into the reclaim path for the ilock.
+ */
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ args->name = name->name;
+ args->namelen = name->len;
+ args->filetype = name->type;
+ args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->dp = dp;
+ args->whichfork = XFS_DATA_FORK;
+ args->trans = tp;
+ args->op_flags = XFS_DA_OP_OKNOENT;
if (ci_name)
- args.op_flags |= XFS_DA_OP_CILOOKUP;
+ args->op_flags |= XFS_DA_OP_CILOOKUP;
- if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_lookup(&args);
- else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_block_lookup(&args);
- else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_leaf_lookup(&args);
+ if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ rval = xfs_dir2_sf_lookup(args);
+ goto out_check_rval;
+ }
+
+ rval = xfs_dir2_isblock(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v) {
+ rval = xfs_dir2_block_lookup(args);
+ goto out_check_rval;
+ }
+
+ rval = xfs_dir2_isleaf(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v)
+ rval = xfs_dir2_leaf_lookup(args);
else
- rval = xfs_dir2_node_lookup(&args);
+ rval = xfs_dir2_node_lookup(args);
+
+out_check_rval:
if (rval == EEXIST)
rval = 0;
if (!rval) {
- *inum = args.inumber;
+ *inum = args->inumber;
if (ci_name) {
- ci_name->name = args.value;
- ci_name->len = args.valuelen;
+ ci_name->name = args->value;
+ ci_name->len = args->valuelen;
}
}
+out_free:
+ kmem_free(args);
return rval;
}
@@ -338,38 +380,51 @@ xfs_dir_removename(
xfs_bmap_free_t *flist, /* bmap's freeblock list */
xfs_extlen_t total) /* bmap's total block count */
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_remove);
- memset(&args, 0, sizeof(xfs_da_args_t));
- args.name = name->name;
- args.namelen = name->len;
- args.filetype = name->type;
- args.hashval = dp->i_mount->m_dirnameops->hashname(name);
- args.inumber = ino;
- args.dp = dp;
- args.firstblock = first;
- args.flist = flist;
- args.total = total;
- args.whichfork = XFS_DATA_FORK;
- args.trans = tp;
-
- if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_removename(&args);
- else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_block_removename(&args);
- else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_leaf_removename(&args);
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ if (!args)
+ return ENOMEM;
+
+ args->name = name->name;
+ args->namelen = name->len;
+ args->filetype = name->type;
+ args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->inumber = ino;
+ args->dp = dp;
+ args->firstblock = first;
+ args->flist = flist;
+ args->total = total;
+ args->whichfork = XFS_DATA_FORK;
+ args->trans = tp;
+
+ if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ rval = xfs_dir2_sf_removename(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isblock(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v) {
+ rval = xfs_dir2_block_removename(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isleaf(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v)
+ rval = xfs_dir2_leaf_removename(args);
else
- rval = xfs_dir2_node_removename(&args);
+ rval = xfs_dir2_node_removename(args);
+out_free:
+ kmem_free(args);
return rval;
}
@@ -386,40 +441,54 @@ xfs_dir_replace(
xfs_bmap_free_t *flist, /* bmap's freeblock list */
xfs_extlen_t total) /* bmap's total block count */
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode));
- if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
+ rval = xfs_dir_ino_validate(tp->t_mountp, inum);
+ if (rval)
return rval;
- memset(&args, 0, sizeof(xfs_da_args_t));
- args.name = name->name;
- args.namelen = name->len;
- args.filetype = name->type;
- args.hashval = dp->i_mount->m_dirnameops->hashname(name);
- args.inumber = inum;
- args.dp = dp;
- args.firstblock = first;
- args.flist = flist;
- args.total = total;
- args.whichfork = XFS_DATA_FORK;
- args.trans = tp;
-
- if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_replace(&args);
- else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_block_replace(&args);
- else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_leaf_replace(&args);
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ if (!args)
+ return ENOMEM;
+
+ args->name = name->name;
+ args->namelen = name->len;
+ args->filetype = name->type;
+ args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->inumber = inum;
+ args->dp = dp;
+ args->firstblock = first;
+ args->flist = flist;
+ args->total = total;
+ args->whichfork = XFS_DATA_FORK;
+ args->trans = tp;
+
+ if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ rval = xfs_dir2_sf_replace(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isblock(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v) {
+ rval = xfs_dir2_block_replace(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isleaf(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v)
+ rval = xfs_dir2_leaf_replace(args);
else
- rval = xfs_dir2_node_replace(&args);
+ rval = xfs_dir2_node_replace(args);
+out_free:
+ kmem_free(args);
return rval;
}
@@ -434,7 +503,7 @@ xfs_dir_canenter(
struct xfs_name *name, /* name of entry to add */
uint resblks)
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
@@ -443,29 +512,42 @@ xfs_dir_canenter(
ASSERT(S_ISDIR(dp->i_d.di_mode));
- memset(&args, 0, sizeof(xfs_da_args_t));
- args.name = name->name;
- args.namelen = name->len;
- args.filetype = name->type;
- args.hashval = dp->i_mount->m_dirnameops->hashname(name);
- args.dp = dp;
- args.whichfork = XFS_DATA_FORK;
- args.trans = tp;
- args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ if (!args)
+ return ENOMEM;
+
+ args->name = name->name;
+ args->namelen = name->len;
+ args->filetype = name->type;
+ args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->dp = dp;
+ args->whichfork = XFS_DATA_FORK;
+ args->trans = tp;
+ args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
XFS_DA_OP_OKNOENT;
- if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_addname(&args);
- else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_block_addname(&args);
- else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_leaf_addname(&args);
+ if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ rval = xfs_dir2_sf_addname(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isblock(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v) {
+ rval = xfs_dir2_block_addname(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isleaf(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v)
+ rval = xfs_dir2_leaf_addname(args);
else
- rval = xfs_dir2_node_addname(&args);
+ rval = xfs_dir2_node_addname(args);
+out_free:
+ kmem_free(args);
return rval;
}
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 9ddfb81..bb3bb65 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -48,6 +48,18 @@
#include <linux/fiemap.h>
#include <linux/slab.h>
+/*
+ * Directories have different lock order w.r.t. mmap_sem compared to regular
+ * files. This is due to readdir potentially triggering page faults on a user
+ * buffer inside filldir(), and this happens with the ilock on the directory
+ * held. For regular files, the lock order is the other way around - the
+ * mmap_sem is taken during the page fault, and then we lock the ilock to do
+ * block mapping. Hence we need a different class for the directory ilock so
+ * that lockdep can tell them apart.
+ */
+static struct lock_class_key xfs_nondir_ilock_class;
+static struct lock_class_key xfs_dir_ilock_class;
+
static int
xfs_initxattrs(
struct inode *inode,
@@ -1191,6 +1203,7 @@ xfs_setup_inode(
xfs_diflags_to_iflags(inode, ip);
ip->d_ops = ip->i_mount->m_nondir_inode_ops;
+ lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
inode->i_op = &xfs_inode_operations;
@@ -1198,6 +1211,7 @@ xfs_setup_inode(
inode->i_mapping->a_ops = &xfs_address_space_operations;
break;
case S_IFDIR:
+ lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);
if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
inode->i_op = &xfs_dir_ci_inode_operations;
else
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index b57a8e0..7e54553 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -499,13 +499,6 @@ xlog_cil_push(
cil->xc_ctx = new_ctx;
/*
- * mirror the new sequence into the cil structure so that we can do
- * unlocked checks against the current sequence in log forces without
- * risking deferencing a freed context pointer.
- */
- cil->xc_current_sequence = new_ctx->sequence;
-
- /*
* The switch is now done, so we can drop the context lock and move out
* of a shared context. We can't just go straight to the commit record,
* though - we need to synchronise with previous and future commits so
@@ -523,8 +516,15 @@ xlog_cil_push(
* Hence we need to add this context to the committing context list so
* that higher sequences will wait for us to write out a commit record
* before they do.
+ *
+ * xfs_log_force_lsn requires us to mirror the new sequence into the cil
+ * structure atomically with the addition of this sequence to the
+ * committing list. This also ensures that we can do unlocked checks
+ * against the current sequence in log forces without risking
+ * deferencing a freed context pointer.
*/
spin_lock(&cil->xc_push_lock);
+ cil->xc_current_sequence = new_ctx->sequence;
list_add(&ctx->committing, &cil->xc_committing);
spin_unlock(&cil->xc_push_lock);
up_write(&cil->xc_ctx_lock);
@@ -662,8 +662,14 @@ xlog_cil_push_background(
}
+/*
+ * xlog_cil_push_now() is used to trigger an immediate CIL push to the sequence
+ * number that is passed. When it returns, the work will be queued for
+ * @push_seq, but it won't be completed. The caller is expected to do any
+ * waiting for push_seq to complete if it is required.
+ */
static void
-xlog_cil_push_foreground(
+xlog_cil_push_now(
struct xlog *log,
xfs_lsn_t push_seq)
{
@@ -688,10 +694,8 @@ xlog_cil_push_foreground(
}
cil->xc_push_seq = push_seq;
+ queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
spin_unlock(&cil->xc_push_lock);
-
- /* do the push now */
- xlog_cil_push(log);
}
bool
@@ -795,7 +799,8 @@ xlog_cil_force_lsn(
* xlog_cil_push() handles racing pushes for the same sequence,
* so no need to deal with it here.
*/
- xlog_cil_push_foreground(log, sequence);
+restart:
+ xlog_cil_push_now(log, sequence);
/*
* See if we can find a previous sequence still committing.
@@ -803,7 +808,6 @@ xlog_cil_force_lsn(
* before allowing the force of push_seq to go ahead. Hence block
* on commits for those as well.
*/
-restart:
spin_lock(&cil->xc_push_lock);
list_for_each_entry(ctx, &cil->xc_committing, committing) {
if (ctx->sequence > sequence)
@@ -821,6 +825,28 @@ restart:
/* found it! */
commit_lsn = ctx->commit_lsn;
}
+
+ /*
+ * The call to xlog_cil_push_now() executes the push in the background.
+ * Hence by the time we have got here it our sequence may not have been
+ * pushed yet. This is true if the current sequence still matches the
+ * push sequence after the above wait loop and the CIL still contains
+ * dirty objects.
+ *
+ * When the push occurs, it will empty the CIL and
+ * atomically increment the currect sequence past the push sequence and
+ * move it into the committing list. Of course, if the CIL is clean at
+ * the time of the push, it won't have pushed the CIL at all, so in that
+ * case we should try the push for this sequence again from the start
+ * just in case.
+ */
+
+ if (sequence == cil->xc_current_sequence &&
+ !list_empty(&cil->xc_cil)) {
+ spin_unlock(&cil->xc_push_lock);
+ goto restart;
+ }
+
spin_unlock(&cil->xc_push_lock);
return commit_lsn;
}