summaryrefslogtreecommitdiff
path: root/fs/xfs/libxfs
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2017-01-25 15:49:35 (GMT)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2017-04-08 07:30:30 (GMT)
commit08a2a26816825b2724fa6e2616df716b31e4a582 (patch)
tree64d5730429db67791b16a190a5659cf71989688a /fs/xfs/libxfs
parent9be1c33d4a995d6369b94c7bb6ae0e8d18e7d658 (diff)
downloadlinux-08a2a26816825b2724fa6e2616df716b31e4a582.tar.xz
xfs: use per-AG reservations for the finobt
commit 76d771b4cbe33c581bd6ca2710c120be51172440 upstream. Currently we try to rely on the global reserved block pool for block allocations for the free inode btree, but I have customer reports (fairly complex workload, need to find an easier reproducer) where that is not enough as the AG where we free an inode that requires a new finobt block is entirely full. This causes us to cancel a dirty transaction and thus a file system shutdown. I think the right way to guard against this is to treat the finot the same way as the refcount btree and have a per-AG reservations for the possible worst case size of it, and the patch below implements that. Note that this could increase mount times with large finobt trees. In an ideal world we would have added a field for the number of finobt fields to the AGI, similar to what we did for the refcount blocks. We should do add it next time we rev the AGI or AGF format by adding new fields. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs/xfs/libxfs')
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c47
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c90
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.h3
3 files changed, 131 insertions, 9 deletions
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index 94234bf..33db69b 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -39,6 +39,7 @@
#include "xfs_rmap_btree.h"
#include "xfs_btree.h"
#include "xfs_refcount_btree.h"
+#include "xfs_ialloc_btree.h"
/*
* Per-AG Block Reservations
@@ -210,6 +211,9 @@ __xfs_ag_resv_init(
if (error) {
trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
error, _RET_IP_);
+ xfs_warn(mp,
+"Per-AG reservation for AG %u failed. Filesystem may run out of space.",
+ pag->pag_agno);
return error;
}
@@ -228,6 +232,8 @@ int
xfs_ag_resv_init(
struct xfs_perag *pag)
{
+ struct xfs_mount *mp = pag->pag_mount;
+ xfs_agnumber_t agno = pag->pag_agno;
xfs_extlen_t ask;
xfs_extlen_t used;
int error = 0;
@@ -236,23 +242,45 @@ xfs_ag_resv_init(
if (pag->pag_meta_resv.ar_asked == 0) {
ask = used = 0;
- error = xfs_refcountbt_calc_reserves(pag->pag_mount,
- pag->pag_agno, &ask, &used);
+ error = xfs_refcountbt_calc_reserves(mp, agno, &ask, &used);
if (error)
goto out;
- error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
- ask, used);
+ error = xfs_finobt_calc_reserves(mp, agno, &ask, &used);
if (error)
goto out;
+
+ error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
+ ask, used);
+ if (error) {
+ /*
+ * Because we didn't have per-AG reservations when the
+ * finobt feature was added we might not be able to
+ * reserve all needed blocks. Warn and fall back to the
+ * old and potentially buggy code in that case, but
+ * ensure we do have the reservation for the refcountbt.
+ */
+ ask = used = 0;
+
+ mp->m_inotbt_nores = true;
+
+ error = xfs_refcountbt_calc_reserves(mp, agno, &ask,
+ &used);
+ if (error)
+ goto out;
+
+ error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
+ ask, used);
+ if (error)
+ goto out;
+ }
}
/* Create the AGFL metadata reservation */
if (pag->pag_agfl_resv.ar_asked == 0) {
ask = used = 0;
- error = xfs_rmapbt_calc_reserves(pag->pag_mount, pag->pag_agno,
- &ask, &used);
+ error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used);
if (error)
goto out;
@@ -261,9 +289,16 @@ xfs_ag_resv_init(
goto out;
}
+#ifdef DEBUG
+ /* need to read in the AGF for the ASSERT below to work */
+ error = xfs_alloc_pagf_init(pag->pag_mount, NULL, pag->pag_agno, 0);
+ if (error)
+ return error;
+
ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <=
pag->pagf_freeblks + pag->pagf_flcount);
+#endif
out:
return error;
}
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 6c6b959..b9c351f 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -82,11 +82,12 @@ xfs_finobt_set_root(
}
STATIC int
-xfs_inobt_alloc_block(
+__xfs_inobt_alloc_block(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *start,
union xfs_btree_ptr *new,
- int *stat)
+ int *stat,
+ enum xfs_ag_resv_type resv)
{
xfs_alloc_arg_t args; /* block allocation args */
int error; /* error return value */
@@ -103,6 +104,7 @@ xfs_inobt_alloc_block(
args.maxlen = 1;
args.prod = 1;
args.type = XFS_ALLOCTYPE_NEAR_BNO;
+ args.resv = resv;
error = xfs_alloc_vextent(&args);
if (error) {
@@ -123,6 +125,27 @@ xfs_inobt_alloc_block(
}
STATIC int
+xfs_inobt_alloc_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *start,
+ union xfs_btree_ptr *new,
+ int *stat)
+{
+ return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_NONE);
+}
+
+STATIC int
+xfs_finobt_alloc_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *start,
+ union xfs_btree_ptr *new,
+ int *stat)
+{
+ return __xfs_inobt_alloc_block(cur, start, new, stat,
+ XFS_AG_RESV_METADATA);
+}
+
+STATIC int
xfs_inobt_free_block(
struct xfs_btree_cur *cur,
struct xfs_buf *bp)
@@ -328,7 +351,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
.dup_cursor = xfs_inobt_dup_cursor,
.set_root = xfs_finobt_set_root,
- .alloc_block = xfs_inobt_alloc_block,
+ .alloc_block = xfs_finobt_alloc_block,
.free_block = xfs_inobt_free_block,
.get_minrecs = xfs_inobt_get_minrecs,
.get_maxrecs = xfs_inobt_get_maxrecs,
@@ -478,3 +501,64 @@ xfs_inobt_rec_check_count(
return 0;
}
#endif /* DEBUG */
+
+static xfs_extlen_t
+xfs_inobt_max_size(
+ struct xfs_mount *mp)
+{
+ /* Bail out if we're uninitialized, which can happen in mkfs. */
+ if (mp->m_inobt_mxr[0] == 0)
+ return 0;
+
+ return xfs_btree_calc_size(mp, mp->m_inobt_mnr,
+ (uint64_t)mp->m_sb.sb_agblocks * mp->m_sb.sb_inopblock /
+ XFS_INODES_PER_CHUNK);
+}
+
+static int
+xfs_inobt_count_blocks(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ xfs_btnum_t btnum,
+ xfs_extlen_t *tree_blocks)
+{
+ struct xfs_buf *agbp;
+ struct xfs_btree_cur *cur;
+ int error;
+
+ error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
+ if (error)
+ return error;
+
+ cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, btnum);
+ error = xfs_btree_count_blocks(cur, tree_blocks);
+ xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+ xfs_buf_relse(agbp);
+
+ return error;
+}
+
+/*
+ * Figure out how many blocks to reserve and how many are used by this btree.
+ */
+int
+xfs_finobt_calc_reserves(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ xfs_extlen_t *ask,
+ xfs_extlen_t *used)
+{
+ xfs_extlen_t tree_len = 0;
+ int error;
+
+ if (!xfs_sb_version_hasfinobt(&mp->m_sb))
+ return 0;
+
+ error = xfs_inobt_count_blocks(mp, agno, XFS_BTNUM_FINO, &tree_len);
+ if (error)
+ return error;
+
+ *ask += xfs_inobt_max_size(mp);
+ *used += tree_len;
+ return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h
index bd88453..aa81e2e 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.h
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
@@ -72,4 +72,7 @@ int xfs_inobt_rec_check_count(struct xfs_mount *,
#define xfs_inobt_rec_check_count(mp, rec) 0
#endif /* DEBUG */
+int xfs_finobt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno,
+ xfs_extlen_t *ask, xfs_extlen_t *used);
+
#endif /* __XFS_IALLOC_BTREE_H__ */