summaryrefslogtreecommitdiff
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c225
1 files changed, 126 insertions, 99 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ab80796..c818972 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1009,77 +1009,88 @@ qsize_t *ext4_get_reserved_space(struct inode *inode)
return &EXT4_I(inode)->i_reserved_quota;
}
#endif
+
/*
* Calculate the number of metadata blocks need to reserve
- * to allocate @blocks for non extent file based file
+ * to allocate a new block at @lblocks for non extent file based file
*/
-static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
+static int ext4_indirect_calc_metadata_amount(struct inode *inode,
+ sector_t lblock)
{
- int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
- int ind_blks, dind_blks, tind_blks;
-
- /* number of new indirect blocks needed */
- ind_blks = (blocks + icap - 1) / icap;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1;
+ int blk_bits;
- dind_blks = (ind_blks + icap - 1) / icap;
+ if (lblock < EXT4_NDIR_BLOCKS)
+ return 0;
- tind_blks = 1;
+ lblock -= EXT4_NDIR_BLOCKS;
- return ind_blks + dind_blks + tind_blks;
+ if (ei->i_da_metadata_calc_len &&
+ (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
+ ei->i_da_metadata_calc_len++;
+ return 0;
+ }
+ ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
+ ei->i_da_metadata_calc_len = 1;
+ blk_bits = roundup_pow_of_two(lblock + 1);
+ return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
}
/*
* Calculate the number of metadata blocks need to reserve
- * to allocate given number of blocks
+ * to allocate a block located at @lblock
*/
-static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
+static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
{
- if (!blocks)
- return 0;
-
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
- return ext4_ext_calc_metadata_amount(inode, blocks);
+ return ext4_ext_calc_metadata_amount(inode, lblock);
- return ext4_indirect_calc_metadata_amount(inode, blocks);
+ return ext4_indirect_calc_metadata_amount(inode, lblock);
}
+/*
+ * Called with i_data_sem down, which is important since we can call
+ * ext4_discard_preallocations() from here.
+ */
static void ext4_da_update_reserve_space(struct inode *inode, int used)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- int total, mdb, mdb_free, mdb_claim = 0;
-
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- /* recalculate the number of metablocks still need to be reserved */
- total = EXT4_I(inode)->i_reserved_data_blocks - used;
- mdb = ext4_calc_metadata_amount(inode, total);
-
- /* figure out how many metablocks to release */
- BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
- mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
-
- if (mdb_free) {
- /* Account for allocated meta_blocks */
- mdb_claim = EXT4_I(inode)->i_allocated_meta_blocks;
- BUG_ON(mdb_free < mdb_claim);
- mdb_free -= mdb_claim;
-
- /* update fs dirty blocks counter */
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ int mdb_free = 0;
+
+ spin_lock(&ei->i_block_reservation_lock);
+ if (unlikely(used > ei->i_reserved_data_blocks)) {
+ ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
+ "with only %d reserved data blocks\n",
+ __func__, inode->i_ino, used,
+ ei->i_reserved_data_blocks);
+ WARN_ON(1);
+ used = ei->i_reserved_data_blocks;
+ }
+
+ /* Update per-inode reservations */
+ ei->i_reserved_data_blocks -= used;
+ used += ei->i_allocated_meta_blocks;
+ ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
+ ei->i_allocated_meta_blocks = 0;
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
+
+ if (ei->i_reserved_data_blocks == 0) {
+ /*
+ * We can release all of the reserved metadata blocks
+ * only when we have written all of the delayed
+ * allocation blocks.
+ */
+ mdb_free = ei->i_reserved_meta_blocks;
+ ei->i_reserved_meta_blocks = 0;
+ ei->i_da_metadata_calc_len = 0;
percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
- EXT4_I(inode)->i_allocated_meta_blocks = 0;
- EXT4_I(inode)->i_reserved_meta_blocks = mdb;
}
-
- /* update per-inode reservations */
- BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
- EXT4_I(inode)->i_reserved_data_blocks -= used;
- percpu_counter_sub(&sbi->s_dirtyblocks_counter, used + mdb_claim);
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
- vfs_dq_claim_block(inode, used + mdb_claim);
-
- /*
- * free those over-booking quota for metadata blocks
- */
+ /* Update quota subsystem */
+ vfs_dq_claim_block(inode, used);
if (mdb_free)
vfs_dq_release_reservation_block(inode, mdb_free);
@@ -1088,7 +1099,8 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
* there aren't any writers on the inode, we can discard the
* inode's preallocations.
*/
- if (!total && (atomic_read(&inode->i_writecount) == 0))
+ if ((ei->i_reserved_data_blocks == 0) &&
+ (atomic_read(&inode->i_writecount) == 0))
ext4_discard_preallocations(inode);
}
@@ -1797,11 +1809,15 @@ static int ext4_journalled_write_end(struct file *file,
return ret ? ret : copied;
}
-static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
+/*
+ * Reserve a single block located at lblock
+ */
+static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
{
int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- unsigned long md_needed, mdblocks, total = 0;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ unsigned long md_needed, md_reserved;
/*
* recalculate the amount of metadata blocks to reserve
@@ -1809,35 +1825,43 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
* worse case is one extent per block
*/
repeat:
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
- mdblocks = ext4_calc_metadata_amount(inode, total);
- BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks);
-
- md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
- total = md_needed + nrblocks;
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ spin_lock(&ei->i_block_reservation_lock);
+ md_reserved = ei->i_reserved_meta_blocks;
+ md_needed = ext4_calc_metadata_amount(inode, lblock);
+ spin_unlock(&ei->i_block_reservation_lock);
/*
* Make quota reservation here to prevent quota overflow
* later. Real quota accounting is done at pages writeout
* time.
*/
- if (vfs_dq_reserve_block(inode, total))
+ if (vfs_dq_reserve_block(inode, md_needed + 1)) {
+ /*
+ * We tend to badly over-estimate the amount of
+ * metadata blocks which are needed, so if we have
+ * reserved any metadata blocks, try to force out the
+ * inode and see if we have any better luck.
+ */
+ if (md_reserved && retries++ <= 3)
+ goto retry;
return -EDQUOT;
+ }
- if (ext4_claim_free_blocks(sbi, total)) {
- vfs_dq_release_reservation_block(inode, total);
+ if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
+ vfs_dq_release_reservation_block(inode, md_needed + 1);
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
+ retry:
+ if (md_reserved)
+ write_inode_now(inode, (retries == 3));
yield();
goto repeat;
}
return -ENOSPC;
}
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
- EXT4_I(inode)->i_reserved_meta_blocks += md_needed;
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ spin_lock(&ei->i_block_reservation_lock);
+ ei->i_reserved_data_blocks++;
+ ei->i_reserved_meta_blocks += md_needed;
+ spin_unlock(&ei->i_block_reservation_lock);
return 0; /* success */
}
@@ -1845,49 +1869,46 @@ repeat:
static void ext4_da_release_space(struct inode *inode, int to_free)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- int total, mdb, mdb_free, release;
+ struct ext4_inode_info *ei = EXT4_I(inode);
if (!to_free)
return; /* Nothing to release, exit */
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- if (!EXT4_I(inode)->i_reserved_data_blocks) {
+ if (unlikely(to_free > ei->i_reserved_data_blocks)) {
/*
- * if there is no reserved blocks, but we try to free some
- * then the counter is messed up somewhere.
- * but since this function is called from invalidate
- * page, it's harmless to return without any action
+ * if there aren't enough reserved blocks, then the
+ * counter is messed up somewhere. Since this
+ * function is called from invalidate page, it's
+ * harmless to return without any action.
*/
- printk(KERN_INFO "ext4 delalloc try to release %d reserved "
- "blocks for inode %lu, but there is no reserved "
- "data blocks\n", to_free, inode->i_ino);
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
- return;
+ ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
+ "ino %lu, to_free %d with only %d reserved "
+ "data blocks\n", inode->i_ino, to_free,
+ ei->i_reserved_data_blocks);
+ WARN_ON(1);
+ to_free = ei->i_reserved_data_blocks;
}
+ ei->i_reserved_data_blocks -= to_free;
- /* recalculate the number of metablocks still need to be reserved */
- total = EXT4_I(inode)->i_reserved_data_blocks - to_free;
- mdb = ext4_calc_metadata_amount(inode, total);
-
- /* figure out how many metablocks to release */
- BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
- mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
-
- release = to_free + mdb_free;
-
- /* update fs dirty blocks counter for truncate case */
- percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
+ if (ei->i_reserved_data_blocks == 0) {
+ /*
+ * We can release all of the reserved metadata blocks
+ * only when we have written all of the delayed
+ * allocation blocks.
+ */
+ to_free += ei->i_reserved_meta_blocks;
+ ei->i_reserved_meta_blocks = 0;
+ ei->i_da_metadata_calc_len = 0;
+ }
- /* update per-inode reservations */
- BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
- EXT4_I(inode)->i_reserved_data_blocks -= to_free;
+ /* update fs dirty blocks counter */
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
- BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
- EXT4_I(inode)->i_reserved_meta_blocks = mdb;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
- vfs_dq_release_reservation_block(inode, release);
+ vfs_dq_release_reservation_block(inode, to_free);
}
static void ext4_da_page_release_reservation(struct page *page,
@@ -2493,7 +2514,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
* XXX: __block_prepare_write() unmaps passed block,
* is it OK?
*/
- ret = ext4_da_reserve_space(inode, 1);
+ ret = ext4_da_reserve_space(inode, iblock);
if (ret)
/* not enough space to reserve */
return ret;
@@ -2967,8 +2988,7 @@ retry:
out_writepages:
if (!no_nrwrite_index_update)
wbc->no_nrwrite_index_update = 0;
- if (wbc->nr_to_write > nr_to_writebump)
- wbc->nr_to_write -= nr_to_writebump;
+ wbc->nr_to_write -= nr_to_writebump;
wbc->range_start = range_start;
trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
return ret;
@@ -2993,11 +3013,18 @@ static int ext4_nonda_switch(struct super_block *sb)
if (2 * free_blocks < 3 * dirty_blocks ||
free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
/*
- * free block count is less that 150% of dirty blocks
- * or free blocks is less that watermark
+ * free block count is less than 150% of dirty blocks
+ * or free blocks is less than watermark
*/
return 1;
}
+ /*
+ * Even if we don't switch but are nearing capacity,
+ * start pushing delalloc when 1/2 of free blocks are dirty.
+ */
+ if (free_blocks < 2 * dirty_blocks)
+ writeback_inodes_sb_if_idle(sb);
+
return 0;
}