From f4b42421d80cac4291dd82bd97baa1bc06b351fc Mon Sep 17 00:00:00 2001 From: Mark Tinguely Date: Tue, 4 Dec 2012 17:18:02 -0600 Subject: xfs: use b_maps[] for discontiguous buffers Commits starting at 77c1a08 introduced a multiple segment support to xfs_buf. xfs_trans_buf_item_match() could not find a multi-segment buffer in the transaction because it was looking at the single segment block number rather than the multi-segment b_maps[0].bm.bn. This results on a recursive buffer lock that can never be satisfied. This patch: 1) Changed the remaining b_map accesses to be b_maps[0] accesses. 2) Renames the single segment b_map structure to __b_map to avoid future confusion. Signed-off-by: Mark Tinguely Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 26673a0..56d1614 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -175,7 +175,7 @@ xfs_buf_get_maps( bp->b_map_count = map_count; if (map_count == 1) { - bp->b_maps = &bp->b_map; + bp->b_maps = &bp->__b_map; return 0; } @@ -193,7 +193,7 @@ static void xfs_buf_free_maps( struct xfs_buf *bp) { - if (bp->b_maps != &bp->b_map) { + if (bp->b_maps != &bp->__b_map) { kmem_free(bp->b_maps); bp->b_maps = NULL; } @@ -377,8 +377,8 @@ xfs_buf_allocate_memory( } use_alloc_page: - start = BBTOB(bp->b_map.bm_bn) >> PAGE_SHIFT; - end = (BBTOB(bp->b_map.bm_bn + bp->b_length) + PAGE_SIZE - 1) + start = BBTOB(bp->b_maps[0].bm_bn) >> PAGE_SHIFT; + end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1) >> PAGE_SHIFT; page_count = end - start; error = _xfs_buf_get_pages(bp, page_count, flags); @@ -640,7 +640,7 @@ _xfs_buf_read( xfs_buf_flags_t flags) { ASSERT(!(flags & XBF_WRITE)); - ASSERT(bp->b_map.bm_bn != XFS_BUF_DADDR_NULL); + ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL); bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); @@ -1709,7 +1709,7 @@ xfs_buf_cmp( struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); xfs_daddr_t diff; - diff = ap->b_map.bm_bn - bp->b_map.bm_bn; + diff = ap->b_maps[0].bm_bn - bp->b_maps[0].bm_bn; if (diff < 0) return -1; if (diff > 0) diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 23f5642..433a12e 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -151,7 +151,7 @@ typedef struct xfs_buf { struct page **b_pages; /* array of page pointers */ struct page *b_page_array[XB_PAGES]; /* inline pages */ struct xfs_buf_map *b_maps; /* compound buffer map */ - struct xfs_buf_map b_map; /* inline compound buffer map */ + struct xfs_buf_map __b_map; /* inline compound buffer map */ int b_map_count; int b_io_length; /* IO size in BBs */ atomic_t b_pin_count; /* pin count */ @@ -330,8 +330,8 @@ void xfs_buf_stale(struct xfs_buf *bp); * In future, uncached buffers will pass the block number directly to the io * request function and hence these macros will go away at that point. */ -#define XFS_BUF_ADDR(bp) ((bp)->b_map.bm_bn) -#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_map.bm_bn = (xfs_daddr_t)(bno)) +#define XFS_BUF_ADDR(bp) ((bp)->b_maps[0].bm_bn) +#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_maps[0].bm_bn = (xfs_daddr_t)(bno)) static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) { -- cgit v0.10.2 From b94381737e9c4d014a4003e8ece9ba88670a2dd4 Mon Sep 17 00:00:00 2001 From: Mark Tinguely Date: Tue, 4 Dec 2012 17:18:03 -0600 Subject: xfs: rename bli_format to avoid confusion with bli_formats Rename the bli_format structure to __bli_format to avoid accidently confusing them with the bli_formats pointer. Signed-off-by: Mark Tinguely Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index becf4a9..1975b3d 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -71,7 +71,7 @@ xfs_buf_item_log_debug( chunk_num = byte >> XFS_BLF_SHIFT; word_num = chunk_num >> BIT_TO_WORD_SHIFT; bit_num = chunk_num & (NBWORD - 1); - wordp = &(bip->bli_format.blf_data_map[word_num]); + wordp = &(bip->__bli_format.blf_data_map[word_num]); bit_set = *wordp & (1 << bit_num); ASSERT(bit_set); byte++; @@ -237,7 +237,7 @@ xfs_buf_item_size( * cancel flag in it. */ trace_xfs_buf_item_size_stale(bip); - ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); + ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); return bip->bli_format_count; } @@ -278,7 +278,7 @@ xfs_buf_item_format_segment( uint buffer_offset; /* copy the flags across from the base format item */ - blfp->blf_flags = bip->bli_format.blf_flags; + blfp->blf_flags = bip->__bli_format.blf_flags; /* * Base size is the actual size of the ondisk structure - it reflects @@ -371,7 +371,7 @@ xfs_buf_item_format_segment( nbits++; } } - bip->bli_format.blf_size = nvecs; + bip->__bli_format.blf_size = nvecs; return vecp; } @@ -405,7 +405,7 @@ xfs_buf_item_format( if (bip->bli_flags & XFS_BLI_INODE_BUF) { if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && xfs_log_item_in_current_chkpt(lip))) - bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF; + bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF; bip->bli_flags &= ~XFS_BLI_INODE_BUF; } @@ -485,7 +485,7 @@ xfs_buf_item_unpin( ASSERT(bip->bli_flags & XFS_BLI_STALE); ASSERT(xfs_buf_islocked(bp)); ASSERT(XFS_BUF_ISSTALE(bp)); - ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); + ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); trace_xfs_buf_item_unpin_stale(bip); @@ -631,7 +631,7 @@ xfs_buf_item_unlock( */ if (bip->bli_flags & XFS_BLI_STALE) { trace_xfs_buf_item_unlock_stale(bip); - ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); + ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); if (!aborted) { atomic_dec(&bip->bli_refcount); return; @@ -644,8 +644,8 @@ xfs_buf_item_unlock( * If the buf item isn't tracking any data, free it, otherwise drop the * reference we hold to it. */ - if (xfs_bitmap_empty(bip->bli_format.blf_data_map, - bip->bli_format.blf_map_size)) + if (xfs_bitmap_empty(bip->__bli_format.blf_data_map, + bip->__bli_format.blf_map_size)) xfs_buf_item_relse(bp); else atomic_dec(&bip->bli_refcount); @@ -716,7 +716,7 @@ xfs_buf_item_get_format( bip->bli_format_count = count; if (count == 1) { - bip->bli_formats = &bip->bli_format; + bip->bli_formats = &bip->__bli_format; return 0; } @@ -731,7 +731,7 @@ STATIC void xfs_buf_item_free_format( struct xfs_buf_log_item *bip) { - if (bip->bli_formats != &bip->bli_format) { + if (bip->bli_formats != &bip->__bli_format) { kmem_free(bip->bli_formats); bip->bli_formats = NULL; } diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 6850f49..16def43 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -104,7 +104,7 @@ typedef struct xfs_buf_log_item { #endif int bli_format_count; /* count of headers */ struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */ - struct xfs_buf_log_format bli_format; /* embedded in-log header */ + struct xfs_buf_log_format __bli_format; /* embedded in-log header */ } xfs_buf_log_item_t; void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 4fc17d4..f7510bf 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -93,7 +93,7 @@ _xfs_trans_bjoin( xfs_buf_item_init(bp, tp->t_mountp); bip = bp->b_fspriv; ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); - ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); + ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); if (reset_recur) bip->bli_recur = 0; @@ -432,7 +432,7 @@ xfs_trans_brelse(xfs_trans_t *tp, bip = bp->b_fspriv; ASSERT(bip->bli_item.li_type == XFS_LI_BUF); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); - ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); + ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); ASSERT(atomic_read(&bip->bli_refcount) > 0); trace_xfs_trans_brelse(bip); @@ -519,7 +519,7 @@ xfs_trans_bhold(xfs_trans_t *tp, ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); - ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); + ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_flags |= XFS_BLI_HOLD; @@ -539,7 +539,7 @@ xfs_trans_bhold_release(xfs_trans_t *tp, ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); - ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); + ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); ASSERT(atomic_read(&bip->bli_refcount) > 0); ASSERT(bip->bli_flags & XFS_BLI_HOLD); @@ -598,7 +598,7 @@ xfs_trans_log_buf(xfs_trans_t *tp, bip->bli_flags &= ~XFS_BLI_STALE; ASSERT(XFS_BUF_ISSTALE(bp)); XFS_BUF_UNSTALE(bp); - bip->bli_format.blf_flags &= ~XFS_BLF_CANCEL; + bip->__bli_format.blf_flags &= ~XFS_BLF_CANCEL; } tp->t_flags |= XFS_TRANS_DIRTY; @@ -657,8 +657,8 @@ xfs_trans_binval( */ ASSERT(XFS_BUF_ISSTALE(bp)); ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); - ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF)); - ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); + ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF)); + ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY); ASSERT(tp->t_flags & XFS_TRANS_DIRTY); return; @@ -668,10 +668,10 @@ xfs_trans_binval( bip->bli_flags |= XFS_BLI_STALE; bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY); - bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; - bip->bli_format.blf_flags |= XFS_BLF_CANCEL; - memset((char *)(bip->bli_format.blf_data_map), 0, - (bip->bli_format.blf_map_size * sizeof(uint))); + bip->__bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; + bip->__bli_format.blf_flags |= XFS_BLF_CANCEL; + memset((char *)(bip->__bli_format.blf_data_map), 0, + (bip->__bli_format.blf_map_size * sizeof(uint))); bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY; tp->t_flags |= XFS_TRANS_DIRTY; } @@ -775,5 +775,5 @@ xfs_trans_dquot_buf( type == XFS_BLF_GDQUOT_BUF); ASSERT(atomic_read(&bip->bli_refcount) > 0); - bip->bli_format.blf_flags |= type; + bip->__bli_format.blf_flags |= type; } -- cgit v0.10.2 From 820a554f2f83d21f7e9e608377c8c86af70917bd Mon Sep 17 00:00:00 2001 From: Mark Tinguely Date: Tue, 4 Dec 2012 17:18:04 -0600 Subject: xfs: fix segment in xfs_buf_item_format_segment Not every segment in a multi-segment buffer is dirty in a transaction and they will not be outputted. The assert in xfs_buf_item_format_segment() that checks for the at least one chunk of data in the segment to be used is not necessary true for multi-segmented buffers. Signed-off-by: Mark Tinguely Reviewed-by: Dave Chinner Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 1975b3d..c48e60b 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -287,6 +287,17 @@ xfs_buf_item_format_segment( */ base_size = offsetof(struct xfs_buf_log_format, blf_data_map) + (blfp->blf_map_size * sizeof(blfp->blf_data_map[0])); + + nvecs = 0; + first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0); + if (!(bip->bli_flags & XFS_BLI_STALE) && first_bit == -1) { + /* + * If the map is not be dirty in the transaction, mark + * the size as zero and do not advance the vector pointer. + */ + goto out; + } + vecp->i_addr = blfp; vecp->i_len = base_size; vecp->i_type = XLOG_REG_TYPE_BFORMAT; @@ -301,15 +312,13 @@ xfs_buf_item_format_segment( */ trace_xfs_buf_item_format_stale(bip); ASSERT(blfp->blf_flags & XFS_BLF_CANCEL); - blfp->blf_size = nvecs; - return vecp; + goto out; } /* * Fill in an iovec for each set of contiguous chunks. */ - first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0); - ASSERT(first_bit != -1); + last_bit = first_bit; nbits = 1; for (;;) { @@ -371,7 +380,8 @@ xfs_buf_item_format_segment( nbits++; } } - bip->__bli_format.blf_size = nvecs; +out: + blfp->blf_size = nvecs; return vecp; } -- cgit v0.10.2 From c883d0c400fc8cdbd5bbe71e179c9e64ace58e86 Mon Sep 17 00:00:00 2001 From: Mark Tinguely Date: Tue, 4 Dec 2012 17:18:05 -0600 Subject: xfs: fix the multi-segment log buffer format Per Dave Chinner suggestion, this patch: 1) Corrects the detection of whether a multi-segment buffer is still tracking data. 2) Clears all the buffer log formats for a multi-segment buffer. Signed-off-by: Mark Tinguely Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index c48e60b..77b0975 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -611,7 +611,7 @@ xfs_buf_item_unlock( { struct xfs_buf_log_item *bip = BUF_ITEM(lip); struct xfs_buf *bp = bip->bli_buf; - int aborted; + int aborted, clean, i; uint hold; /* Clear the buffer's association with this transaction. */ @@ -654,8 +654,15 @@ xfs_buf_item_unlock( * If the buf item isn't tracking any data, free it, otherwise drop the * reference we hold to it. */ - if (xfs_bitmap_empty(bip->__bli_format.blf_data_map, - bip->__bli_format.blf_map_size)) + clean = 1; + for (i = 0; i < bip->bli_format_count; i++) { + if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map, + bip->bli_formats[i].blf_map_size)) { + clean = 0; + break; + } + } + if (clean) xfs_buf_item_relse(bp); else atomic_dec(&bip->bli_refcount); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index f7510bf..3edf5db 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -643,6 +643,7 @@ xfs_trans_binval( xfs_buf_t *bp) { xfs_buf_log_item_t *bip = bp->b_fspriv; + int i; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); @@ -670,8 +671,10 @@ xfs_trans_binval( bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY); bip->__bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; bip->__bli_format.blf_flags |= XFS_BLF_CANCEL; - memset((char *)(bip->__bli_format.blf_data_map), 0, - (bip->__bli_format.blf_map_size * sizeof(uint))); + for (i = 0; i < bip->bli_format_count; i++) { + memset(bip->bli_formats[i].blf_data_map, 0, + (bip->bli_formats[i].blf_map_size * sizeof(uint))); + } bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY; tp->t_flags |= XFS_TRANS_DIRTY; } -- cgit v0.10.2 From ec47eb6b0b450a4e82340b6de674104de3f0dc0a Mon Sep 17 00:00:00 2001 From: Mark Tinguely Date: Tue, 4 Dec 2012 17:18:06 -0600 Subject: xfs remove the XFS_TRANS_DEBUG routines Remove the XFS_TRANS_DEBUG routines. They are no longer appropriate and have not been used in years Signed-off-by: Mark Tinguely Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 77b0975..63c86c4 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -37,109 +37,6 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip) return container_of(lip, struct xfs_buf_log_item, bli_item); } - -#ifdef XFS_TRANS_DEBUG -/* - * This function uses an alternate strategy for tracking the bytes - * that the user requests to be logged. This can then be used - * in conjunction with the bli_orig array in the buf log item to - * catch bugs in our callers' code. - * - * We also double check the bits set in xfs_buf_item_log using a - * simple algorithm to check that every byte is accounted for. - */ -STATIC void -xfs_buf_item_log_debug( - xfs_buf_log_item_t *bip, - uint first, - uint last) -{ - uint x; - uint byte; - uint nbytes; - uint chunk_num; - uint word_num; - uint bit_num; - uint bit_set; - uint *wordp; - - ASSERT(bip->bli_logged != NULL); - byte = first; - nbytes = last - first + 1; - bfset(bip->bli_logged, first, nbytes); - for (x = 0; x < nbytes; x++) { - chunk_num = byte >> XFS_BLF_SHIFT; - word_num = chunk_num >> BIT_TO_WORD_SHIFT; - bit_num = chunk_num & (NBWORD - 1); - wordp = &(bip->__bli_format.blf_data_map[word_num]); - bit_set = *wordp & (1 << bit_num); - ASSERT(bit_set); - byte++; - } -} - -/* - * This function is called when we flush something into a buffer without - * logging it. This happens for things like inodes which are logged - * separately from the buffer. - */ -void -xfs_buf_item_flush_log_debug( - xfs_buf_t *bp, - uint first, - uint last) -{ - xfs_buf_log_item_t *bip = bp->b_fspriv; - uint nbytes; - - if (bip == NULL || (bip->bli_item.li_type != XFS_LI_BUF)) - return; - - ASSERT(bip->bli_logged != NULL); - nbytes = last - first + 1; - bfset(bip->bli_logged, first, nbytes); -} - -/* - * This function is called to verify that our callers have logged - * all the bytes that they changed. - * - * It does this by comparing the original copy of the buffer stored in - * the buf log item's bli_orig array to the current copy of the buffer - * and ensuring that all bytes which mismatch are set in the bli_logged - * array of the buf log item. - */ -STATIC void -xfs_buf_item_log_check( - xfs_buf_log_item_t *bip) -{ - char *orig; - char *buffer; - int x; - xfs_buf_t *bp; - - ASSERT(bip->bli_orig != NULL); - ASSERT(bip->bli_logged != NULL); - - bp = bip->bli_buf; - ASSERT(bp->b_length > 0); - ASSERT(bp->b_addr != NULL); - orig = bip->bli_orig; - buffer = bp->b_addr; - for (x = 0; x < BBTOB(bp->b_length); x++) { - if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) { - xfs_emerg(bp->b_mount, - "%s: bip %x buffer %x orig %x index %d", - __func__, bip, bp, orig, x); - ASSERT(0); - } - } -} -#else -#define xfs_buf_item_log_debug(x,y,z) -#define xfs_buf_item_log_check(x) -#endif - STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp); /* @@ -429,7 +326,6 @@ xfs_buf_item_format( * Check to make sure everything is consistent. */ trace_xfs_buf_item_format(bip); - xfs_buf_item_log_check(bip); } /* @@ -915,8 +811,6 @@ xfs_buf_item_log_segment( mask = (1 << end_bit) - 1; *wordp |= mask; } - - xfs_buf_item_log_debug(bip, first, last); } /* diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 16def43..ee36c88 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -98,10 +98,6 @@ typedef struct xfs_buf_log_item { unsigned int bli_flags; /* misc flags */ unsigned int bli_recur; /* lock recursion count */ atomic_t bli_refcount; /* cnt of tp refs */ -#ifdef XFS_TRANS_DEBUG - char *bli_orig; /* original buffer copy */ - char *bli_logged; /* bytes logged (bitmap) */ -#endif int bli_format_count; /* count of headers */ struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */ struct xfs_buf_log_format __bli_format; /* embedded in-log header */ @@ -117,16 +113,6 @@ void xfs_buf_attach_iodone(struct xfs_buf *, void xfs_buf_iodone_callbacks(struct xfs_buf *); void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); -#ifdef XFS_TRANS_DEBUG -void -xfs_buf_item_flush_log_debug( - struct xfs_buf *bp, - uint first, - uint last); -#else -#define xfs_buf_item_flush_log_debug(bp, first, last) -#endif - #endif /* __KERNEL__ */ #endif /* __XFS_BUF_ITEM_H__ */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 66282dc..4f20165 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2379,9 +2379,6 @@ xfs_iflush_fork( char *cp; xfs_ifork_t *ifp; xfs_mount_t *mp; -#ifdef XFS_TRANS_DEBUG - int first; -#endif static const short brootflag[2] = { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; static const short dataflag[2] = @@ -2724,9 +2721,6 @@ xfs_iflush_int( xfs_inode_log_item_t *iip; xfs_dinode_t *dip; xfs_mount_t *mp; -#ifdef XFS_TRANS_DEBUG - int first; -#endif ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(xfs_isiflocked(ip)); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index d041d47..f034bd1 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -269,17 +269,6 @@ xfs_inode_item_format( } else { ASSERT(!(iip->ili_fields & XFS_ILOG_DBROOT)); -#ifdef XFS_TRANS_DEBUG - if (iip->ili_root_size > 0) { - ASSERT(iip->ili_root_size == - ip->i_df.if_broot_bytes); - ASSERT(memcmp(iip->ili_orig_root, - ip->i_df.if_broot, - iip->ili_root_size) == 0); - } else { - ASSERT(ip->i_df.if_broot_bytes == 0); - } -#endif iip->ili_fields &= ~XFS_ILOG_DBROOT; } break; @@ -678,11 +667,6 @@ void xfs_inode_item_destroy( xfs_inode_t *ip) { -#ifdef XFS_TRANS_DEBUG - if (ip->i_itemp->ili_root_size != 0) { - kmem_free(ip->i_itemp->ili_orig_root); - } -#endif kmem_zone_free(xfs_ili_zone, ip->i_itemp); } diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 376d4d0..779812f 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -148,10 +148,6 @@ typedef struct xfs_inode_log_item { data exts */ struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged attr exts */ -#ifdef XFS_TRANS_DEBUG - int ili_root_size; - char *ili_orig_root; -#endif xfs_inode_log_format_t ili_format; /* logged structure */ } xfs_inode_log_item_t; diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 6011ee6..0eda725 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -55,20 +55,6 @@ xfs_ail_check( ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); -#ifdef XFS_TRANS_DEBUG - /* - * Walk the list checking lsn ordering, and that every entry has the - * XFS_LI_IN_AIL flag set. This is really expensive, so only do it - * when specifically debugging the transaction subsystem. - */ - prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); - list_for_each_entry(lip, &ailp->xa_ail, li_ail) { - if (&prev_lip->li_ail != &ailp->xa_ail) - ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); - ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); - prev_lip = lip; - } -#endif /* XFS_TRANS_DEBUG */ } #else /* !DEBUG */ #define xfs_ail_check(a,l) diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index d2eee20..ac6d567 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -33,14 +33,6 @@ #include "xfs_inode_item.h" #include "xfs_trace.h" -#ifdef XFS_TRANS_DEBUG -STATIC void -xfs_trans_inode_broot_debug( - xfs_inode_t *ip); -#else -#define xfs_trans_inode_broot_debug(ip) -#endif - /* * Add a locked inode to the transaction. * @@ -67,8 +59,6 @@ xfs_trans_ijoin( * Get a log_item_desc to point at the new item. */ xfs_trans_add_item(tp, &iip->ili_item); - - xfs_trans_inode_broot_debug(ip); } /* @@ -135,34 +125,3 @@ xfs_trans_log_inode( flags |= ip->i_itemp->ili_last_fields; ip->i_itemp->ili_fields |= flags; } - -#ifdef XFS_TRANS_DEBUG -/* - * Keep track of the state of the inode btree root to make sure we - * log it properly. - */ -STATIC void -xfs_trans_inode_broot_debug( - xfs_inode_t *ip) -{ - xfs_inode_log_item_t *iip; - - ASSERT(ip->i_itemp != NULL); - iip = ip->i_itemp; - if (iip->ili_root_size != 0) { - ASSERT(iip->ili_orig_root != NULL); - kmem_free(iip->ili_orig_root); - iip->ili_root_size = 0; - iip->ili_orig_root = NULL; - } - if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { - ASSERT((ip->i_df.if_broot != NULL) && - (ip->i_df.if_broot_bytes > 0)); - iip->ili_root_size = ip->i_df.if_broot_bytes; - iip->ili_orig_root = - (char*)kmem_alloc(iip->ili_root_size, KM_SLEEP); - memcpy(iip->ili_orig_root, (char*)(ip->i_df.if_broot), - iip->ili_root_size); - } -} -#endif -- cgit v0.10.2 From f755503206ef705c20db622637d80a3e1b94a6f5 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 21 Dec 2012 10:45:17 -0500 Subject: xfs: remove int casts from debug dquot soft limit timer asserts The int casts here make it easy to trigger an assert with a large soft limit. For example, set a >4TB soft limit on an empty volume to reproduce a (0 > -x) comparison due to an overflow of d_blk_softlimit. Signed-off-by: Brian Foster Reviewed-by: Ben Myers Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 5f53e75..8a59f85 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -784,11 +784,11 @@ xfs_qm_scall_getquota( (XFS_IS_OQUOTA_ENFORCED(mp) && (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) && dst->d_id != 0) { - if (((int) dst->d_bcount > (int) dst->d_blk_softlimit) && + if ((dst->d_bcount > dst->d_blk_softlimit) && (dst->d_blk_softlimit > 0)) { ASSERT(dst->d_btimer != 0); } - if (((int) dst->d_icount > (int) dst->d_ino_softlimit) && + if ((dst->d_icount > dst->d_ino_softlimit) && (dst->d_ino_softlimit > 0)) { ASSERT(dst->d_itimer != 0); } -- cgit v0.10.2 From 83a9ba00573904953c58565226f1d3bab9f10983 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 10 Dec 2012 14:49:15 -0600 Subject: xfs: don't zero structure members after a memset(0) Commit 408cc4e97a3ccd172d2d676e4b585badf439271b added memset(0, ...) to allocation args structures, so there is no need to explicitly set any of the fields to 0 after that. Signed-off-by: Eric Sandeen Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 393055f..0ad2325 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -1925,8 +1925,6 @@ xfs_alloc_fix_freelist( targs.mp = mp; targs.agbp = agbp; targs.agno = args->agno; - targs.mod = targs.minleft = targs.wasdel = targs.userdata = - targs.minalignslop = 0; targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; targs.type = XFS_ALLOCTYPE_THIS_AG; targs.pag = pag; diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 0e92d12..c507720 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -3099,8 +3099,6 @@ xfs_bmap_extents_to_btree( args.fsbno = *firstblock; } args.minlen = args.maxlen = args.prod = 1; - args.total = args.minleft = args.alignment = args.mod = args.isfl = - args.minalignslop = 0; args.wasdel = wasdel; *logflagsp = 0; if ((error = xfs_alloc_vextent(&args))) { @@ -3259,8 +3257,6 @@ xfs_bmap_local_to_extents( args.type = XFS_ALLOCTYPE_NEAR_BNO; } args.total = total; - args.mod = args.minleft = args.alignment = args.wasdel = - args.isfl = args.minalignslop = 0; args.minlen = args.maxlen = args.prod = 1; if ((error = xfs_alloc_vextent(&args))) goto done; diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index a815412..515bf71 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -279,8 +279,6 @@ xfs_ialloc_ag_alloc( (args.agbno < be32_to_cpu(agi->agi_length)))) { args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); args.type = XFS_ALLOCTYPE_THIS_BNO; - args.mod = args.total = args.wasdel = args.isfl = - args.userdata = args.minalignslop = 0; args.prod = 1; /* @@ -333,8 +331,6 @@ xfs_ialloc_ag_alloc( * Allocate a fixed-size extent of inodes. */ args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.mod = args.total = args.wasdel = args.isfl = - args.userdata = args.minalignslop = 0; args.prod = 1; /* * Allow space for the inode btree to split. -- cgit v0.10.2 From d4608632ec8f4ae3ffecdd343ede34e60eabc64f Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 10 Jan 2013 10:41:48 -0600 Subject: xfs: recalculate leaf entry pointer after compacting a dir2 block Dave Jones hit this assert when doing a compile on recent git, with CONFIG_XFS_DEBUG enabled: XFS: Assertion failed: (char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)), file: fs/xfs/xfs_dir2_data.c, line: 828 Upon further digging, the tag found by xfs_dir2_data_unused_tag_p(dup) contained "2" and not the proper offset, and I found that this value was changed after the memmoves under "Use a stale leaf for our new entry." in xfs_dir2_block_addname(), i.e. memmove(&blp[mid + 1], &blp[mid], (highstale - mid) * sizeof(*blp)); overwrote it. What has happened is that the previous call to xfs_dir2_block_compact() has rearranged things; it changes btp->count as well as the blp array. So after we make that call, we must recalculate the proper pointer to the leaf entries by making another call to xfs_dir2_block_leaf_p(). Dave provided a metadump image which led to a simple reproducer (create a particular filename in the affected directory) and this resolves the testcase as well as the bug on his live system. Thanks also to dchinner for looking at this one with me. Signed-off-by: Eric Sandeen Tested-by: Dave Jones Reviewed-by: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index 7536faa..12afe07 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -355,10 +355,12 @@ xfs_dir2_block_addname( /* * If need to compact the leaf entries, do it now. */ - if (compact) + if (compact) { xfs_dir2_block_compact(tp, bp, hdr, btp, blp, &needlog, &lfloghigh, &lfloglow); - else if (btp->stale) { + /* recalculate blp post-compaction */ + blp = xfs_dir2_block_leaf_p(btp); + } else if (btp->stale) { /* * Set leaf logging boundaries to impossible state. * For the no-stale case they're set explicitly. -- cgit v0.10.2 From a17164e54bf0e3c2cbc72c35b9f67c2873a122dd Mon Sep 17 00:00:00 2001 From: Abhijit Pawar Date: Wed, 9 Jan 2013 19:34:42 +0530 Subject: fs/xfs remove obsolete simple_strto This patch replaces usages of obsolete simple_strtoul with kstrtoint in xfs_args and suffix_strtoul. Signed-off-by: Abhijit Pawar Reviewed-by: Jie Liu Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index ab8839b..c407121 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -139,9 +139,9 @@ static const match_table_t tokens = { STATIC unsigned long -suffix_strtoul(char *s, char **endp, unsigned int base) +suffix_kstrtoint(char *s, unsigned int base, int *res) { - int last, shift_left_factor = 0; + int last, shift_left_factor = 0, _res; char *value = s; last = strlen(value) - 1; @@ -158,7 +158,10 @@ suffix_strtoul(char *s, char **endp, unsigned int base) value[last] = '\0'; } - return simple_strtoul((const char *)s, endp, base) << shift_left_factor; + if (kstrtoint(s, base, &_res)) + return -EINVAL; + *res = _res << shift_left_factor; + return 0; } /* @@ -174,7 +177,7 @@ xfs_parseargs( char *options) { struct super_block *sb = mp->m_super; - char *this_char, *value, *eov; + char *this_char, *value; int dsunit = 0; int dswidth = 0; int iosize = 0; @@ -230,14 +233,16 @@ xfs_parseargs( this_char); return EINVAL; } - mp->m_logbufs = simple_strtoul(value, &eov, 10); + if (kstrtoint(value, 10, &mp->m_logbufs)) + return EINVAL; } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { if (!value || !*value) { xfs_warn(mp, "%s option requires an argument", this_char); return EINVAL; } - mp->m_logbsize = suffix_strtoul(value, &eov, 10); + if (suffix_kstrtoint(value, 10, &mp->m_logbsize)) + return EINVAL; } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { if (!value || !*value) { xfs_warn(mp, "%s option requires an argument", @@ -266,7 +271,8 @@ xfs_parseargs( this_char); return EINVAL; } - iosize = simple_strtoul(value, &eov, 10); + if (kstrtoint(value, 10, &iosize)) + return EINVAL; iosizelog = ffs(iosize) - 1; } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { if (!value || !*value) { @@ -274,7 +280,8 @@ xfs_parseargs( this_char); return EINVAL; } - iosize = suffix_strtoul(value, &eov, 10); + if (suffix_kstrtoint(value, 10, &iosize)) + return EINVAL; iosizelog = ffs(iosize) - 1; } else if (!strcmp(this_char, MNTOPT_GRPID) || !strcmp(this_char, MNTOPT_BSDGROUPS)) { @@ -296,14 +303,16 @@ xfs_parseargs( this_char); return EINVAL; } - dsunit = simple_strtoul(value, &eov, 10); + if (kstrtoint(value, 10, &dsunit)) + return EINVAL; } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { if (!value || !*value) { xfs_warn(mp, "%s option requires an argument", this_char); return EINVAL; } - dswidth = simple_strtoul(value, &eov, 10); + if (kstrtoint(value, 10, &dswidth)) + return EINVAL; } else if (!strcmp(this_char, MNTOPT_32BITINODE)) { mp->m_flags |= XFS_MOUNT_SMALL_INUMS; } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { -- cgit v0.10.2 From aeb4f20a02b4c984c48995ad54f40caf5ffa0705 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 16 Jan 2013 17:33:53 -0600 Subject: xfs: Do not return EFSCORRUPTED when filesystem probe finds no XFS magic 9802182 changed the return value from EWRONGFS (aka EINVAL) to EFSCORRUPTED which doesn't seem to be handled properly by the root filesystem probe. Signed-off-by: Eric Sandeen Tested-by: Sergei Trofimovich Reviewed-by: Ben Myers Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index da50846..7d6df7c 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -658,7 +658,7 @@ xfs_sb_quiet_read_verify( return; } /* quietly fail */ - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, EWRONGFS); } static void -- cgit v0.10.2 From 667a9291c5b38318a47a2df730bc064e5d37d2a8 Mon Sep 17 00:00:00 2001 From: Thiago Farina Date: Mon, 12 Nov 2012 21:32:59 -0200 Subject: xfs: Remove boolean_t typedef completely. Since we are using C99 we have one builtin defined in include/linux/types.h, use that instead. v2: you missed one in fs/xfs/xfs_qm_bhv.c, cleaned up. -bpm Signed-off-by: Thiago Farina Reviewed-by: Ben Myers Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 46bd9d5..dafdae3 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -120,7 +120,7 @@ xlog_verify_iclog( struct xlog *log, struct xlog_in_core *iclog, int count, - boolean_t syncing); + bool syncing); STATIC void xlog_verify_tail_lsn( struct xlog *log, @@ -3611,7 +3611,7 @@ xlog_verify_iclog( struct xlog *log, struct xlog_in_core *iclog, int count, - boolean_t syncing) + bool syncing) { xlog_op_header_t *ophead; xlog_in_core_t *icptr; @@ -3659,7 +3659,7 @@ xlog_verify_iclog( /* clientid is only 1 byte */ field_offset = (__psint_t) ((xfs_caddr_t)&(ophead->oh_clientid) - base_ptr); - if (syncing == B_FALSE || (field_offset & 0x1ff)) { + if (!syncing || (field_offset & 0x1ff)) { clientid = ophead->oh_clientid; } else { idx = BTOBBT((xfs_caddr_t)&(ophead->oh_clientid) - iclog->ic_datap); @@ -3682,7 +3682,7 @@ xlog_verify_iclog( /* check length */ field_offset = (__psint_t) ((xfs_caddr_t)&(ophead->oh_len) - base_ptr); - if (syncing == B_FALSE || (field_offset & 0x1ff)) { + if (!syncing || (field_offset & 0x1ff)) { op_len = be32_to_cpu(ophead->oh_len); } else { idx = BTOBBT((__psint_t)&ophead->oh_len - diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index 6b39115..2d02eac 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c @@ -146,7 +146,7 @@ xfs_qm_newmount( * inode goes inactive and wants to free blocks, * or via xfs_log_mount_finish. */ - *needquotamount = B_TRUE; + *needquotamount = true; *quotaflags = mp->m_qflags; mp->m_qflags = 0; } diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 8a59f85..33d9c2b 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -408,10 +408,10 @@ xfs_qm_scall_getqstat( { struct xfs_quotainfo *q = mp->m_quotainfo; struct xfs_inode *uip, *gip; - boolean_t tempuqip, tempgqip; + bool tempuqip, tempgqip; uip = gip = NULL; - tempuqip = tempgqip = B_FALSE; + tempuqip = tempgqip = false; memset(out, 0, sizeof(fs_quota_stat_t)); out->qs_version = FS_QSTAT_VERSION; @@ -434,12 +434,12 @@ xfs_qm_scall_getqstat( if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &uip) == 0) - tempuqip = B_TRUE; + tempuqip = true; } if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) { if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &gip) == 0) - tempgqip = B_TRUE; + tempgqip = true; } if (uip) { out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks; diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 0c7fa54..642c2d6 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -516,7 +516,7 @@ xfs_trans_unreserve_and_mod_dquots( int i, j; xfs_dquot_t *dqp; xfs_dqtrx_t *qtrx, *qa; - boolean_t locked; + bool locked; if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY)) return; @@ -537,17 +537,17 @@ xfs_trans_unreserve_and_mod_dquots( * about the number of blocks used field, or deltas. * Also we don't bother to zero the fields. */ - locked = B_FALSE; + locked = false; if (qtrx->qt_blk_res) { xfs_dqlock(dqp); - locked = B_TRUE; + locked = true; dqp->q_res_bcount -= (xfs_qcnt_t)qtrx->qt_blk_res; } if (qtrx->qt_ino_res) { if (!locked) { xfs_dqlock(dqp); - locked = B_TRUE; + locked = true; } dqp->q_res_icount -= (xfs_qcnt_t)qtrx->qt_ino_res; @@ -556,7 +556,7 @@ xfs_trans_unreserve_and_mod_dquots( if (qtrx->qt_rtblk_res) { if (!locked) { xfs_dqlock(dqp); - locked = B_TRUE; + locked = true; } dqp->q_res_rtbcount -= (xfs_qcnt_t)qtrx->qt_rtblk_res; diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index 7a41874..61ba1cf 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h @@ -32,7 +32,6 @@ typedef unsigned int __uint32_t; typedef signed long long int __int64_t; typedef unsigned long long int __uint64_t; -typedef enum { B_FALSE,B_TRUE } boolean_t; typedef __uint32_t prid_t; /* project ID */ typedef __uint32_t inst_t; /* an instruction */ diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index d95f565..77ad748 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -725,7 +725,7 @@ xfs_create( int error; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; - boolean_t unlock_dp_on_error = B_FALSE; + bool unlock_dp_on_error = false; uint cancel_flags; int committed; prid_t prid; @@ -794,7 +794,7 @@ xfs_create( } xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); - unlock_dp_on_error = B_TRUE; + unlock_dp_on_error = true; xfs_bmap_init(&free_list, &first_block); @@ -830,7 +830,7 @@ xfs_create( * error path. */ xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); - unlock_dp_on_error = B_FALSE; + unlock_dp_on_error = false; error = xfs_dir_createname(tp, dp, name, ip->i_ino, &first_block, &free_list, resblks ? @@ -1367,7 +1367,7 @@ xfs_symlink( int pathlen; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; - boolean_t unlock_dp_on_error = B_FALSE; + bool unlock_dp_on_error = false; uint cancel_flags; int committed; xfs_fileoff_t first_fsb; @@ -1438,7 +1438,7 @@ xfs_symlink( } xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); - unlock_dp_on_error = B_TRUE; + unlock_dp_on_error = true; /* * Check whether the directory allows new symlinks or not. @@ -1484,7 +1484,7 @@ xfs_symlink( * error path. */ xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); - unlock_dp_on_error = B_FALSE; + unlock_dp_on_error = false; /* * Also attach the dquot(s) to it, if applicable. -- cgit v0.10.2 From 9e96fe6df44425b69ed89f6ac20352cec1f127d7 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 17 Jan 2013 13:11:29 -0500 Subject: xfs: pull up stack_switch check into xfs_bmapi_write The stack_switch check currently occurs in __xfs_bmapi_allocate, which means the stack switch only occurs when xfs_bmapi_allocate() is called in a loop. Pull the check up before the loop in xfs_bmapi_write() such that the first iteration of the loop has consistent behavior. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index c507720..491f35e 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -4676,9 +4676,6 @@ __xfs_bmapi_allocate( return error; } - if (bma->flags & XFS_BMAPI_STACK_SWITCH) - bma->stack_switch = 1; - error = xfs_bmap_alloc(bma); if (error) return error; @@ -4952,6 +4949,9 @@ xfs_bmapi_write( bma.flist = flist; bma.firstblock = firstblock; + if (flags & XFS_BMAPI_STACK_SWITCH) + bma.stack_switch = 1; + while (bno < end && n < *nmap) { inhole = eof || bma.got.br_startoff > bno; wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); -- cgit v0.10.2 From 003fd6c8be14a348c56cb1d171605ab13fca906f Mon Sep 17 00:00:00 2001 From: Ben Myers Date: Fri, 18 Jan 2013 14:17:46 -0600 Subject: xfs: fix fs/xfs/xfs_log.c:1740:39: error: 'B_TRUE' undeclared Commit 667a9291c5b3 "xfs: Remove boolean_t typedef completely." didn't. Remove a stray B_TRUE that breaks CONFIG_XFS_DEBUG=y. Signed-off-by: Ben Myers Reported-by: Wu Fengguang Reviewed-by: Mark Tinguely diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index dafdae3..eec226f 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1737,7 +1737,7 @@ xlog_sync( ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); - xlog_verify_iclog(log, iclog, count, B_TRUE); + xlog_verify_iclog(log, iclog, count, true); /* account for log which doesn't start at block #0 */ XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); -- cgit v0.10.2 From 10616b806d1d7835b1d23b8d75ef638f92cb98b6 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 21 Jan 2013 23:53:52 +1100 Subject: xfs: fix _xfs_buf_find oops on blocks beyond the filesystem end When _xfs_buf_find is passed an out of range address, it will fail to find a relevant struct xfs_perag and oops with a null dereference. This can happen when trying to walk a filesystem with a metadata inode that has a partially corrupted extent map (i.e. the block number returned is corrupt, but is otherwise intact) and we try to read from the corrupted block address. In this case, just fail the lookup. If it is readahead being issued, it will simply not be done, but if it is real read that fails we will get an error being reported. Ideally this case should result in an EFSCORRUPTED error being reported, but we cannot return an error through xfs_buf_read() or xfs_buf_get() so this lookup failure may result in ENOMEM or EIO errors being reported instead. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Reviewed-by: Ben Myers Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 56d1614..689d726 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -487,6 +487,7 @@ _xfs_buf_find( struct rb_node *parent; xfs_buf_t *bp; xfs_daddr_t blkno = map[0].bm_bn; + xfs_daddr_t eofs; int numblks = 0; int i; @@ -498,6 +499,23 @@ _xfs_buf_find( ASSERT(!(numbytes < (1 << btp->bt_sshift))); ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask)); + /* + * Corrupted block numbers can get through to here, unfortunately, so we + * have to check that the buffer falls within the filesystem bounds. + */ + eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks); + if (blkno >= eofs) { + /* + * XXX (dgc): we should really be returning EFSCORRUPTED here, + * but none of the higher level infrastructure supports + * returning a specific error on buffer lookup failures. + */ + xfs_alert(btp->bt_mount, + "%s: Block out of range: block 0x%llx, EOFS 0x%llx ", + __func__, blkno, eofs); + return NULL; + } + /* get tree root */ pag = xfs_perag_get(btp->bt_mount, xfs_daddr_to_agno(btp->bt_mount, blkno)); -- cgit v0.10.2 From 4d559a3bcb7383f34334092af07e68fb60910684 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 21 Jan 2013 23:53:54 +1100 Subject: xfs: limit speculative prealloc near ENOSPC thresholds There is a window on small filesytsems where specualtive preallocation can be larger than that ENOSPC throttling thresholds, resulting in specualtive preallocation trying to reserve more space than there is space available. This causes immediate ENOSPC to be triggered, prealloc to be turned off and flushing to occur. One the next write (i.e. next 4k page), we do exactly the same thing, and so effective drive into synchronous 4k writes by triggering ENOSPC flushing on every page while in the window between the prealloc size and the ENOSPC prealloc throttle threshold. Fix this by checking to see if the prealloc size would consume all free space, and throttle it appropriately to avoid premature ENOSPC... Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index add06b4..364818e 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -351,6 +351,15 @@ xfs_iomap_prealloc_size( } if (shift) alloc_blocks >>= shift; + + /* + * If we are still trying to allocate more space than is + * available, squash the prealloc hard. This can happen if we + * have a large file on a small filesystem and the above + * lowspace thresholds are smaller than MAXEXTLEN. + */ + while (alloc_blocks >= freesp) + alloc_blocks >>= 4; } if (alloc_blocks < mp->m_writeio_blocks) -- cgit v0.10.2 From 3b19034d4f4554e39ca244fb28962bbf2ccba046 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 21 Jan 2013 23:53:55 +1100 Subject: xfs: fix shutdown hang on invalid inode during create When the new inode verify in xfs_iread() fails, the create transaction is aborted and a shutdown occurs. The subsequent unmount then hangs in xfs_wait_buftarg() on a buffer that has an elevated hold count. Debug showed that it was an AGI buffer getting stuck: [ 22.576147] XFS (vdb): buffer 0x2/0x1, hold 0x2 stuck [ 22.976213] XFS (vdb): buffer 0x2/0x1, hold 0x2 stuck [ 23.376206] XFS (vdb): buffer 0x2/0x1, hold 0x2 stuck [ 23.776325] XFS (vdb): buffer 0x2/0x1, hold 0x2 stuck The trace of this buffer leading up to the shutdown (trimmed for brevity) looks like: xfs_buf_init: bno 0x2 nblks 0x1 hold 1 caller xfs_buf_get_map xfs_buf_get: bno 0x2 len 0x200 hold 1 caller xfs_buf_read_map xfs_buf_read: bno 0x2 len 0x200 hold 1 caller xfs_trans_read_buf_map xfs_buf_iorequest: bno 0x2 nblks 0x1 hold 1 caller _xfs_buf_read xfs_buf_hold: bno 0x2 nblks 0x1 hold 1 caller xfs_buf_iorequest xfs_buf_rele: bno 0x2 nblks 0x1 hold 2 caller xfs_buf_iorequest xfs_buf_iowait: bno 0x2 nblks 0x1 hold 1 caller _xfs_buf_read xfs_buf_ioerror: bno 0x2 len 0x200 hold 1 caller xfs_buf_bio_end_io xfs_buf_iodone: bno 0x2 nblks 0x1 hold 1 caller _xfs_buf_ioend xfs_buf_iowait_done: bno 0x2 nblks 0x1 hold 1 caller _xfs_buf_read xfs_buf_hold: bno 0x2 nblks 0x1 hold 1 caller xfs_buf_item_init xfs_trans_read_buf: bno 0x2 len 0x200 hold 2 recur 0 refcount 1 xfs_trans_brelse: bno 0x2 len 0x200 hold 2 recur 0 refcount 1 xfs_buf_item_relse: bno 0x2 nblks 0x1 hold 2 caller xfs_trans_brelse xfs_buf_rele: bno 0x2 nblks 0x1 hold 2 caller xfs_buf_item_relse xfs_buf_unlock: bno 0x2 nblks 0x1 hold 1 caller xfs_trans_brelse xfs_buf_rele: bno 0x2 nblks 0x1 hold 1 caller xfs_trans_brelse xfs_buf_trylock: bno 0x2 nblks 0x1 hold 2 caller _xfs_buf_find xfs_buf_find: bno 0x2 len 0x200 hold 2 caller xfs_buf_get_map xfs_buf_get: bno 0x2 len 0x200 hold 2 caller xfs_buf_read_map xfs_buf_read: bno 0x2 len 0x200 hold 2 caller xfs_trans_read_buf_map xfs_buf_hold: bno 0x2 nblks 0x1 hold 2 caller xfs_buf_item_init xfs_trans_read_buf: bno 0x2 len 0x200 hold 3 recur 0 refcount 1 xfs_trans_log_buf: bno 0x2 len 0x200 hold 3 recur 0 refcount 1 xfs_buf_item_unlock: bno 0x2 len 0x200 hold 3 flags DIRTY liflags ABORTED xfs_buf_unlock: bno 0x2 nblks 0x1 hold 3 caller xfs_buf_item_unlock xfs_buf_rele: bno 0x2 nblks 0x1 hold 3 caller xfs_buf_item_unlock And that is the AGI buffer from cold cache read into memory to transaction abort. You can see at transaction abort the bli is dirty and only has a single reference. The item is not pinned, and it's not in the AIL. Hence the only reference to it is this transaction. The problem is that the xfs_buf_item_unlock() call is dropping the last reference to the xfs_buf_log_item attached to the buffer (which holds a reference to the buffer), but it is not freeing the xfs_buf_log_item. Hence nothing will ever release the buffer, and the unmount hangs waiting for this reference to go away. The fix is simple - xfs_buf_item_unlock needs to detect the last reference going away in this case and free the xfs_buf_log_item to release the reference it holds on the buffer. Signed-off-by: Dave Chinner Reviewed-by: Ben Myers Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 689d726..fbbb9eb 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1505,6 +1505,8 @@ restart: while (!list_empty(&btp->bt_lru)) { bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); if (atomic_read(&bp->b_hold) > 1) { + trace_xfs_buf_wait_buftarg(bp, _RET_IP_); + list_move_tail(&bp->b_lru, &btp->bt_lru); spin_unlock(&btp->bt_lru_lock); delay(100); goto restart; diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 63c86c4..9c4c050 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -548,7 +548,10 @@ xfs_buf_item_unlock( /* * If the buf item isn't tracking any data, free it, otherwise drop the - * reference we hold to it. + * reference we hold to it. If we are aborting the transaction, this may + * be the only reference to the buf item, so we free it anyway + * regardless of whether it is dirty or not. A dirty abort implies a + * shutdown, anyway. */ clean = 1; for (i = 0; i < bip->bli_format_count; i++) { @@ -560,7 +563,12 @@ xfs_buf_item_unlock( } if (clean) xfs_buf_item_relse(bp); - else + else if (aborted) { + if (atomic_dec_and_test(&bip->bli_refcount)) { + ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp)); + xfs_buf_item_relse(bp); + } + } else atomic_dec(&bip->bli_refcount); if (!hold) diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 2e137d4..16a8129 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -341,6 +341,7 @@ DEFINE_BUF_EVENT(xfs_buf_item_relse); DEFINE_BUF_EVENT(xfs_buf_item_iodone); DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); DEFINE_BUF_EVENT(xfs_buf_error_relse); +DEFINE_BUF_EVENT(xfs_buf_wait_buftarg); DEFINE_BUF_EVENT(xfs_trans_read_buf_io); DEFINE_BUF_EVENT(xfs_trans_read_buf_shut); -- cgit v0.10.2 From ced55f38d6bde7c10a14ea51c2edcd51a98575e3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 23 Jan 2013 13:56:18 +0100 Subject: xfs: Fix possible use-after-free with AIO Running AIO is pinning inode in memory using file reference. Once AIO is completed using aio_complete(), file reference is put and inode can be freed from memory. So we have to be sure that calling aio_complete() is the last thing we do with the inode. CC: xfs@oss.sgi.com CC: Ben Myers CC: stable@vger.kernel.org Signed-off-by: Jan Kara Reviewed-by: Ben Myers Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 4111a40..5f707e5 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -86,11 +86,11 @@ xfs_destroy_ioend( } if (ioend->io_iocb) { + inode_dio_done(ioend->io_inode); if (ioend->io_isasync) { aio_complete(ioend->io_iocb, ioend->io_error ? ioend->io_error : ioend->io_result, 0); } - inode_dio_done(ioend->io_inode); } mempool_free(ioend, xfs_ioend_pool); -- cgit v0.10.2 From 2729423cf2ef3ac51d040f24a5ddd84c1b2acc70 Mon Sep 17 00:00:00 2001 From: Torsten Kaiser Date: Sun, 20 Jan 2013 10:24:49 +0100 Subject: xfs: Fix xfs_swap_extents() after removal of xfs_flushinval_pages() Commit fb59581404ab7ec5075299065c22cb211a9262a9 removed xfs_flushinval_pages() and changed its callers to use filemap_write_and_wait() and truncate_pagecache_range() directly. But in xfs_swap_extents() this change accidental switched the argument for 'tip' to 'ip'. This patch switches it back to 'tip' Signed-off-by: Torsten Kaiser Reviewed-by: Ben Myers Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index d0e9c74..a8bd26b 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -246,10 +246,10 @@ xfs_swap_extents( goto out_unlock; } - error = -filemap_write_and_wait(VFS_I(ip)->i_mapping); + error = -filemap_write_and_wait(VFS_I(tip)->i_mapping); if (error) goto out_unlock; - truncate_pagecache_range(VFS_I(ip), 0, -1); + truncate_pagecache_range(VFS_I(tip), 0, -1); /* Verify O_DIRECT for ftmp */ if (VN_CACHED(VFS_I(tip)) != 0) { -- cgit v0.10.2 From 4f3b57832ba39223c6f8823d07b9fb206e282ced Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Mon, 28 Jan 2013 21:25:35 +0800 Subject: xfs: add a helper to figure out the space log reservation per item Add a new helper xfs_calc_buf_res() to calcuate out the transaction space reservations per item. xfs_buf_log_overhead() is used to figure out the extra space for struct xfs_buf_log_format that gets written into the log for every buffer as well as a log opheader, i.e. struct xlog_op_header. Signed-off-by: Jie Liu CC: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 06ed520..6c601ea 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -37,14 +37,45 @@ #include "xfs_extent_busy.h" #include "xfs_bmap.h" #include "xfs_quota.h" +#include "xfs_qm.h" #include "xfs_trans_priv.h" #include "xfs_trans_space.h" #include "xfs_inode_item.h" +#include "xfs_log_priv.h" +#include "xfs_buf_item.h" #include "xfs_trace.h" kmem_zone_t *xfs_trans_zone; kmem_zone_t *xfs_log_item_desc_zone; +/* + * A buffer has a format structure overhead in the log in addition + * to the data, so we need to take this into account when reserving + * space in a transaction for a buffer. Round the space required up + * to a multiple of 128 bytes so that we don't change the historical + * reservation that has been used for this overhead. + */ +STATIC uint +xfs_buf_log_overhead(void) +{ + return round_up(sizeof(struct xlog_op_header) + + sizeof(struct xfs_buf_log_format), 128); +} + +/* + * Calculate out transaction log reservation per item in bytes. + * + * The nbufs argument is used to indicate the number of items that + * will be changed in a transaction. size is used to tell how many + * bytes should be reserved per item. + */ +STATIC uint +xfs_calc_buf_res( + uint nbufs, + uint size) +{ + return nbufs * (size + xfs_buf_log_overhead()); +} /* * Various log reservation values. -- cgit v0.10.2 From 5b292ae3a951a58e32119d73c7ac8f5bec7395a3 Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Fri, 1 Feb 2013 14:39:29 -0600 Subject: xfs: make use of xfs_calc_buf_res() in xfs_trans.c Refining the existing reservations with xfs_calc_buf_res() in xfs_trans.c Signed-off-by: Jie Liu CC: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 6c601ea..a81625c 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -116,18 +116,15 @@ xfs_calc_write_reservation( struct xfs_mount *mp) { return XFS_DQUOT_LOGRES(mp) + - MAX((mp->m_sb.sb_inodesize + - XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + - 2 * mp->m_sb.sb_sectsize + - mp->m_sb.sb_sectsize + - XFS_ALLOCFREE_LOG_RES(mp, 2) + - 128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + - XFS_ALLOCFREE_LOG_COUNT(mp, 2))), - (2 * mp->m_sb.sb_sectsize + - 2 * mp->m_sb.sb_sectsize + - mp->m_sb.sb_sectsize + - XFS_ALLOCFREE_LOG_RES(mp, 2) + - 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))); + MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) + + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), + XFS_FSB_TO_B(mp, 1))), + (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), + XFS_FSB_TO_B(mp, 1)))); } /* @@ -148,18 +145,17 @@ xfs_calc_itruncate_reservation( struct xfs_mount *mp) { return XFS_DQUOT_LOGRES(mp) + - MAX((mp->m_sb.sb_inodesize + - XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + - 128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))), - (4 * mp->m_sb.sb_sectsize + - 4 * mp->m_sb.sb_sectsize + - mp->m_sb.sb_sectsize + - XFS_ALLOCFREE_LOG_RES(mp, 4) + - 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)) + - 128 * 5 + - XFS_ALLOCFREE_LOG_RES(mp, 1) + - 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + - XFS_ALLOCFREE_LOG_COUNT(mp, 1)))); + MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) + + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, + XFS_FSB_TO_B(mp, 1))), + (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4), + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(5, 0) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) + + mp->m_in_maxlevels, 0))); } /* @@ -179,14 +175,12 @@ xfs_calc_rename_reservation( struct xfs_mount *mp) { return XFS_DQUOT_LOGRES(mp) + - MAX((4 * mp->m_sb.sb_inodesize + - 2 * XFS_DIROP_LOG_RES(mp) + - 128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp))), - (3 * mp->m_sb.sb_sectsize + - 3 * mp->m_sb.sb_sectsize + - mp->m_sb.sb_sectsize + - XFS_ALLOCFREE_LOG_RES(mp, 3) + - 128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3)))); + MAX((xfs_calc_buf_res(4, mp->m_sb.sb_inodesize) + + xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp), + XFS_FSB_TO_B(mp, 1))), + (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3), + XFS_FSB_TO_B(mp, 1)))); } /* @@ -206,15 +200,12 @@ xfs_calc_link_reservation( struct xfs_mount *mp) { return XFS_DQUOT_LOGRES(mp) + - MAX((mp->m_sb.sb_inodesize + - mp->m_sb.sb_inodesize + - XFS_DIROP_LOG_RES(mp) + - 128 * (2 + XFS_DIROP_LOG_COUNT(mp))), - (mp->m_sb.sb_sectsize + - mp->m_sb.sb_sectsize + - mp->m_sb.sb_sectsize + - XFS_ALLOCFREE_LOG_RES(mp, 1) + - 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1)))); + MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) + + xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), + XFS_FSB_TO_B(mp, 1))), + (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)))); } /* @@ -234,15 +225,12 @@ xfs_calc_remove_reservation( struct xfs_mount *mp) { return XFS_DQUOT_LOGRES(mp) + - MAX((mp->m_sb.sb_inodesize + - mp->m_sb.sb_inodesize + - XFS_DIROP_LOG_RES(mp) + - 128 * (2 + XFS_DIROP_LOG_COUNT(mp))), - (2 * mp->m_sb.sb_sectsize + - 2 * mp->m_sb.sb_sectsize + - mp->m_sb.sb_sectsize + - XFS_ALLOCFREE_LOG_RES(mp, 2) + - 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))); + MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) + + xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), + XFS_FSB_TO_B(mp, 1))), + (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), + XFS_FSB_TO_B(mp, 1)))); } /* @@ -264,18 +252,18 @@ xfs_calc_symlink_reservation( struct xfs_mount *mp) { return XFS_DQUOT_LOGRES(mp) + - MAX((mp->m_sb.sb_inodesize + - mp->m_sb.sb_inodesize + - XFS_FSB_TO_B(mp, 1) + - XFS_DIROP_LOG_RES(mp) + - 1024 + - 128 * (4 + XFS_DIROP_LOG_COUNT(mp))), - (2 * mp->m_sb.sb_sectsize + - XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) + - XFS_FSB_TO_B(mp, mp->m_in_maxlevels) + - XFS_ALLOCFREE_LOG_RES(mp, 1) + - 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + - XFS_ALLOCFREE_LOG_COUNT(mp, 1)))); + MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) + + xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(1, 1024)), + (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(mp->m_in_maxlevels, + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)))); } /* @@ -298,18 +286,19 @@ xfs_calc_create_reservation( struct xfs_mount *mp) { return XFS_DQUOT_LOGRES(mp) + - MAX((mp->m_sb.sb_inodesize + - mp->m_sb.sb_inodesize + + MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) + + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + + (uint)XFS_FSB_TO_B(mp, 1) + + xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), + XFS_FSB_TO_B(mp, 1))), + (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + mp->m_sb.sb_sectsize + - XFS_FSB_TO_B(mp, 1) + - XFS_DIROP_LOG_RES(mp) + - 128 * (3 + XFS_DIROP_LOG_COUNT(mp))), - (3 * mp->m_sb.sb_sectsize + - XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) + - XFS_FSB_TO_B(mp, mp->m_in_maxlevels) + - XFS_ALLOCFREE_LOG_RES(mp, 1) + - 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + - XFS_ALLOCFREE_LOG_COUNT(mp, 1)))); + xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(mp->m_in_maxlevels, + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)))); } /* @@ -337,16 +326,16 @@ xfs_calc_ifree_reservation( struct xfs_mount *mp) { return XFS_DQUOT_LOGRES(mp) + - mp->m_sb.sb_inodesize + - mp->m_sb.sb_sectsize + - mp->m_sb.sb_sectsize + - XFS_FSB_TO_B(mp, 1) + + xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) + + xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + MAX((__uint16_t)XFS_FSB_TO_B(mp, 1), XFS_INODE_CLUSTER_SIZE(mp)) + - 128 * 5 + - XFS_ALLOCFREE_LOG_RES(mp, 1) + - 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + - XFS_ALLOCFREE_LOG_COUNT(mp, 1)); + xfs_calc_buf_res(1, 0) + + xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) + + mp->m_in_maxlevels, 0) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)); } /* @@ -374,9 +363,9 @@ STATIC uint xfs_calc_growdata_reservation( struct xfs_mount *mp) { - return mp->m_sb.sb_sectsize * 3 + - XFS_ALLOCFREE_LOG_RES(mp, 1) + - 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1)); + return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)); } /* @@ -393,12 +382,12 @@ STATIC uint xfs_calc_growrtalloc_reservation( struct xfs_mount *mp) { - return 2 * mp->m_sb.sb_sectsize + - XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + - mp->m_sb.sb_inodesize + - XFS_ALLOCFREE_LOG_RES(mp, 1) + - 128 * (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + - XFS_ALLOCFREE_LOG_COUNT(mp, 1)); + return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)); } /* @@ -410,7 +399,7 @@ STATIC uint xfs_calc_growrtzero_reservation( struct xfs_mount *mp) { - return mp->m_sb.sb_blocksize + 128; + return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize); } /* @@ -427,11 +416,10 @@ STATIC uint xfs_calc_growrtfree_reservation( struct xfs_mount *mp) { - return mp->m_sb.sb_sectsize + - 2 * mp->m_sb.sb_inodesize + - mp->m_sb.sb_blocksize + - mp->m_rsumsize + - 128 * 5; + return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) + + xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) + + xfs_calc_buf_res(1, mp->m_rsumsize); } /* @@ -442,7 +430,7 @@ STATIC uint xfs_calc_swrite_reservation( struct xfs_mount *mp) { - return mp->m_sb.sb_inodesize + 128; + return xfs_calc_buf_res(1, mp->m_sb.sb_inodesize); } /* @@ -452,7 +440,7 @@ xfs_calc_swrite_reservation( STATIC uint xfs_calc_writeid_reservation(xfs_mount_t *mp) { - return mp->m_sb.sb_inodesize + 128; + return xfs_calc_buf_res(1, mp->m_sb.sb_inodesize); } /* @@ -468,13 +456,13 @@ xfs_calc_addafork_reservation( struct xfs_mount *mp) { return XFS_DQUOT_LOGRES(mp) + - mp->m_sb.sb_inodesize + - mp->m_sb.sb_sectsize * 2 + - mp->m_dirblksize + - XFS_FSB_TO_B(mp, XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + - XFS_ALLOCFREE_LOG_RES(mp, 1) + - 128 * (4 + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1 + - XFS_ALLOCFREE_LOG_COUNT(mp, 1)); + xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) + + xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(1, mp->m_dirblksize) + + xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1, + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)); } /* @@ -492,14 +480,12 @@ STATIC uint xfs_calc_attrinval_reservation( struct xfs_mount *mp) { - return MAX((mp->m_sb.sb_inodesize + - XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + - 128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))), - (4 * mp->m_sb.sb_sectsize + - 4 * mp->m_sb.sb_sectsize + - mp->m_sb.sb_sectsize + - XFS_ALLOCFREE_LOG_RES(mp, 4) + - 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)))); + return MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) + + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK), + XFS_FSB_TO_B(mp, 1))), + (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4), + XFS_FSB_TO_B(mp, 1)))); } /* @@ -517,10 +503,9 @@ xfs_calc_attrset_reservation( struct xfs_mount *mp) { return XFS_DQUOT_LOGRES(mp) + - mp->m_sb.sb_inodesize + - mp->m_sb.sb_sectsize + - XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) + - 128 * (2 + XFS_DA_NODE_MAXDEPTH); + xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) + + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1)); } /* @@ -539,16 +524,15 @@ xfs_calc_attrrm_reservation( struct xfs_mount *mp) { return XFS_DQUOT_LOGRES(mp) + - MAX((mp->m_sb.sb_inodesize + - XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) + - XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + - 128 * (1 + XFS_DA_NODE_MAXDEPTH + - XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))), - (2 * mp->m_sb.sb_sectsize + - 2 * mp->m_sb.sb_sectsize + - mp->m_sb.sb_sectsize + - XFS_ALLOCFREE_LOG_RES(mp, 2) + - 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))); + MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) + + xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, + XFS_FSB_TO_B(mp, 1)) + + (uint)XFS_FSB_TO_B(mp, + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)), + (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), + XFS_FSB_TO_B(mp, 1)))); } /* @@ -558,7 +542,7 @@ STATIC uint xfs_calc_clear_agi_bucket_reservation( struct xfs_mount *mp) { - return mp->m_sb.sb_sectsize + 128; + return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); } /* -- cgit v0.10.2 From b0c10b983a3e5cc35f239999df1b8bad1ba5b8f6 Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Mon, 28 Jan 2013 21:26:16 +0800 Subject: xfs: calculate XFS_TRANS_QM_SBCHANGE space log reservation at mount time The transaction log space for clearing/reseting the quota flags is calculated out at runtime, this patch can figure it out at mount time. Signed-off-by: Jie Liu CC: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 7d6df7c..0db4020 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1109,8 +1109,8 @@ xfs_mount_reset_sbqflags( return 0; tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); - error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, - XFS_DEFAULT_LOG_COUNT); + error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp), + 0, 0, XFS_DEFAULT_LOG_COUNT); if (error) { xfs_trans_cancel(tp, 0); xfs_alert(mp, "%s: Superblock update failed!", __func__); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index bab8314..4e9aa95 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -40,6 +40,7 @@ typedef struct xfs_trans_reservations { uint tr_growrtalloc; /* grow realtime allocations */ uint tr_growrtzero; /* grow realtime zeroing */ uint tr_growrtfree; /* grow realtime freeing */ + uint tr_qm_sbchange; /* change quota flags */ } xfs_trans_reservations_t; #ifndef __KERNEL__ diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index a81625c..6ba62c0 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -546,6 +546,17 @@ xfs_calc_clear_agi_bucket_reservation( } /* + * Clearing the quotaflags in the superblock. + * the super block for changing quota flags: sector size + */ +STATIC uint +xfs_calc_qm_sbchange_reservation( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); +} + +/* * Initialize the precomputed transaction reservation values * in the mount structure. */ @@ -576,6 +587,7 @@ xfs_trans_init( resp->tr_growrtalloc = xfs_calc_growrtalloc_reservation(mp); resp->tr_growrtzero = xfs_calc_growrtzero_reservation(mp); resp->tr_growrtfree = xfs_calc_growrtfree_reservation(mp); + resp->tr_qm_sbchange = xfs_calc_qm_sbchange_reservation(mp); } /* diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index c6c0601..a690f28 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -262,7 +262,7 @@ struct xfs_log_item_desc { (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))))) #define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm) #define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi) - +#define XFS_QM_SBCHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_qm_sbchange) /* * Various log count values. -- cgit v0.10.2 From f910a8c620e21fdb53384ad741ff9e4995cc6e67 Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Mon, 28 Jan 2013 21:26:34 +0800 Subject: xfs: calculate xfs_qm_write_sb_changes() space log reservation at mount time For the transaction that write the incore superblock changes of quota flags to disk, it would reserve the same log space to clear/reset quota flags transaction, hence we can use XFS_TRANS_SBCHANGE_LOG_RES() for it as well. Signed-off-by: Jie Liu CC: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 60eff47..e5b5cf9 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1584,10 +1584,9 @@ xfs_qm_write_sb_changes( int error; tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); - if ((error = xfs_trans_reserve(tp, 0, - mp->m_sb.sb_sectsize + 128, 0, - 0, - XFS_DEFAULT_LOG_COUNT))) { + error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp), + 0, 0, XFS_DEFAULT_LOG_COUNT); + if (error) { xfs_trans_cancel(tp, 0); return error; } -- cgit v0.10.2 From f0f2df94faca43fd26f85af7e83df240777c8c37 Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Mon, 28 Jan 2013 21:26:49 +0800 Subject: xfs: calcuate XFS_TRANS_QM_SETQLIM space log reservation at mount time For adjusting quota limits transactions, we calculate out the log space reservation at runtime, this patch does it at mount time. Signed-off-by: Jie Liu CC: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 4e9aa95..9baa7e0 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -41,6 +41,7 @@ typedef struct xfs_trans_reservations { uint tr_growrtzero; /* grow realtime zeroing */ uint tr_growrtfree; /* grow realtime freeing */ uint tr_qm_sbchange; /* change quota flags */ + uint tr_qm_setqlim; /* adjust quota limits */ } xfs_trans_reservations_t; #ifndef __KERNEL__ diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 33d9c2b..1476b6f 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -490,8 +490,9 @@ xfs_qm_scall_setqlim( return 0; tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); - if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128, - 0, 0, XFS_DEFAULT_LOG_COUNT))) { + error = xfs_trans_reserve(tp, 0, XFS_QM_SETQLIM_LOG_RES(mp), + 0, 0, XFS_DEFAULT_LOG_COUNT); + if (error) { xfs_trans_cancel(tp, 0); return (error); } diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 6ba62c0..1838850 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -557,6 +557,17 @@ xfs_calc_qm_sbchange_reservation( } /* + * Adjusting quota limits. + * the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot) + */ +STATIC uint +xfs_calc_qm_setqlim_reservation( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot)); +} + +/* * Initialize the precomputed transaction reservation values * in the mount structure. */ @@ -588,6 +599,7 @@ xfs_trans_init( resp->tr_growrtzero = xfs_calc_growrtzero_reservation(mp); resp->tr_growrtfree = xfs_calc_growrtfree_reservation(mp); resp->tr_qm_sbchange = xfs_calc_qm_sbchange_reservation(mp); + resp->tr_qm_setqlim = xfs_calc_qm_setqlim_reservation(mp); } /* diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index a690f28..93a0ec7 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -263,6 +263,7 @@ struct xfs_log_item_desc { #define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm) #define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi) #define XFS_QM_SBCHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_qm_sbchange) +#define XFS_QM_SETQLIM_LOG_RES(mp) ((mp)->m_reservations.tr_qm_setqlim) /* * Various log count values. -- cgit v0.10.2 From 4800104438a4467ffa5ae1e51d5a59c0f64e5f9a Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Mon, 28 Jan 2013 21:27:04 +0800 Subject: xfs: calculate XFS_TRANS_QM_DQALLOC space log reservation at mount time The disk quota allocation log space reservation is calcuated at runtime, this patch does it at mount time. Signed-off-by: Jie Liu CC: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 9e1bf52..8025eb2 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -612,15 +612,9 @@ xfs_qm_dqread( if (flags & XFS_QMOPT_DQALLOC) { tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp), - XFS_WRITE_LOG_RES(mp) + - /* - * Round the chunklen up to the next multiple - * of 128 (buf log item chunk size)). - */ - BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + 128, - 0, - XFS_TRANS_PERM_LOG_RES, - XFS_WRITE_LOG_COUNT); + XFS_QM_DQALLOC_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, + XFS_WRITE_LOG_COUNT); if (error) goto error1; cancelflags = XFS_TRANS_RELEASE_LOG_RES; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 9baa7e0..fc500c6 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -42,6 +42,7 @@ typedef struct xfs_trans_reservations { uint tr_growrtfree; /* grow realtime freeing */ uint tr_qm_sbchange; /* change quota flags */ uint tr_qm_setqlim; /* adjust quota limits */ + uint tr_qm_dqalloc; /* allocate quota on disk */ } xfs_trans_reservations_t; #ifndef __KERNEL__ diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 1838850..6dd2c04 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -568,6 +568,20 @@ xfs_calc_qm_setqlim_reservation( } /* + * Allocating quota on disk if needed. + * the write transaction log space: XFS_WRITE_LOG_RES(mp) + * the unit of quota allocation: one system block size + */ +STATIC uint +xfs_calc_qm_dqalloc_reservation( + struct xfs_mount *mp) +{ + return XFS_WRITE_LOG_RES(mp) + + xfs_calc_buf_res(1, + XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1); +} + +/* * Initialize the precomputed transaction reservation values * in the mount structure. */ @@ -600,6 +614,7 @@ xfs_trans_init( resp->tr_growrtfree = xfs_calc_growrtfree_reservation(mp); resp->tr_qm_sbchange = xfs_calc_qm_sbchange_reservation(mp); resp->tr_qm_setqlim = xfs_calc_qm_setqlim_reservation(mp); + resp->tr_qm_dqalloc = xfs_calc_qm_dqalloc_reservation(mp); } /* diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 93a0ec7..04575db 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -264,6 +264,7 @@ struct xfs_log_item_desc { #define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi) #define XFS_QM_SBCHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_qm_sbchange) #define XFS_QM_SETQLIM_LOG_RES(mp) ((mp)->m_reservations.tr_qm_setqlim) +#define XFS_QM_DQALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_qm_dqalloc) /* * Various log count values. -- cgit v0.10.2 From a1bd9557544d59140c4ac87fe405069b9e1aaf99 Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Mon, 28 Jan 2013 21:27:15 +0800 Subject: xfs: calculate XFS_TRANS_QM_QUOTAOFF space log reservation at mount time Convert the calculation of quota off transaction log space reservation from runtime to mount time. Signed-off-by: Jie Liu CC: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index fc500c6..4f5e148 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -43,6 +43,7 @@ typedef struct xfs_trans_reservations { uint tr_qm_sbchange; /* change quota flags */ uint tr_qm_setqlim; /* adjust quota limits */ uint tr_qm_dqalloc; /* allocate quota on disk */ + uint tr_qm_quotaoff; /* turn quota off */ } xfs_trans_reservations_t; #ifndef __KERNEL__ diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 1476b6f..4605f89 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -672,14 +672,10 @@ xfs_qm_log_quotaoff( uint oldsbqflag=0; tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF); - if ((error = xfs_trans_reserve(tp, 0, - sizeof(xfs_qoff_logitem_t) * 2 + - mp->m_sb.sb_sectsize + 128, - 0, - 0, - XFS_DEFAULT_LOG_COUNT))) { + error = xfs_trans_reserve(tp, 0, XFS_QM_QUOTAOFF_LOG_RES(mp), + 0, 0, XFS_DEFAULT_LOG_COUNT); + if (error) goto error0; - } qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT); xfs_trans_log_quotaoff_item(tp, qoffi); diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 6dd2c04..a557c82 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -582,6 +582,19 @@ xfs_calc_qm_dqalloc_reservation( } /* + * Turning off quotas. + * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2 + * the superblock for the quota flags: sector size + */ +STATIC uint +xfs_calc_qm_quotaoff_reservation( + struct xfs_mount *mp) +{ + return sizeof(struct xfs_qoff_logitem) * 2 + + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); +} + +/* * Initialize the precomputed transaction reservation values * in the mount structure. */ @@ -615,6 +628,7 @@ xfs_trans_init( resp->tr_qm_sbchange = xfs_calc_qm_sbchange_reservation(mp); resp->tr_qm_setqlim = xfs_calc_qm_setqlim_reservation(mp); resp->tr_qm_dqalloc = xfs_calc_qm_dqalloc_reservation(mp); + resp->tr_qm_quotaoff = xfs_calc_qm_quotaoff_reservation(mp); } /* diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 04575db..1e103da 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -265,6 +265,7 @@ struct xfs_log_item_desc { #define XFS_QM_SBCHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_qm_sbchange) #define XFS_QM_SETQLIM_LOG_RES(mp) ((mp)->m_reservations.tr_qm_setqlim) #define XFS_QM_DQALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_qm_dqalloc) +#define XFS_QM_QUOTAOFF_LOG_RES(mp) ((mp)->m_reservations.tr_qm_quotaoff) /* * Various log count values. -- cgit v0.10.2 From 762d7ba657ed4a0934b4da7dcef058012f252e0f Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Mon, 28 Jan 2013 21:27:21 +0800 Subject: xfs: calculate XFS_TRANS_QM_QUOTAOFF_END space log reservation at mount time Convert the calculation for end of quotaoff log space reservation from runtime to mount time. Signed-off-by: Jie Liu CC: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 4f5e148..b46009d 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -44,6 +44,7 @@ typedef struct xfs_trans_reservations { uint tr_qm_setqlim; /* adjust quota limits */ uint tr_qm_dqalloc; /* allocate quota on disk */ uint tr_qm_quotaoff; /* turn quota off */ + uint tr_qm_equotaoff;/* end of turn quota off */ } xfs_trans_reservations_t; #ifndef __KERNEL__ diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 4605f89..cf9a340 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -639,8 +639,9 @@ xfs_qm_log_quotaoff_end( tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END); - if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2, - 0, 0, XFS_DEFAULT_LOG_COUNT))) { + error = xfs_trans_reserve(tp, 0, XFS_QM_QUOTAOFF_END_LOG_RES(mp), + 0, 0, XFS_DEFAULT_LOG_COUNT); + if (error) { xfs_trans_cancel(tp, 0); return (error); } diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index a557c82..84643fa 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -595,6 +595,17 @@ xfs_calc_qm_quotaoff_reservation( } /* + * End of turning off quotas. + * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2 + */ +STATIC uint +xfs_calc_qm_quotaoff_end_reservation( + struct xfs_mount *mp) +{ + return sizeof(struct xfs_qoff_logitem) * 2; +} + +/* * Initialize the precomputed transaction reservation values * in the mount structure. */ @@ -629,6 +640,7 @@ xfs_trans_init( resp->tr_qm_setqlim = xfs_calc_qm_setqlim_reservation(mp); resp->tr_qm_dqalloc = xfs_calc_qm_dqalloc_reservation(mp); resp->tr_qm_quotaoff = xfs_calc_qm_quotaoff_reservation(mp); + resp->tr_qm_equotaoff = xfs_calc_qm_quotaoff_end_reservation(mp); } /* diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 1e103da..57339e4 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -266,6 +266,7 @@ struct xfs_log_item_desc { #define XFS_QM_SETQLIM_LOG_RES(mp) ((mp)->m_reservations.tr_qm_setqlim) #define XFS_QM_DQALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_qm_dqalloc) #define XFS_QM_QUOTAOFF_LOG_RES(mp) ((mp)->m_reservations.tr_qm_quotaoff) +#define XFS_QM_QUOTAOFF_END_LOG_RES(mp) ((mp)->m_reservations.tr_qm_equotaoff) /* * Various log count values. -- cgit v0.10.2 From a7bd794a0f489a66ad595f2bcab0eac8f232e409 Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Mon, 28 Jan 2013 21:27:25 +0800 Subject: xfs: introduce XFS_SB_LOG_RES() for transactions that modify sb on disk Introduce a new transaction space reservation XFS_SB_LOG_RES() for those transactions that need to modify the superblock on disk. Signed-off-by: Jie Liu CC: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index b46009d..2e30a9a 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -45,6 +45,7 @@ typedef struct xfs_trans_reservations { uint tr_qm_dqalloc; /* allocate quota on disk */ uint tr_qm_quotaoff; /* turn quota off */ uint tr_qm_equotaoff;/* end of turn quota off */ + uint tr_sb; /* modify superblock */ } xfs_trans_reservations_t; #ifndef __KERNEL__ diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 84643fa..72da2aa 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -606,6 +606,17 @@ xfs_calc_qm_quotaoff_end_reservation( } /* + * Syncing the incore super block changes to disk. + * the super block to reflect the changes: sector size + */ +STATIC uint +xfs_calc_sb_reservation( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); +} + +/* * Initialize the precomputed transaction reservation values * in the mount structure. */ @@ -641,6 +652,7 @@ xfs_trans_init( resp->tr_qm_dqalloc = xfs_calc_qm_dqalloc_reservation(mp); resp->tr_qm_quotaoff = xfs_calc_qm_quotaoff_reservation(mp); resp->tr_qm_equotaoff = xfs_calc_qm_quotaoff_end_reservation(mp); + resp->tr_sb = xfs_calc_sb_reservation(mp); } /* diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 57339e4..d06919e 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -267,6 +267,7 @@ struct xfs_log_item_desc { #define XFS_QM_DQALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_qm_dqalloc) #define XFS_QM_QUOTAOFF_LOG_RES(mp) ((mp)->m_reservations.tr_qm_quotaoff) #define XFS_QM_QUOTAOFF_END_LOG_RES(mp) ((mp)->m_reservations.tr_qm_equotaoff) +#define XFS_SB_LOG_RES(mp) ((mp)->m_reservations.tr_sb) /* * Various log count values. -- cgit v0.10.2 From e457274b60a3e3046d9451a199826281ce92023d Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Mon, 28 Jan 2013 21:27:31 +0800 Subject: xfs: make use of XFS_SB_LOG_RES() at xfs_log_sbcount() Make use of XFS_SB_LOG_RES() at xfs_log_sbcount(). Signed-off-by: Jie Liu CC: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 0db4020..a670461 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1583,8 +1583,8 @@ xfs_log_sbcount(xfs_mount_t *mp) return 0; tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP); - error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, - XFS_DEFAULT_LOG_COUNT); + error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0, + XFS_DEFAULT_LOG_COUNT); if (error) { xfs_trans_cancel(tp, 0); return error; -- cgit v0.10.2 From 5166ab06558cfe4a3745252a91ad6b495ba49290 Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Mon, 28 Jan 2013 21:27:39 +0800 Subject: xfs: make use of XFS_SB_LOG_RES() at xfs_mount_log_sb() Make use of XFS_SB_LOG_RES() at xfs_mount_log_sb(). Signed-off-by: Jie Liu CC: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index a670461..3806088 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1945,8 +1945,8 @@ xfs_mount_log_sb( XFS_SB_VERSIONNUM)); tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); - error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, - XFS_DEFAULT_LOG_COUNT); + error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0, + XFS_DEFAULT_LOG_COUNT); if (error) { xfs_trans_cancel(tp, 0); return error; -- cgit v0.10.2 From 762c585b187cde93bbb8cadd3cb3871bbae1b75a Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Mon, 28 Jan 2013 21:27:46 +0800 Subject: xfs: make use of XFS_SB_LOG_RES() at xfs_fs_log_dummy() Make use of XFS_SB_LOG_RES() at xfs_fs_log_dummy(). Signed-off-by: Jie Liu CC: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 94eaeed..2866b8c 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -709,8 +709,8 @@ xfs_fs_log_dummy( int error; tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); - error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, - XFS_DEFAULT_LOG_COUNT); + error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0, + XFS_DEFAULT_LOG_COUNT); if (error) { xfs_trans_cancel(tp, 0); return error; -- cgit v0.10.2 From a21cd503678286c56b1d0cca1c99349a4aa042f4 Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Mon, 28 Jan 2013 21:27:53 +0800 Subject: xfs: refactor space log reservation for XFS_TRANS_ATTR_SET Currently, we calculate the attribute set transaction log space reservation at runtime in two parts: 1) XFS_ATTRSET_LOG_RES() which is calcuated out at mount time. 2) ((ext * (mp)->m_sb.sb_sectsize) + \ (ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \ (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))))) which is calculated out at runtime since it depend on the given extent length in blocks. This patch renamed XFS_ATTRSET_LOG_RES(mp) to XFS_ATTRSETM_LOG_RES(mp) to indicate that it is figured out at mount time. Introduce XFS_ATTRSETRT_LOG_RES(mp) which would be used to calculate out the unit of the log space reservation for one block. In this way, the total runtime space for the given extent length can be figured out by: XFS_ATTRSETM_LOG_RES(mp) + XFS_ATTRSETRT_LOG_RES(mp) * ext Signed-off-by: Jie Liu CC: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index aaf4725..8886838 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -300,9 +300,12 @@ xfs_attr_set_int( if (rsvd) args.trans->t_flags |= XFS_TRANS_RESERVE; - if ((error = xfs_trans_reserve(args.trans, args.total, - XFS_ATTRSET_LOG_RES(mp, args.total), 0, - XFS_TRANS_PERM_LOG_RES, XFS_ATTRSET_LOG_COUNT))) { + error = xfs_trans_reserve(args.trans, args.total, + XFS_ATTRSETM_LOG_RES(mp) + + XFS_ATTRSETRT_LOG_RES(mp) * args.total, + 0, XFS_TRANS_PERM_LOG_RES, + XFS_ATTRSET_LOG_COUNT); + if (error) { xfs_trans_cancel(args.trans, 0); return(error); } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 2e30a9a..bc90706 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -34,7 +34,8 @@ typedef struct xfs_trans_reservations { uint tr_addafork; /* cvt inode to attributed trans */ uint tr_writeid; /* write setuid/setgid file */ uint tr_attrinval; /* attr fork buffer invalidation */ - uint tr_attrset; /* set/create an attribute */ + uint tr_attrsetm; /* set/create an attribute at mount time */ + uint tr_attrsetrt; /* set/create an attribute at runtime */ uint tr_attrrm; /* remove an attribute */ uint tr_clearagi; /* clear bad agi unlinked ino bucket */ uint tr_growrtalloc; /* grow realtime allocations */ diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 72da2aa..2fd7c1f 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -489,17 +489,18 @@ xfs_calc_attrinval_reservation( } /* - * Setting an attribute. + * Setting an attribute at mount time. * the inode getting the attribute * the superblock for allocations * the agfs extents are allocated from * the attribute btree * max depth * the inode allocation btree * Since attribute transaction space is dependent on the size of the attribute, - * the calculation is done partially at mount time and partially at runtime. + * the calculation is done partially at mount time and partially at runtime(see + * below). */ STATIC uint -xfs_calc_attrset_reservation( +xfs_calc_attrsetm_reservation( struct xfs_mount *mp) { return XFS_DQUOT_LOGRES(mp) + @@ -509,6 +510,24 @@ xfs_calc_attrset_reservation( } /* + * Setting an attribute at runtime, transaction space unit per block. + * the superblock for allocations: sector size + * the inode bmap btree could join or split: max depth * block size + * Since the runtime attribute transaction space is dependent on the total + * blocks needed for the 1st bmap, here we calculate out the space unit for + * one block so that the caller could figure out the total space according + * to the attibute extent length in blocks by: ext * XFS_ATTRSETRT_LOG_RES(mp). + */ +STATIC uint +xfs_calc_attrsetrt_reservation( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK), + XFS_FSB_TO_B(mp, 1)); +} + +/* * Removing an attribute. * the inode: inode size * the attribute btree could join: max depth * block size @@ -641,7 +660,8 @@ xfs_trans_init( resp->tr_writeid = xfs_calc_writeid_reservation(mp); resp->tr_addafork = xfs_calc_addafork_reservation(mp); resp->tr_attrinval = xfs_calc_attrinval_reservation(mp); - resp->tr_attrset = xfs_calc_attrset_reservation(mp); + resp->tr_attrsetm = xfs_calc_attrsetm_reservation(mp); + resp->tr_attrsetrt = xfs_calc_attrsetrt_reservation(mp); resp->tr_attrrm = xfs_calc_attrrm_reservation(mp); resp->tr_clearagi = xfs_calc_clear_agi_bucket_reservation(mp); resp->tr_growrtalloc = xfs_calc_growrtalloc_reservation(mp); diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index d06919e..cd29f61 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -252,15 +252,12 @@ struct xfs_log_item_desc { * as long as SWRITE logs the entire inode core */ #define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) -#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) +#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) #define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork) #define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval) -#define XFS_ATTRSET_LOG_RES(mp, ext) \ - ((mp)->m_reservations.tr_attrset + \ - (ext * (mp)->m_sb.sb_sectsize) + \ - (ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \ - (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))))) -#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm) +#define XFS_ATTRSETM_LOG_RES(mp) ((mp)->m_reservations.tr_attrsetm) +#define XFS_ATTRSETRT_LOG_RES(mp) ((mp)->m_reservations.tr_attrsetrt) +#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm) #define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi) #define XFS_QM_SBCHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_qm_sbchange) #define XFS_QM_SETQLIM_LOG_RES(mp) ((mp)->m_reservations.tr_qm_setqlim) -- cgit v0.10.2 From 311f08acde635e4e5ccea9b9d8c856cc2e0ced95 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 4 Feb 2013 10:13:11 -0600 Subject: xfs: memory barrier before wake_up_bit() In xfs_ifunlock() there is a call to wake_up_bit() after clearing the flush lock on the xfs inode. This is not guaranteed to be safe, as noted in the comments above wake_up_bit() beginning with: In order for this to function properly, as it uses waitqueue_active() internally, some kind of memory barrier must be done prior to calling this. Signed-off-by: Alex Elder Reviewed-by: Dave Chinner Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 22baf6e..237e7f6 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -419,6 +419,7 @@ static inline void xfs_iflock(struct xfs_inode *ip) static inline void xfs_ifunlock(struct xfs_inode *ip) { xfs_iflags_clear(ip, XFS_IFLOCK); + smp_mb(); wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT); } -- cgit v0.10.2 From a1e16c26660b301cc8423185924cf1b0b16ea92b Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Feb 2013 16:05:01 +1100 Subject: xfs: limit speculative prealloc size on sparse files Speculative preallocation based on the current file size works well for contiguous files, but is sub-optimal for sparse files where the EOF preallocation can fill holes and result in large amounts of zeros being written when it is not necessary. The algorithm is modified to prevent EOF speculative preallocation from triggering larger allocations on IO patterns of truncate--to-zero-seek-write-seek-write-.... which results in non-sparse files for large files. This, unfortunately, is the way cp now behaves when copying sparse files and so needs to be fixed. What this code does is that it looks at the existing extent adjacent to the current EOF and if it determines that it is a hole we disable speculative preallocation altogether. To avoid the next write from doing a large prealloc, it takes the size of subsequent preallocations from the current size of the existing EOF extent. IOWs, if you leave a hole in the file, it resets preallocation behaviour to the same as if it was a zero size file. Example new behaviour: $ xfs_io -f -c "pwrite 0 31m" \ -c "pwrite 33m 1m" \ -c "pwrite 128m 1m" \ -c "fiemap -v" /mnt/scratch/blah wrote 32505856/32505856 bytes at offset 0 31 MiB, 7936 ops; 0.0000 sec (1.608 GiB/sec and 421432.7439 ops/sec) wrote 1048576/1048576 bytes at offset 34603008 1 MiB, 256 ops; 0.0000 sec (1.462 GiB/sec and 383233.5329 ops/sec) wrote 1048576/1048576 bytes at offset 134217728 1 MiB, 256 ops; 0.0000 sec (1.719 GiB/sec and 450704.2254 ops/sec) /mnt/scratch/blah: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..65535]: 96..65631 65536 0x0 1: [65536..67583]: hole 2048 2: [67584..69631]: 67680..69727 2048 0x0 3: [69632..262143]: hole 192512 4: [262144..264191]: 262240..264287 2048 0x1 Signed-off-by: Dave Chinner Reviewed-by: Mark Tinguely Reviewed-by: Brian Foster Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 364818e..912d83d 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -311,6 +311,62 @@ xfs_iomap_eof_want_preallocate( } /* + * Determine the initial size of the preallocation. We are beyond the current + * EOF here, but we need to take into account whether this is a sparse write or + * an extending write when determining the preallocation size. Hence we need to + * look up the extent that ends at the current write offset and use the result + * to determine the preallocation size. + * + * If the extent is a hole, then preallocation is essentially disabled. + * Otherwise we take the size of the preceeding data extent as the basis for the + * preallocation size. If the size of the extent is greater than half the + * maximum extent length, then use the current offset as the basis. This ensures + * that for large files the preallocation size always extends to MAXEXTLEN + * rather than falling short due to things like stripe unit/width alignment of + * real extents. + */ +STATIC int +xfs_iomap_eof_prealloc_initial_size( + struct xfs_mount *mp, + struct xfs_inode *ip, + xfs_off_t offset, + xfs_bmbt_irec_t *imap, + int nimaps) +{ + xfs_fileoff_t start_fsb; + int imaps = 1; + int error; + + ASSERT(nimaps >= imaps); + + /* if we are using a specific prealloc size, return now */ + if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) + return 0; + + /* + * As we write multiple pages, the offset will always align to the + * start of a page and hence point to a hole at EOF. i.e. if the size is + * 4096 bytes, we only have one block at FSB 0, but XFS_B_TO_FSB(4096) + * will return FSB 1. Hence if there are blocks in the file, we want to + * point to the block prior to the EOF block and not the hole that maps + * directly at @offset. + */ + start_fsb = XFS_B_TO_FSB(mp, offset); + if (start_fsb) + start_fsb--; + error = xfs_bmapi_read(ip, start_fsb, 1, imap, &imaps, XFS_BMAPI_ENTIRE); + if (error) + return 0; + + ASSERT(imaps == 1); + if (imap[0].br_startblock == HOLESTARTBLOCK) + return 0; + if (imap[0].br_blockcount <= (MAXEXTLEN >> 1)) + return imap[0].br_blockcount; + return XFS_B_TO_FSB(mp, offset); +} + +/* * If we don't have a user specified preallocation size, dynamically increase * the preallocation size as the size of the file grows. Cap the maximum size * at a single extent or less if the filesystem is near full. The closer the @@ -319,20 +375,19 @@ xfs_iomap_eof_want_preallocate( STATIC xfs_fsblock_t xfs_iomap_prealloc_size( struct xfs_mount *mp, - struct xfs_inode *ip) + struct xfs_inode *ip, + xfs_off_t offset, + struct xfs_bmbt_irec *imap, + int nimaps) { xfs_fsblock_t alloc_blocks = 0; - if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { + alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, offset, + imap, nimaps); + if (alloc_blocks > 0) { int shift = 0; int64_t freesp; - /* - * rounddown_pow_of_two() returns an undefined result - * if we pass in alloc_blocks = 0. Hence the "+ 1" to - * ensure we always pass in a non-zero value. - */ - alloc_blocks = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)) + 1; alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, rounddown_pow_of_two(alloc_blocks)); @@ -399,7 +454,6 @@ xfs_iomap_write_delay( extsz = xfs_get_extsz_hint(ip); offset_fsb = XFS_B_TO_FSBT(mp, offset); - error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, imap, XFS_WRITE_IMAPS, &prealloc); if (error) @@ -407,7 +461,10 @@ xfs_iomap_write_delay( retry: if (prealloc) { - xfs_fsblock_t alloc_blocks = xfs_iomap_prealloc_size(mp, ip); + xfs_fsblock_t alloc_blocks; + + alloc_blocks = xfs_iomap_prealloc_size(mp, ip, offset, imap, + XFS_WRITE_IMAPS); aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); ioalign = XFS_B_TO_FSBT(mp, aligned_offset); -- cgit v0.10.2 From 5337fe9b108d602c483fe9d62ffef9227acf3a74 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 11 Feb 2013 10:08:21 -0500 Subject: xfs: recheck buffer pinned status after push trylock failure The buffer pinned check and trylock sequence in xfs_buf_item_push() can race with an active transaction on marking the buffer pinned. This can result in the buffer becoming pinned and stale after the initial check and the trylock failure, but before the check in xfs_buf_trylock() that issues a log force. If the log force is issued from this context, a spinlock recursion occurs on xa_lock. Prepare xfs_buf_item_push() to handle the race by detecting a pinned buffer after the trylock failure so xfsaild issues a log force from a safe context. This, along with various previous fixes, renders the log force in xfs_buf_trylock() redundant. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 9c4c050..cf26347 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -469,8 +469,18 @@ xfs_buf_item_push( if (xfs_buf_ispinned(bp)) return XFS_ITEM_PINNED; - if (!xfs_buf_trylock(bp)) + if (!xfs_buf_trylock(bp)) { + /* + * If we have just raced with a buffer being pinned and it has + * been marked stale, we could end up stalling until someone else + * issues a log force to unpin the stale buffer. Check for the + * race condition here so xfsaild recognizes the buffer is pinned + * and queues a log force to move it along. + */ + if (xfs_buf_ispinned(bp)) + return XFS_ITEM_PINNED; return XFS_ITEM_LOCKED; + } ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); -- cgit v0.10.2 From fa5566e4ffb918131a054413eb42075a77a41413 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 11 Feb 2013 10:08:22 -0500 Subject: xfs: remove log force from xfs_buf_trylock() The trylock log force invoked via xfs_buf_item_push() can attempt to acquire xa_lock, thus leading to a recursion bug when called with xa_lock held. This log force was originally added to xfs_buf_trylock() to address xfsaild stalls due to pinned and stale buffers. Since the addition of this behavior, the log item pushing code had been reworked to detect and track pinned items to inform xfsaild to issue a log force itself when necessary. As such, the log force on trylock failure is redundant and safe to remove. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index fbbb9eb..4e8f0df 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -951,8 +951,6 @@ xfs_buf_trylock( locked = down_trylock(&bp->b_sema) == 0; if (locked) XB_SET_OWNER(bp); - else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) - xfs_log_force(bp->b_target->bt_mount, 0); trace_xfs_buf_trylock(bp, _RET_IP_); return locked; -- cgit v0.10.2 From 1e82379b018ceed0f0912327c60d73107dacbcb3 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Feb 2013 15:58:13 +1100 Subject: xfs: xfs_bmap_add_attrfork_local is too generic When we are converting local data to an extent format as a result of adding an attribute, the type of data contained in the local fork determines the behaviour that needs to occur. xfs_bmap_add_attrfork_local() already handles the directory data case specially by using S_ISDIR() and calling out to xfs_dir2_sf_to_block(), but with verifiers we now need to handle each different type of metadata specially and different metadata formats require different verifiers (and eventually block header initialisation). There is only a single place that we add and attribute fork to the inode, but that is in the attribute code and it knows nothing about the specific contents of the data fork. It is only the case of local data that is the issue here, so adding code to hadnle this case in the attribute specific code is wrong. Hence we are really stuck trying to detect the data fork contents in xfs_bmap_add_attrfork_local() and performing the correct callout there. Luckily the current cases can be determined by S_IS* macros, and we can push the work off to data specific callouts, but each of those callouts does a lot of work in common with xfs_bmap_local_to_extents(). The only reason that this fails for symlinks right now is is that xfs_bmap_local_to_extents() assumes the data fork contains extent data, and so attaches a a bmap extent data verifier to the buffer and simply copies the data fork information straight into it. To fix this, allow us to pass a "formatting" callback into xfs_bmap_local_to_extents() which is responsible for setting the buffer type, initialising it and copying the data fork contents over to the new buffer. This allows callers to specify how they want to format the new buffer (which is necessary for the upcoming CRC enabled metadata blocks) and hence make xfs_bmap_local_to_extents() useful for any type of data fork content. Signed-off-by: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 491f35e..b44af92 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -147,7 +147,10 @@ xfs_bmap_local_to_extents( xfs_fsblock_t *firstblock, /* first block allocated in xaction */ xfs_extlen_t total, /* total blocks needed by transaction */ int *logflagsp, /* inode logging flags */ - int whichfork); /* data or attr fork */ + int whichfork, /* data or attr fork */ + void (*init_fn)(struct xfs_buf *bp, + struct xfs_inode *ip, + struct xfs_ifork *ifp)); /* * Search the extents list for the inode, for the extent containing bno. @@ -357,7 +360,42 @@ xfs_bmap_add_attrfork_extents( } /* - * Called from xfs_bmap_add_attrfork to handle local format files. + * Block initialisation functions for local to extent format conversion. + * As these get more complex, they will be moved to the relevant files, + * but for now they are too simple to worry about. + */ +STATIC void +xfs_bmap_local_to_extents_init_fn( + struct xfs_buf *bp, + struct xfs_inode *ip, + struct xfs_ifork *ifp) +{ + bp->b_ops = &xfs_bmbt_buf_ops; + memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); +} + +STATIC void +xfs_symlink_local_to_remote( + struct xfs_buf *bp, + struct xfs_inode *ip, + struct xfs_ifork *ifp) +{ + /* remote symlink blocks are not verifiable until CRCs come along */ + bp->b_ops = NULL; + memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); +} + +/* + * Called from xfs_bmap_add_attrfork to handle local format files. Each + * different data fork content type needs a different callout to do the + * conversion. Some are basic and only require special block initialisation + * callouts for the data formating, others (directories) are so specialised they + * handle everything themselves. + * + * XXX (dgc): investigate whether directory conversion can use the generic + * formatting callout. It should be possible - it's just a very complex + * formatter. it would also require passing the transaction through to the init + * function. */ STATIC int /* error */ xfs_bmap_add_attrfork_local( @@ -368,25 +406,29 @@ xfs_bmap_add_attrfork_local( int *flags) /* inode logging flags */ { xfs_da_args_t dargs; /* args for dir/attr code */ - int error; /* error return value */ - xfs_mount_t *mp; /* mount structure pointer */ if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip)) return 0; + if (S_ISDIR(ip->i_d.di_mode)) { - mp = ip->i_mount; memset(&dargs, 0, sizeof(dargs)); dargs.dp = ip; dargs.firstblock = firstblock; dargs.flist = flist; - dargs.total = mp->m_dirblkfsbs; + dargs.total = ip->i_mount->m_dirblkfsbs; dargs.whichfork = XFS_DATA_FORK; dargs.trans = tp; - error = xfs_dir2_sf_to_block(&dargs); - } else - error = xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags, - XFS_DATA_FORK); - return error; + return xfs_dir2_sf_to_block(&dargs); + } + + if (S_ISLNK(ip->i_d.di_mode)) + return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, + flags, XFS_DATA_FORK, + xfs_symlink_local_to_remote); + + return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags, + XFS_DATA_FORK, + xfs_bmap_local_to_extents_init_fn); } /* @@ -3219,7 +3261,10 @@ xfs_bmap_local_to_extents( xfs_fsblock_t *firstblock, /* first block allocated in xaction */ xfs_extlen_t total, /* total blocks needed by transaction */ int *logflagsp, /* inode logging flags */ - int whichfork) /* data or attr fork */ + int whichfork, + void (*init_fn)(struct xfs_buf *bp, + struct xfs_inode *ip, + struct xfs_ifork *ifp)) { int error; /* error return value */ int flags; /* logging flags returned */ @@ -3239,12 +3284,12 @@ xfs_bmap_local_to_extents( xfs_buf_t *bp; /* buffer for extent block */ xfs_bmbt_rec_host_t *ep;/* extent record pointer */ + ASSERT((ifp->if_flags & + (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE); memset(&args, 0, sizeof(args)); args.tp = tp; args.mp = ip->i_mount; args.firstblock = *firstblock; - ASSERT((ifp->if_flags & - (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE); /* * Allocate a block. We know we need only one, since the * file currently fits in an inode. @@ -3258,17 +3303,20 @@ xfs_bmap_local_to_extents( } args.total = total; args.minlen = args.maxlen = args.prod = 1; - if ((error = xfs_alloc_vextent(&args))) + error = xfs_alloc_vextent(&args); + if (error) goto done; - /* - * Can't fail, the space was reserved. - */ + + /* Can't fail, the space was reserved. */ ASSERT(args.fsbno != NULLFSBLOCK); ASSERT(args.len == 1); *firstblock = args.fsbno; bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); - bp->b_ops = &xfs_bmbt_buf_ops; - memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); + + /* initialise the block and copy the data */ + init_fn(bp, ip, ifp); + + /* account for the change in fork size and log everything */ xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); xfs_bmap_forkoff_reset(args.mp, ip, whichfork); xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); @@ -4915,8 +4963,32 @@ xfs_bmapi_write( XFS_STATS_INC(xs_blk_mapw); if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { + /* + * XXX (dgc): This assumes we are only called for inodes that + * contain content neutral data in local format. Anything that + * contains caller-specific data in local format that needs + * transformation to move to a block format needs to do the + * conversion to extent format itself. + * + * Directory data forks and attribute forks handle this + * themselves, but with the addition of metadata verifiers every + * data fork in local format now contains caller specific data + * and as such conversion through this function is likely to be + * broken. + * + * The only likely user of this branch is for remote symlinks, + * but we cannot overwrite the data fork contents of the symlink + * (EEXIST occurs higher up the stack) and so it will never go + * from local format to extent format here. Hence I don't think + * this branch is ever executed intentionally and we should + * consider removing it and asserting that xfs_bmapi_write() + * cannot be called directly on local format forks. i.e. callers + * are completely responsible for local to extent format + * conversion, not xfs_bmapi_write(). + */ error = xfs_bmap_local_to_extents(tp, ip, firstblock, total, - &bma.logflags, whichfork); + &bma.logflags, whichfork, + xfs_bmap_local_to_extents_init_fn); if (error) goto error0; } -- cgit v0.10.2