From dbe1b5ca26396b6c61d711c8ac4de13ebb02e9f6 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Wed, 24 Dec 2014 08:51:38 +1100 Subject: xfs: Make xfs_vn_rename compliant with renameat2() syscall To be able to support RENAME_EXCHANGE flag from renameat2() system call, XFS must have its inode_operations updated, exporting .rename2 method, instead of .rename. This patch just replaces the (now old) .rename method by .rename2, using the same infra-structure, but checking rename flags. Calls to .rename2 using RENAME_EXCHANGE flag, although now handled inside XFS, still return -EINVAL. RENAME_NOREPLACE is handled via VFS and we don't need to care about it inside xfs_vn_rename. Signed-off-by: Carlos Maiolino Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index c50311c..abb838a 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -380,18 +380,23 @@ xfs_vn_rename( struct inode *odir, struct dentry *odentry, struct inode *ndir, - struct dentry *ndentry) + struct dentry *ndentry, + unsigned int flags) { struct inode *new_inode = ndentry->d_inode; struct xfs_name oname; struct xfs_name nname; + /* XFS does not support RENAME_EXCHANGE yet */ + if (flags & ~RENAME_NOREPLACE) + return -EINVAL; + xfs_dentry_to_name(&oname, odentry, 0); xfs_dentry_to_name(&nname, ndentry, odentry->d_inode->i_mode); return xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), - XFS_I(ndir), &nname, new_inode ? - XFS_I(new_inode) : NULL); + XFS_I(ndir), &nname, + new_inode ? XFS_I(new_inode) : NULL); } /* @@ -1144,7 +1149,7 @@ static const struct inode_operations xfs_dir_inode_operations = { */ .rmdir = xfs_vn_unlink, .mknod = xfs_vn_mknod, - .rename = xfs_vn_rename, + .rename2 = xfs_vn_rename, .get_acl = xfs_get_acl, .set_acl = xfs_set_acl, .getattr = xfs_vn_getattr, @@ -1172,7 +1177,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = { */ .rmdir = xfs_vn_unlink, .mknod = xfs_vn_mknod, - .rename = xfs_vn_rename, + .rename2 = xfs_vn_rename, .get_acl = xfs_get_acl, .set_acl = xfs_set_acl, .getattr = xfs_vn_getattr, -- cgit v0.10.2 From d31a1825450062b85282b4afed1c840fd306d012 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Wed, 24 Dec 2014 08:51:42 +1100 Subject: xfs: Add support to RENAME_EXCHANGE flag Adds a new function named xfs_cross_rename(), responsible for handling requests from sys_renameat2() using RENAME_EXCHANGE flag. Signed-off-by: Carlos Maiolino Reviewed-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 41f804e..9916aef 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2656,6 +2656,124 @@ xfs_sort_for_rename( } /* + * xfs_cross_rename() + * + * responsible for handling RENAME_EXCHANGE flag in renameat2() sytemcall + */ +STATIC int +xfs_cross_rename( + struct xfs_trans *tp, + struct xfs_inode *dp1, + struct xfs_name *name1, + struct xfs_inode *ip1, + struct xfs_inode *dp2, + struct xfs_name *name2, + struct xfs_inode *ip2, + struct xfs_bmap_free *free_list, + xfs_fsblock_t *first_block, + int spaceres) +{ + int error = 0; + int ip1_flags = 0; + int ip2_flags = 0; + int dp2_flags = 0; + + /* Swap inode number for dirent in first parent */ + error = xfs_dir_replace(tp, dp1, name1, + ip2->i_ino, + first_block, free_list, spaceres); + if (error) + goto out; + + /* Swap inode number for dirent in second parent */ + error = xfs_dir_replace(tp, dp2, name2, + ip1->i_ino, + first_block, free_list, spaceres); + if (error) + goto out; + + /* + * If we're renaming one or more directories across different parents, + * update the respective ".." entries (and link counts) to match the new + * parents. + */ + if (dp1 != dp2) { + dp2_flags = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; + + if (S_ISDIR(ip2->i_d.di_mode)) { + error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot, + dp1->i_ino, first_block, + free_list, spaceres); + if (error) + goto out; + + /* transfer ip2 ".." reference to dp1 */ + if (!S_ISDIR(ip1->i_d.di_mode)) { + error = xfs_droplink(tp, dp2); + if (error) + goto out; + error = xfs_bumplink(tp, dp1); + if (error) + goto out; + } + + /* + * Although ip1 isn't changed here, userspace needs + * to be warned about the change, so that applications + * relying on it (like backup ones), will properly + * notify the change + */ + ip1_flags |= XFS_ICHGTIME_CHG; + ip2_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; + } + + if (S_ISDIR(ip1->i_d.di_mode)) { + error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot, + dp2->i_ino, first_block, + free_list, spaceres); + if (error) + goto out; + + /* transfer ip1 ".." reference to dp2 */ + if (!S_ISDIR(ip2->i_d.di_mode)) { + error = xfs_droplink(tp, dp1); + if (error) + goto out; + error = xfs_bumplink(tp, dp2); + if (error) + goto out; + } + + /* + * Although ip2 isn't changed here, userspace needs + * to be warned about the change, so that applications + * relying on it (like backup ones), will properly + * notify the change + */ + ip1_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; + ip2_flags |= XFS_ICHGTIME_CHG; + } + } + + if (ip1_flags) { + xfs_trans_ichgtime(tp, ip1, ip1_flags); + xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE); + } + if (ip2_flags) { + xfs_trans_ichgtime(tp, ip2, ip2_flags); + xfs_trans_log_inode(tp, ip2, XFS_ILOG_CORE); + } + if (dp2_flags) { + xfs_trans_ichgtime(tp, dp2, dp2_flags); + xfs_trans_log_inode(tp, dp2, XFS_ILOG_CORE); + } + xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE); +out: + return error; +} + +/* * xfs_rename */ int @@ -2665,7 +2783,8 @@ xfs_rename( xfs_inode_t *src_ip, xfs_inode_t *target_dp, struct xfs_name *target_name, - xfs_inode_t *target_ip) + xfs_inode_t *target_ip, + unsigned int flags) { xfs_trans_t *tp = NULL; xfs_mount_t *mp = src_dp->i_mount; @@ -2743,6 +2862,18 @@ xfs_rename( } /* + * Handle RENAME_EXCHANGE flags + */ + if (flags & RENAME_EXCHANGE) { + error = xfs_cross_rename(tp, src_dp, src_name, src_ip, + target_dp, target_name, target_ip, + &free_list, &first_block, spaceres); + if (error) + goto abort_return; + goto finish_rename; + } + + /* * Set up the target. */ if (target_ip == NULL) { @@ -2881,6 +3012,7 @@ xfs_rename( if (new_parent) xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); +finish_rename: /* * If this is a synchronous mount, make sure that the * rename transaction goes to disk before returning to diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 4ed2ba9..f772296 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -338,7 +338,7 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, struct xfs_inode *src_ip, struct xfs_inode *target_dp, struct xfs_name *target_name, - struct xfs_inode *target_ip); + struct xfs_inode *target_ip, unsigned int flags); void xfs_ilock(xfs_inode_t *, uint); int xfs_ilock_nowait(xfs_inode_t *, uint); diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index abb838a..ce80eeb 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -384,19 +384,23 @@ xfs_vn_rename( unsigned int flags) { struct inode *new_inode = ndentry->d_inode; + int omode = 0; struct xfs_name oname; struct xfs_name nname; - /* XFS does not support RENAME_EXCHANGE yet */ - if (flags & ~RENAME_NOREPLACE) + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) return -EINVAL; - xfs_dentry_to_name(&oname, odentry, 0); + /* if we are exchanging files, we need to set i_mode of both files */ + if (flags & RENAME_EXCHANGE) + omode = ndentry->d_inode->i_mode; + + xfs_dentry_to_name(&oname, odentry, omode); xfs_dentry_to_name(&nname, ndentry, odentry->d_inode->i_mode); return xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), XFS_I(ndir), &nname, - new_inode ? XFS_I(new_inode) : NULL); + new_inode ? XFS_I(new_inode) : NULL, flags); } /* -- cgit v0.10.2 From 96ab7954bca0eeedfb17094719db1351fba361d3 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 24 Dec 2014 09:46:23 +1100 Subject: xfs: initialize log buf I/O completion wq on log alloc Log buffer I/O completion passes through the high priority m_log_workqueue rather than the default metadata buffer workqueue. The log buffer wq is initialized at I/O submission time. The log buffers are reused once initialized, however, so this is not necessary. Initialize the log buffer I/O completion workqueue pointers once when the log is allocated and log buffers initialized rather than on every log buffer I/O submission. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index e408bf5..4f09e0f 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1395,6 +1395,8 @@ xlog_alloc_log( ASSERT(xfs_buf_islocked(bp)); xfs_buf_unlock(bp); + /* use high priority wq for log I/O completion */ + bp->b_ioend_wq = mp->m_log_workqueue; bp->b_iodone = xlog_iodone; log->l_xbuf = bp; @@ -1427,6 +1429,8 @@ xlog_alloc_log( ASSERT(xfs_buf_islocked(bp)); xfs_buf_unlock(bp); + /* use high priority wq for log I/O completion */ + bp->b_ioend_wq = mp->m_log_workqueue; bp->b_iodone = xlog_iodone; iclog->ic_bp = bp; iclog->ic_data = bp->b_addr; @@ -1806,8 +1810,6 @@ xlog_sync( XFS_BUF_ZEROFLAGS(bp); XFS_BUF_ASYNC(bp); bp->b_flags |= XBF_SYNCIO; - /* use high priority completion wq */ - bp->b_ioend_wq = log->l_mp->m_log_workqueue; if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) { bp->b_flags |= XBF_FUA; @@ -1856,8 +1858,6 @@ xlog_sync( bp->b_flags |= XBF_SYNCIO; if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) bp->b_flags |= XBF_FUA; - /* use high priority completion wq */ - bp->b_ioend_wq = log->l_mp->m_log_workqueue; ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); -- cgit v0.10.2 From 77af574eef78c404ea630f7955a5ed0c926a63fe Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 24 Dec 2014 09:47:27 +1100 Subject: xfs: remove extra newlines from xfs messages xfs_warn() and friends add a newline by default, but some messages add another one. Particularly for the failing write message below, this can waste a lot of console real estate! Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 3f9bd58..925ead2 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -535,7 +535,7 @@ xfs_buf_item_push( if ((bp->b_flags & XBF_WRITE_FAIL) && ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) { xfs_warn(bp->b_target->bt_mount, -"Detected failing async write on buffer block 0x%llx. Retrying async write.\n", +"Detected failing async write on buffer block 0x%llx. Retrying async write.", (long long)bp->b_bn); } diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 4f09e0f..8fbbfb2 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2027,7 +2027,7 @@ xlog_print_tic_res( " total reg = %u bytes (o/flow = %u bytes)\n" " ophdrs = %u (ophdr space = %u bytes)\n" " ophdr + reg = %u bytes\n" - " num regions = %u\n", + " num regions = %u", ((ticket->t_trans_type <= 0 || ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]), diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 19cbda1..22e6aca 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -685,7 +685,7 @@ xfs_blkdev_get( mp); if (IS_ERR(*bdevp)) { error = PTR_ERR(*bdevp); - xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error); + xfs_warn(mp, "Invalid device [%s], error=%d", name, error); } return error; -- cgit v0.10.2 From 1a43ec03ddd40793db00dbc7340685f5accf6fc3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 24 Dec 2014 09:48:35 +1100 Subject: xfs: Keep sb_bad_features2 consistent with sb_features2 Currently when we modify sb_features2, we store the same value also in sb_bad_features2. However in most places we forget to mark field sb_bad_features2 for logging and thus it can happen that a change to it is lost. This results in an inconsistent sb_features2 and sb_bad_features2 fields e.g. after xfstests test xfs/187. Fix the problem by changing XFS_SB_FEATURES2 to actually mean both sb_features2 and sb_bad_features2 fields since this is always what we want to log. This isn't ideal because the fact that XFS_SB_FEATURES2 means two fields could cause some problem in future however the code is hopefully less error prone that it is now. Signed-off-by: Jan Kara Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index fbd6da2..4762732 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -304,8 +304,8 @@ typedef enum { #define XFS_SB_ICOUNT XFS_SB_MVAL(ICOUNT) #define XFS_SB_IFREE XFS_SB_MVAL(IFREE) #define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS) -#define XFS_SB_FEATURES2 XFS_SB_MVAL(FEATURES2) -#define XFS_SB_BAD_FEATURES2 XFS_SB_MVAL(BAD_FEATURES2) +#define XFS_SB_FEATURES2 (XFS_SB_MVAL(FEATURES2) | \ + XFS_SB_MVAL(BAD_FEATURES2)) #define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT) #define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT) #define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT) @@ -319,9 +319,9 @@ typedef enum { XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \ XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \ XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \ - XFS_SB_BAD_FEATURES2 | XFS_SB_FEATURES_COMPAT | \ - XFS_SB_FEATURES_RO_COMPAT | XFS_SB_FEATURES_INCOMPAT | \ - XFS_SB_FEATURES_LOG_INCOMPAT | XFS_SB_PQUOTINO) + XFS_SB_FEATURES_COMPAT | XFS_SB_FEATURES_RO_COMPAT | \ + XFS_SB_FEATURES_INCOMPAT | XFS_SB_FEATURES_LOG_INCOMPAT | \ + XFS_SB_PQUOTINO) /* diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index d3d3883..71d2c97 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -678,7 +678,7 @@ xfs_mountfs( xfs_warn(mp, "correcting sb_features alignment problem"); sbp->sb_features2 |= sbp->sb_bad_features2; sbp->sb_bad_features2 = sbp->sb_features2; - mp->m_update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2; + mp->m_update_flags |= XFS_SB_FEATURES2; /* * Re-check for ATTR2 in case it was found in bad_features2 @@ -1436,8 +1436,7 @@ xfs_mount_log_sb( int error; ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID | - XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2 | - XFS_SB_VERSIONNUM)); + XFS_SB_FEATURES2 | XFS_SB_VERSIONNUM)); tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); -- cgit v0.10.2 From 2155355fda502e75cd942db101fbb08e1a826ba8 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 9 Jan 2015 10:45:13 +1100 Subject: xfs: move xfs_fs.h to libxfs Ioctl API definitions are shared with userspace, so move the header file that defines them all to libxfs along with all the other code shared with userspace. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h new file mode 100644 index 0000000..18dc721 --- /dev/null +++ b/fs/xfs/libxfs/xfs_fs.h @@ -0,0 +1,576 @@ +/* + * Copyright (c) 1995-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_FS_H__ +#define __XFS_FS_H__ + +/* + * SGI's XFS filesystem's major stuff (constants, structures) + */ + +/* + * Direct I/O attribute record used with XFS_IOC_DIOINFO + * d_miniosz is the min xfer size, xfer size multiple and file seek offset + * alignment. + */ +#ifndef HAVE_DIOATTR +struct dioattr { + __u32 d_mem; /* data buffer memory alignment */ + __u32 d_miniosz; /* min xfer size */ + __u32 d_maxiosz; /* max xfer size */ +}; +#endif + +/* + * Structure for XFS_IOC_FSGETXATTR[A] and XFS_IOC_FSSETXATTR. + */ +#ifndef HAVE_FSXATTR +struct fsxattr { + __u32 fsx_xflags; /* xflags field value (get/set) */ + __u32 fsx_extsize; /* extsize field value (get/set)*/ + __u32 fsx_nextents; /* nextents field value (get) */ + __u32 fsx_projid; /* project identifier (get/set) */ + unsigned char fsx_pad[12]; +}; +#endif + +/* + * Flags for the bs_xflags/fsx_xflags field + * There should be a one-to-one correspondence between these flags and the + * XFS_DIFLAG_s. + */ +#define XFS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */ +#define XFS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */ +#define XFS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */ +#define XFS_XFLAG_APPEND 0x00000010 /* all writes append */ +#define XFS_XFLAG_SYNC 0x00000020 /* all writes synchronous */ +#define XFS_XFLAG_NOATIME 0x00000040 /* do not update access time */ +#define XFS_XFLAG_NODUMP 0x00000080 /* do not include in backups */ +#define XFS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */ +#define XFS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */ +#define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */ +#define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */ +#define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ +#define XFS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */ +#define XFS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */ +#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ + +/* + * Structure for XFS_IOC_GETBMAP. + * On input, fill in bmv_offset and bmv_length of the first structure + * to indicate the area of interest in the file, and bmv_entries with + * the number of array elements given back. The first structure is + * updated on return to give the offset and length for the next call. + */ +#ifndef HAVE_GETBMAP +struct getbmap { + __s64 bmv_offset; /* file offset of segment in blocks */ + __s64 bmv_block; /* starting block (64-bit daddr_t) */ + __s64 bmv_length; /* length of segment, blocks */ + __s32 bmv_count; /* # of entries in array incl. 1st */ + __s32 bmv_entries; /* # of entries filled in (output) */ +}; +#endif + +/* + * Structure for XFS_IOC_GETBMAPX. Fields bmv_offset through bmv_entries + * are used exactly as in the getbmap structure. The getbmapx structure + * has additional bmv_iflags and bmv_oflags fields. The bmv_iflags field + * is only used for the first structure. It contains input flags + * specifying XFS_IOC_GETBMAPX actions. The bmv_oflags field is filled + * in by the XFS_IOC_GETBMAPX command for each returned structure after + * the first. + */ +#ifndef HAVE_GETBMAPX +struct getbmapx { + __s64 bmv_offset; /* file offset of segment in blocks */ + __s64 bmv_block; /* starting block (64-bit daddr_t) */ + __s64 bmv_length; /* length of segment, blocks */ + __s32 bmv_count; /* # of entries in array incl. 1st */ + __s32 bmv_entries; /* # of entries filled in (output). */ + __s32 bmv_iflags; /* input flags (1st structure) */ + __s32 bmv_oflags; /* output flags (after 1st structure)*/ + __s32 bmv_unused1; /* future use */ + __s32 bmv_unused2; /* future use */ +}; +#endif + +/* bmv_iflags values - set by XFS_IOC_GETBMAPX caller. */ +#define BMV_IF_ATTRFORK 0x1 /* return attr fork rather than data */ +#define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ +#define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ +#define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */ +#define BMV_IF_NO_HOLES 0x10 /* Do not return holes */ +#define BMV_IF_VALID \ + (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC| \ + BMV_IF_DELALLOC|BMV_IF_NO_HOLES) + +/* bmv_oflags values - returned for each non-header segment */ +#define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ +#define BMV_OF_DELALLOC 0x2 /* segment = delayed allocation */ +#define BMV_OF_LAST 0x4 /* segment is the last in the file */ + +/* + * Structure for XFS_IOC_FSSETDM. + * For use by backup and restore programs to set the XFS on-disk inode + * fields di_dmevmask and di_dmstate. These must be set to exactly and + * only values previously obtained via xfs_bulkstat! (Specifically the + * xfs_bstat_t fields bs_dmevmask and bs_dmstate.) + */ +#ifndef HAVE_FSDMIDATA +struct fsdmidata { + __u32 fsd_dmevmask; /* corresponds to di_dmevmask */ + __u16 fsd_padding; + __u16 fsd_dmstate; /* corresponds to di_dmstate */ +}; +#endif + +/* + * File segment locking set data type for 64 bit access. + * Also used for all the RESV/FREE interfaces. + */ +typedef struct xfs_flock64 { + __s16 l_type; + __s16 l_whence; + __s64 l_start; + __s64 l_len; /* len == 0 means until end of file */ + __s32 l_sysid; + __u32 l_pid; + __s32 l_pad[4]; /* reserve area */ +} xfs_flock64_t; + +/* + * Output for XFS_IOC_FSGEOMETRY_V1 + */ +typedef struct xfs_fsop_geom_v1 { + __u32 blocksize; /* filesystem (data) block size */ + __u32 rtextsize; /* realtime extent size */ + __u32 agblocks; /* fsblocks in an AG */ + __u32 agcount; /* number of allocation groups */ + __u32 logblocks; /* fsblocks in the log */ + __u32 sectsize; /* (data) sector size, bytes */ + __u32 inodesize; /* inode size in bytes */ + __u32 imaxpct; /* max allowed inode space(%) */ + __u64 datablocks; /* fsblocks in data subvolume */ + __u64 rtblocks; /* fsblocks in realtime subvol */ + __u64 rtextents; /* rt extents in realtime subvol*/ + __u64 logstart; /* starting fsblock of the log */ + unsigned char uuid[16]; /* unique id of the filesystem */ + __u32 sunit; /* stripe unit, fsblocks */ + __u32 swidth; /* stripe width, fsblocks */ + __s32 version; /* structure version */ + __u32 flags; /* superblock version flags */ + __u32 logsectsize; /* log sector size, bytes */ + __u32 rtsectsize; /* realtime sector size, bytes */ + __u32 dirblocksize; /* directory block size, bytes */ +} xfs_fsop_geom_v1_t; + +/* + * Output for XFS_IOC_FSGEOMETRY + */ +typedef struct xfs_fsop_geom { + __u32 blocksize; /* filesystem (data) block size */ + __u32 rtextsize; /* realtime extent size */ + __u32 agblocks; /* fsblocks in an AG */ + __u32 agcount; /* number of allocation groups */ + __u32 logblocks; /* fsblocks in the log */ + __u32 sectsize; /* (data) sector size, bytes */ + __u32 inodesize; /* inode size in bytes */ + __u32 imaxpct; /* max allowed inode space(%) */ + __u64 datablocks; /* fsblocks in data subvolume */ + __u64 rtblocks; /* fsblocks in realtime subvol */ + __u64 rtextents; /* rt extents in realtime subvol*/ + __u64 logstart; /* starting fsblock of the log */ + unsigned char uuid[16]; /* unique id of the filesystem */ + __u32 sunit; /* stripe unit, fsblocks */ + __u32 swidth; /* stripe width, fsblocks */ + __s32 version; /* structure version */ + __u32 flags; /* superblock version flags */ + __u32 logsectsize; /* log sector size, bytes */ + __u32 rtsectsize; /* realtime sector size, bytes */ + __u32 dirblocksize; /* directory block size, bytes */ + __u32 logsunit; /* log stripe unit, bytes */ +} xfs_fsop_geom_t; + +/* Output for XFS_FS_COUNTS */ +typedef struct xfs_fsop_counts { + __u64 freedata; /* free data section blocks */ + __u64 freertx; /* free rt extents */ + __u64 freeino; /* free inodes */ + __u64 allocino; /* total allocated inodes */ +} xfs_fsop_counts_t; + +/* Input/Output for XFS_GET_RESBLKS and XFS_SET_RESBLKS */ +typedef struct xfs_fsop_resblks { + __u64 resblks; + __u64 resblks_avail; +} xfs_fsop_resblks_t; + +#define XFS_FSOP_GEOM_VERSION 0 + +#define XFS_FSOP_GEOM_FLAGS_ATTR 0x0001 /* attributes in use */ +#define XFS_FSOP_GEOM_FLAGS_NLINK 0x0002 /* 32-bit nlink values */ +#define XFS_FSOP_GEOM_FLAGS_QUOTA 0x0004 /* quotas enabled */ +#define XFS_FSOP_GEOM_FLAGS_IALIGN 0x0008 /* inode alignment */ +#define XFS_FSOP_GEOM_FLAGS_DALIGN 0x0010 /* large data alignment */ +#define XFS_FSOP_GEOM_FLAGS_SHARED 0x0020 /* read-only shared */ +#define XFS_FSOP_GEOM_FLAGS_EXTFLG 0x0040 /* special extent flag */ +#define XFS_FSOP_GEOM_FLAGS_DIRV2 0x0080 /* directory version 2 */ +#define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */ +#define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */ +#define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */ +#define XFS_FSOP_GEOM_FLAGS_PROJID32 0x0800 /* 32-bit project IDs */ +#define XFS_FSOP_GEOM_FLAGS_DIRV2CI 0x1000 /* ASCII only CI names */ +#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */ +#define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */ +#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */ +#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */ + +/* + * Minimum and maximum sizes need for growth checks. + * + * Block counts are in units of filesystem blocks, not basic blocks. + */ +#define XFS_MIN_AG_BLOCKS 64 +#define XFS_MIN_LOG_BLOCKS 512ULL +#define XFS_MAX_LOG_BLOCKS (1024 * 1024ULL) +#define XFS_MIN_LOG_BYTES (10 * 1024 * 1024ULL) + +/* keep the maximum size under 2^31 by a small amount */ +#define XFS_MAX_LOG_BYTES \ + ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES) + +/* Used for sanity checks on superblock */ +#define XFS_MAX_DBLOCKS(s) ((xfs_rfsblock_t)(s)->sb_agcount * (s)->sb_agblocks) +#define XFS_MIN_DBLOCKS(s) ((xfs_rfsblock_t)((s)->sb_agcount - 1) * \ + (s)->sb_agblocks + XFS_MIN_AG_BLOCKS) + +/* + * Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT + */ +typedef struct xfs_growfs_data { + __u64 newblocks; /* new data subvol size, fsblocks */ + __u32 imaxpct; /* new inode space percentage limit */ +} xfs_growfs_data_t; + +typedef struct xfs_growfs_log { + __u32 newblocks; /* new log size, fsblocks */ + __u32 isint; /* 1 if new log is internal */ +} xfs_growfs_log_t; + +typedef struct xfs_growfs_rt { + __u64 newblocks; /* new realtime size, fsblocks */ + __u32 extsize; /* new realtime extent size, fsblocks */ +} xfs_growfs_rt_t; + + +/* + * Structures returned from ioctl XFS_IOC_FSBULKSTAT & XFS_IOC_FSBULKSTAT_SINGLE + */ +typedef struct xfs_bstime { + time_t tv_sec; /* seconds */ + __s32 tv_nsec; /* and nanoseconds */ +} xfs_bstime_t; + +typedef struct xfs_bstat { + __u64 bs_ino; /* inode number */ + __u16 bs_mode; /* type and mode */ + __u16 bs_nlink; /* number of links */ + __u32 bs_uid; /* user id */ + __u32 bs_gid; /* group id */ + __u32 bs_rdev; /* device value */ + __s32 bs_blksize; /* block size */ + __s64 bs_size; /* file size */ + xfs_bstime_t bs_atime; /* access time */ + xfs_bstime_t bs_mtime; /* modify time */ + xfs_bstime_t bs_ctime; /* inode change time */ + int64_t bs_blocks; /* number of blocks */ + __u32 bs_xflags; /* extended flags */ + __s32 bs_extsize; /* extent size */ + __s32 bs_extents; /* number of extents */ + __u32 bs_gen; /* generation count */ + __u16 bs_projid_lo; /* lower part of project id */ +#define bs_projid bs_projid_lo /* (previously just bs_projid) */ + __u16 bs_forkoff; /* inode fork offset in bytes */ + __u16 bs_projid_hi; /* higher part of project id */ + unsigned char bs_pad[10]; /* pad space, unused */ + __u32 bs_dmevmask; /* DMIG event mask */ + __u16 bs_dmstate; /* DMIG state info */ + __u16 bs_aextents; /* attribute number of extents */ +} xfs_bstat_t; + +/* + * Project quota id helpers (previously projid was 16bit only + * and using two 16bit values to hold new 32bit projid was choosen + * to retain compatibility with "old" filesystems). + */ +static inline __uint32_t +bstat_get_projid(struct xfs_bstat *bs) +{ + return (__uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo; +} + +/* + * The user-level BulkStat Request interface structure. + */ +typedef struct xfs_fsop_bulkreq { + __u64 __user *lastip; /* last inode # pointer */ + __s32 icount; /* count of entries in buffer */ + void __user *ubuffer;/* user buffer for inode desc. */ + __s32 __user *ocount; /* output count pointer */ +} xfs_fsop_bulkreq_t; + + +/* + * Structures returned from xfs_inumbers routine (XFS_IOC_FSINUMBERS). + */ +typedef struct xfs_inogrp { + __u64 xi_startino; /* starting inode number */ + __s32 xi_alloccount; /* # bits set in allocmask */ + __u64 xi_allocmask; /* mask of allocated inodes */ +} xfs_inogrp_t; + + +/* + * Error injection. + */ +typedef struct xfs_error_injection { + __s32 fd; + __s32 errtag; +} xfs_error_injection_t; + + +/* + * Speculative preallocation trimming. + */ +#define XFS_EOFBLOCKS_VERSION 1 +struct xfs_fs_eofblocks { + __u32 eof_version; + __u32 eof_flags; + uid_t eof_uid; + gid_t eof_gid; + prid_t eof_prid; + __u32 pad32; + __u64 eof_min_file_size; + __u64 pad64[12]; +}; + +/* eof_flags values */ +#define XFS_EOF_FLAGS_SYNC (1 << 0) /* sync/wait mode scan */ +#define XFS_EOF_FLAGS_UID (1 << 1) /* filter by uid */ +#define XFS_EOF_FLAGS_GID (1 << 2) /* filter by gid */ +#define XFS_EOF_FLAGS_PRID (1 << 3) /* filter by project id */ +#define XFS_EOF_FLAGS_MINFILESIZE (1 << 4) /* filter by min file size */ +#define XFS_EOF_FLAGS_UNION (1 << 5) /* union filter algorithm; + * kernel only, not included in + * valid mask */ +#define XFS_EOF_FLAGS_VALID \ + (XFS_EOF_FLAGS_SYNC | \ + XFS_EOF_FLAGS_UID | \ + XFS_EOF_FLAGS_GID | \ + XFS_EOF_FLAGS_PRID | \ + XFS_EOF_FLAGS_MINFILESIZE) + + +/* + * The user-level Handle Request interface structure. + */ +typedef struct xfs_fsop_handlereq { + __u32 fd; /* fd for FD_TO_HANDLE */ + void __user *path; /* user pathname */ + __u32 oflags; /* open flags */ + void __user *ihandle;/* user supplied handle */ + __u32 ihandlen; /* user supplied length */ + void __user *ohandle;/* user buffer for handle */ + __u32 __user *ohandlen;/* user buffer length */ +} xfs_fsop_handlereq_t; + +/* + * Compound structures for passing args through Handle Request interfaces + * xfs_fssetdm_by_handle, xfs_attrlist_by_handle, xfs_attrmulti_by_handle + * - ioctls: XFS_IOC_FSSETDM_BY_HANDLE, XFS_IOC_ATTRLIST_BY_HANDLE, and + * XFS_IOC_ATTRMULTI_BY_HANDLE + */ + +typedef struct xfs_fsop_setdm_handlereq { + struct xfs_fsop_handlereq hreq; /* handle information */ + struct fsdmidata __user *data; /* DMAPI data */ +} xfs_fsop_setdm_handlereq_t; + +typedef struct xfs_attrlist_cursor { + __u32 opaque[4]; +} xfs_attrlist_cursor_t; + +typedef struct xfs_fsop_attrlist_handlereq { + struct xfs_fsop_handlereq hreq; /* handle interface structure */ + struct xfs_attrlist_cursor pos; /* opaque cookie, list offset */ + __u32 flags; /* which namespace to use */ + __u32 buflen; /* length of buffer supplied */ + void __user *buffer; /* returned names */ +} xfs_fsop_attrlist_handlereq_t; + +typedef struct xfs_attr_multiop { + __u32 am_opcode; +#define ATTR_OP_GET 1 /* return the indicated attr's value */ +#define ATTR_OP_SET 2 /* set/create the indicated attr/value pair */ +#define ATTR_OP_REMOVE 3 /* remove the indicated attr */ + __s32 am_error; + void __user *am_attrname; + void __user *am_attrvalue; + __u32 am_length; + __u32 am_flags; +} xfs_attr_multiop_t; + +typedef struct xfs_fsop_attrmulti_handlereq { + struct xfs_fsop_handlereq hreq; /* handle interface structure */ + __u32 opcount;/* count of following multiop */ + struct xfs_attr_multiop __user *ops; /* attr_multi data */ +} xfs_fsop_attrmulti_handlereq_t; + +/* + * per machine unique filesystem identifier types. + */ +typedef struct { __u32 val[2]; } xfs_fsid_t; /* file system id type */ + +typedef struct xfs_fid { + __u16 fid_len; /* length of remainder */ + __u16 fid_pad; + __u32 fid_gen; /* generation number */ + __u64 fid_ino; /* 64 bits inode number */ +} xfs_fid_t; + +typedef struct xfs_handle { + union { + __s64 align; /* force alignment of ha_fid */ + xfs_fsid_t _ha_fsid; /* unique file system identifier */ + } ha_u; + xfs_fid_t ha_fid; /* file system specific file ID */ +} xfs_handle_t; +#define ha_fsid ha_u._ha_fsid + +#define XFS_HSIZE(handle) (((char *) &(handle).ha_fid.fid_pad \ + - (char *) &(handle)) \ + + (handle).ha_fid.fid_len) + +/* + * Structure passed to XFS_IOC_SWAPEXT + */ +typedef struct xfs_swapext +{ + __int64_t sx_version; /* version */ +#define XFS_SX_VERSION 0 + __int64_t sx_fdtarget; /* fd of target file */ + __int64_t sx_fdtmp; /* fd of tmp file */ + xfs_off_t sx_offset; /* offset into file */ + xfs_off_t sx_length; /* leng from offset */ + char sx_pad[16]; /* pad space, unused */ + xfs_bstat_t sx_stat; /* stat of target b4 copy */ +} xfs_swapext_t; + +/* + * Flags for going down operation + */ +#define XFS_FSOP_GOING_FLAGS_DEFAULT 0x0 /* going down */ +#define XFS_FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */ +#define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ + +/* + * ioctl commands that are used by Linux filesystems + */ +#define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS +#define XFS_IOC_SETXFLAGS FS_IOC_SETFLAGS +#define XFS_IOC_GETVERSION FS_IOC_GETVERSION + +/* + * ioctl commands that replace IRIX fcntl()'s + * For 'documentation' purposed more than anything else, + * the "cmd #" field reflects the IRIX fcntl number. + */ +#define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64) +#define XFS_IOC_FREESP _IOW ('X', 11, struct xfs_flock64) +#define XFS_IOC_DIOINFO _IOR ('X', 30, struct dioattr) +#define XFS_IOC_FSGETXATTR _IOR ('X', 31, struct fsxattr) +#define XFS_IOC_FSSETXATTR _IOW ('X', 32, struct fsxattr) +#define XFS_IOC_ALLOCSP64 _IOW ('X', 36, struct xfs_flock64) +#define XFS_IOC_FREESP64 _IOW ('X', 37, struct xfs_flock64) +#define XFS_IOC_GETBMAP _IOWR('X', 38, struct getbmap) +#define XFS_IOC_FSSETDM _IOW ('X', 39, struct fsdmidata) +#define XFS_IOC_RESVSP _IOW ('X', 40, struct xfs_flock64) +#define XFS_IOC_UNRESVSP _IOW ('X', 41, struct xfs_flock64) +#define XFS_IOC_RESVSP64 _IOW ('X', 42, struct xfs_flock64) +#define XFS_IOC_UNRESVSP64 _IOW ('X', 43, struct xfs_flock64) +#define XFS_IOC_GETBMAPA _IOWR('X', 44, struct getbmap) +#define XFS_IOC_FSGETXATTRA _IOR ('X', 45, struct fsxattr) +/* XFS_IOC_SETBIOSIZE ---- deprecated 46 */ +/* XFS_IOC_GETBIOSIZE ---- deprecated 47 */ +#define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap) +#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64) +#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks) + +/* + * ioctl commands that replace IRIX syssgi()'s + */ +#define XFS_IOC_FSGEOMETRY_V1 _IOR ('X', 100, struct xfs_fsop_geom_v1) +#define XFS_IOC_FSBULKSTAT _IOWR('X', 101, struct xfs_fsop_bulkreq) +#define XFS_IOC_FSBULKSTAT_SINGLE _IOWR('X', 102, struct xfs_fsop_bulkreq) +#define XFS_IOC_FSINUMBERS _IOWR('X', 103, struct xfs_fsop_bulkreq) +#define XFS_IOC_PATH_TO_FSHANDLE _IOWR('X', 104, struct xfs_fsop_handlereq) +#define XFS_IOC_PATH_TO_HANDLE _IOWR('X', 105, struct xfs_fsop_handlereq) +#define XFS_IOC_FD_TO_HANDLE _IOWR('X', 106, struct xfs_fsop_handlereq) +#define XFS_IOC_OPEN_BY_HANDLE _IOWR('X', 107, struct xfs_fsop_handlereq) +#define XFS_IOC_READLINK_BY_HANDLE _IOWR('X', 108, struct xfs_fsop_handlereq) +#define XFS_IOC_SWAPEXT _IOWR('X', 109, struct xfs_swapext) +#define XFS_IOC_FSGROWFSDATA _IOW ('X', 110, struct xfs_growfs_data) +#define XFS_IOC_FSGROWFSLOG _IOW ('X', 111, struct xfs_growfs_log) +#define XFS_IOC_FSGROWFSRT _IOW ('X', 112, struct xfs_growfs_rt) +#define XFS_IOC_FSCOUNTS _IOR ('X', 113, struct xfs_fsop_counts) +#define XFS_IOC_SET_RESBLKS _IOWR('X', 114, struct xfs_fsop_resblks) +#define XFS_IOC_GET_RESBLKS _IOR ('X', 115, struct xfs_fsop_resblks) +#define XFS_IOC_ERROR_INJECTION _IOW ('X', 116, struct xfs_error_injection) +#define XFS_IOC_ERROR_CLEARALL _IOW ('X', 117, struct xfs_error_injection) +/* XFS_IOC_ATTRCTL_BY_HANDLE -- deprecated 118 */ + +/* XFS_IOC_FREEZE -- FIFREEZE 119 */ +/* XFS_IOC_THAW -- FITHAW 120 */ +#ifndef FIFREEZE +#define XFS_IOC_FREEZE _IOWR('X', 119, int) +#define XFS_IOC_THAW _IOWR('X', 120, int) +#endif + +#define XFS_IOC_FSSETDM_BY_HANDLE _IOW ('X', 121, struct xfs_fsop_setdm_handlereq) +#define XFS_IOC_ATTRLIST_BY_HANDLE _IOW ('X', 122, struct xfs_fsop_attrlist_handlereq) +#define XFS_IOC_ATTRMULTI_BY_HANDLE _IOW ('X', 123, struct xfs_fsop_attrmulti_handlereq) +#define XFS_IOC_FSGEOMETRY _IOR ('X', 124, struct xfs_fsop_geom) +#define XFS_IOC_GOINGDOWN _IOR ('X', 125, __uint32_t) +/* XFS_IOC_GETFSUUID ---------- deprecated 140 */ + + +#ifndef HAVE_BBMACROS +/* + * Block I/O parameterization. A basic block (BB) is the lowest size of + * filesystem allocation, and must equal 512. Length units given to bio + * routines are in BB's. + */ +#define BBSHIFT 9 +#define BBSIZE (1<> BBSHIFT) +#define BTOBBT(bytes) ((__u64)(bytes) >> BBSHIFT) +#define BBTOB(bbs) ((bbs) << BBSHIFT) +#endif + +#endif /* __XFS_FS_H__ */ diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h deleted file mode 100644 index 18dc721..0000000 --- a/fs/xfs/xfs_fs.h +++ /dev/null @@ -1,576 +0,0 @@ -/* - * Copyright (c) 1995-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_FS_H__ -#define __XFS_FS_H__ - -/* - * SGI's XFS filesystem's major stuff (constants, structures) - */ - -/* - * Direct I/O attribute record used with XFS_IOC_DIOINFO - * d_miniosz is the min xfer size, xfer size multiple and file seek offset - * alignment. - */ -#ifndef HAVE_DIOATTR -struct dioattr { - __u32 d_mem; /* data buffer memory alignment */ - __u32 d_miniosz; /* min xfer size */ - __u32 d_maxiosz; /* max xfer size */ -}; -#endif - -/* - * Structure for XFS_IOC_FSGETXATTR[A] and XFS_IOC_FSSETXATTR. - */ -#ifndef HAVE_FSXATTR -struct fsxattr { - __u32 fsx_xflags; /* xflags field value (get/set) */ - __u32 fsx_extsize; /* extsize field value (get/set)*/ - __u32 fsx_nextents; /* nextents field value (get) */ - __u32 fsx_projid; /* project identifier (get/set) */ - unsigned char fsx_pad[12]; -}; -#endif - -/* - * Flags for the bs_xflags/fsx_xflags field - * There should be a one-to-one correspondence between these flags and the - * XFS_DIFLAG_s. - */ -#define XFS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */ -#define XFS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */ -#define XFS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */ -#define XFS_XFLAG_APPEND 0x00000010 /* all writes append */ -#define XFS_XFLAG_SYNC 0x00000020 /* all writes synchronous */ -#define XFS_XFLAG_NOATIME 0x00000040 /* do not update access time */ -#define XFS_XFLAG_NODUMP 0x00000080 /* do not include in backups */ -#define XFS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */ -#define XFS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */ -#define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */ -#define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */ -#define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ -#define XFS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */ -#define XFS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */ -#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ - -/* - * Structure for XFS_IOC_GETBMAP. - * On input, fill in bmv_offset and bmv_length of the first structure - * to indicate the area of interest in the file, and bmv_entries with - * the number of array elements given back. The first structure is - * updated on return to give the offset and length for the next call. - */ -#ifndef HAVE_GETBMAP -struct getbmap { - __s64 bmv_offset; /* file offset of segment in blocks */ - __s64 bmv_block; /* starting block (64-bit daddr_t) */ - __s64 bmv_length; /* length of segment, blocks */ - __s32 bmv_count; /* # of entries in array incl. 1st */ - __s32 bmv_entries; /* # of entries filled in (output) */ -}; -#endif - -/* - * Structure for XFS_IOC_GETBMAPX. Fields bmv_offset through bmv_entries - * are used exactly as in the getbmap structure. The getbmapx structure - * has additional bmv_iflags and bmv_oflags fields. The bmv_iflags field - * is only used for the first structure. It contains input flags - * specifying XFS_IOC_GETBMAPX actions. The bmv_oflags field is filled - * in by the XFS_IOC_GETBMAPX command for each returned structure after - * the first. - */ -#ifndef HAVE_GETBMAPX -struct getbmapx { - __s64 bmv_offset; /* file offset of segment in blocks */ - __s64 bmv_block; /* starting block (64-bit daddr_t) */ - __s64 bmv_length; /* length of segment, blocks */ - __s32 bmv_count; /* # of entries in array incl. 1st */ - __s32 bmv_entries; /* # of entries filled in (output). */ - __s32 bmv_iflags; /* input flags (1st structure) */ - __s32 bmv_oflags; /* output flags (after 1st structure)*/ - __s32 bmv_unused1; /* future use */ - __s32 bmv_unused2; /* future use */ -}; -#endif - -/* bmv_iflags values - set by XFS_IOC_GETBMAPX caller. */ -#define BMV_IF_ATTRFORK 0x1 /* return attr fork rather than data */ -#define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ -#define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ -#define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */ -#define BMV_IF_NO_HOLES 0x10 /* Do not return holes */ -#define BMV_IF_VALID \ - (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC| \ - BMV_IF_DELALLOC|BMV_IF_NO_HOLES) - -/* bmv_oflags values - returned for each non-header segment */ -#define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ -#define BMV_OF_DELALLOC 0x2 /* segment = delayed allocation */ -#define BMV_OF_LAST 0x4 /* segment is the last in the file */ - -/* - * Structure for XFS_IOC_FSSETDM. - * For use by backup and restore programs to set the XFS on-disk inode - * fields di_dmevmask and di_dmstate. These must be set to exactly and - * only values previously obtained via xfs_bulkstat! (Specifically the - * xfs_bstat_t fields bs_dmevmask and bs_dmstate.) - */ -#ifndef HAVE_FSDMIDATA -struct fsdmidata { - __u32 fsd_dmevmask; /* corresponds to di_dmevmask */ - __u16 fsd_padding; - __u16 fsd_dmstate; /* corresponds to di_dmstate */ -}; -#endif - -/* - * File segment locking set data type for 64 bit access. - * Also used for all the RESV/FREE interfaces. - */ -typedef struct xfs_flock64 { - __s16 l_type; - __s16 l_whence; - __s64 l_start; - __s64 l_len; /* len == 0 means until end of file */ - __s32 l_sysid; - __u32 l_pid; - __s32 l_pad[4]; /* reserve area */ -} xfs_flock64_t; - -/* - * Output for XFS_IOC_FSGEOMETRY_V1 - */ -typedef struct xfs_fsop_geom_v1 { - __u32 blocksize; /* filesystem (data) block size */ - __u32 rtextsize; /* realtime extent size */ - __u32 agblocks; /* fsblocks in an AG */ - __u32 agcount; /* number of allocation groups */ - __u32 logblocks; /* fsblocks in the log */ - __u32 sectsize; /* (data) sector size, bytes */ - __u32 inodesize; /* inode size in bytes */ - __u32 imaxpct; /* max allowed inode space(%) */ - __u64 datablocks; /* fsblocks in data subvolume */ - __u64 rtblocks; /* fsblocks in realtime subvol */ - __u64 rtextents; /* rt extents in realtime subvol*/ - __u64 logstart; /* starting fsblock of the log */ - unsigned char uuid[16]; /* unique id of the filesystem */ - __u32 sunit; /* stripe unit, fsblocks */ - __u32 swidth; /* stripe width, fsblocks */ - __s32 version; /* structure version */ - __u32 flags; /* superblock version flags */ - __u32 logsectsize; /* log sector size, bytes */ - __u32 rtsectsize; /* realtime sector size, bytes */ - __u32 dirblocksize; /* directory block size, bytes */ -} xfs_fsop_geom_v1_t; - -/* - * Output for XFS_IOC_FSGEOMETRY - */ -typedef struct xfs_fsop_geom { - __u32 blocksize; /* filesystem (data) block size */ - __u32 rtextsize; /* realtime extent size */ - __u32 agblocks; /* fsblocks in an AG */ - __u32 agcount; /* number of allocation groups */ - __u32 logblocks; /* fsblocks in the log */ - __u32 sectsize; /* (data) sector size, bytes */ - __u32 inodesize; /* inode size in bytes */ - __u32 imaxpct; /* max allowed inode space(%) */ - __u64 datablocks; /* fsblocks in data subvolume */ - __u64 rtblocks; /* fsblocks in realtime subvol */ - __u64 rtextents; /* rt extents in realtime subvol*/ - __u64 logstart; /* starting fsblock of the log */ - unsigned char uuid[16]; /* unique id of the filesystem */ - __u32 sunit; /* stripe unit, fsblocks */ - __u32 swidth; /* stripe width, fsblocks */ - __s32 version; /* structure version */ - __u32 flags; /* superblock version flags */ - __u32 logsectsize; /* log sector size, bytes */ - __u32 rtsectsize; /* realtime sector size, bytes */ - __u32 dirblocksize; /* directory block size, bytes */ - __u32 logsunit; /* log stripe unit, bytes */ -} xfs_fsop_geom_t; - -/* Output for XFS_FS_COUNTS */ -typedef struct xfs_fsop_counts { - __u64 freedata; /* free data section blocks */ - __u64 freertx; /* free rt extents */ - __u64 freeino; /* free inodes */ - __u64 allocino; /* total allocated inodes */ -} xfs_fsop_counts_t; - -/* Input/Output for XFS_GET_RESBLKS and XFS_SET_RESBLKS */ -typedef struct xfs_fsop_resblks { - __u64 resblks; - __u64 resblks_avail; -} xfs_fsop_resblks_t; - -#define XFS_FSOP_GEOM_VERSION 0 - -#define XFS_FSOP_GEOM_FLAGS_ATTR 0x0001 /* attributes in use */ -#define XFS_FSOP_GEOM_FLAGS_NLINK 0x0002 /* 32-bit nlink values */ -#define XFS_FSOP_GEOM_FLAGS_QUOTA 0x0004 /* quotas enabled */ -#define XFS_FSOP_GEOM_FLAGS_IALIGN 0x0008 /* inode alignment */ -#define XFS_FSOP_GEOM_FLAGS_DALIGN 0x0010 /* large data alignment */ -#define XFS_FSOP_GEOM_FLAGS_SHARED 0x0020 /* read-only shared */ -#define XFS_FSOP_GEOM_FLAGS_EXTFLG 0x0040 /* special extent flag */ -#define XFS_FSOP_GEOM_FLAGS_DIRV2 0x0080 /* directory version 2 */ -#define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */ -#define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */ -#define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */ -#define XFS_FSOP_GEOM_FLAGS_PROJID32 0x0800 /* 32-bit project IDs */ -#define XFS_FSOP_GEOM_FLAGS_DIRV2CI 0x1000 /* ASCII only CI names */ -#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */ -#define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */ -#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */ -#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */ - -/* - * Minimum and maximum sizes need for growth checks. - * - * Block counts are in units of filesystem blocks, not basic blocks. - */ -#define XFS_MIN_AG_BLOCKS 64 -#define XFS_MIN_LOG_BLOCKS 512ULL -#define XFS_MAX_LOG_BLOCKS (1024 * 1024ULL) -#define XFS_MIN_LOG_BYTES (10 * 1024 * 1024ULL) - -/* keep the maximum size under 2^31 by a small amount */ -#define XFS_MAX_LOG_BYTES \ - ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES) - -/* Used for sanity checks on superblock */ -#define XFS_MAX_DBLOCKS(s) ((xfs_rfsblock_t)(s)->sb_agcount * (s)->sb_agblocks) -#define XFS_MIN_DBLOCKS(s) ((xfs_rfsblock_t)((s)->sb_agcount - 1) * \ - (s)->sb_agblocks + XFS_MIN_AG_BLOCKS) - -/* - * Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT - */ -typedef struct xfs_growfs_data { - __u64 newblocks; /* new data subvol size, fsblocks */ - __u32 imaxpct; /* new inode space percentage limit */ -} xfs_growfs_data_t; - -typedef struct xfs_growfs_log { - __u32 newblocks; /* new log size, fsblocks */ - __u32 isint; /* 1 if new log is internal */ -} xfs_growfs_log_t; - -typedef struct xfs_growfs_rt { - __u64 newblocks; /* new realtime size, fsblocks */ - __u32 extsize; /* new realtime extent size, fsblocks */ -} xfs_growfs_rt_t; - - -/* - * Structures returned from ioctl XFS_IOC_FSBULKSTAT & XFS_IOC_FSBULKSTAT_SINGLE - */ -typedef struct xfs_bstime { - time_t tv_sec; /* seconds */ - __s32 tv_nsec; /* and nanoseconds */ -} xfs_bstime_t; - -typedef struct xfs_bstat { - __u64 bs_ino; /* inode number */ - __u16 bs_mode; /* type and mode */ - __u16 bs_nlink; /* number of links */ - __u32 bs_uid; /* user id */ - __u32 bs_gid; /* group id */ - __u32 bs_rdev; /* device value */ - __s32 bs_blksize; /* block size */ - __s64 bs_size; /* file size */ - xfs_bstime_t bs_atime; /* access time */ - xfs_bstime_t bs_mtime; /* modify time */ - xfs_bstime_t bs_ctime; /* inode change time */ - int64_t bs_blocks; /* number of blocks */ - __u32 bs_xflags; /* extended flags */ - __s32 bs_extsize; /* extent size */ - __s32 bs_extents; /* number of extents */ - __u32 bs_gen; /* generation count */ - __u16 bs_projid_lo; /* lower part of project id */ -#define bs_projid bs_projid_lo /* (previously just bs_projid) */ - __u16 bs_forkoff; /* inode fork offset in bytes */ - __u16 bs_projid_hi; /* higher part of project id */ - unsigned char bs_pad[10]; /* pad space, unused */ - __u32 bs_dmevmask; /* DMIG event mask */ - __u16 bs_dmstate; /* DMIG state info */ - __u16 bs_aextents; /* attribute number of extents */ -} xfs_bstat_t; - -/* - * Project quota id helpers (previously projid was 16bit only - * and using two 16bit values to hold new 32bit projid was choosen - * to retain compatibility with "old" filesystems). - */ -static inline __uint32_t -bstat_get_projid(struct xfs_bstat *bs) -{ - return (__uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo; -} - -/* - * The user-level BulkStat Request interface structure. - */ -typedef struct xfs_fsop_bulkreq { - __u64 __user *lastip; /* last inode # pointer */ - __s32 icount; /* count of entries in buffer */ - void __user *ubuffer;/* user buffer for inode desc. */ - __s32 __user *ocount; /* output count pointer */ -} xfs_fsop_bulkreq_t; - - -/* - * Structures returned from xfs_inumbers routine (XFS_IOC_FSINUMBERS). - */ -typedef struct xfs_inogrp { - __u64 xi_startino; /* starting inode number */ - __s32 xi_alloccount; /* # bits set in allocmask */ - __u64 xi_allocmask; /* mask of allocated inodes */ -} xfs_inogrp_t; - - -/* - * Error injection. - */ -typedef struct xfs_error_injection { - __s32 fd; - __s32 errtag; -} xfs_error_injection_t; - - -/* - * Speculative preallocation trimming. - */ -#define XFS_EOFBLOCKS_VERSION 1 -struct xfs_fs_eofblocks { - __u32 eof_version; - __u32 eof_flags; - uid_t eof_uid; - gid_t eof_gid; - prid_t eof_prid; - __u32 pad32; - __u64 eof_min_file_size; - __u64 pad64[12]; -}; - -/* eof_flags values */ -#define XFS_EOF_FLAGS_SYNC (1 << 0) /* sync/wait mode scan */ -#define XFS_EOF_FLAGS_UID (1 << 1) /* filter by uid */ -#define XFS_EOF_FLAGS_GID (1 << 2) /* filter by gid */ -#define XFS_EOF_FLAGS_PRID (1 << 3) /* filter by project id */ -#define XFS_EOF_FLAGS_MINFILESIZE (1 << 4) /* filter by min file size */ -#define XFS_EOF_FLAGS_UNION (1 << 5) /* union filter algorithm; - * kernel only, not included in - * valid mask */ -#define XFS_EOF_FLAGS_VALID \ - (XFS_EOF_FLAGS_SYNC | \ - XFS_EOF_FLAGS_UID | \ - XFS_EOF_FLAGS_GID | \ - XFS_EOF_FLAGS_PRID | \ - XFS_EOF_FLAGS_MINFILESIZE) - - -/* - * The user-level Handle Request interface structure. - */ -typedef struct xfs_fsop_handlereq { - __u32 fd; /* fd for FD_TO_HANDLE */ - void __user *path; /* user pathname */ - __u32 oflags; /* open flags */ - void __user *ihandle;/* user supplied handle */ - __u32 ihandlen; /* user supplied length */ - void __user *ohandle;/* user buffer for handle */ - __u32 __user *ohandlen;/* user buffer length */ -} xfs_fsop_handlereq_t; - -/* - * Compound structures for passing args through Handle Request interfaces - * xfs_fssetdm_by_handle, xfs_attrlist_by_handle, xfs_attrmulti_by_handle - * - ioctls: XFS_IOC_FSSETDM_BY_HANDLE, XFS_IOC_ATTRLIST_BY_HANDLE, and - * XFS_IOC_ATTRMULTI_BY_HANDLE - */ - -typedef struct xfs_fsop_setdm_handlereq { - struct xfs_fsop_handlereq hreq; /* handle information */ - struct fsdmidata __user *data; /* DMAPI data */ -} xfs_fsop_setdm_handlereq_t; - -typedef struct xfs_attrlist_cursor { - __u32 opaque[4]; -} xfs_attrlist_cursor_t; - -typedef struct xfs_fsop_attrlist_handlereq { - struct xfs_fsop_handlereq hreq; /* handle interface structure */ - struct xfs_attrlist_cursor pos; /* opaque cookie, list offset */ - __u32 flags; /* which namespace to use */ - __u32 buflen; /* length of buffer supplied */ - void __user *buffer; /* returned names */ -} xfs_fsop_attrlist_handlereq_t; - -typedef struct xfs_attr_multiop { - __u32 am_opcode; -#define ATTR_OP_GET 1 /* return the indicated attr's value */ -#define ATTR_OP_SET 2 /* set/create the indicated attr/value pair */ -#define ATTR_OP_REMOVE 3 /* remove the indicated attr */ - __s32 am_error; - void __user *am_attrname; - void __user *am_attrvalue; - __u32 am_length; - __u32 am_flags; -} xfs_attr_multiop_t; - -typedef struct xfs_fsop_attrmulti_handlereq { - struct xfs_fsop_handlereq hreq; /* handle interface structure */ - __u32 opcount;/* count of following multiop */ - struct xfs_attr_multiop __user *ops; /* attr_multi data */ -} xfs_fsop_attrmulti_handlereq_t; - -/* - * per machine unique filesystem identifier types. - */ -typedef struct { __u32 val[2]; } xfs_fsid_t; /* file system id type */ - -typedef struct xfs_fid { - __u16 fid_len; /* length of remainder */ - __u16 fid_pad; - __u32 fid_gen; /* generation number */ - __u64 fid_ino; /* 64 bits inode number */ -} xfs_fid_t; - -typedef struct xfs_handle { - union { - __s64 align; /* force alignment of ha_fid */ - xfs_fsid_t _ha_fsid; /* unique file system identifier */ - } ha_u; - xfs_fid_t ha_fid; /* file system specific file ID */ -} xfs_handle_t; -#define ha_fsid ha_u._ha_fsid - -#define XFS_HSIZE(handle) (((char *) &(handle).ha_fid.fid_pad \ - - (char *) &(handle)) \ - + (handle).ha_fid.fid_len) - -/* - * Structure passed to XFS_IOC_SWAPEXT - */ -typedef struct xfs_swapext -{ - __int64_t sx_version; /* version */ -#define XFS_SX_VERSION 0 - __int64_t sx_fdtarget; /* fd of target file */ - __int64_t sx_fdtmp; /* fd of tmp file */ - xfs_off_t sx_offset; /* offset into file */ - xfs_off_t sx_length; /* leng from offset */ - char sx_pad[16]; /* pad space, unused */ - xfs_bstat_t sx_stat; /* stat of target b4 copy */ -} xfs_swapext_t; - -/* - * Flags for going down operation - */ -#define XFS_FSOP_GOING_FLAGS_DEFAULT 0x0 /* going down */ -#define XFS_FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */ -#define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ - -/* - * ioctl commands that are used by Linux filesystems - */ -#define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS -#define XFS_IOC_SETXFLAGS FS_IOC_SETFLAGS -#define XFS_IOC_GETVERSION FS_IOC_GETVERSION - -/* - * ioctl commands that replace IRIX fcntl()'s - * For 'documentation' purposed more than anything else, - * the "cmd #" field reflects the IRIX fcntl number. - */ -#define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64) -#define XFS_IOC_FREESP _IOW ('X', 11, struct xfs_flock64) -#define XFS_IOC_DIOINFO _IOR ('X', 30, struct dioattr) -#define XFS_IOC_FSGETXATTR _IOR ('X', 31, struct fsxattr) -#define XFS_IOC_FSSETXATTR _IOW ('X', 32, struct fsxattr) -#define XFS_IOC_ALLOCSP64 _IOW ('X', 36, struct xfs_flock64) -#define XFS_IOC_FREESP64 _IOW ('X', 37, struct xfs_flock64) -#define XFS_IOC_GETBMAP _IOWR('X', 38, struct getbmap) -#define XFS_IOC_FSSETDM _IOW ('X', 39, struct fsdmidata) -#define XFS_IOC_RESVSP _IOW ('X', 40, struct xfs_flock64) -#define XFS_IOC_UNRESVSP _IOW ('X', 41, struct xfs_flock64) -#define XFS_IOC_RESVSP64 _IOW ('X', 42, struct xfs_flock64) -#define XFS_IOC_UNRESVSP64 _IOW ('X', 43, struct xfs_flock64) -#define XFS_IOC_GETBMAPA _IOWR('X', 44, struct getbmap) -#define XFS_IOC_FSGETXATTRA _IOR ('X', 45, struct fsxattr) -/* XFS_IOC_SETBIOSIZE ---- deprecated 46 */ -/* XFS_IOC_GETBIOSIZE ---- deprecated 47 */ -#define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap) -#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64) -#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks) - -/* - * ioctl commands that replace IRIX syssgi()'s - */ -#define XFS_IOC_FSGEOMETRY_V1 _IOR ('X', 100, struct xfs_fsop_geom_v1) -#define XFS_IOC_FSBULKSTAT _IOWR('X', 101, struct xfs_fsop_bulkreq) -#define XFS_IOC_FSBULKSTAT_SINGLE _IOWR('X', 102, struct xfs_fsop_bulkreq) -#define XFS_IOC_FSINUMBERS _IOWR('X', 103, struct xfs_fsop_bulkreq) -#define XFS_IOC_PATH_TO_FSHANDLE _IOWR('X', 104, struct xfs_fsop_handlereq) -#define XFS_IOC_PATH_TO_HANDLE _IOWR('X', 105, struct xfs_fsop_handlereq) -#define XFS_IOC_FD_TO_HANDLE _IOWR('X', 106, struct xfs_fsop_handlereq) -#define XFS_IOC_OPEN_BY_HANDLE _IOWR('X', 107, struct xfs_fsop_handlereq) -#define XFS_IOC_READLINK_BY_HANDLE _IOWR('X', 108, struct xfs_fsop_handlereq) -#define XFS_IOC_SWAPEXT _IOWR('X', 109, struct xfs_swapext) -#define XFS_IOC_FSGROWFSDATA _IOW ('X', 110, struct xfs_growfs_data) -#define XFS_IOC_FSGROWFSLOG _IOW ('X', 111, struct xfs_growfs_log) -#define XFS_IOC_FSGROWFSRT _IOW ('X', 112, struct xfs_growfs_rt) -#define XFS_IOC_FSCOUNTS _IOR ('X', 113, struct xfs_fsop_counts) -#define XFS_IOC_SET_RESBLKS _IOWR('X', 114, struct xfs_fsop_resblks) -#define XFS_IOC_GET_RESBLKS _IOR ('X', 115, struct xfs_fsop_resblks) -#define XFS_IOC_ERROR_INJECTION _IOW ('X', 116, struct xfs_error_injection) -#define XFS_IOC_ERROR_CLEARALL _IOW ('X', 117, struct xfs_error_injection) -/* XFS_IOC_ATTRCTL_BY_HANDLE -- deprecated 118 */ - -/* XFS_IOC_FREEZE -- FIFREEZE 119 */ -/* XFS_IOC_THAW -- FITHAW 120 */ -#ifndef FIFREEZE -#define XFS_IOC_FREEZE _IOWR('X', 119, int) -#define XFS_IOC_THAW _IOWR('X', 120, int) -#endif - -#define XFS_IOC_FSSETDM_BY_HANDLE _IOW ('X', 121, struct xfs_fsop_setdm_handlereq) -#define XFS_IOC_ATTRLIST_BY_HANDLE _IOW ('X', 122, struct xfs_fsop_attrlist_handlereq) -#define XFS_IOC_ATTRMULTI_BY_HANDLE _IOW ('X', 123, struct xfs_fsop_attrmulti_handlereq) -#define XFS_IOC_FSGEOMETRY _IOR ('X', 124, struct xfs_fsop_geom) -#define XFS_IOC_GOINGDOWN _IOR ('X', 125, __uint32_t) -/* XFS_IOC_GETFSUUID ---------- deprecated 140 */ - - -#ifndef HAVE_BBMACROS -/* - * Block I/O parameterization. A basic block (BB) is the lowest size of - * filesystem allocation, and must equal 512. Length units given to bio - * routines are in BB's. - */ -#define BBSHIFT 9 -#define BBSIZE (1<> BBSHIFT) -#define BTOBBT(bytes) ((__u64)(bytes) >> BBSHIFT) -#define BBTOB(bbs) ((bbs) << BBSHIFT) -#endif - -#endif /* __XFS_FS_H__ */ -- cgit v0.10.2 From 5ebdc213ac02877e23fe7594d4b92b120488aac9 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 9 Jan 2015 10:46:31 +1100 Subject: xfs: move xfs_types.h to libxfs The types used by the core XFS code are common between kernel and userspace. xfs_types.h is duplicated in both kernel and userspace, so move it to libxfs along with all the other shared code. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h new file mode 100644 index 0000000..b79dc66 --- /dev/null +++ b/fs/xfs/libxfs/xfs_types.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_TYPES_H__ +#define __XFS_TYPES_H__ + +typedef __uint32_t prid_t; /* project ID */ + +typedef __uint32_t xfs_agblock_t; /* blockno in alloc. group */ +typedef __uint32_t xfs_agino_t; /* inode # within allocation grp */ +typedef __uint32_t xfs_extlen_t; /* extent length in blocks */ +typedef __uint32_t xfs_agnumber_t; /* allocation group number */ +typedef __int32_t xfs_extnum_t; /* # of extents in a file */ +typedef __int16_t xfs_aextnum_t; /* # extents in an attribute fork */ +typedef __int64_t xfs_fsize_t; /* bytes in a file */ +typedef __uint64_t xfs_ufsize_t; /* unsigned bytes in a file */ + +typedef __int32_t xfs_suminfo_t; /* type of bitmap summary info */ +typedef __int32_t xfs_rtword_t; /* word type for bitmap manipulations */ + +typedef __int64_t xfs_lsn_t; /* log sequence number */ +typedef __int32_t xfs_tid_t; /* transaction identifier */ + +typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ +typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ + +typedef __uint64_t xfs_fsblock_t; /* blockno in filesystem (agno|agbno) */ +typedef __uint64_t xfs_rfsblock_t; /* blockno in filesystem (raw) */ +typedef __uint64_t xfs_rtblock_t; /* extent (block) in realtime area */ +typedef __uint64_t xfs_fileoff_t; /* block number in a file */ +typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */ + +typedef __int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */ +typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */ + +/* + * Null values for the types. + */ +#define NULLFSBLOCK ((xfs_fsblock_t)-1) +#define NULLRFSBLOCK ((xfs_rfsblock_t)-1) +#define NULLRTBLOCK ((xfs_rtblock_t)-1) +#define NULLFILEOFF ((xfs_fileoff_t)-1) + +#define NULLAGBLOCK ((xfs_agblock_t)-1) +#define NULLAGNUMBER ((xfs_agnumber_t)-1) +#define NULLEXTNUM ((xfs_extnum_t)-1) + +#define NULLCOMMITLSN ((xfs_lsn_t)-1) + +#define NULLFSINO ((xfs_ino_t)-1) +#define NULLAGINO ((xfs_agino_t)-1) + +/* + * Max values for extlen, extnum, aextnum. + */ +#define MAXEXTLEN ((xfs_extlen_t)0x001fffff) /* 21 bits */ +#define MAXEXTNUM ((xfs_extnum_t)0x7fffffff) /* signed int */ +#define MAXAEXTNUM ((xfs_aextnum_t)0x7fff) /* signed short */ + +/* + * Minimum and maximum blocksize and sectorsize. + * The blocksize upper limit is pretty much arbitrary. + * The sectorsize upper limit is due to sizeof(sb_sectsize). + */ +#define XFS_MIN_BLOCKSIZE_LOG 9 /* i.e. 512 bytes */ +#define XFS_MAX_BLOCKSIZE_LOG 16 /* i.e. 65536 bytes */ +#define XFS_MIN_BLOCKSIZE (1 << XFS_MIN_BLOCKSIZE_LOG) +#define XFS_MAX_BLOCKSIZE (1 << XFS_MAX_BLOCKSIZE_LOG) +#define XFS_MIN_SECTORSIZE_LOG 9 /* i.e. 512 bytes */ +#define XFS_MAX_SECTORSIZE_LOG 15 /* i.e. 32768 bytes */ +#define XFS_MIN_SECTORSIZE (1 << XFS_MIN_SECTORSIZE_LOG) +#define XFS_MAX_SECTORSIZE (1 << XFS_MAX_SECTORSIZE_LOG) + +/* + * Inode fork identifiers. + */ +#define XFS_DATA_FORK 0 +#define XFS_ATTR_FORK 1 + +/* + * Min numbers of data/attr fork btree root pointers. + */ +#define MINDBTPTRS 3 +#define MINABTPTRS 2 + +/* + * MAXNAMELEN is the length (including the terminating null) of + * the longest permissible file (component) name. + */ +#define MAXNAMELEN 256 + +typedef enum { + XFS_LOOKUP_EQi, XFS_LOOKUP_LEi, XFS_LOOKUP_GEi +} xfs_lookup_t; + +typedef enum { + XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi, + XFS_BTNUM_FINOi, XFS_BTNUM_MAX +} xfs_btnum_t; + +struct xfs_name { + const unsigned char *name; + int len; + int type; +}; + +/* + * uid_t and gid_t are hard-coded to 32 bits in the inode. + * Hence, an 'id' in a dquot is 32 bits.. + */ +typedef __uint32_t xfs_dqid_t; + +/* + * Constants for bit manipulations. + */ +#define XFS_NBBYLOG 3 /* log2(NBBY) */ +#define XFS_WORDLOG 2 /* log2(sizeof(xfs_rtword_t)) */ +#define XFS_NBWORDLOG (XFS_NBBYLOG + XFS_WORDLOG) +#define XFS_NBWORD (1 << XFS_NBWORDLOG) +#define XFS_WORDMASK ((1 << XFS_WORDLOG) - 1) + + +#endif /* __XFS_TYPES_H__ */ diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h deleted file mode 100644 index b79dc66..0000000 --- a/fs/xfs/xfs_types.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_TYPES_H__ -#define __XFS_TYPES_H__ - -typedef __uint32_t prid_t; /* project ID */ - -typedef __uint32_t xfs_agblock_t; /* blockno in alloc. group */ -typedef __uint32_t xfs_agino_t; /* inode # within allocation grp */ -typedef __uint32_t xfs_extlen_t; /* extent length in blocks */ -typedef __uint32_t xfs_agnumber_t; /* allocation group number */ -typedef __int32_t xfs_extnum_t; /* # of extents in a file */ -typedef __int16_t xfs_aextnum_t; /* # extents in an attribute fork */ -typedef __int64_t xfs_fsize_t; /* bytes in a file */ -typedef __uint64_t xfs_ufsize_t; /* unsigned bytes in a file */ - -typedef __int32_t xfs_suminfo_t; /* type of bitmap summary info */ -typedef __int32_t xfs_rtword_t; /* word type for bitmap manipulations */ - -typedef __int64_t xfs_lsn_t; /* log sequence number */ -typedef __int32_t xfs_tid_t; /* transaction identifier */ - -typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ -typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ - -typedef __uint64_t xfs_fsblock_t; /* blockno in filesystem (agno|agbno) */ -typedef __uint64_t xfs_rfsblock_t; /* blockno in filesystem (raw) */ -typedef __uint64_t xfs_rtblock_t; /* extent (block) in realtime area */ -typedef __uint64_t xfs_fileoff_t; /* block number in a file */ -typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */ - -typedef __int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */ -typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */ - -/* - * Null values for the types. - */ -#define NULLFSBLOCK ((xfs_fsblock_t)-1) -#define NULLRFSBLOCK ((xfs_rfsblock_t)-1) -#define NULLRTBLOCK ((xfs_rtblock_t)-1) -#define NULLFILEOFF ((xfs_fileoff_t)-1) - -#define NULLAGBLOCK ((xfs_agblock_t)-1) -#define NULLAGNUMBER ((xfs_agnumber_t)-1) -#define NULLEXTNUM ((xfs_extnum_t)-1) - -#define NULLCOMMITLSN ((xfs_lsn_t)-1) - -#define NULLFSINO ((xfs_ino_t)-1) -#define NULLAGINO ((xfs_agino_t)-1) - -/* - * Max values for extlen, extnum, aextnum. - */ -#define MAXEXTLEN ((xfs_extlen_t)0x001fffff) /* 21 bits */ -#define MAXEXTNUM ((xfs_extnum_t)0x7fffffff) /* signed int */ -#define MAXAEXTNUM ((xfs_aextnum_t)0x7fff) /* signed short */ - -/* - * Minimum and maximum blocksize and sectorsize. - * The blocksize upper limit is pretty much arbitrary. - * The sectorsize upper limit is due to sizeof(sb_sectsize). - */ -#define XFS_MIN_BLOCKSIZE_LOG 9 /* i.e. 512 bytes */ -#define XFS_MAX_BLOCKSIZE_LOG 16 /* i.e. 65536 bytes */ -#define XFS_MIN_BLOCKSIZE (1 << XFS_MIN_BLOCKSIZE_LOG) -#define XFS_MAX_BLOCKSIZE (1 << XFS_MAX_BLOCKSIZE_LOG) -#define XFS_MIN_SECTORSIZE_LOG 9 /* i.e. 512 bytes */ -#define XFS_MAX_SECTORSIZE_LOG 15 /* i.e. 32768 bytes */ -#define XFS_MIN_SECTORSIZE (1 << XFS_MIN_SECTORSIZE_LOG) -#define XFS_MAX_SECTORSIZE (1 << XFS_MAX_SECTORSIZE_LOG) - -/* - * Inode fork identifiers. - */ -#define XFS_DATA_FORK 0 -#define XFS_ATTR_FORK 1 - -/* - * Min numbers of data/attr fork btree root pointers. - */ -#define MINDBTPTRS 3 -#define MINABTPTRS 2 - -/* - * MAXNAMELEN is the length (including the terminating null) of - * the longest permissible file (component) name. - */ -#define MAXNAMELEN 256 - -typedef enum { - XFS_LOOKUP_EQi, XFS_LOOKUP_LEi, XFS_LOOKUP_GEi -} xfs_lookup_t; - -typedef enum { - XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi, - XFS_BTNUM_FINOi, XFS_BTNUM_MAX -} xfs_btnum_t; - -struct xfs_name { - const unsigned char *name; - int len; - int type; -}; - -/* - * uid_t and gid_t are hard-coded to 32 bits in the inode. - * Hence, an 'id' in a dquot is 32 bits.. - */ -typedef __uint32_t xfs_dqid_t; - -/* - * Constants for bit manipulations. - */ -#define XFS_NBBYLOG 3 /* log2(NBBY) */ -#define XFS_WORDLOG 2 /* log2(sizeof(xfs_rtword_t)) */ -#define XFS_NBWORDLOG (XFS_NBBYLOG + XFS_WORDLOG) -#define XFS_NBWORD (1 << XFS_NBWORDLOG) -#define XFS_WORDMASK ((1 << XFS_WORDLOG) - 1) - - -#endif /* __XFS_TYPES_H__ */ -- cgit v0.10.2 From 9799b438ce21662fa173ffc0b30d93567a71dfa0 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 9 Jan 2015 10:46:49 +1100 Subject: xfs: move struct xfs_bmalloca to libxfs It no long is used for stack splits, so strip the kernel workqueue bits from it and push it back into libxfs/xfs_bmap.h so that it can be shared with the userspace code. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 44db6db..856f53c 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -28,6 +28,37 @@ struct xfs_trans; extern kmem_zone_t *xfs_bmap_free_item_zone; /* + * Argument structure for xfs_bmap_alloc. + */ +struct xfs_bmalloca { + xfs_fsblock_t *firstblock; /* i/o first block allocated */ + struct xfs_bmap_free *flist; /* bmap freelist */ + struct xfs_trans *tp; /* transaction pointer */ + struct xfs_inode *ip; /* incore inode pointer */ + struct xfs_bmbt_irec prev; /* extent before the new one */ + struct xfs_bmbt_irec got; /* extent after, or delayed */ + + xfs_fileoff_t offset; /* offset in file filling in */ + xfs_extlen_t length; /* i/o length asked/allocated */ + xfs_fsblock_t blkno; /* starting block of new extent */ + + struct xfs_btree_cur *cur; /* btree cursor */ + xfs_extnum_t idx; /* current extent index */ + int nallocs;/* number of extents alloc'd */ + int logflags;/* flags for transaction logging */ + + xfs_extlen_t total; /* total blocks needed for xaction */ + xfs_extlen_t minlen; /* minimum allocation size (blocks) */ + xfs_extlen_t minleft; /* amount must be left after alloc */ + bool eof; /* set if allocating past last extent */ + bool wasdel; /* replacing a delayed allocation */ + bool userdata;/* set if is user data */ + bool aeof; /* allocated space at eof */ + bool conv; /* overwriting unwritten extents */ + int flags; +}; + +/* * List of extents to be free "later". * The list is kept sorted on xbf_startblock. */ diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 2fdb72d..1fce0f3 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -26,40 +26,7 @@ struct xfs_ifork; struct xfs_inode; struct xfs_mount; struct xfs_trans; - -/* - * Argument structure for xfs_bmap_alloc. - */ -struct xfs_bmalloca { - xfs_fsblock_t *firstblock; /* i/o first block allocated */ - struct xfs_bmap_free *flist; /* bmap freelist */ - struct xfs_trans *tp; /* transaction pointer */ - struct xfs_inode *ip; /* incore inode pointer */ - struct xfs_bmbt_irec prev; /* extent before the new one */ - struct xfs_bmbt_irec got; /* extent after, or delayed */ - - xfs_fileoff_t offset; /* offset in file filling in */ - xfs_extlen_t length; /* i/o length asked/allocated */ - xfs_fsblock_t blkno; /* starting block of new extent */ - - struct xfs_btree_cur *cur; /* btree cursor */ - xfs_extnum_t idx; /* current extent index */ - int nallocs;/* number of extents alloc'd */ - int logflags;/* flags for transaction logging */ - - xfs_extlen_t total; /* total blocks needed for xaction */ - xfs_extlen_t minlen; /* minimum allocation size (blocks) */ - xfs_extlen_t minleft; /* amount must be left after alloc */ - bool eof; /* set if allocating past last extent */ - bool wasdel; /* replacing a delayed allocation */ - bool userdata;/* set if is user data */ - bool aeof; /* allocated space at eof */ - bool conv; /* overwriting unwritten extents */ - int flags; - struct completion *done; - struct work_struct work; - int result; -}; +struct xfs_bmalloca; int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist, int *committed); -- cgit v0.10.2 From aa5d95c1b57792119804c587b9a468019219d7e0 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 9 Jan 2015 10:47:14 +1100 Subject: xfs: move xfs_bmap_finish prototype This function is used libxfs code, but is implemented separately in userspace. Move the function prototype to xfs_bmap.h so that the prototype is shared even if the implementations aren't. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 856f53c..b9d8a49 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -180,6 +180,8 @@ void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); void xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len, struct xfs_bmap_free *flist, struct xfs_mount *mp); void xfs_bmap_cancel(struct xfs_bmap_free *flist); +int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist, + int *committed); void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork); int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip, xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 1fce0f3..736429a 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -28,8 +28,6 @@ struct xfs_mount; struct xfs_trans; struct xfs_bmalloca; -int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist, - int *committed); int xfs_bmap_rtalloc(struct xfs_bmalloca *ap); int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff, int whichfork, int *eof); -- cgit v0.10.2 From 64af7a6ea5a4c7e12ae79415250d054424b7e0c2 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 9 Jan 2015 10:47:43 +1100 Subject: xfs: remove deprecated sysctls xfsbufd_centisecs and age_buffer_centisecs were due for removal in 3.14. We forgot to do that - it's now well past time to remove these deprecated, unused sysctls. Signed-off-by: Dave Chinner Reviewed-by: Eric Sandeen Signed-off-by: Dave Chinner diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt index 5be51fd..7077103 100644 --- a/Documentation/filesystems/xfs.txt +++ b/Documentation/filesystems/xfs.txt @@ -348,16 +348,13 @@ The following sysctls are available for the XFS filesystem: Deprecated Sysctls ================== - fs.xfs.xfsbufd_centisecs (Min: 50 Default: 100 Max: 3000) - Dirty metadata is now tracked by the log subsystem and - flushing is driven by log space and idling demands. The - xfsbufd no longer exists, so this syctl does nothing. +None at present. - Due for removal in 3.14. - fs.xfs.age_buffer_centisecs (Min: 100 Default: 1500 Max: 720000) - Dirty metadata is now tracked by the log subsystem and - flushing is driven by log space and idling demands. The - xfsbufd no longer exists, so this syctl does nothing. +Removed Sysctls +=============== - Due for removal in 3.14. + Name Removed + ---- ------- + fs.xfs.xfsbufd_centisec v3.20 + fs.xfs.age_buffer_centisecs v3.20 diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index 1743b9f..a0c8067 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c @@ -149,24 +149,6 @@ static struct ctl_table xfs_table[] = { .extra2 = &xfs_params.inherit_noatim.max }, { - .procname = "xfsbufd_centisecs", - .data = &xfs_params.xfs_buf_timer.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.xfs_buf_timer.min, - .extra2 = &xfs_params.xfs_buf_timer.max - }, - { - .procname = "age_buffer_centisecs", - .data = &xfs_params.xfs_buf_age.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.xfs_buf_age.min, - .extra2 = &xfs_params.xfs_buf_age.max - }, - { .procname = "inherit_nosymlinks", .data = &xfs_params.inherit_nosym.val, .maxlen = sizeof(int), -- cgit v0.10.2 From d32057fc84c141af22ddf07b58e52570e52369cd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Jan 2015 10:48:12 +1100 Subject: xfs: pass a 64-bit count argument to xfs_iomap_write_unwritten The code is already ready for it, and the pnfs layout commit code expects to be able to pass a larger than 32-bit argument. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index c980e2a..ccb1dd0 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -802,7 +802,7 @@ int xfs_iomap_write_unwritten( xfs_inode_t *ip, xfs_off_t offset, - size_t count) + xfs_off_t count) { xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t offset_fsb; diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 411fbb8..8688e66 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -27,6 +27,6 @@ int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, struct xfs_bmbt_irec *); int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, struct xfs_bmbt_irec *); -int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); +int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t); #endif /* __XFS_IOMAP_H__*/ -- cgit v0.10.2 From 43fd1fce9643586e0995ee5d11fb40641575348a Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Fri, 9 Jan 2015 10:48:58 +1100 Subject: xfs: fix implicit bool to int conversion try_wait_for_completion returns bool so the wrapper function xfs_dqflock_nowait should probably also return bool and not int. Signed-off-by: Nicholas Mc Guire Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index c24c67e..2f536f3 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -86,7 +86,7 @@ static inline void xfs_dqflock(xfs_dquot_t *dqp) wait_for_completion(&dqp->q_flush); } -static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp) +static inline bool xfs_dqflock_nowait(xfs_dquot_t *dqp) { return try_wait_for_completion(&dqp->q_flush); } -- cgit v0.10.2 From 4d11a40239405e531fc0e9dcd07921f00b965931 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 22 Jan 2015 09:10:26 +1100 Subject: xfs: remove bitfield based superblock updates When we log changes to the superblock, we first have to write them to the on-disk buffer, and then log that. Right now we have a complex bitfield based arrangement to only write the modified field to the buffer before we log it. This used to be necessary as a performance optimisation because we logged the superblock buffer in every extent or inode allocation or freeing, and so performance was extremely important. We haven't done this for years, however, ever since the lazy superblock counters pulled the superblock logging out of the transaction commit fast path. Hence we have a bunch of complexity that is not necessary that makes writing the in-core superblock to disk much more complex than it needs to be. We only need to log the superblock now during management operations (e.g. during mount, unmount or quota control operations) so it is not a performance critical path anymore. As such, remove the complex field based logging mechanism and replace it with a simple conversion function similar to what we use for all other on-disk structures. This means we always log the entirity of the superblock, but again because we rarely modify the superblock this is not an issue for log bandwidth or CPU time. Indeed, if we do log the superblock frequently, delayed logging will minimise the impact of this overhead. [Fixed gquota/pquota inode sharing regression noticed by bfoster.] Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 5d38e8b..c914422 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -403,7 +403,7 @@ xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp) if (!xfs_sb_version_hasattr2(&mp->m_sb)) { xfs_sb_version_addattr2(&mp->m_sb); spin_unlock(&mp->m_sb_lock); - xfs_mod_sb(tp, XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); + xfs_mod_sb(tp); } else spin_unlock(&mp->m_sb_lock); } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index b5eb474..8c39cc8 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1221,22 +1221,20 @@ xfs_bmap_add_attrfork( goto bmap_cancel; if (!xfs_sb_version_hasattr(&mp->m_sb) || (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { - __int64_t sbfields = 0; + bool mod_sb = false; spin_lock(&mp->m_sb_lock); if (!xfs_sb_version_hasattr(&mp->m_sb)) { xfs_sb_version_addattr(&mp->m_sb); - sbfields |= XFS_SB_VERSIONNUM; + mod_sb = true; } if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) { xfs_sb_version_addattr2(&mp->m_sb); - sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); + mod_sb = true; } - if (sbfields) { - spin_unlock(&mp->m_sb_lock); - xfs_mod_sb(tp, sbfields); - } else - spin_unlock(&mp->m_sb_lock); + spin_unlock(&mp->m_sb_lock); + if (mod_sb) + xfs_mod_sb(tp); } error = xfs_bmap_finish(&tp, &flist, &committed); diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 752915f..115a7cd 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -40,69 +40,6 @@ * Physical superblock buffer manipulations. Shared with libxfs in userspace. */ -static const struct { - short offset; - short type; /* 0 = integer - * 1 = binary / string (no translation) - */ -} xfs_sb_info[] = { - { offsetof(xfs_sb_t, sb_magicnum), 0 }, - { offsetof(xfs_sb_t, sb_blocksize), 0 }, - { offsetof(xfs_sb_t, sb_dblocks), 0 }, - { offsetof(xfs_sb_t, sb_rblocks), 0 }, - { offsetof(xfs_sb_t, sb_rextents), 0 }, - { offsetof(xfs_sb_t, sb_uuid), 1 }, - { offsetof(xfs_sb_t, sb_logstart), 0 }, - { offsetof(xfs_sb_t, sb_rootino), 0 }, - { offsetof(xfs_sb_t, sb_rbmino), 0 }, - { offsetof(xfs_sb_t, sb_rsumino), 0 }, - { offsetof(xfs_sb_t, sb_rextsize), 0 }, - { offsetof(xfs_sb_t, sb_agblocks), 0 }, - { offsetof(xfs_sb_t, sb_agcount), 0 }, - { offsetof(xfs_sb_t, sb_rbmblocks), 0 }, - { offsetof(xfs_sb_t, sb_logblocks), 0 }, - { offsetof(xfs_sb_t, sb_versionnum), 0 }, - { offsetof(xfs_sb_t, sb_sectsize), 0 }, - { offsetof(xfs_sb_t, sb_inodesize), 0 }, - { offsetof(xfs_sb_t, sb_inopblock), 0 }, - { offsetof(xfs_sb_t, sb_fname[0]), 1 }, - { offsetof(xfs_sb_t, sb_blocklog), 0 }, - { offsetof(xfs_sb_t, sb_sectlog), 0 }, - { offsetof(xfs_sb_t, sb_inodelog), 0 }, - { offsetof(xfs_sb_t, sb_inopblog), 0 }, - { offsetof(xfs_sb_t, sb_agblklog), 0 }, - { offsetof(xfs_sb_t, sb_rextslog), 0 }, - { offsetof(xfs_sb_t, sb_inprogress), 0 }, - { offsetof(xfs_sb_t, sb_imax_pct), 0 }, - { offsetof(xfs_sb_t, sb_icount), 0 }, - { offsetof(xfs_sb_t, sb_ifree), 0 }, - { offsetof(xfs_sb_t, sb_fdblocks), 0 }, - { offsetof(xfs_sb_t, sb_frextents), 0 }, - { offsetof(xfs_sb_t, sb_uquotino), 0 }, - { offsetof(xfs_sb_t, sb_gquotino), 0 }, - { offsetof(xfs_sb_t, sb_qflags), 0 }, - { offsetof(xfs_sb_t, sb_flags), 0 }, - { offsetof(xfs_sb_t, sb_shared_vn), 0 }, - { offsetof(xfs_sb_t, sb_inoalignmt), 0 }, - { offsetof(xfs_sb_t, sb_unit), 0 }, - { offsetof(xfs_sb_t, sb_width), 0 }, - { offsetof(xfs_sb_t, sb_dirblklog), 0 }, - { offsetof(xfs_sb_t, sb_logsectlog), 0 }, - { offsetof(xfs_sb_t, sb_logsectsize), 0 }, - { offsetof(xfs_sb_t, sb_logsunit), 0 }, - { offsetof(xfs_sb_t, sb_features2), 0 }, - { offsetof(xfs_sb_t, sb_bad_features2), 0 }, - { offsetof(xfs_sb_t, sb_features_compat), 0 }, - { offsetof(xfs_sb_t, sb_features_ro_compat), 0 }, - { offsetof(xfs_sb_t, sb_features_incompat), 0 }, - { offsetof(xfs_sb_t, sb_features_log_incompat), 0 }, - { offsetof(xfs_sb_t, sb_crc), 0 }, - { offsetof(xfs_sb_t, sb_pad), 0 }, - { offsetof(xfs_sb_t, sb_pquotino), 0 }, - { offsetof(xfs_sb_t, sb_lsn), 0 }, - { sizeof(xfs_sb_t), 0 } -}; - /* * Reference counting access wrappers to the perag structures. * Because we never free per-ag structures, the only thing we @@ -461,58 +398,49 @@ xfs_sb_from_disk( __xfs_sb_from_disk(to, from, true); } -static inline void +static void xfs_sb_quota_to_disk( - xfs_dsb_t *to, - xfs_sb_t *from, - __int64_t *fields) + struct xfs_dsb *to, + struct xfs_sb *from) { __uint16_t qflags = from->sb_qflags; + to->sb_uquotino = cpu_to_be64(from->sb_uquotino); + if (xfs_sb_version_has_pquotino(from)) { + to->sb_qflags = cpu_to_be16(from->sb_qflags); + to->sb_gquotino = cpu_to_be64(from->sb_gquotino); + to->sb_pquotino = cpu_to_be64(from->sb_pquotino); + return; + } + /* - * We need to do these manipilations only if we are working - * with an older version of on-disk superblock. + * The in-core version of sb_qflags do not have XFS_OQUOTA_* + * flags, whereas the on-disk version does. So, convert incore + * XFS_{PG}QUOTA_* flags to on-disk XFS_OQUOTA_* flags. */ - if (xfs_sb_version_has_pquotino(from)) - return; + qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD | + XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD); - if (*fields & XFS_SB_QFLAGS) { - /* - * The in-core version of sb_qflags do not have - * XFS_OQUOTA_* flags, whereas the on-disk version - * does. So, convert incore XFS_{PG}QUOTA_* flags - * to on-disk XFS_OQUOTA_* flags. - */ - qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD | - XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD); - - if (from->sb_qflags & - (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD)) - qflags |= XFS_OQUOTA_ENFD; - if (from->sb_qflags & - (XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) - qflags |= XFS_OQUOTA_CHKD; - to->sb_qflags = cpu_to_be16(qflags); - *fields &= ~XFS_SB_QFLAGS; - } + if (from->sb_qflags & + (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD)) + qflags |= XFS_OQUOTA_ENFD; + if (from->sb_qflags & + (XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) + qflags |= XFS_OQUOTA_CHKD; + to->sb_qflags = cpu_to_be16(qflags); /* - * GQUOTINO and PQUOTINO cannot be used together in versions of - * superblock that do not have pquotino. from->sb_flags tells us which - * quota is active and should be copied to disk. If neither are active, - * make sure we write NULLFSINO to the sb_gquotino field as a quota - * inode value of "0" is invalid when the XFS_SB_VERSION_QUOTA feature - * bit is set. + * GQUOTINO and PQUOTINO cannot be used together in versions + * of superblock that do not have pquotino. from->sb_flags + * tells us which quota is active and should be copied to + * disk. If neither are active, we should NULL the inode. * - * Note that we don't need to handle the sb_uquotino or sb_pquotino here - * as they do not require any translation. Hence the main sb field loop - * will write them appropriately from the in-core superblock. + * In all cases, the separate pquotino must remain 0 because it + * it beyond the "end" of the valid non-pquotino superblock. */ - if ((*fields & XFS_SB_GQUOTINO) && - (from->sb_qflags & XFS_GQUOTA_ACCT)) + if (from->sb_qflags & XFS_GQUOTA_ACCT) to->sb_gquotino = cpu_to_be64(from->sb_gquotino); - else if ((*fields & XFS_SB_PQUOTINO) && - (from->sb_qflags & XFS_PQUOTA_ACCT)) + else if (from->sb_qflags & XFS_PQUOTA_ACCT) to->sb_gquotino = cpu_to_be64(from->sb_pquotino); else { /* @@ -526,63 +454,72 @@ xfs_sb_quota_to_disk( to->sb_gquotino = cpu_to_be64(NULLFSINO); } - *fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO); + to->sb_pquotino = 0; } -/* - * Copy in core superblock to ondisk one. - * - * The fields argument is mask of superblock fields to copy. - */ void xfs_sb_to_disk( - xfs_dsb_t *to, - xfs_sb_t *from, - __int64_t fields) + struct xfs_dsb *to, + struct xfs_sb *from) { - xfs_caddr_t to_ptr = (xfs_caddr_t)to; - xfs_caddr_t from_ptr = (xfs_caddr_t)from; - xfs_sb_field_t f; - int first; - int size; - - ASSERT(fields); - if (!fields) - return; + xfs_sb_quota_to_disk(to, from); + + to->sb_magicnum = cpu_to_be32(from->sb_magicnum); + to->sb_blocksize = cpu_to_be32(from->sb_blocksize); + to->sb_dblocks = cpu_to_be64(from->sb_dblocks); + to->sb_rblocks = cpu_to_be64(from->sb_rblocks); + to->sb_rextents = cpu_to_be64(from->sb_rextents); + memcpy(&to->sb_uuid, &from->sb_uuid, sizeof(to->sb_uuid)); + to->sb_logstart = cpu_to_be64(from->sb_logstart); + to->sb_rootino = cpu_to_be64(from->sb_rootino); + to->sb_rbmino = cpu_to_be64(from->sb_rbmino); + to->sb_rsumino = cpu_to_be64(from->sb_rsumino); + to->sb_rextsize = cpu_to_be32(from->sb_rextsize); + to->sb_agblocks = cpu_to_be32(from->sb_agblocks); + to->sb_agcount = cpu_to_be32(from->sb_agcount); + to->sb_rbmblocks = cpu_to_be32(from->sb_rbmblocks); + to->sb_logblocks = cpu_to_be32(from->sb_logblocks); + to->sb_versionnum = cpu_to_be16(from->sb_versionnum); + to->sb_sectsize = cpu_to_be16(from->sb_sectsize); + to->sb_inodesize = cpu_to_be16(from->sb_inodesize); + to->sb_inopblock = cpu_to_be16(from->sb_inopblock); + memcpy(&to->sb_fname, &from->sb_fname, sizeof(to->sb_fname)); + to->sb_blocklog = from->sb_blocklog; + to->sb_sectlog = from->sb_sectlog; + to->sb_inodelog = from->sb_inodelog; + to->sb_inopblog = from->sb_inopblog; + to->sb_agblklog = from->sb_agblklog; + to->sb_rextslog = from->sb_rextslog; + to->sb_inprogress = from->sb_inprogress; + to->sb_imax_pct = from->sb_imax_pct; + to->sb_icount = cpu_to_be64(from->sb_icount); + to->sb_ifree = cpu_to_be64(from->sb_ifree); + to->sb_fdblocks = cpu_to_be64(from->sb_fdblocks); + to->sb_frextents = cpu_to_be64(from->sb_frextents); - /* We should never write the crc here, it's updated in the IO path */ - fields &= ~XFS_SB_CRC; - - xfs_sb_quota_to_disk(to, from, &fields); - while (fields) { - f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); - first = xfs_sb_info[f].offset; - size = xfs_sb_info[f + 1].offset - first; - - ASSERT(xfs_sb_info[f].type == 0 || xfs_sb_info[f].type == 1); - - if (size == 1 || xfs_sb_info[f].type == 1) { - memcpy(to_ptr + first, from_ptr + first, size); - } else { - switch (size) { - case 2: - *(__be16 *)(to_ptr + first) = - cpu_to_be16(*(__u16 *)(from_ptr + first)); - break; - case 4: - *(__be32 *)(to_ptr + first) = - cpu_to_be32(*(__u32 *)(from_ptr + first)); - break; - case 8: - *(__be64 *)(to_ptr + first) = - cpu_to_be64(*(__u64 *)(from_ptr + first)); - break; - default: - ASSERT(0); - } - } - fields &= ~(1LL << f); + to->sb_flags = from->sb_flags; + to->sb_shared_vn = from->sb_shared_vn; + to->sb_inoalignmt = cpu_to_be32(from->sb_inoalignmt); + to->sb_unit = cpu_to_be32(from->sb_unit); + to->sb_width = cpu_to_be32(from->sb_width); + to->sb_dirblklog = from->sb_dirblklog; + to->sb_logsectlog = from->sb_logsectlog; + to->sb_logsectsize = cpu_to_be16(from->sb_logsectsize); + to->sb_logsunit = cpu_to_be32(from->sb_logsunit); + to->sb_features2 = cpu_to_be32(from->sb_features2); + to->sb_bad_features2 = cpu_to_be32(from->sb_bad_features2); + + if (xfs_sb_version_hascrc(from)) { + to->sb_features_compat = cpu_to_be32(from->sb_features_compat); + to->sb_features_ro_compat = + cpu_to_be32(from->sb_features_ro_compat); + to->sb_features_incompat = + cpu_to_be32(from->sb_features_incompat); + to->sb_features_log_incompat = + cpu_to_be32(from->sb_features_log_incompat); + to->sb_pad = 0; + to->sb_lsn = cpu_to_be64(from->sb_lsn); } } @@ -823,35 +760,13 @@ xfs_initialize_perag_data( * access. */ void -xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) +xfs_mod_sb( + struct xfs_trans *tp) { - xfs_buf_t *bp; - int first; - int last; - xfs_mount_t *mp; - xfs_sb_field_t f; - - ASSERT(fields); - if (!fields) - return; - mp = tp->t_mountp; - bp = xfs_trans_getsb(tp, mp, 0); - first = sizeof(xfs_sb_t); - last = 0; - - /* translate/copy */ - - xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields); - - /* find modified range */ - f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields); - ASSERT((1LL << f) & XFS_SB_MOD_BITS); - last = xfs_sb_info[f + 1].offset - 1; - - f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); - ASSERT((1LL << f) & XFS_SB_MOD_BITS); - first = xfs_sb_info[f].offset; + struct xfs_mount *mp = tp->t_mountp; + struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0); + xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); - xfs_trans_log_buf(tp, bp, first, last); + xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb)); } diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 8eb1c54..e193caa 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -27,11 +27,11 @@ extern struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *, xfs_agnumber_t, extern void xfs_perag_put(struct xfs_perag *pag); extern int xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t); -extern void xfs_sb_calc_crc(struct xfs_buf *); -extern void xfs_mod_sb(struct xfs_trans *, __int64_t); -extern void xfs_sb_mount_common(struct xfs_mount *, struct xfs_sb *); -extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); -extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); +extern void xfs_sb_calc_crc(struct xfs_buf *bp); +extern void xfs_mod_sb(struct xfs_trans *tp); +extern void xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp); +extern void xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from); +extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from); extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp); #endif /* __XFS_SB_H__ */ diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index fdc6422..82af857 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -541,7 +541,7 @@ xfs_growfs_data_private( saved_error = error; continue; } - xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, XFS_SB_ALL_BITS); + xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); error = xfs_bwrite(bp); xfs_buf_relse(bp); @@ -780,9 +780,7 @@ xfs_fs_log_dummy( xfs_trans_cancel(tp, 0); return error; } - - /* log the UUID because it is an unchanging field */ - xfs_mod_sb(tp, XFS_SB_UUID); + xfs_mod_sb(tp); xfs_trans_set_sync(tp); return xfs_trans_commit(tp, 0); } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index d3d3883..2953d46 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -613,7 +613,7 @@ xfs_mount_reset_sbqflags( return error; } - xfs_mod_sb(tp, XFS_SB_QFLAGS); + xfs_mod_sb(tp); return xfs_trans_commit(tp, 0); } @@ -896,7 +896,7 @@ xfs_mountfs( * perform the update e.g. for the root filesystem. */ if (mp->m_update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) { - error = xfs_mount_log_sb(mp, mp->m_update_flags); + error = xfs_mount_log_sb(mp); if (error) { xfs_warn(mp, "failed to write sb changes"); goto out_rtunmount; @@ -1126,7 +1126,7 @@ xfs_log_sbcount(xfs_mount_t *mp) return error; } - xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS); + xfs_mod_sb(tp); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); return error; @@ -1429,15 +1429,10 @@ xfs_freesb( */ int xfs_mount_log_sb( - xfs_mount_t *mp, - __int64_t fields) + struct xfs_mount *mp) { - xfs_trans_t *tp; - int error; - - ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID | - XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2 | - XFS_SB_VERSIONNUM)); + struct xfs_trans *tp; + int error; tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); @@ -1445,9 +1440,8 @@ xfs_mount_log_sb( xfs_trans_cancel(tp, 0); return error; } - xfs_mod_sb(tp, fields); - error = xfs_trans_commit(tp, 0); - return error; + xfs_mod_sb(tp); + return xfs_trans_commit(tp, 0); } /* diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 22ccf69..28b341b 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -378,7 +378,7 @@ extern void xfs_unmountfs(xfs_mount_t *); extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, uint, int); -extern int xfs_mount_log_sb(xfs_mount_t *, __int64_t); +extern int xfs_mount_log_sb(xfs_mount_t *); extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); extern int xfs_readsb(xfs_mount_t *, int); extern void xfs_freesb(xfs_mount_t *); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 79fb19d..c815a80 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -714,7 +714,6 @@ STATIC int xfs_qm_qino_alloc( xfs_mount_t *mp, xfs_inode_t **ip, - __int64_t sbfields, uint flags) { xfs_trans_t *tp; @@ -777,11 +776,6 @@ xfs_qm_qino_alloc( spin_lock(&mp->m_sb_lock); if (flags & XFS_QMOPT_SBVERSION) { ASSERT(!xfs_sb_version_hasquota(&mp->m_sb)); - ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | - XFS_SB_GQUOTINO | XFS_SB_PQUOTINO | XFS_SB_QFLAGS)) == - (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | - XFS_SB_GQUOTINO | XFS_SB_PQUOTINO | - XFS_SB_QFLAGS)); xfs_sb_version_addquota(&mp->m_sb); mp->m_sb.sb_uquotino = NULLFSINO; @@ -798,7 +792,7 @@ xfs_qm_qino_alloc( else mp->m_sb.sb_pquotino = (*ip)->i_ino; spin_unlock(&mp->m_sb_lock); - xfs_mod_sb(tp, sbfields); + xfs_mod_sb(tp); if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { xfs_alert(mp, "%s failed (error %d)!", __func__, error); @@ -1451,7 +1445,7 @@ xfs_qm_mount_quotas( spin_unlock(&mp->m_sb_lock); if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) { - if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) { + if (xfs_qm_write_sb_changes(mp)) { /* * We could only have been turning quotas off. * We aren't in very good shape actually because @@ -1482,7 +1476,6 @@ xfs_qm_init_quotainos( struct xfs_inode *gip = NULL; struct xfs_inode *pip = NULL; int error; - __int64_t sbflags = 0; uint flags = 0; ASSERT(mp->m_quotainfo); @@ -1517,9 +1510,6 @@ xfs_qm_init_quotainos( } } else { flags |= XFS_QMOPT_SBVERSION; - sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | - XFS_SB_GQUOTINO | XFS_SB_PQUOTINO | - XFS_SB_QFLAGS); } /* @@ -1530,7 +1520,6 @@ xfs_qm_init_quotainos( */ if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) { error = xfs_qm_qino_alloc(mp, &uip, - sbflags | XFS_SB_UQUOTINO, flags | XFS_QMOPT_UQUOTA); if (error) goto error_rele; @@ -1539,7 +1528,6 @@ xfs_qm_init_quotainos( } if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) { error = xfs_qm_qino_alloc(mp, &gip, - sbflags | XFS_SB_GQUOTINO, flags | XFS_QMOPT_GQUOTA); if (error) goto error_rele; @@ -1548,7 +1536,6 @@ xfs_qm_init_quotainos( } if (XFS_IS_PQUOTA_ON(mp) && pip == NULL) { error = xfs_qm_qino_alloc(mp, &pip, - sbflags | XFS_SB_PQUOTINO, flags | XFS_QMOPT_PQUOTA); if (error) goto error_rele; @@ -1593,8 +1580,7 @@ xfs_qm_dqfree_one( */ int xfs_qm_write_sb_changes( - xfs_mount_t *mp, - __int64_t flags) + struct xfs_mount *mp) { xfs_trans_t *tp; int error; @@ -1606,10 +1592,8 @@ xfs_qm_write_sb_changes( return error; } - xfs_mod_sb(tp, flags); - error = xfs_trans_commit(tp, 0); - - return error; + xfs_mod_sb(tp); + return xfs_trans_commit(tp, 0); } diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 3a07a93..bddd23f 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -157,7 +157,7 @@ struct xfs_dquot_acct { #define XFS_QM_RTBWARNLIMIT 5 extern void xfs_qm_destroy_quotainfo(struct xfs_mount *); -extern int xfs_qm_write_sb_changes(struct xfs_mount *, __int64_t); +extern int xfs_qm_write_sb_changes(struct xfs_mount *); /* dquot stuff */ extern void xfs_qm_dqpurge_all(struct xfs_mount *, uint); diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 74fca68..8d7e5f0 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -92,8 +92,7 @@ xfs_qm_scall_quotaoff( mutex_unlock(&q->qi_quotaofflock); /* XXX what to do if error ? Revert back to old vals incore ? */ - error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS); - return error; + return xfs_qm_write_sb_changes(mp); } dqtype = 0; @@ -314,7 +313,6 @@ xfs_qm_scall_quotaon( { int error; uint qf; - __int64_t sbflags; flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); /* @@ -322,8 +320,6 @@ xfs_qm_scall_quotaon( */ flags &= ~(XFS_ALL_QUOTA_ACCT); - sbflags = 0; - if (flags == 0) { xfs_debug(mp, "%s: zero flags, m_qflags=%x", __func__, mp->m_qflags); @@ -370,11 +366,10 @@ xfs_qm_scall_quotaon( /* * There's nothing to change if it's the same. */ - if ((qf & flags) == flags && sbflags == 0) + if ((qf & flags) == flags) return -EEXIST; - sbflags |= XFS_SB_QFLAGS; - if ((error = xfs_qm_write_sb_changes(mp, sbflags))) + if ((error = xfs_qm_write_sb_changes(mp))) return error; /* * If we aren't trying to switch on quota enforcement, we are done. @@ -801,7 +796,7 @@ xfs_qm_log_quotaoff( mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL; spin_unlock(&mp->m_sb_lock); - xfs_mod_sb(tp, XFS_SB_QFLAGS); + xfs_mod_sb(tp); /* * We have to make sure that the transaction is secure on disk before we diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 19cbda1..6fb2989 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1258,7 +1258,7 @@ xfs_fs_remount( * might have some superblock changes to update. */ if (mp->m_update_flags) { - error = xfs_mount_log_sb(mp, mp->m_update_flags); + error = xfs_mount_log_sb(mp); if (error) { xfs_warn(mp, "failed to write sb changes"); return error; -- cgit v0.10.2 From 61e63ecb577f9b56bfb3182f1215b64e37a12c38 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 22 Jan 2015 09:10:31 +1100 Subject: xfs: consolidate superblock logging functions We now have several superblock loggin functions that are identical except for the transaction reservation and whether it shoul dbe a synchronous transaction or not. Consolidate these all into a single function, a single reserveration and a sync flag and call it xfs_sync_sb(). Also, xfs_mod_sb() is not really a modification function - it's the operation of logging the superblock buffer. hence change the name of it to reflect this. Note that we have to change the mp->m_update_flags that are passed around at mount time to a boolean simply to indicate a superblock update is needed. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index c914422..15105db 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -403,7 +403,7 @@ xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp) if (!xfs_sb_version_hasattr2(&mp->m_sb)) { xfs_sb_version_addattr2(&mp->m_sb); spin_unlock(&mp->m_sb_lock); - xfs_mod_sb(tp); + xfs_log_sb(tp); } else spin_unlock(&mp->m_sb_lock); } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 8c39cc8..63a5bb9 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1221,20 +1221,20 @@ xfs_bmap_add_attrfork( goto bmap_cancel; if (!xfs_sb_version_hasattr(&mp->m_sb) || (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { - bool mod_sb = false; + bool log_sb = false; spin_lock(&mp->m_sb_lock); if (!xfs_sb_version_hasattr(&mp->m_sb)) { xfs_sb_version_addattr(&mp->m_sb); - mod_sb = true; + log_sb = true; } if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) { xfs_sb_version_addattr2(&mp->m_sb); - mod_sb = true; + log_sb = true; } spin_unlock(&mp->m_sb_lock); - if (mod_sb) - xfs_mod_sb(tp); + if (log_sb) + xfs_log_sb(tp); } error = xfs_bmap_finish(&tp, &flist, &committed); diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 115a7cd..63f8148 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -753,14 +753,13 @@ xfs_initialize_perag_data( } /* - * xfs_mod_sb() can be used to copy arbitrary changes to the - * in-core superblock into the superblock buffer to be logged. - * It does not provide the higher level of locking that is - * needed to protect the in-core superblock from concurrent - * access. + * xfs_log_sb() can be used to copy arbitrary changes to the in-core superblock + * into the superblock buffer to be logged. It does not provide the higher + * level of locking that is needed to protect the in-core superblock from + * concurrent access. */ void -xfs_mod_sb( +xfs_log_sb( struct xfs_trans *tp) { struct xfs_mount *mp = tp->t_mountp; @@ -770,3 +769,35 @@ xfs_mod_sb( xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb)); } + +/* + * xfs_sync_sb + * + * Sync the superblock to disk. + * + * Note that the caller is responsible for checking the frozen state of the + * filesystem. This procedure uses the non-blocking transaction allocator and + * thus will allow modifications to a frozen fs. This is required because this + * code can be called during the process of freezing where use of the high-level + * allocator would deadlock. + */ +int +xfs_sync_sb( + struct xfs_mount *mp, + bool wait) +{ + struct xfs_trans *tp; + int error; + + tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_CHANGE, KM_SLEEP); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return error; + } + + xfs_log_sb(tp); + if (wait) + xfs_trans_set_sync(tp); + return xfs_trans_commit(tp, 0); +} diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index e193caa..b25bb9a 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -28,7 +28,8 @@ extern void xfs_perag_put(struct xfs_perag *pag); extern int xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t); extern void xfs_sb_calc_crc(struct xfs_buf *bp); -extern void xfs_mod_sb(struct xfs_trans *tp); +extern void xfs_log_sb(struct xfs_trans *tp); +extern int xfs_sync_sb(struct xfs_mount *mp, bool wait); extern void xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp); extern void xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from); extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from); diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 82404da..8dda4b3 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -82,7 +82,7 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops; #define XFS_TRANS_ATTR_RM 23 #define XFS_TRANS_ATTR_FLAG 24 #define XFS_TRANS_CLEAR_AGI_BUCKET 25 -#define XFS_TRANS_QM_SBCHANGE 26 +#define XFS_TRANS_SB_CHANGE 26 /* * Dummy entries since we use the transaction type to index into the * trans_type[] in xlog_recover_print_trans_head() @@ -95,17 +95,15 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops; #define XFS_TRANS_QM_DQCLUSTER 32 #define XFS_TRANS_QM_QINOCREATE 33 #define XFS_TRANS_QM_QUOTAOFF_END 34 -#define XFS_TRANS_SB_UNIT 35 -#define XFS_TRANS_FSYNC_TS 36 -#define XFS_TRANS_GROWFSRT_ALLOC 37 -#define XFS_TRANS_GROWFSRT_ZERO 38 -#define XFS_TRANS_GROWFSRT_FREE 39 -#define XFS_TRANS_SWAPEXT 40 -#define XFS_TRANS_SB_COUNT 41 -#define XFS_TRANS_CHECKPOINT 42 -#define XFS_TRANS_ICREATE 43 -#define XFS_TRANS_CREATE_TMPFILE 44 -#define XFS_TRANS_TYPE_MAX 44 +#define XFS_TRANS_FSYNC_TS 35 +#define XFS_TRANS_GROWFSRT_ALLOC 36 +#define XFS_TRANS_GROWFSRT_ZERO 37 +#define XFS_TRANS_GROWFSRT_FREE 38 +#define XFS_TRANS_SWAPEXT 39 +#define XFS_TRANS_CHECKPOINT 40 +#define XFS_TRANS_ICREATE 41 +#define XFS_TRANS_CREATE_TMPFILE 42 +#define XFS_TRANS_TYPE_MAX 43 /* new transaction types need to be reflected in xfs_logprint(8) */ #define XFS_TRANS_TYPES \ @@ -113,7 +111,6 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops; { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \ { XFS_TRANS_INACTIVE, "INACTIVE" }, \ { XFS_TRANS_CREATE, "CREATE" }, \ - { XFS_TRANS_CREATE_TMPFILE, "CREATE_TMPFILE" }, \ { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \ { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \ { XFS_TRANS_REMOVE, "REMOVE" }, \ @@ -134,23 +131,23 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops; { XFS_TRANS_ATTR_RM, "ATTR_RM" }, \ { XFS_TRANS_ATTR_FLAG, "ATTR_FLAG" }, \ { XFS_TRANS_CLEAR_AGI_BUCKET, "CLEAR_AGI_BUCKET" }, \ - { XFS_TRANS_QM_SBCHANGE, "QM_SBCHANGE" }, \ + { XFS_TRANS_SB_CHANGE, "SBCHANGE" }, \ + { XFS_TRANS_DUMMY1, "DUMMY1" }, \ + { XFS_TRANS_DUMMY2, "DUMMY2" }, \ { XFS_TRANS_QM_QUOTAOFF, "QM_QUOTAOFF" }, \ { XFS_TRANS_QM_DQALLOC, "QM_DQALLOC" }, \ { XFS_TRANS_QM_SETQLIM, "QM_SETQLIM" }, \ { XFS_TRANS_QM_DQCLUSTER, "QM_DQCLUSTER" }, \ { XFS_TRANS_QM_QINOCREATE, "QM_QINOCREATE" }, \ { XFS_TRANS_QM_QUOTAOFF_END, "QM_QOFF_END" }, \ - { XFS_TRANS_SB_UNIT, "SB_UNIT" }, \ { XFS_TRANS_FSYNC_TS, "FSYNC_TS" }, \ { XFS_TRANS_GROWFSRT_ALLOC, "GROWFSRT_ALLOC" }, \ { XFS_TRANS_GROWFSRT_ZERO, "GROWFSRT_ZERO" }, \ { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \ { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \ - { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \ { XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \ - { XFS_TRANS_DUMMY1, "DUMMY1" }, \ - { XFS_TRANS_DUMMY2, "DUMMY2" }, \ + { XFS_TRANS_ICREATE, "ICREATE" }, \ + { XFS_TRANS_CREATE_TMPFILE, "CREATE_TMPFILE" }, \ { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" } /* diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 6c1330f..68cb1e7 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -716,17 +716,6 @@ xfs_calc_clear_agi_bucket_reservation( } /* - * Clearing the quotaflags in the superblock. - * the super block for changing quota flags: sector size - */ -STATIC uint -xfs_calc_qm_sbchange_reservation( - struct xfs_mount *mp) -{ - return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); -} - -/* * Adjusting quota limits. * the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot) */ @@ -864,9 +853,6 @@ xfs_trans_resv_calc( * The following transactions are logged in logical format with * a default log count. */ - resp->tr_qm_sbchange.tr_logres = xfs_calc_qm_sbchange_reservation(mp); - resp->tr_qm_sbchange.tr_logcount = XFS_DEFAULT_LOG_COUNT; - resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation(mp); resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT; diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h index 1097d14..2d5bdfc 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.h +++ b/fs/xfs/libxfs/xfs_trans_resv.h @@ -56,7 +56,6 @@ struct xfs_trans_resv { struct xfs_trans_res tr_growrtalloc; /* grow realtime allocations */ struct xfs_trans_res tr_growrtzero; /* grow realtime zeroing */ struct xfs_trans_res tr_growrtfree; /* grow realtime freeing */ - struct xfs_trans_res tr_qm_sbchange; /* change quota flags */ struct xfs_trans_res tr_qm_setqlim; /* adjust quota limits */ struct xfs_trans_res tr_qm_dqalloc; /* allocate quota on disk */ struct xfs_trans_res tr_qm_quotaoff; /* turn quota off */ diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 82af857..f711452 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -756,35 +756,6 @@ out: return 0; } -/* - * Dump a transaction into the log that contains no real change. This is needed - * to be able to make the log dirty or stamp the current tail LSN into the log - * during the covering operation. - * - * We cannot use an inode here for this - that will push dirty state back up - * into the VFS and then periodic inode flushing will prevent log covering from - * making progress. Hence we log a field in the superblock instead and use a - * synchronous transaction to ensure the superblock is immediately unpinned - * and can be written back. - */ -int -xfs_fs_log_dummy( - xfs_mount_t *mp) -{ - xfs_trans_t *tp; - int error; - - tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - return error; - } - xfs_mod_sb(tp); - xfs_trans_set_sync(tp); - return xfs_trans_commit(tp, 0); -} - int xfs_fs_goingdown( xfs_mount_t *mp, diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index e408bf5..2b8dcf2 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -33,6 +33,7 @@ #include "xfs_fsops.h" #include "xfs_cksum.h" #include "xfs_sysfs.h" +#include "xfs_sb.h" kmem_zone_t *xfs_log_ticket_zone; @@ -1290,9 +1291,20 @@ xfs_log_worker( struct xfs_mount *mp = log->l_mp; /* dgc: errors ignored - not fatal and nowhere to report them */ - if (xfs_log_need_covered(mp)) - xfs_fs_log_dummy(mp); - else + if (xfs_log_need_covered(mp)) { + /* + * Dump a transaction into the log that contains no real change. + * This is needed to stamp the current tail LSN into the log + * during the covering operation. + * + * We cannot use an inode here for this - that will push dirty + * state back up into the VFS and then periodic inode flushing + * will prevent log covering from making progress. Hence we + * synchronously log the superblock instead to ensure the + * superblock is immediately unpinned and can be written back. + */ + xfs_sync_sb(mp, true); + } else xfs_log_force(mp, 0); /* start pushing all the metadata that is currently dirty */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 2953d46..5ef9aa2 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -408,11 +408,11 @@ xfs_update_alignment(xfs_mount_t *mp) if (xfs_sb_version_hasdalign(sbp)) { if (sbp->sb_unit != mp->m_dalign) { sbp->sb_unit = mp->m_dalign; - mp->m_update_flags |= XFS_SB_UNIT; + mp->m_update_sb = true; } if (sbp->sb_width != mp->m_swidth) { sbp->sb_width = mp->m_swidth; - mp->m_update_flags |= XFS_SB_WIDTH; + mp->m_update_sb = true; } } else { xfs_warn(mp, @@ -583,38 +583,19 @@ int xfs_mount_reset_sbqflags( struct xfs_mount *mp) { - int error; - struct xfs_trans *tp; - mp->m_qflags = 0; - /* - * It is OK to look at sb_qflags here in mount path, - * without m_sb_lock. - */ + /* It is OK to look at sb_qflags in the mount path without m_sb_lock. */ if (mp->m_sb.sb_qflags == 0) return 0; spin_lock(&mp->m_sb_lock); mp->m_sb.sb_qflags = 0; spin_unlock(&mp->m_sb_lock); - /* - * If the fs is readonly, let the incore superblock run - * with quotas off but don't flush the update out to disk - */ - if (mp->m_flags & XFS_MOUNT_RDONLY) + if (!xfs_fs_writable(mp, SB_FREEZE_WRITE)) return 0; - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_sbchange, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - xfs_alert(mp, "%s: Superblock update failed!", __func__); - return error; - } - - xfs_mod_sb(tp); - return xfs_trans_commit(tp, 0); + return xfs_sync_sb(mp, false); } __uint64_t @@ -678,7 +659,7 @@ xfs_mountfs( xfs_warn(mp, "correcting sb_features alignment problem"); sbp->sb_features2 |= sbp->sb_bad_features2; sbp->sb_bad_features2 = sbp->sb_features2; - mp->m_update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2; + mp->m_update_sb = true; /* * Re-check for ATTR2 in case it was found in bad_features2 @@ -692,17 +673,17 @@ xfs_mountfs( if (xfs_sb_version_hasattr2(&mp->m_sb) && (mp->m_flags & XFS_MOUNT_NOATTR2)) { xfs_sb_version_removeattr2(&mp->m_sb); - mp->m_update_flags |= XFS_SB_FEATURES2; + mp->m_update_sb = true; /* update sb_versionnum for the clearing of the morebits */ if (!sbp->sb_features2) - mp->m_update_flags |= XFS_SB_VERSIONNUM; + mp->m_update_sb = true; } /* always use v2 inodes by default now */ if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) { mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT; - mp->m_update_flags |= XFS_SB_VERSIONNUM; + mp->m_update_sb = true; } /* @@ -895,8 +876,8 @@ xfs_mountfs( * the next remount into writeable mode. Otherwise we would never * perform the update e.g. for the root filesystem. */ - if (mp->m_update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) { - error = xfs_mount_log_sb(mp); + if (mp->m_update_sb && !(mp->m_flags & XFS_MOUNT_RDONLY)) { + error = xfs_sync_sb(mp, false); if (error) { xfs_warn(mp, "failed to write sb changes"); goto out_rtunmount; @@ -1103,9 +1084,6 @@ xfs_fs_writable( int xfs_log_sbcount(xfs_mount_t *mp) { - xfs_trans_t *tp; - int error; - /* allow this to proceed during the freeze sequence... */ if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE)) return 0; @@ -1119,17 +1097,7 @@ xfs_log_sbcount(xfs_mount_t *mp) if (!xfs_sb_version_haslazysbcount(&mp->m_sb)) return 0; - tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - return error; - } - - xfs_mod_sb(tp); - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); - return error; + return xfs_sync_sb(mp, true); } /* @@ -1423,28 +1391,6 @@ xfs_freesb( } /* - * Used to log changes to the superblock unit and width fields which could - * be altered by the mount options, as well as any potential sb_features2 - * fixup. Only the first superblock is updated. - */ -int -xfs_mount_log_sb( - struct xfs_mount *mp) -{ - struct xfs_trans *tp; - int error; - - tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - return error; - } - xfs_mod_sb(tp); - return xfs_trans_commit(tp, 0); -} - -/* * If the underlying (data/log/rt) device is readonly, there are some * operations that cannot proceed. */ diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 28b341b..a5b2ff8 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -162,8 +162,7 @@ typedef struct xfs_mount { struct delayed_work m_reclaim_work; /* background inode reclaim */ struct delayed_work m_eofblocks_work; /* background eof blocks trimming */ - __int64_t m_update_flags; /* sb flags we need to update - on the next remount,rw */ + bool m_update_sb; /* sb needs update in mount */ int64_t m_low_space[XFS_LOWSP_MAX]; /* low free space thresholds */ struct xfs_kobj m_kobj; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index c815a80..3e81862 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -792,7 +792,7 @@ xfs_qm_qino_alloc( else mp->m_sb.sb_pquotino = (*ip)->i_ino; spin_unlock(&mp->m_sb_lock); - xfs_mod_sb(tp); + xfs_log_sb(tp); if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { xfs_alert(mp, "%s failed (error %d)!", __func__, error); @@ -1445,7 +1445,7 @@ xfs_qm_mount_quotas( spin_unlock(&mp->m_sb_lock); if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) { - if (xfs_qm_write_sb_changes(mp)) { + if (xfs_sync_sb(mp, false)) { /* * We could only have been turning quotas off. * We aren't in very good shape actually because @@ -1574,29 +1574,6 @@ xfs_qm_dqfree_one( xfs_qm_dqdestroy(dqp); } -/* - * Start a transaction and write the incore superblock changes to - * disk. flags parameter indicates which fields have changed. - */ -int -xfs_qm_write_sb_changes( - struct xfs_mount *mp) -{ - xfs_trans_t *tp; - int error; - - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_sbchange, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - return error; - } - - xfs_mod_sb(tp); - return xfs_trans_commit(tp, 0); -} - - /* --------------- utility functions for vnodeops ---------------- */ diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index bddd23f..d6e4d88 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -157,7 +157,6 @@ struct xfs_dquot_acct { #define XFS_QM_RTBWARNLIMIT 5 extern void xfs_qm_destroy_quotainfo(struct xfs_mount *); -extern int xfs_qm_write_sb_changes(struct xfs_mount *); /* dquot stuff */ extern void xfs_qm_dqpurge_all(struct xfs_mount *, uint); diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 8d7e5f0..b8a565e 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -92,7 +92,7 @@ xfs_qm_scall_quotaoff( mutex_unlock(&q->qi_quotaofflock); /* XXX what to do if error ? Revert back to old vals incore ? */ - return xfs_qm_write_sb_changes(mp); + return xfs_sync_sb(mp, false); } dqtype = 0; @@ -369,7 +369,8 @@ xfs_qm_scall_quotaon( if ((qf & flags) == flags) return -EEXIST; - if ((error = xfs_qm_write_sb_changes(mp))) + error = xfs_sync_sb(mp, false); + if (error) return error; /* * If we aren't trying to switch on quota enforcement, we are done. @@ -796,7 +797,7 @@ xfs_qm_log_quotaoff( mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL; spin_unlock(&mp->m_sb_lock); - xfs_mod_sb(tp); + xfs_log_sb(tp); /* * We have to make sure that the transaction is secure on disk before we diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 6fb2989..a3b791b 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1257,13 +1257,13 @@ xfs_fs_remount( * If this is the first remount to writeable state we * might have some superblock changes to update. */ - if (mp->m_update_flags) { - error = xfs_mount_log_sb(mp); + if (mp->m_update_sb) { + error = xfs_sync_sb(mp, false); if (error) { xfs_warn(mp, "failed to write sb changes"); return error; } - mp->m_update_flags = 0; + mp->m_update_sb = false; } /* @@ -1293,8 +1293,9 @@ xfs_fs_remount( /* * Second stage of a freeze. The data is already frozen so we only - * need to take care of the metadata. Once that's done write a dummy - * record to dirty the log in case of a crash while frozen. + * need to take care of the metadata. Once that's done sync the superblock + * to the log to dirty it in case of a crash while frozen. This ensures that we + * will recover the unlinked inode lists on the next mount. */ STATIC int xfs_fs_freeze( @@ -1304,7 +1305,7 @@ xfs_fs_freeze( xfs_save_resvblks(mp); xfs_quiesce_attr(mp); - return xfs_fs_log_dummy(mp); + return xfs_sync_sb(mp, true); } STATIC int -- cgit v0.10.2 From 074e427ba7f7398427e4f8e2aec071edcc509673 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 22 Jan 2015 09:10:33 +1100 Subject: xfs: sanitise sb_bad_features2 handling We currently have to ensure that every time we update sb_features2 that we update sb_bad_features2. Now that we log and format the superblock in it's entirety we actually don't have to care because we can simply update the sb_bad_features2 when we format it into the buffer. This removes the need for anything but the mount and superblock formatting code to care about sb_bad_features2, and hence removes the possibility that we forget to update bad_features2 when necessary in the future. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index fbd6da2..749c861 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -151,10 +151,13 @@ typedef struct xfs_sb { __uint32_t sb_features2; /* additional feature bits */ /* - * bad features2 field as a result of failing to pad the sb - * structure to 64 bits. Some machines will be using this field - * for features2 bits. Easiest just to mark it bad and not use - * it for anything else. + * bad features2 field as a result of failing to pad the sb structure to + * 64 bits. Some machines will be using this field for features2 bits. + * Easiest just to mark it bad and not use it for anything else. + * + * This is not kept up to date in memory; it is always overwritten by + * the value in sb_features2 when formatting the incore superblock to + * the disk buffer. */ __uint32_t sb_bad_features2; @@ -453,13 +456,11 @@ static inline void xfs_sb_version_addattr2(struct xfs_sb *sbp) { sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; sbp->sb_features2 |= XFS_SB_VERSION2_ATTR2BIT; - sbp->sb_bad_features2 |= XFS_SB_VERSION2_ATTR2BIT; } static inline void xfs_sb_version_removeattr2(struct xfs_sb *sbp) { sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT; - sbp->sb_bad_features2 &= ~XFS_SB_VERSION2_ATTR2BIT; if (!sbp->sb_features2) sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT; } @@ -475,7 +476,6 @@ static inline void xfs_sb_version_addprojid32bit(struct xfs_sb *sbp) { sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; sbp->sb_features2 |= XFS_SB_VERSION2_PROJID32BIT; - sbp->sb_bad_features2 |= XFS_SB_VERSION2_PROJID32BIT; } /* diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 63f8148..b0a5fe9 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -497,7 +497,6 @@ xfs_sb_to_disk( to->sb_fdblocks = cpu_to_be64(from->sb_fdblocks); to->sb_frextents = cpu_to_be64(from->sb_frextents); - to->sb_flags = from->sb_flags; to->sb_shared_vn = from->sb_shared_vn; to->sb_inoalignmt = cpu_to_be32(from->sb_inoalignmt); @@ -507,6 +506,13 @@ xfs_sb_to_disk( to->sb_logsectlog = from->sb_logsectlog; to->sb_logsectsize = cpu_to_be16(from->sb_logsectsize); to->sb_logsunit = cpu_to_be32(from->sb_logsunit); + + /* + * We need to ensure that bad_features2 always matches features2. + * Hence we enforce that here rather than having to remember to do it + * everywhere else that updates features2. + */ + from->sb_bad_features2 = from->sb_features2; to->sb_features2 = cpu_to_be32(from->sb_features2); to->sb_bad_features2 = cpu_to_be32(from->sb_bad_features2); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 5ef9aa2..4fa80e6 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -640,25 +640,24 @@ xfs_mountfs( xfs_sb_mount_common(mp, sbp); /* - * Check for a mismatched features2 values. Older kernels - * read & wrote into the wrong sb offset for sb_features2 - * on some platforms due to xfs_sb_t not being 64bit size aligned - * when sb_features2 was added, which made older superblock - * reading/writing routines swap it as a 64-bit value. + * Check for a mismatched features2 values. Older kernels read & wrote + * into the wrong sb offset for sb_features2 on some platforms due to + * xfs_sb_t not being 64bit size aligned when sb_features2 was added, + * which made older superblock reading/writing routines swap it as a + * 64-bit value. * * For backwards compatibility, we make both slots equal. * - * If we detect a mismatched field, we OR the set bits into the - * existing features2 field in case it has already been modified; we - * don't want to lose any features. We then update the bad location - * with the ORed value so that older kernels will see any features2 - * flags, and mark the two fields as needing updates once the - * transaction subsystem is online. + * If we detect a mismatched field, we OR the set bits into the existing + * features2 field in case it has already been modified; we don't want + * to lose any features. We then update the bad location with the ORed + * value so that older kernels will see any features2 flags. The + * superblock writeback code ensures the new sb_features2 is copied to + * sb_bad_features2 before it is logged or written to disk. */ if (xfs_sb_has_mismatched_features2(sbp)) { xfs_warn(mp, "correcting sb_features alignment problem"); sbp->sb_features2 |= sbp->sb_bad_features2; - sbp->sb_bad_features2 = sbp->sb_features2; mp->m_update_sb = true; /* -- cgit v0.10.2 From 0d612fb570b71ea2e49554a770cff4c489018b2c Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 22 Jan 2015 09:29:05 +1100 Subject: xfs: ensure buffer types are set correctly Jan Kara reported that log recovery was finding buffers with invalid types in them. This should not happen, and indicates a bug in the logging of buffers. To catch this, add asserts to the buffer formatting code to ensure that the buffer type is in range when the transaction is committed. We don't set a type on buffers being marked stale - they are not going to get replayed, the format item exists only for recovery to be able to prevent replay of the buffer, so the type does not matter. Hence that needs special casing here. cc: # 3.10 to current Reported-by: Jan Kara Tested-by: Jan Kara Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 3f9bd58..744352b 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -319,6 +319,10 @@ xfs_buf_item_format( ASSERT(atomic_read(&bip->bli_refcount) > 0); ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || (bip->bli_flags & XFS_BLI_STALE)); + ASSERT((bip->bli_flags & XFS_BLI_STALE) || + (xfs_blft_from_flags(&bip->__bli_format) > XFS_BLFT_UNKNOWN_BUF + && xfs_blft_from_flags(&bip->__bli_format) < XFS_BLFT_MAX_BUF)); + /* * If it is an inode buffer, transfer the in-memory state to the -- cgit v0.10.2 From f19b872b086711bb4b22c3a0f52f16aa920bcc61 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 22 Jan 2015 09:29:40 +1100 Subject: xfs: inode unlink does not set AGI buffer type This leads to log recovery throwing errors like: XFS (md0): Mounting V5 Filesystem XFS (md0): Starting recovery (logdev: internal) XFS (md0): Unknown buffer type 0! XFS (md0): _xfs_buf_ioapply: no ops on block 0xaea8802/0x1 ffff8800ffc53800: 58 41 47 49 ..... Which is the AGI buffer magic number. Ensure that we set the type appropriately in both unlink list addition and removal. cc: # 3.10 to current Tested-by: Jan Kara Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 41f804e..d745e1a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1995,6 +1995,7 @@ xfs_iunlink( agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); offset = offsetof(xfs_agi_t, agi_unlinked) + (sizeof(xfs_agino_t) * bucket_index); + xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF); xfs_trans_log_buf(tp, agibp, offset, (offset + sizeof(xfs_agino_t) - 1)); return 0; @@ -2086,6 +2087,7 @@ xfs_iunlink_remove( agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); offset = offsetof(xfs_agi_t, agi_unlinked) + (sizeof(xfs_agino_t) * bucket_index); + xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF); xfs_trans_log_buf(tp, agibp, offset, (offset + sizeof(xfs_agino_t) - 1)); } else { -- cgit v0.10.2 From fe22d552b82d7cc7de1851233ae8bef579198637 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 22 Jan 2015 09:30:06 +1100 Subject: xfs: set buf types when converting extent formats Conversion from local to extent format does not set the buffer type correctly on the new extent buffer when a symlink data is moved out of line. Fix the symlink code and leave a comment in the generic bmap code reminding us that the format-specific data copy needs to set the destination buffer type appropriately. cc: # 3.10 to current Tested-by: Jan Kara Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index b5eb474..4e20fe7 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -973,7 +973,11 @@ xfs_bmap_local_to_extents( *firstblock = args.fsbno; bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); - /* initialise the block and copy the data */ + /* + * Initialise the block and copy the data + * + * Note: init_fn must set the buffer log item type correctly! + */ init_fn(tp, bp, ip, ifp); /* account for the change in fork size and log everything */ diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index c80c523..e7e26bd 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -178,6 +178,8 @@ xfs_symlink_local_to_remote( struct xfs_mount *mp = ip->i_mount; char *buf; + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SYMLINK_BUF); + if (!xfs_sb_version_hascrc(&mp->m_sb)) { bp->b_ops = NULL; memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); -- cgit v0.10.2 From 3443a3bca54588f43286b725d8648d33a38c86f1 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 22 Jan 2015 09:30:23 +1100 Subject: xfs: set superblock buffer type correctly When the superblock is modified in a transaction, the commonly modified fields are not actually copied to the superblock buffer to avoid the buffer lock becoming a serialisation point. However, there are some other operations that modify the superblock fields within the transaction that don't directly log to the superblock but rely on the changes to be applied during the transaction commit (to minimise the buffer lock hold time). When we do this, we fail to mark the buffer log item as being a superblock buffer and that can lead to the buffer not being marked with the corect type in the log and hence causing recovery issues. Fix it by setting the type correctly, similar to xfs_mod_sb()... cc: # 3.10 to current Tested-by: Jan Kara Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index fa3135b..eb90cd5 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -472,6 +472,7 @@ xfs_trans_apply_sb_deltas( whole = 1; } + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); if (whole) /* * Log the whole thing, the fields are noncontiguous. -- cgit v0.10.2 From 4d949021aac8b63c9c3b8a83cc8a29759c7e1d5e Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 22 Jan 2015 10:04:24 +1100 Subject: xfs: remove incorrect error negation in attr_multi ioctl xfs_compat_attrmulti_by_handle() calls memdup_user() which returns a negative error code. The error code is negated by the caller and thus incorrectly converted to a positive error code. Remove the error negation such that the negative error is passed correctly back up to userspace. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index ec67728..bfc7c7c 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -423,7 +423,7 @@ xfs_compat_attrmulti_by_handle( ops = memdup_user(compat_ptr(am_hreq.ops), size); if (IS_ERR(ops)) { - error = -PTR_ERR(ops); + error = PTR_ERR(ops); goto out_dput; } -- cgit v0.10.2 From 8add71ca3fd67c5f222622711f95cfd6cec2a996 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 2 Feb 2015 09:53:56 +1100 Subject: xfs: factor out a xfs_update_prealloc_flags() helper This logic is duplicated in xfs_file_fallocate and xfs_ioc_space, and we'll need another copy of it for pNFS block support. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 13e974e..712d312 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -127,6 +127,42 @@ xfs_iozero( return (-status); } +int +xfs_update_prealloc_flags( + struct xfs_inode *ip, + enum xfs_prealloc_flags flags) +{ + struct xfs_trans *tp; + int error; + + tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID); + error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return error; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + + if (!(flags & XFS_PREALLOC_INVISIBLE)) { + ip->i_d.di_mode &= ~S_ISUID; + if (ip->i_d.di_mode & S_IXGRP) + ip->i_d.di_mode &= ~S_ISGID; + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + } + + if (flags & XFS_PREALLOC_SET) + ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; + if (flags & XFS_PREALLOC_CLEAR) + ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + if (flags & XFS_PREALLOC_SYNC) + xfs_trans_set_sync(tp); + return xfs_trans_commit(tp, 0); +} + /* * Fsync operations on directories are much simpler than on regular files, * as there is no file data to flush, and thus also no need for explicit @@ -784,8 +820,8 @@ xfs_file_fallocate( { struct inode *inode = file_inode(file); struct xfs_inode *ip = XFS_I(inode); - struct xfs_trans *tp; long error; + enum xfs_prealloc_flags flags = 0; loff_t new_size = 0; if (!S_ISREG(inode->i_mode)) @@ -822,6 +858,8 @@ xfs_file_fallocate( if (error) goto out_unlock; } else { + flags |= XFS_PREALLOC_SET; + if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > i_size_read(inode)) { new_size = offset + len; @@ -839,28 +877,10 @@ xfs_file_fallocate( goto out_unlock; } - tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID); - error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - goto out_unlock; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - ip->i_d.di_mode &= ~S_ISUID; - if (ip->i_d.di_mode & S_IXGRP) - ip->i_d.di_mode &= ~S_ISGID; - - if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE))) - ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; - - xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - if (file->f_flags & O_DSYNC) - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); + flags |= XFS_PREALLOC_SYNC; + + error = xfs_update_prealloc_flags(ip, flags); if (error) goto out_unlock; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 4ed2ba9..bc220bc 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -377,6 +377,15 @@ int xfs_droplink(struct xfs_trans *, struct xfs_inode *); int xfs_bumplink(struct xfs_trans *, struct xfs_inode *); /* from xfs_file.c */ +enum xfs_prealloc_flags { + XFS_PREALLOC_SET = (1 << 1), + XFS_PREALLOC_CLEAR = (1 << 2), + XFS_PREALLOC_SYNC = (1 << 3), + XFS_PREALLOC_INVISIBLE = (1 << 4), +}; + +int xfs_update_prealloc_flags(struct xfs_inode *, + enum xfs_prealloc_flags); int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); int xfs_iozero(struct xfs_inode *, loff_t, size_t); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index a183198..d58bcd2 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -606,11 +606,8 @@ xfs_ioc_space( unsigned int cmd, xfs_flock64_t *bf) { - struct xfs_mount *mp = ip->i_mount; - struct xfs_trans *tp; struct iattr iattr; - bool setprealloc = false; - bool clrprealloc = false; + enum xfs_prealloc_flags flags = 0; int error; /* @@ -630,6 +627,11 @@ xfs_ioc_space( if (!S_ISREG(inode->i_mode)) return -EINVAL; + if (filp->f_flags & O_DSYNC) + flags |= XFS_PREALLOC_SYNC; + if (ioflags & XFS_IO_INVIS) + flags |= XFS_PREALLOC_INVISIBLE; + error = mnt_want_write_file(filp); if (error) return error; @@ -673,25 +675,23 @@ xfs_ioc_space( } if (bf->l_start < 0 || - bf->l_start > mp->m_super->s_maxbytes || + bf->l_start > inode->i_sb->s_maxbytes || bf->l_start + bf->l_len < 0 || - bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) { + bf->l_start + bf->l_len >= inode->i_sb->s_maxbytes) { error = -EINVAL; goto out_unlock; } switch (cmd) { case XFS_IOC_ZERO_RANGE: + flags |= XFS_PREALLOC_SET; error = xfs_zero_file_space(ip, bf->l_start, bf->l_len); - if (!error) - setprealloc = true; break; case XFS_IOC_RESVSP: case XFS_IOC_RESVSP64: + flags |= XFS_PREALLOC_SET; error = xfs_alloc_file_space(ip, bf->l_start, bf->l_len, XFS_BMAPI_PREALLOC); - if (!error) - setprealloc = true; break; case XFS_IOC_UNRESVSP: case XFS_IOC_UNRESVSP64: @@ -701,6 +701,7 @@ xfs_ioc_space( case XFS_IOC_ALLOCSP64: case XFS_IOC_FREESP: case XFS_IOC_FREESP64: + flags |= XFS_PREALLOC_CLEAR; if (bf->l_start > XFS_ISIZE(ip)) { error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), bf->l_start - XFS_ISIZE(ip), 0); @@ -712,8 +713,6 @@ xfs_ioc_space( iattr.ia_size = bf->l_start; error = xfs_setattr_size(ip, &iattr); - if (!error) - clrprealloc = true; break; default: ASSERT(0); @@ -723,32 +722,7 @@ xfs_ioc_space( if (error) goto out_unlock; - tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_writeid, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - goto out_unlock; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - - if (!(ioflags & XFS_IO_INVIS)) { - ip->i_d.di_mode &= ~S_ISUID; - if (ip->i_d.di_mode & S_IXGRP) - ip->i_d.di_mode &= ~S_ISGID; - xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - } - - if (setprealloc) - ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; - else if (clrprealloc) - ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - if (filp->f_flags & O_DSYNC) - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); + error = xfs_update_prealloc_flags(ip, flags); out_unlock: xfs_iunlock(ip, XFS_IOLOCK_EXCL); -- cgit v0.10.2 From f3d215526e6955028dfbbfd446db8716275fb0c7 Mon Sep 17 00:00:00 2001 From: "Wang, Yalin" Date: Mon, 2 Feb 2015 09:54:18 +1100 Subject: xfs: change kmem_free to use generic kvfree() Change kmem_free to use kvfree() generic function, remove the duplicated code. Signed-off-by: Yalin Wang Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 53e95b2..a7a3a63 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -91,16 +91,6 @@ kmem_zalloc_large(size_t size, xfs_km_flags_t flags) return ptr; } -void -kmem_free(const void *ptr) -{ - if (!is_vmalloc_addr(ptr)) { - kfree(ptr); - } else { - vfree(ptr); - } -} - void * kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, xfs_km_flags_t flags) diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 64db0e5..cc6b768 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -63,7 +63,10 @@ kmem_flags_convert(xfs_km_flags_t flags) extern void *kmem_alloc(size_t, xfs_km_flags_t); extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t); extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t); -extern void kmem_free(const void *); +static inline void kmem_free(const void *ptr) +{ + kvfree(ptr); +} extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); -- cgit v0.10.2 From 2ba66237029d1ad6c1a5e2241b0ffbbfff55f750 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 2 Feb 2015 10:02:09 +1100 Subject: xfs: don't allocate an ioend for direct I/O completions Back in the days when the direct I/O ->end_io callback could be called from interrupt context for AIO we needed a structure to hand off to the workqueue, and reused the ioend structure for this purpose. These days ->end_io is always called from user or workqueue context, which allows us to avoid this memory allocation and simplify the code significantly. [dchinner: removed now unused xfs_finish_ioend_sync() function after Brian Foster did an initial review. ] Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 18e2f3b..3a9b7a1 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -135,30 +135,22 @@ xfs_setfilesize_trans_alloc( */ STATIC int xfs_setfilesize( - struct xfs_ioend *ioend) + struct xfs_inode *ip, + struct xfs_trans *tp, + xfs_off_t offset, + size_t size) { - struct xfs_inode *ip = XFS_I(ioend->io_inode); - struct xfs_trans *tp = ioend->io_append_trans; xfs_fsize_t isize; - /* - * The transaction may have been allocated in the I/O submission thread, - * thus we need to mark ourselves as beeing in a transaction manually. - * Similarly for freeze protection. - */ - current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); - rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], - 0, 1, _THIS_IP_); - xfs_ilock(ip, XFS_ILOCK_EXCL); - isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); + isize = xfs_new_eof(ip, offset + size); if (!isize) { xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_trans_cancel(tp, 0); return 0; } - trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); + trace_xfs_setfilesize(ip, offset, size); ip->i_d.di_size = isize; xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); @@ -167,6 +159,25 @@ xfs_setfilesize( return xfs_trans_commit(tp, 0); } +STATIC int +xfs_setfilesize_ioend( + struct xfs_ioend *ioend) +{ + struct xfs_inode *ip = XFS_I(ioend->io_inode); + struct xfs_trans *tp = ioend->io_append_trans; + + /* + * The transaction may have been allocated in the I/O submission thread, + * thus we need to mark ourselves as being in a transaction manually. + * Similarly for freeze protection. + */ + current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); + rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], + 0, 1, _THIS_IP_); + + return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); +} + /* * Schedule IO completion handling on the final put of an ioend. * @@ -182,8 +193,7 @@ xfs_finish_ioend( if (ioend->io_type == XFS_IO_UNWRITTEN) queue_work(mp->m_unwritten_workqueue, &ioend->io_work); - else if (ioend->io_append_trans || - (ioend->io_isdirect && xfs_ioend_is_append(ioend))) + else if (ioend->io_append_trans) queue_work(mp->m_data_workqueue, &ioend->io_work); else xfs_destroy_ioend(ioend); @@ -215,22 +225,8 @@ xfs_end_io( if (ioend->io_type == XFS_IO_UNWRITTEN) { error = xfs_iomap_write_unwritten(ip, ioend->io_offset, ioend->io_size); - } else if (ioend->io_isdirect && xfs_ioend_is_append(ioend)) { - /* - * For direct I/O we do not know if we need to allocate blocks - * or not so we can't preallocate an append transaction as that - * results in nested reservations and log space deadlocks. Hence - * allocate the transaction here. While this is sub-optimal and - * can block IO completion for some time, we're stuck with doing - * it this way until we can pass the ioend to the direct IO - * allocation callbacks and avoid nesting that way. - */ - error = xfs_setfilesize_trans_alloc(ioend); - if (error) - goto done; - error = xfs_setfilesize(ioend); } else if (ioend->io_append_trans) { - error = xfs_setfilesize(ioend); + error = xfs_setfilesize_ioend(ioend); } else { ASSERT(!xfs_ioend_is_append(ioend)); } @@ -242,17 +238,6 @@ done: } /* - * Call IO completion handling in caller context on the final put of an ioend. - */ -STATIC void -xfs_finish_ioend_sync( - struct xfs_ioend *ioend) -{ - if (atomic_dec_and_test(&ioend->io_remaining)) - xfs_end_io(&ioend->io_work); -} - -/* * Allocate and initialise an IO completion structure. * We need to track unwritten extent write completion here initially. * We'll need to extend this for updating the ondisk inode size later @@ -273,7 +258,6 @@ xfs_alloc_ioend( * all the I/O from calling the completion routine too early. */ atomic_set(&ioend->io_remaining, 1); - ioend->io_isdirect = 0; ioend->io_error = 0; ioend->io_list = NULL; ioend->io_type = type; @@ -1459,11 +1443,7 @@ xfs_get_blocks_direct( * * If the private argument is non-NULL __xfs_get_blocks signals us that we * need to issue a transaction to convert the range from unwritten to written - * extents. In case this is regular synchronous I/O we just call xfs_end_io - * to do this and we are done. But in case this was a successful AIO - * request this handler is called from interrupt context, from which we - * can't start transactions. In that case offload the I/O completion to - * the workqueues we also use for buffered I/O completion. + * extents. */ STATIC void xfs_end_io_direct_write( @@ -1472,7 +1452,12 @@ xfs_end_io_direct_write( ssize_t size, void *private) { - struct xfs_ioend *ioend = iocb->private; + struct inode *inode = file_inode(iocb->ki_filp); + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + + if (XFS_FORCED_SHUTDOWN(mp)) + return; /* * While the generic direct I/O code updates the inode size, it does @@ -1480,22 +1465,33 @@ xfs_end_io_direct_write( * end_io handler thinks the on-disk size is outside the in-core * size. To prevent this just update it a little bit earlier here. */ - if (offset + size > i_size_read(ioend->io_inode)) - i_size_write(ioend->io_inode, offset + size); + if (offset + size > i_size_read(inode)) + i_size_write(inode, offset + size); /* - * blockdev_direct_IO can return an error even after the I/O - * completion handler was called. Thus we need to protect - * against double-freeing. + * For direct I/O we do not know if we need to allocate blocks or not, + * so we can't preallocate an append transaction, as that results in + * nested reservations and log space deadlocks. Hence allocate the + * transaction here. While this is sub-optimal and can block IO + * completion for some time, we're stuck with doing it this way until + * we can pass the ioend to the direct IO allocation callbacks and + * avoid nesting that way. */ - iocb->private = NULL; - - ioend->io_offset = offset; - ioend->io_size = size; - if (private && size > 0) - ioend->io_type = XFS_IO_UNWRITTEN; + if (private && size > 0) { + xfs_iomap_write_unwritten(ip, offset, size); + } else if (offset + size > ip->i_d.di_size) { + struct xfs_trans *tp; + int error; + + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return; + } - xfs_finish_ioend_sync(ioend); + xfs_setfilesize(ip, tp, offset, size); + } } STATIC ssize_t @@ -1507,39 +1503,16 @@ xfs_vm_direct_IO( { struct inode *inode = iocb->ki_filp->f_mapping->host; struct block_device *bdev = xfs_find_bdev_for_inode(inode); - struct xfs_ioend *ioend = NULL; - ssize_t ret; if (rw & WRITE) { - size_t size = iov_iter_count(iter); - - /* - * We cannot preallocate a size update transaction here as we - * don't know whether allocation is necessary or not. Hence we - * can only tell IO completion that one is necessary if we are - * not doing unwritten extent conversion. - */ - iocb->private = ioend = xfs_alloc_ioend(inode, XFS_IO_DIRECT); - if (offset + size > XFS_I(inode)->i_d.di_size) - ioend->io_isdirect = 1; - - ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, + return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, offset, xfs_get_blocks_direct, xfs_end_io_direct_write, NULL, DIO_ASYNC_EXTEND); - if (ret != -EIOCBQUEUED && iocb->private) - goto out_destroy_ioend; - } else { - ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, - offset, xfs_get_blocks_direct, - NULL, NULL, 0); } - - return ret; - -out_destroy_ioend: - xfs_destroy_ioend(ioend); - return ret; + return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, + offset, xfs_get_blocks_direct, + NULL, NULL, 0); } /* diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index f94dd45..ac644e0 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -24,14 +24,12 @@ extern mempool_t *xfs_ioend_pool; * Types of I/O for bmap clustering and I/O completion tracking. */ enum { - XFS_IO_DIRECT = 0, /* special case for direct I/O ioends */ XFS_IO_DELALLOC, /* covers delalloc region */ XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */ XFS_IO_OVERWRITE, /* covers already allocated extent */ }; #define XFS_IO_TYPES \ - { 0, "" }, \ { XFS_IO_DELALLOC, "delalloc" }, \ { XFS_IO_UNWRITTEN, "unwritten" }, \ { XFS_IO_OVERWRITE, "overwrite" } @@ -45,7 +43,6 @@ typedef struct xfs_ioend { unsigned int io_type; /* delalloc / unwritten */ int io_error; /* I/O error code */ atomic_t io_remaining; /* hold count */ - unsigned int io_isdirect : 1;/* direct I/O */ struct inode *io_inode; /* file being written to */ struct buffer_head *io_buffer_head;/* buffer linked list head */ struct buffer_head *io_buffer_tail;/* buffer linked list tail */ -- cgit v0.10.2 From 817b6c480e330a5325ed9acb0cef8143923a52de Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 2 Feb 2015 10:14:04 +1100 Subject: xfs: FSX_NONBLOCK is not used It is set if the filp is set ot non-blocking, but the flag is not used anywhere. Hence we can kill it. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index a183198..0c0e74f 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1016,7 +1016,6 @@ xfs_diflags_to_linux( #define FSX_PROJID 1 #define FSX_EXTSIZE 2 #define FSX_XFLAGS 4 -#define FSX_NONBLOCK 8 STATIC int xfs_ioctl_setattr( @@ -1299,8 +1298,6 @@ xfs_ioc_fssetxattr( return -EFAULT; mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID; - if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) - mask |= FSX_NONBLOCK; error = mnt_want_write_file(filp); if (error) @@ -1343,8 +1340,6 @@ xfs_ioc_setxflags( return -EOPNOTSUPP; mask = FSX_XFLAGS; - if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) - mask |= FSX_NONBLOCK; fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); error = mnt_want_write_file(filp); -- cgit v0.10.2 From 29a17c00d4b1b8eab61b85b71cb5a83455a7dc5e Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 2 Feb 2015 10:14:25 +1100 Subject: xfs: separate xflags from xfs_ioctl_setattr The setting of the extended flags is down through two separate interfaces, but they are munged together into xfs_ioctl_setattr and make that function far more complex than it needs to be. Separate it out into a helper function along with all the other common inode changes and transaction manipulations in xfs_ioctl_setattr(). Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 0c0e74f..b0064bd 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1013,6 +1013,44 @@ xfs_diflags_to_linux( inode->i_flags &= ~S_NOATIME; } +static int +xfs_ioctl_setattr_xflags( + struct xfs_trans *tp, + struct xfs_inode *ip, + struct fsxattr *fa) +{ + struct xfs_mount *mp = ip->i_mount; + + /* Can't change realtime flag if any extents are allocated. */ + if ((ip->i_d.di_nextents || ip->i_delayed_blks) && + XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & XFS_XFLAG_REALTIME)) + return -EINVAL; + + /* If realtime flag is set then must have realtime device */ + if (fa->fsx_xflags & XFS_XFLAG_REALTIME) { + if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 || + (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) + return -EINVAL; + } + + /* + * Can't modify an immutable/append-only file unless + * we have appropriate permission. + */ + if (((ip->i_d.di_flags & (XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND)) || + (fa->fsx_xflags & (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && + !capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + + xfs_trans_ijoin(tp, ip, 0); + xfs_set_diflags(ip, fa->fsx_xflags); + xfs_diflags_to_linux(ip); + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + XFS_STATS_INC(xs_ig_attrchg); + return 0; +} + #define FSX_PROJID 1 #define FSX_EXTSIZE 2 #define FSX_XFLAGS 4 @@ -1159,44 +1197,9 @@ xfs_ioctl_setattr( } - if (mask & FSX_XFLAGS) { - /* - * Can't change realtime flag if any extents are allocated. - */ - if ((ip->i_d.di_nextents || ip->i_delayed_blks) && - (XFS_IS_REALTIME_INODE(ip)) != - (fa->fsx_xflags & XFS_XFLAG_REALTIME)) { - code = -EINVAL; /* EFBIG? */ - goto error_return; - } - - /* - * If realtime flag is set then must have realtime data. - */ - if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) { - if ((mp->m_sb.sb_rblocks == 0) || - (mp->m_sb.sb_rextsize == 0) || - (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { - code = -EINVAL; - goto error_return; - } - } - - /* - * Can't modify an immutable/append-only file unless - * we have appropriate permission. - */ - if ((ip->i_d.di_flags & - (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) || - (fa->fsx_xflags & - (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && - !capable(CAP_LINUX_IMMUTABLE)) { - code = -EPERM; - goto error_return; - } - } - - xfs_trans_ijoin(tp, ip, 0); + code = xfs_ioctl_setattr_xflags(tp, ip, fa); + if (code) + goto error_return; /* * Change file ownership. Must be the owner or privileged. @@ -1227,11 +1230,6 @@ xfs_ioctl_setattr( } - if (mask & FSX_XFLAGS) { - xfs_set_diflags(ip, fa->fsx_xflags); - xfs_diflags_to_linux(ip); - } - /* * Only set the extent size hint if we've already determined that the * extent size hint should be set on the inode. If no extent size flags @@ -1246,11 +1244,6 @@ xfs_ioctl_setattr( ip->i_d.di_extsize = extsize; } - xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - XFS_STATS_INC(xs_ig_attrchg); - /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. -- cgit v0.10.2 From 8f3d17ab060ec21cead88b81c65050a6ff77e9be Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 2 Feb 2015 10:15:35 +1100 Subject: xfs: factor out xfs_ioctl_setattr transaciton preamble The setup of the transaction is done after a random smattering of checks and before another bunch of ioperations specific validity checks. Pull all the preamble out into a helper function that returns a transaction or error. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index b0064bd..0f62f5b 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1042,7 +1042,6 @@ xfs_ioctl_setattr_xflags( !capable(CAP_LINUX_IMMUTABLE)) return -EPERM; - xfs_trans_ijoin(tp, ip, 0); xfs_set_diflags(ip, fa->fsx_xflags); xfs_diflags_to_linux(ip); xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); @@ -1051,6 +1050,54 @@ xfs_ioctl_setattr_xflags( return 0; } +/* + * Set up the transaction structure for the setattr operation, checking that we + * have permission to do so. On success, return a clean transaction and the + * inode locked exclusively ready for further operation specific checks. On + * failure, return an error without modifying or locking the inode. + */ +static struct xfs_trans * +xfs_ioctl_setattr_get_trans( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error; + + if (mp->m_flags & XFS_MOUNT_RDONLY) + return ERR_PTR(-EROFS); + if (XFS_FORCED_SHUTDOWN(mp)) + return ERR_PTR(-EIO); + + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); + if (error) + goto out_cancel; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + + /* + * CAP_FOWNER overrides the following restrictions: + * + * The user ID of the calling process must be equal to the file owner + * ID, except in cases where the CAP_FSETID capability is applicable. + */ + if (!inode_owner_or_capable(VFS_I(ip))) { + error = -EPERM; + goto out_cancel; + } + + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(tp); + + return tp; + +out_cancel: + xfs_trans_cancel(tp, 0); + return ERR_PTR(error); +} + #define FSX_PROJID 1 #define FSX_EXTSIZE 2 #define FSX_XFLAGS 4 @@ -1063,7 +1110,6 @@ xfs_ioctl_setattr( { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; - unsigned int lock_flags = 0; struct xfs_dquot *udqp = NULL; struct xfs_dquot *pdqp = NULL; struct xfs_dquot *olddquot = NULL; @@ -1071,11 +1117,6 @@ xfs_ioctl_setattr( trace_xfs_ioctl_setattr(ip); - if (mp->m_flags & XFS_MOUNT_RDONLY) - return -EROFS; - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - /* * Disallow 32bit project ids when projid32bit feature is not enabled. */ @@ -1099,28 +1140,10 @@ xfs_ioctl_setattr( return code; } - /* - * For the other attributes, we acquire the inode lock and - * first do an error checking pass. - */ - tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); - code = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); - if (code) - goto error_return; - - lock_flags = XFS_ILOCK_EXCL; - xfs_ilock(ip, lock_flags); - - /* - * CAP_FOWNER overrides the following restrictions: - * - * The user ID of the calling process must be equal - * to the file owner ID, except in cases where the - * CAP_FSETID capability is applicable. - */ - if (!inode_owner_or_capable(VFS_I(ip))) { - code = -EPERM; - goto error_return; + tp = xfs_ioctl_setattr_get_trans(ip); + if (IS_ERR(tp)) { + code = PTR_ERR(tp); + goto error_free_dquots; } /* @@ -1244,20 +1267,7 @@ xfs_ioctl_setattr( ip->i_d.di_extsize = extsize; } - /* - * If this is a synchronous mount, make sure that the - * transaction goes to disk before returning to the user. - * This is slightly sub-optimal in that truncates require - * two sync transactions instead of one for wsync filesystems. - * One for the truncate and one for the timestamps since we - * don't want to change the timestamps unless we're sure the - * truncate worked. Truncates are less than 1% of the laddis - * mix so this probably isn't worth the trouble to optimize. - */ - if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(tp); code = xfs_trans_commit(tp, 0); - xfs_iunlock(ip, lock_flags); /* * Release any dquot(s) the inode had kept before chown. @@ -1268,12 +1278,11 @@ xfs_ioctl_setattr( return code; - error_return: +error_return: + xfs_trans_cancel(tp, 0); +error_free_dquots: xfs_qm_dqrele(udqp); xfs_qm_dqrele(pdqp); - xfs_trans_cancel(tp, 0); - if (lock_flags) - xfs_iunlock(ip, lock_flags); return code; } -- cgit v0.10.2 From f96291f6a39c2b60bede851efa059ba89e5f8277 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 2 Feb 2015 10:15:56 +1100 Subject: xfs: disaggregate xfs_ioctl_setattr xfs_ioctl_setxflags doesn't need all of the functionailty in xfs_ioctl_setattr() and now we have separate helper functions that share the checks and modifications that xfs_ioctl_setxflags requires. Hence disaggregate it from xfs_ioctl_setattr() to allow further work to be done on xfs_ioctl_setattr. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 0f62f5b..383e61f 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1324,14 +1324,14 @@ xfs_ioc_getxflags( STATIC int xfs_ioc_setxflags( - xfs_inode_t *ip, + struct xfs_inode *ip, struct file *filp, void __user *arg) { + struct xfs_trans *tp; struct fsxattr fa; unsigned int flags; - unsigned int mask; - int error; + int error; if (copy_from_user(&flags, arg, sizeof(flags))) return -EFAULT; @@ -1341,13 +1341,26 @@ xfs_ioc_setxflags( FS_SYNC_FL)) return -EOPNOTSUPP; - mask = FSX_XFLAGS; fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); error = mnt_want_write_file(filp); if (error) return error; - error = xfs_ioctl_setattr(ip, &fa, mask); + + tp = xfs_ioctl_setattr_get_trans(ip); + if (IS_ERR(tp)) { + error = PTR_ERR(tp); + goto out_drop_write; + } + + error = xfs_ioctl_setattr_xflags(tp, ip, &fa); + if (error) { + xfs_trans_cancel(tp, 0); + goto out_drop_write; + } + + error = xfs_trans_commit(tp, 0); +out_drop_write: mnt_drop_write_file(filp); return error; } -- cgit v0.10.2 From fd179b9c3bdab682ae5bb3e10380a31853be179a Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 2 Feb 2015 10:16:25 +1100 Subject: xfs: kill xfs_ioctl_setattr behaviour mask Now there is only one caller to xfs_ioctl_setattr that uses all the functionality of the function we can kill the behviour mask and start cleaning up the code. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 383e61f..a1f2854 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1098,15 +1098,10 @@ out_cancel: return ERR_PTR(error); } -#define FSX_PROJID 1 -#define FSX_EXTSIZE 2 -#define FSX_XFLAGS 4 - STATIC int xfs_ioctl_setattr( xfs_inode_t *ip, - struct fsxattr *fa, - int mask) + struct fsxattr *fa) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; @@ -1120,8 +1115,8 @@ xfs_ioctl_setattr( /* * Disallow 32bit project ids when projid32bit feature is not enabled. */ - if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) && - !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) + if (fa->fsx_projid > (__uint16_t)-1 && + !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) return -EINVAL; /* @@ -1132,7 +1127,7 @@ xfs_ioctl_setattr( * If the IDs do change before we take the ilock, we're covered * because the i_*dquot fields will get updated anyway. */ - if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) { + if (XFS_IS_QUOTA_ON(mp)) { code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid, ip->i_d.di_gid, fa->fsx_projid, XFS_QMOPT_PQUOTA, &udqp, NULL, &pdqp); @@ -1151,72 +1146,53 @@ xfs_ioctl_setattr( * Only allow changing of projid from init_user_ns since it is a * non user namespace aware identifier. */ - if (mask & FSX_PROJID) { - if (current_user_ns() != &init_user_ns) { - code = -EINVAL; - goto error_return; - } - - if (XFS_IS_QUOTA_RUNNING(mp) && - XFS_IS_PQUOTA_ON(mp) && - xfs_get_projid(ip) != fa->fsx_projid) { - ASSERT(tp); - code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, - pdqp, capable(CAP_FOWNER) ? - XFS_QMOPT_FORCE_RES : 0); - if (code) /* out of quota */ - goto error_return; - } + if (current_user_ns() != &init_user_ns) { + code = -EINVAL; + goto error_return; } - if (mask & FSX_EXTSIZE) { - /* - * Can't change extent size if any extents are allocated. - */ - if (ip->i_d.di_nextents && - ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != - fa->fsx_extsize)) { - code = -EINVAL; /* EFBIG? */ + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp) && + xfs_get_projid(ip) != fa->fsx_projid) { + code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, pdqp, + capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); + if (code) /* out of quota */ goto error_return; - } + } - /* - * Extent size must be a multiple of the appropriate block - * size, if set at all. It must also be smaller than the - * maximum extent size supported by the filesystem. - * - * Also, for non-realtime files, limit the extent size hint to - * half the size of the AGs in the filesystem so alignment - * doesn't result in extents larger than an AG. - */ - if (fa->fsx_extsize != 0) { - xfs_extlen_t size; - xfs_fsblock_t extsize_fsb; + /* Can't change extent size if any extents are allocated. */ + code = -EINVAL; + if (ip->i_d.di_nextents && + ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize)) + goto error_return; - extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); - if (extsize_fsb > MAXEXTLEN) { - code = -EINVAL; - goto error_return; - } - - if (XFS_IS_REALTIME_INODE(ip) || - ((mask & FSX_XFLAGS) && - (fa->fsx_xflags & XFS_XFLAG_REALTIME))) { - size = mp->m_sb.sb_rextsize << - mp->m_sb.sb_blocklog; - } else { - size = mp->m_sb.sb_blocksize; - if (extsize_fsb > mp->m_sb.sb_agblocks / 2) { - code = -EINVAL; - goto error_return; - } - } - - if (fa->fsx_extsize % size) { - code = -EINVAL; + /* + * Extent size must be a multiple of the appropriate block size, if set + * at all. It must also be smaller than the maximum extent size + * supported by the filesystem. + * + * Also, for non-realtime files, limit the extent size hint to half the + * size of the AGs in the filesystem so alignment doesn't result in + * extents larger than an AG. + */ + if (fa->fsx_extsize != 0) { + xfs_extlen_t size; + xfs_fsblock_t extsize_fsb; + + extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); + if (extsize_fsb > MAXEXTLEN) + goto error_return; + + if (XFS_IS_REALTIME_INODE(ip) || + (fa->fsx_xflags & XFS_XFLAG_REALTIME)) { + size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; + } else { + size = mp->m_sb.sb_blocksize; + if (extsize_fsb > mp->m_sb.sb_agblocks / 2) goto error_return; - } } + + if (fa->fsx_extsize % size) + goto error_return; } @@ -1225,32 +1201,25 @@ xfs_ioctl_setattr( goto error_return; /* - * Change file ownership. Must be the owner or privileged. + * Change file ownership. Must be the owner or privileged. CAP_FSETID + * overrides the following restrictions: + * + * The set-user-ID and set-group-ID bits of a file will be cleared upon + * successful return from chown() */ - if (mask & FSX_PROJID) { - /* - * CAP_FSETID overrides the following restrictions: - * - * The set-user-ID and set-group-ID bits of a file will be - * cleared upon successful return from chown() - */ - if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && - !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID)) - ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); - /* - * Change the ownerships and register quota modifications - * in the transaction. - */ - if (xfs_get_projid(ip) != fa->fsx_projid) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { - olddquot = xfs_qm_vop_chown(tp, ip, - &ip->i_pdquot, pdqp); - } - ASSERT(ip->i_d.di_version > 1); - xfs_set_projid(ip, fa->fsx_projid); - } + if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && + !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID)) + ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); + /* Change the ownerships and register project quota modifications */ + if (xfs_get_projid(ip) != fa->fsx_projid) { + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { + olddquot = xfs_qm_vop_chown(tp, ip, + &ip->i_pdquot, pdqp); + } + ASSERT(ip->i_d.di_version > 1); + xfs_set_projid(ip, fa->fsx_projid); } /* @@ -1258,14 +1227,10 @@ xfs_ioctl_setattr( * extent size hint should be set on the inode. If no extent size flags * are set on the inode then unconditionally clear the extent size hint. */ - if (mask & FSX_EXTSIZE) { - int extsize = 0; - - if (ip->i_d.di_flags & - (XFS_DIFLAG_EXTSIZE | XFS_DIFLAG_EXTSZINHERIT)) - extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; - ip->i_d.di_extsize = extsize; - } + if (ip->i_d.di_flags & (XFS_DIFLAG_EXTSIZE | XFS_DIFLAG_EXTSZINHERIT)) + ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; + else + ip->i_d.di_extsize = 0; code = xfs_trans_commit(tp, 0); @@ -1293,18 +1258,15 @@ xfs_ioc_fssetxattr( void __user *arg) { struct fsxattr fa; - unsigned int mask; int error; if (copy_from_user(&fa, arg, sizeof(fa))) return -EFAULT; - mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID; - error = mnt_want_write_file(filp); if (error) return error; - error = xfs_ioctl_setattr(ip, &fa, mask); + error = xfs_ioctl_setattr(ip, &fa); mnt_drop_write_file(filp); return error; } -- cgit v0.10.2 From 41c145271d79eae508321340b727d3e3c9a66664 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 2 Feb 2015 10:17:51 +1100 Subject: xfs: XFS_IOCTL_SETXATTR can run in user namespaces Currently XFS_IOCTL_SETXATTR will fail if run in a user namespace as it it not allowed to change project IDs. The current code, however, also prevents any other change being made as well, so things like extent size hints cannot be set in user namespaces. This is wrong, so only disallow access to project IDs and related flags from inside the init namespace. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index a1f2854..b65817c 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1120,6 +1120,19 @@ xfs_ioctl_setattr( return -EINVAL; /* + * Project Quota ID state is only allowed to change from within the init + * namespace. Enforce that restriction only if we are trying to change + * the quota ID state. Everything else is allowed in user namespaces. + */ + if (current_user_ns() != &init_user_ns) { + if (xfs_get_projid(ip) != fa->fsx_projid) + return -EINVAL; + if ((fa->fsx_xflags & XFS_XFLAG_PROJINHERIT) != + (ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)) + return -EINVAL; + } + + /* * If disk quotas is on, we make sure that the dquots do exist on disk, * before we start any other transactions. Trying to do this later * is messy. We don't care to take a readlock to look at the ids @@ -1141,15 +1154,6 @@ xfs_ioctl_setattr( goto error_free_dquots; } - /* - * Do a quota reservation only if projid is actually going to change. - * Only allow changing of projid from init_user_ns since it is a - * non user namespace aware identifier. - */ - if (current_user_ns() != &init_user_ns) { - code = -EINVAL; - goto error_return; - } if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp) && xfs_get_projid(ip) != fa->fsx_projid) { -- cgit v0.10.2 From d4388d3c0988ec00787ad1f8e63b5e2a6abef1dc Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 2 Feb 2015 10:22:20 +1100 Subject: xfs: factor extsize hint checking out of xfs_ioctl_setattr The extent size hint change checking is fairly complex, so isolate that into it's own function. This simplifies the logic flow of the setattr code, making it easier to read. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index b65817c..9f80853 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1098,6 +1098,51 @@ out_cancel: return ERR_PTR(error); } +int +xfs_ioctl_setattr_check_extsize( + struct xfs_inode *ip, + struct fsxattr *fa) +{ + struct xfs_mount *mp = ip->i_mount; + + /* Can't change extent size if any extents are allocated. */ + if (ip->i_d.di_nextents && + ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize)) + return -EINVAL; + + /* + * Extent size must be a multiple of the appropriate block size, if set + * at all. It must also be smaller than the maximum extent size + * supported by the filesystem. + * + * Also, for non-realtime files, limit the extent size hint to half the + * size of the AGs in the filesystem so alignment doesn't result in + * extents larger than an AG. + */ + if (fa->fsx_extsize != 0) { + xfs_extlen_t size; + xfs_fsblock_t extsize_fsb; + + extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); + if (extsize_fsb > MAXEXTLEN) + return -EINVAL; + + if (XFS_IS_REALTIME_INODE(ip) || + (fa->fsx_xflags & XFS_XFLAG_REALTIME)) { + size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; + } else { + size = mp->m_sb.sb_blocksize; + if (extsize_fsb > mp->m_sb.sb_agblocks / 2) + return -EINVAL; + } + + if (fa->fsx_extsize % size) + return -EINVAL; + } + return 0; +} + + STATIC int xfs_ioctl_setattr( xfs_inode_t *ip, @@ -1160,49 +1205,16 @@ xfs_ioctl_setattr( code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, pdqp, capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); if (code) /* out of quota */ - goto error_return; - } - - /* Can't change extent size if any extents are allocated. */ - code = -EINVAL; - if (ip->i_d.di_nextents && - ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize)) - goto error_return; - - /* - * Extent size must be a multiple of the appropriate block size, if set - * at all. It must also be smaller than the maximum extent size - * supported by the filesystem. - * - * Also, for non-realtime files, limit the extent size hint to half the - * size of the AGs in the filesystem so alignment doesn't result in - * extents larger than an AG. - */ - if (fa->fsx_extsize != 0) { - xfs_extlen_t size; - xfs_fsblock_t extsize_fsb; - - extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); - if (extsize_fsb > MAXEXTLEN) - goto error_return; - - if (XFS_IS_REALTIME_INODE(ip) || - (fa->fsx_xflags & XFS_XFLAG_REALTIME)) { - size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; - } else { - size = mp->m_sb.sb_blocksize; - if (extsize_fsb > mp->m_sb.sb_agblocks / 2) - goto error_return; - } - - if (fa->fsx_extsize % size) - goto error_return; + goto error_trans_cancel; } + code = xfs_ioctl_setattr_check_extsize(ip, fa); + if (code) + goto error_trans_cancel; code = xfs_ioctl_setattr_xflags(tp, ip, fa); if (code) - goto error_return; + goto error_trans_cancel; /* * Change file ownership. Must be the owner or privileged. CAP_FSETID @@ -1247,7 +1259,7 @@ xfs_ioctl_setattr( return code; -error_return: +error_trans_cancel: xfs_trans_cancel(tp, 0); error_free_dquots: xfs_qm_dqrele(udqp); -- cgit v0.10.2 From 23bd0735cfdf5322170a9ef48c7d47c2e6567ba8 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 2 Feb 2015 10:22:53 +1100 Subject: xfs: factor projid hint checking out of xfs_ioctl_setattr The project ID change checking is one of the few remaining open coded checks in xfs_ioctl_setattr(). Factor it into a helper function so that the setattr code mostly becomes a flow of check and action helpers, making it easier to read and follow. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 9f80853..1f186d2 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1142,6 +1142,34 @@ xfs_ioctl_setattr_check_extsize( return 0; } +int +xfs_ioctl_setattr_check_projid( + struct xfs_inode *ip, + struct fsxattr *fa) +{ + /* Disallow 32bit project ids if projid32bit feature is not enabled. */ + if (fa->fsx_projid > (__uint16_t)-1 && + !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) + return -EINVAL; + + /* + * Project Quota ID state is only allowed to change from within the init + * namespace. Enforce that restriction only if we are trying to change + * the quota ID state. Everything else is allowed in user namespaces. + */ + if (current_user_ns() == &init_user_ns) + return 0; + + if (xfs_get_projid(ip) != fa->fsx_projid) + return -EINVAL; + if ((fa->fsx_xflags & XFS_XFLAG_PROJINHERIT) != + (ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)) + return -EINVAL; + + return 0; +} + + STATIC int xfs_ioctl_setattr( @@ -1157,25 +1185,9 @@ xfs_ioctl_setattr( trace_xfs_ioctl_setattr(ip); - /* - * Disallow 32bit project ids when projid32bit feature is not enabled. - */ - if (fa->fsx_projid > (__uint16_t)-1 && - !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) - return -EINVAL; - - /* - * Project Quota ID state is only allowed to change from within the init - * namespace. Enforce that restriction only if we are trying to change - * the quota ID state. Everything else is allowed in user namespaces. - */ - if (current_user_ns() != &init_user_ns) { - if (xfs_get_projid(ip) != fa->fsx_projid) - return -EINVAL; - if ((fa->fsx_xflags & XFS_XFLAG_PROJINHERIT) != - (ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)) - return -EINVAL; - } + code = xfs_ioctl_setattr_check_projid(ip, fa); + if (code) + return code; /* * If disk quotas is on, we make sure that the dquots do exist on disk, -- cgit v0.10.2 From 9b94fcc39822b450af823b3d8cbef6b53ce87ed9 Mon Sep 17 00:00:00 2001 From: Iustin Pop Date: Mon, 2 Feb 2015 10:26:26 +1100 Subject: xfs: fix behaviour of XFS_IOC_FSSETXATTR on directories Currently, the ioctl handling code for XFS_IOC_FSSETXATTR treats all targets as regular files: it refuses to change the extent size if extents are allocated. This is wrong for directories, as there the extent size is only used as a default for children. The patch fixes this issue and improves validation of flag combinations: - only disallow extent size changes after extents have been allocated for regular files - only allow XFS_XFLAG_EXTSIZE for regular files - only allow XFS_XFLAG_EXTSZINHERIT for directories - automatically clear the flags if the extent size is zero Thanks to Dave Chinner for guidance on the proper fix for this issue. [dchinner: ported changes onto cleanup series. Makes changes clear and obvious.] [dchinner: added comments documenting validity checking rules.] Signed-off-by: Iustin Pop Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 1f186d2..0f6b6ab 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1098,6 +1098,20 @@ out_cancel: return ERR_PTR(error); } +/* + * extent size hint validation is somewhat cumbersome. Rules are: + * + * 1. extent size hint is only valid for directories and regular files + * 2. XFS_XFLAG_EXTSIZE is only valid for regular files + * 3. XFS_XFLAG_EXTSZINHERIT is only valid for directories. + * 4. can only be changed on regular files if no extents are allocated + * 5. can be changed on directories at any time + * 6. extsize hint of 0 turns off hints, clears inode flags. + * 7. Extent size must be a multiple of the appropriate block size. + * 8. for non-realtime files, the extent size hint must be limited + * to half the AG size to avoid alignment extending the extent beyond the + * limits of the AG. + */ int xfs_ioctl_setattr_check_extsize( struct xfs_inode *ip, @@ -1105,20 +1119,17 @@ xfs_ioctl_setattr_check_extsize( { struct xfs_mount *mp = ip->i_mount; - /* Can't change extent size if any extents are allocated. */ - if (ip->i_d.di_nextents && + if ((fa->fsx_xflags & XFS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode)) + return -EINVAL; + + if ((fa->fsx_xflags & XFS_XFLAG_EXTSZINHERIT) && + !S_ISDIR(ip->i_d.di_mode)) + return -EINVAL; + + if (S_ISREG(ip->i_d.di_mode) && ip->i_d.di_nextents && ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize)) return -EINVAL; - /* - * Extent size must be a multiple of the appropriate block size, if set - * at all. It must also be smaller than the maximum extent size - * supported by the filesystem. - * - * Also, for non-realtime files, limit the extent size hint to half the - * size of the AGs in the filesystem so alignment doesn't result in - * extents larger than an AG. - */ if (fa->fsx_extsize != 0) { xfs_extlen_t size; xfs_fsblock_t extsize_fsb; @@ -1138,7 +1149,9 @@ xfs_ioctl_setattr_check_extsize( if (fa->fsx_extsize % size) return -EINVAL; - } + } else + fa->fsx_xflags &= ~(XFS_XFLAG_EXTSIZE | XFS_XFLAG_EXTSZINHERIT); + return 0; } @@ -1169,8 +1182,6 @@ xfs_ioctl_setattr_check_projid( return 0; } - - STATIC int xfs_ioctl_setattr( xfs_inode_t *ip, -- cgit v0.10.2 From f8079b850c9e130423829c919f3c0802e677099d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 5 Feb 2015 11:13:21 +1100 Subject: xfs: growfs should use synchronous transactions Growfs updates the secondary superblocks using synchronous unlogged buffer writes after committing the updates to the primary superblock. Mark the transaction to the primary superblock as synchronous so that we guarantee it is committed to disk before we update the secondary superblocks. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index f711452..fba6532 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -488,6 +488,7 @@ xfs_growfs_data_private( xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, nfree); if (dpct) xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct); + xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); if (error) return error; -- cgit v0.10.2 From f92090e95cd26ea0a80dc8305b685cc30f6a501f Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Thu, 5 Feb 2015 11:13:21 +1100 Subject: xfs: xfs_ioctl_setattr_check_projid can be static fs/xfs/xfs_ioctl.c:1146:1: sparse: symbol 'xfs_ioctl_setattr_check_projid' was not declared. Should it be static? Also fix xfs_ioctl_setattr_check_extsize at the same time. Signed-off-by: Fengguang Wu Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 0f6b6ab..f1dc90b 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1112,7 +1112,7 @@ out_cancel: * to half the AG size to avoid alignment extending the extent beyond the * limits of the AG. */ -int +static int xfs_ioctl_setattr_check_extsize( struct xfs_inode *ip, struct fsxattr *fa) @@ -1155,7 +1155,7 @@ xfs_ioctl_setattr_check_extsize( return 0; } -int +static int xfs_ioctl_setattr_check_projid( struct xfs_inode *ip, struct fsxattr *fa) -- cgit v0.10.2 From de8bd0eb699e6f20e2e2ce368fe35aa83732718b Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 6 Feb 2015 09:20:29 +1100 Subject: xfs: fix panic_mask documentation This bit of the docs didn't quite reflect reality. Signed-off-by: Eric Sandeen Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt index 5be51fd..2e568459 100644 --- a/Documentation/filesystems/xfs.txt +++ b/Documentation/filesystems/xfs.txt @@ -287,9 +287,9 @@ The following sysctls are available for the XFS filesystem: XFS_ERRLEVEL_LOW: 1 XFS_ERRLEVEL_HIGH: 5 - fs.xfs.panic_mask (Min: 0 Default: 0 Max: 127) + fs.xfs.panic_mask (Min: 0 Default: 0 Max: 255) Causes certain error conditions to call BUG(). Value is a bitmask; - AND together the tags which represent errors which should cause panics: + OR together the tags which represent errors which should cause panics: XFS_NO_PTAG 0 XFS_PTAG_IFLUSH 0x00000001 @@ -299,6 +299,7 @@ The following sysctls are available for the XFS filesystem: XFS_PTAG_SHUTDOWN_CORRUPT 0x00000010 XFS_PTAG_SHUTDOWN_IOERROR 0x00000020 XFS_PTAG_SHUTDOWN_LOGERROR 0x00000040 + XFS_PTAG_FSBLOCK_ZERO 0x00000080 This option is intended for debugging only. -- cgit v0.10.2 From 01f9882eac84b85421a469cb65ee9bb555a5985f Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 6 Feb 2015 09:53:02 +1100 Subject: xfs: report proper f_files in statfs if we overshoot imaxpct Normally, a statfs syscall reports m_maxicount as f_files (total file nodes in file system) because it is supposed to be the upper limit for dynamically-allocated inodes. It's possible, however, to overshoot imaxpct / m_maxicount. If this happens, we should report the actual number of allocated inodes, which is contained in sb_icount. Add one more adjustment to the statfs code to make this happen. Reported-by: Alexander Tsvetkov Signed-off-by: Eric Sandeen Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index a3b791b..26afa43 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1111,6 +1111,11 @@ xfs_fs_statfs( statp->f_files, mp->m_maxicount); + /* If sb_icount overshot maxicount, report actual allocation */ + statp->f_files = max_t(typeof(statp->f_files), + statp->f_files, + sbp->sb_icount); + /* make sure statp->f_ffree does not underflow */ ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); statp->f_ffree = max_t(__int64_t, ffree, 0); -- cgit v0.10.2 From e9892d3cc853afdda2cc69e2576d9ddb5fafad71 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 10 Feb 2015 09:23:40 +1100 Subject: xfs: only trace buffer items if they exist The commit 2d3d0c5 ("xfs: lobotomise xfs_trans_read_buf_map()") left a landmine in the tracing code: trace_xfs_trans_buf_read() is now call on all buffers that are read through this interface rather than just buffers in transactions. For buffers outside transaction context, bp->b_fspriv is null, and so the buf log item tracing functions cannot be called. This causes a NULL pointer dereference in the trace_xfs_trans_buf_read() function when tracing is turned on. cc: Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 0a4d4ab..7579841 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -327,9 +327,10 @@ xfs_trans_read_buf_map( return -EIO; } - if (tp) + if (tp) { _xfs_trans_bjoin(tp, bp, 1); - trace_xfs_trans_read_buf(bp->b_fspriv); + trace_xfs_trans_read_buf(bp->b_fspriv); + } *bpp = bp; return 0; -- cgit v0.10.2