From 334e580a6f97e2e84d1c19a8679603956acaa622 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 4 Jan 2016 16:44:15 +1100 Subject: fs: XFS_IOC_FS[SG]SETXATTR to FS_IOC_FS[SG]ETXATTR promotion Hoist the ioctl definitions for the XFS_IOC_FS[SG]SETXATTR API from fs/xfs/libxfs/xfs_fs.h to include/uapi/linux/fs.h so that the ioctls can be used by all filesystems, not just XFS. This enables (initially) ext4 to use the ioctl to set project IDs on inodes. Based-on-patch-from: Li Xi Signed-off-by: Dave Chinner diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index b2b73a9..dd29d0a 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -36,38 +36,23 @@ struct dioattr { #endif /* - * Structure for XFS_IOC_FSGETXATTR[A] and XFS_IOC_FSSETXATTR. + * Flags for the bs_xflags/fsx_xflags field in FS_IOC_FS[GS]ETXATTR[A] */ -#ifndef HAVE_FSXATTR -struct fsxattr { - __u32 fsx_xflags; /* xflags field value (get/set) */ - __u32 fsx_extsize; /* extsize field value (get/set)*/ - __u32 fsx_nextents; /* nextents field value (get) */ - __u32 fsx_projid; /* project identifier (get/set) */ - unsigned char fsx_pad[12]; -}; -#endif - -/* - * Flags for the bs_xflags/fsx_xflags field - * There should be a one-to-one correspondence between these flags and the - * XFS_DIFLAG_s. - */ -#define XFS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */ -#define XFS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */ -#define XFS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */ -#define XFS_XFLAG_APPEND 0x00000010 /* all writes append */ -#define XFS_XFLAG_SYNC 0x00000020 /* all writes synchronous */ -#define XFS_XFLAG_NOATIME 0x00000040 /* do not update access time */ -#define XFS_XFLAG_NODUMP 0x00000080 /* do not include in backups */ -#define XFS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */ -#define XFS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */ -#define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */ -#define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */ -#define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ -#define XFS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */ -#define XFS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */ -#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ +#define XFS_XFLAG_REALTIME FS_XFLAG_REALTIME +#define XFS_XFLAG_PREALLOC FS_XFLAG_PREALLOC +#define XFS_XFLAG_IMMUTABLE FS_XFLAG_IMMUTABLE +#define XFS_XFLAG_APPEND FS_XFLAG_APPEND +#define XFS_XFLAG_SYNC FS_XFLAG_SYNC +#define XFS_XFLAG_NOATIME FS_XFLAG_NOATIME +#define XFS_XFLAG_NODUMP FS_XFLAG_NODUMP +#define XFS_XFLAG_RTINHERIT FS_XFLAG_RTINHERIT +#define XFS_XFLAG_PROJINHERIT FS_XFLAG_PROJINHERIT +#define XFS_XFLAG_NOSYMLINKS FS_XFLAG_NOSYMLINKS +#define XFS_XFLAG_EXTSIZE FS_XFLAG_EXTSIZE +#define XFS_XFLAG_EXTSZINHERIT FS_XFLAG_EXTSZINHERIT +#define XFS_XFLAG_NODEFRAG FS_XFLAG_NODEFRAG +#define XFS_XFLAG_FILESTREAM FS_XFLAG_FILESTREAM +#define XFS_XFLAG_HASATTR FS_XFLAG_HASATTR /* * Structure for XFS_IOC_GETBMAP. @@ -514,8 +499,8 @@ typedef struct xfs_swapext #define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64) #define XFS_IOC_FREESP _IOW ('X', 11, struct xfs_flock64) #define XFS_IOC_DIOINFO _IOR ('X', 30, struct dioattr) -#define XFS_IOC_FSGETXATTR _IOR ('X', 31, struct fsxattr) -#define XFS_IOC_FSSETXATTR _IOW ('X', 32, struct fsxattr) +#define XFS_IOC_FSGETXATTR FS_IOC_FSGETXATTR +#define XFS_IOC_FSSETXATTR FS_IOC_FSSETXATTR #define XFS_IOC_ALLOCSP64 _IOW ('X', 36, struct xfs_flock64) #define XFS_IOC_FREESP64 _IOW ('X', 37, struct xfs_flock64) #define XFS_IOC_GETBMAP _IOWR('X', 38, struct getbmap) diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index f15d980..df175dd 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -110,6 +110,36 @@ struct inodes_stat_t { #define MS_MGC_VAL 0xC0ED0000 #define MS_MGC_MSK 0xffff0000 +/* + * Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR. + */ +struct fsxattr { + __u32 fsx_xflags; /* xflags field value (get/set) */ + __u32 fsx_extsize; /* extsize field value (get/set)*/ + __u32 fsx_nextents; /* nextents field value (get) */ + __u32 fsx_projid; /* project identifier (get/set) */ + unsigned char fsx_pad[12]; +}; + +/* + * Flags for the fsx_xflags field + */ +#define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */ +#define FS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */ +#define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */ +#define FS_XFLAG_APPEND 0x00000010 /* all writes append */ +#define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */ +#define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */ +#define FS_XFLAG_NODUMP 0x00000080 /* do not include in backups */ +#define FS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */ +#define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */ +#define FS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */ +#define FS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */ +#define FS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ +#define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */ +#define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */ +#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ + /* the read-only stuff doesn't really belong here, but any other place is probably as bad and I don't want to create yet another include file. */ @@ -169,6 +199,8 @@ struct inodes_stat_t { #define FS_IOC32_SETFLAGS _IOW('f', 2, int) #define FS_IOC32_GETVERSION _IOR('v', 1, int) #define FS_IOC32_SETVERSION _IOW('v', 2, int) +#define FS_IOC_FSGETXATTR _IOR ('X', 31, struct fsxattr) +#define FS_IOC_FSSETXATTR _IOW ('X', 32, struct fsxattr) /* * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) -- cgit v0.10.2 From e7b89481017b2111d188afd70bbd0da9e9b94cc9 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 4 Jan 2016 16:44:15 +1100 Subject: xfs: use FS_XFLAG definitions directly Now that the ioctls have been hoisted up to the VFS level, use the VFs definitions directly and remove the XFS specific definitions completely. Userspace is going to have to handle the change of this interface separately, so removing the definitions from xfs_fs.h is not an issue here at all. Signed-off-by: Dave Chinner diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 8774498..f28eeab 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -984,8 +984,6 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) /* * Values for di_flags - * There should be a one-to-one correspondence between these flags and the - * XFS_XFLAG_s. */ #define XFS_DIFLAG_REALTIME_BIT 0 /* file's blocks come from rt area */ #define XFS_DIFLAG_PREALLOC_BIT 1 /* file space has been preallocated */ diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index dd29d0a..fffe3d0 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -36,25 +36,6 @@ struct dioattr { #endif /* - * Flags for the bs_xflags/fsx_xflags field in FS_IOC_FS[GS]ETXATTR[A] - */ -#define XFS_XFLAG_REALTIME FS_XFLAG_REALTIME -#define XFS_XFLAG_PREALLOC FS_XFLAG_PREALLOC -#define XFS_XFLAG_IMMUTABLE FS_XFLAG_IMMUTABLE -#define XFS_XFLAG_APPEND FS_XFLAG_APPEND -#define XFS_XFLAG_SYNC FS_XFLAG_SYNC -#define XFS_XFLAG_NOATIME FS_XFLAG_NOATIME -#define XFS_XFLAG_NODUMP FS_XFLAG_NODUMP -#define XFS_XFLAG_RTINHERIT FS_XFLAG_RTINHERIT -#define XFS_XFLAG_PROJINHERIT FS_XFLAG_PROJINHERIT -#define XFS_XFLAG_NOSYMLINKS FS_XFLAG_NOSYMLINKS -#define XFS_XFLAG_EXTSIZE FS_XFLAG_EXTSIZE -#define XFS_XFLAG_EXTSZINHERIT FS_XFLAG_EXTSZINHERIT -#define XFS_XFLAG_NODEFRAG FS_XFLAG_NODEFRAG -#define XFS_XFLAG_FILESTREAM FS_XFLAG_FILESTREAM -#define XFS_XFLAG_HASATTR FS_XFLAG_HASATTR - -/* * Structure for XFS_IOC_GETBMAP. * On input, fill in bmv_offset and bmv_length of the first structure * to indicate the area of interest in the file, and bmv_entries with diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 8ee3939..ca9ca5a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -616,33 +616,33 @@ _xfs_dic2xflags( if (di_flags & XFS_DIFLAG_ANY) { if (di_flags & XFS_DIFLAG_REALTIME) - flags |= XFS_XFLAG_REALTIME; + flags |= FS_XFLAG_REALTIME; if (di_flags & XFS_DIFLAG_PREALLOC) - flags |= XFS_XFLAG_PREALLOC; + flags |= FS_XFLAG_PREALLOC; if (di_flags & XFS_DIFLAG_IMMUTABLE) - flags |= XFS_XFLAG_IMMUTABLE; + flags |= FS_XFLAG_IMMUTABLE; if (di_flags & XFS_DIFLAG_APPEND) - flags |= XFS_XFLAG_APPEND; + flags |= FS_XFLAG_APPEND; if (di_flags & XFS_DIFLAG_SYNC) - flags |= XFS_XFLAG_SYNC; + flags |= FS_XFLAG_SYNC; if (di_flags & XFS_DIFLAG_NOATIME) - flags |= XFS_XFLAG_NOATIME; + flags |= FS_XFLAG_NOATIME; if (di_flags & XFS_DIFLAG_NODUMP) - flags |= XFS_XFLAG_NODUMP; + flags |= FS_XFLAG_NODUMP; if (di_flags & XFS_DIFLAG_RTINHERIT) - flags |= XFS_XFLAG_RTINHERIT; + flags |= FS_XFLAG_RTINHERIT; if (di_flags & XFS_DIFLAG_PROJINHERIT) - flags |= XFS_XFLAG_PROJINHERIT; + flags |= FS_XFLAG_PROJINHERIT; if (di_flags & XFS_DIFLAG_NOSYMLINKS) - flags |= XFS_XFLAG_NOSYMLINKS; + flags |= FS_XFLAG_NOSYMLINKS; if (di_flags & XFS_DIFLAG_EXTSIZE) - flags |= XFS_XFLAG_EXTSIZE; + flags |= FS_XFLAG_EXTSIZE; if (di_flags & XFS_DIFLAG_EXTSZINHERIT) - flags |= XFS_XFLAG_EXTSZINHERIT; + flags |= FS_XFLAG_EXTSZINHERIT; if (di_flags & XFS_DIFLAG_NODEFRAG) - flags |= XFS_XFLAG_NODEFRAG; + flags |= FS_XFLAG_NODEFRAG; if (di_flags & XFS_DIFLAG_FILESTREAM) - flags |= XFS_XFLAG_FILESTREAM; + flags |= FS_XFLAG_FILESTREAM; } return flags; @@ -655,7 +655,7 @@ xfs_ip2xflags( xfs_icdinode_t *dic = &ip->i_d; return _xfs_dic2xflags(dic->di_flags) | - (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0); + (XFS_IFORK_Q(ip) ? FS_XFLAG_HASATTR : 0); } uint @@ -663,7 +663,7 @@ xfs_dic2xflags( xfs_dinode_t *dip) { return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) | - (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); + (XFS_DFORK_Q(dip) ? FS_XFLAG_HASATTR : 0); } /* diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index d42738d..94b35eb3 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -859,25 +859,25 @@ xfs_merge_ioc_xflags( unsigned int xflags = start; if (flags & FS_IMMUTABLE_FL) - xflags |= XFS_XFLAG_IMMUTABLE; + xflags |= FS_XFLAG_IMMUTABLE; else - xflags &= ~XFS_XFLAG_IMMUTABLE; + xflags &= ~FS_XFLAG_IMMUTABLE; if (flags & FS_APPEND_FL) - xflags |= XFS_XFLAG_APPEND; + xflags |= FS_XFLAG_APPEND; else - xflags &= ~XFS_XFLAG_APPEND; + xflags &= ~FS_XFLAG_APPEND; if (flags & FS_SYNC_FL) - xflags |= XFS_XFLAG_SYNC; + xflags |= FS_XFLAG_SYNC; else - xflags &= ~XFS_XFLAG_SYNC; + xflags &= ~FS_XFLAG_SYNC; if (flags & FS_NOATIME_FL) - xflags |= XFS_XFLAG_NOATIME; + xflags |= FS_XFLAG_NOATIME; else - xflags &= ~XFS_XFLAG_NOATIME; + xflags &= ~FS_XFLAG_NOATIME; if (flags & FS_NODUMP_FL) - xflags |= XFS_XFLAG_NODUMP; + xflags |= FS_XFLAG_NODUMP; else - xflags &= ~XFS_XFLAG_NODUMP; + xflags &= ~FS_XFLAG_NODUMP; return xflags; } @@ -948,33 +948,33 @@ xfs_set_diflags( /* can't set PREALLOC this way, just preserve it */ di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); - if (xflags & XFS_XFLAG_IMMUTABLE) + if (xflags & FS_XFLAG_IMMUTABLE) di_flags |= XFS_DIFLAG_IMMUTABLE; - if (xflags & XFS_XFLAG_APPEND) + if (xflags & FS_XFLAG_APPEND) di_flags |= XFS_DIFLAG_APPEND; - if (xflags & XFS_XFLAG_SYNC) + if (xflags & FS_XFLAG_SYNC) di_flags |= XFS_DIFLAG_SYNC; - if (xflags & XFS_XFLAG_NOATIME) + if (xflags & FS_XFLAG_NOATIME) di_flags |= XFS_DIFLAG_NOATIME; - if (xflags & XFS_XFLAG_NODUMP) + if (xflags & FS_XFLAG_NODUMP) di_flags |= XFS_DIFLAG_NODUMP; - if (xflags & XFS_XFLAG_NODEFRAG) + if (xflags & FS_XFLAG_NODEFRAG) di_flags |= XFS_DIFLAG_NODEFRAG; - if (xflags & XFS_XFLAG_FILESTREAM) + if (xflags & FS_XFLAG_FILESTREAM) di_flags |= XFS_DIFLAG_FILESTREAM; if (S_ISDIR(ip->i_d.di_mode)) { - if (xflags & XFS_XFLAG_RTINHERIT) + if (xflags & FS_XFLAG_RTINHERIT) di_flags |= XFS_DIFLAG_RTINHERIT; - if (xflags & XFS_XFLAG_NOSYMLINKS) + if (xflags & FS_XFLAG_NOSYMLINKS) di_flags |= XFS_DIFLAG_NOSYMLINKS; - if (xflags & XFS_XFLAG_EXTSZINHERIT) + if (xflags & FS_XFLAG_EXTSZINHERIT) di_flags |= XFS_DIFLAG_EXTSZINHERIT; - if (xflags & XFS_XFLAG_PROJINHERIT) + if (xflags & FS_XFLAG_PROJINHERIT) di_flags |= XFS_DIFLAG_PROJINHERIT; } else if (S_ISREG(ip->i_d.di_mode)) { - if (xflags & XFS_XFLAG_REALTIME) + if (xflags & FS_XFLAG_REALTIME) di_flags |= XFS_DIFLAG_REALTIME; - if (xflags & XFS_XFLAG_EXTSIZE) + if (xflags & FS_XFLAG_EXTSIZE) di_flags |= XFS_DIFLAG_EXTSIZE; } @@ -988,19 +988,19 @@ xfs_diflags_to_linux( struct inode *inode = VFS_I(ip); unsigned int xflags = xfs_ip2xflags(ip); - if (xflags & XFS_XFLAG_IMMUTABLE) + if (xflags & FS_XFLAG_IMMUTABLE) inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; - if (xflags & XFS_XFLAG_APPEND) + if (xflags & FS_XFLAG_APPEND) inode->i_flags |= S_APPEND; else inode->i_flags &= ~S_APPEND; - if (xflags & XFS_XFLAG_SYNC) + if (xflags & FS_XFLAG_SYNC) inode->i_flags |= S_SYNC; else inode->i_flags &= ~S_SYNC; - if (xflags & XFS_XFLAG_NOATIME) + if (xflags & FS_XFLAG_NOATIME) inode->i_flags |= S_NOATIME; else inode->i_flags &= ~S_NOATIME; @@ -1016,11 +1016,11 @@ xfs_ioctl_setattr_xflags( /* Can't change realtime flag if any extents are allocated. */ if ((ip->i_d.di_nextents || ip->i_delayed_blks) && - XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & XFS_XFLAG_REALTIME)) + XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME)) return -EINVAL; /* If realtime flag is set then must have realtime device */ - if (fa->fsx_xflags & XFS_XFLAG_REALTIME) { + if (fa->fsx_xflags & FS_XFLAG_REALTIME) { if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 || (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) return -EINVAL; @@ -1031,7 +1031,7 @@ xfs_ioctl_setattr_xflags( * we have appropriate permission. */ if (((ip->i_d.di_flags & (XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND)) || - (fa->fsx_xflags & (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && + (fa->fsx_xflags & (FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND))) && !capable(CAP_LINUX_IMMUTABLE)) return -EPERM; @@ -1095,8 +1095,8 @@ out_cancel: * extent size hint validation is somewhat cumbersome. Rules are: * * 1. extent size hint is only valid for directories and regular files - * 2. XFS_XFLAG_EXTSIZE is only valid for regular files - * 3. XFS_XFLAG_EXTSZINHERIT is only valid for directories. + * 2. FS_XFLAG_EXTSIZE is only valid for regular files + * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories. * 4. can only be changed on regular files if no extents are allocated * 5. can be changed on directories at any time * 6. extsize hint of 0 turns off hints, clears inode flags. @@ -1112,10 +1112,10 @@ xfs_ioctl_setattr_check_extsize( { struct xfs_mount *mp = ip->i_mount; - if ((fa->fsx_xflags & XFS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode)) + if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode)) return -EINVAL; - if ((fa->fsx_xflags & XFS_XFLAG_EXTSZINHERIT) && + if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) && !S_ISDIR(ip->i_d.di_mode)) return -EINVAL; @@ -1132,7 +1132,7 @@ xfs_ioctl_setattr_check_extsize( return -EINVAL; if (XFS_IS_REALTIME_INODE(ip) || - (fa->fsx_xflags & XFS_XFLAG_REALTIME)) { + (fa->fsx_xflags & FS_XFLAG_REALTIME)) { size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; } else { size = mp->m_sb.sb_blocksize; @@ -1143,7 +1143,7 @@ xfs_ioctl_setattr_check_extsize( if (fa->fsx_extsize % size) return -EINVAL; } else - fa->fsx_xflags &= ~(XFS_XFLAG_EXTSIZE | XFS_XFLAG_EXTSZINHERIT); + fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT); return 0; } @@ -1168,7 +1168,7 @@ xfs_ioctl_setattr_check_projid( if (xfs_get_projid(ip) != fa->fsx_projid) return -EINVAL; - if ((fa->fsx_xflags & XFS_XFLAG_PROJINHERIT) != + if ((fa->fsx_xflags & FS_XFLAG_PROJINHERIT) != (ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)) return -EINVAL; -- cgit v0.10.2 From 58f88ca2df7270881de2034c8286233a89efe71c Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 4 Jan 2016 16:44:15 +1100 Subject: xfs: introduce per-inode DAX enablement Rather than just being able to turn DAX on and off via a mount option, some applications may only want to enable DAX for certain performance critical files in a filesystem. This patch introduces a new inode flag to enable DAX in the v3 inode di_flags2 field. It adds support for setting and clearing flags in the di_flags2 field via the XFS_IOC_FSSETXATTR ioctl, and sets the S_DAX inode flag appropriately when it is seen. When this flag is set on a directory, it acts as an "inherit flag". That is, inodes created in the directory will automatically inherit the on-disk inode DAX flag, enabling administrators to set up directory heirarchies that automatically use DAX. Setting this flag on an empty root directory will make the entire filesystem use DAX by default. Signed-off-by: Dave Chinner diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index f28eeab..b4ae7ce 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -1024,6 +1024,15 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM) /* + * Values for di_flags2 These start by being exposed to userspace in the upper + * 16 bits of the XFS_XFLAG_s range. + */ +#define XFS_DIFLAG2_DAX_BIT 0 /* use DAX for this inode */ +#define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT) + +#define XFS_DIFLAG2_ANY (XFS_DIFLAG2_DAX) + +/* * Inode number format: * low inopblog bits - offset in block * next agblklog bits - block number in ag diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index ca9ca5a..8929908 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -610,7 +610,9 @@ __xfs_iflock( STATIC uint _xfs_dic2xflags( - __uint16_t di_flags) + __uint16_t di_flags, + uint64_t di_flags2, + bool has_attr) { uint flags = 0; @@ -645,25 +647,32 @@ _xfs_dic2xflags( flags |= FS_XFLAG_FILESTREAM; } + if (di_flags2 & XFS_DIFLAG2_ANY) { + if (di_flags2 & XFS_DIFLAG2_DAX) + flags |= FS_XFLAG_DAX; + } + + if (has_attr) + flags |= FS_XFLAG_HASATTR; + return flags; } uint xfs_ip2xflags( - xfs_inode_t *ip) + struct xfs_inode *ip) { - xfs_icdinode_t *dic = &ip->i_d; + struct xfs_icdinode *dic = &ip->i_d; - return _xfs_dic2xflags(dic->di_flags) | - (XFS_IFORK_Q(ip) ? FS_XFLAG_HASATTR : 0); + return _xfs_dic2xflags(dic->di_flags, dic->di_flags2, XFS_IFORK_Q(ip)); } uint xfs_dic2xflags( - xfs_dinode_t *dip) + struct xfs_dinode *dip) { - return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) | - (XFS_DFORK_Q(dip) ? FS_XFLAG_HASATTR : 0); + return _xfs_dic2xflags(be16_to_cpu(dip->di_flags), + be64_to_cpu(dip->di_flags2), XFS_DFORK_Q(dip)); } /* @@ -862,7 +871,8 @@ xfs_ialloc( case S_IFREG: case S_IFDIR: if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { - uint di_flags = 0; + uint64_t di_flags2 = 0; + uint di_flags = 0; if (S_ISDIR(mode)) { if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) @@ -898,7 +908,11 @@ xfs_ialloc( di_flags |= XFS_DIFLAG_NODEFRAG; if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM) di_flags |= XFS_DIFLAG_FILESTREAM; + if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX) + di_flags2 |= XFS_DIFLAG2_DAX; + ip->i_d.di_flags |= di_flags; + ip->i_d.di_flags2 |= di_flags2; } /* FALLTHROUGH */ case S_IFLNK: diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 94b35eb3..478d04e 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -945,6 +945,7 @@ xfs_set_diflags( unsigned int xflags) { unsigned int di_flags; + uint64_t di_flags2; /* can't set PREALLOC this way, just preserve it */ di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); @@ -977,8 +978,18 @@ xfs_set_diflags( if (xflags & FS_XFLAG_EXTSIZE) di_flags |= XFS_DIFLAG_EXTSIZE; } - ip->i_d.di_flags = di_flags; + + /* diflags2 only valid for v3 inodes. */ + if (ip->i_d.di_version < 3) + return; + + di_flags2 = 0; + if (xflags & FS_XFLAG_DAX) + di_flags2 |= XFS_DIFLAG2_DAX; + + ip->i_d.di_flags2 = di_flags2; + } STATIC void @@ -1004,6 +1015,11 @@ xfs_diflags_to_linux( inode->i_flags |= S_NOATIME; else inode->i_flags &= ~S_NOATIME; + if (xflags & FS_XFLAG_DAX) + inode->i_flags |= S_DAX; + else + inode->i_flags &= ~S_DAX; + } static int diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 245268a..a1b8af1 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -1201,8 +1201,8 @@ xfs_diflags_to_iflags( inode->i_flags |= S_SYNC; if (flags & XFS_DIFLAG_NOATIME) inode->i_flags |= S_NOATIME; - /* XXX: Also needs an on-disk per inode flag! */ - if (ip->i_mount->m_flags & XFS_MOUNT_DAX) + if (ip->i_mount->m_flags & XFS_MOUNT_DAX || + ip->i_d.di_flags2 & XFS_DIFLAG2_DAX) inode->i_flags |= S_DAX; } diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index df175dd..4cad4c8 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -138,6 +138,7 @@ struct fsxattr { #define FS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ #define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */ #define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */ +#define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */ #define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ /* the read-only stuff doesn't really belong here, but any other place is -- cgit v0.10.2 From 3e85286e75224fa3f08bdad20e78c8327742634e Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 19 Jan 2016 08:21:46 +1100 Subject: Revert "xfs: clear PF_NOFREEZE for xfsaild kthread" This reverts commit 24ba16bb3d499c49974669cd8429c3e4138ab102 as it prevents machines from suspending. This regression occurs when the xfsaild is idle on entry to suspend, and so there s no activity to wake it from it's idle sleep and hence see that it is supposed to freeze. Hence the freezer times out waiting for it and suspend is cancelled. There is no obvious fix for this short of freezing the filesystem properly, so revert this change for now. cc: # 4.4 Signed-off-by: Dave Chinner Acked-by: Jiri Kosina Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index aa67339..4f18fd9 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -497,7 +497,6 @@ xfsaild( long tout = 0; /* milliseconds */ current->flags |= PF_MEMALLOC; - set_freezable(); while (!kthread_should_stop()) { if (tout && tout <= 20) -- cgit v0.10.2 From 85bec5460ad8e05e0a8d70fb0f6750eb719ad092 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 19 Jan 2016 08:28:10 +1100 Subject: xfs: log mount failures don't wait for buffers to be released Recently I've been seeing xfs/051 fail on 1k block size filesystems. Trying to trace the events during the test lead to the problem going away, indicating that it was a race condition that lead to this ASSERT failure: XFS: Assertion failed: atomic_read(&pag->pag_ref) == 0, file: fs/xfs/xfs_mount.c, line: 156 ..... [] xfs_free_perag+0x87/0xb0 [] xfs_mountfs+0x4d9/0x900 [] xfs_fs_fill_super+0x3bf/0x4d0 [] mount_bdev+0x180/0x1b0 [] xfs_fs_mount+0x15/0x20 [] mount_fs+0x38/0x170 [] vfs_kern_mount+0x67/0x120 [] do_mount+0x218/0xd60 [] SyS_mount+0x8b/0xd0 When I finally caught it with tracing enabled, I saw that AG 2 had an elevated reference count and a buffer was responsible for it. I tracked down the specific buffer, and found that it was missing the final reference count release that would put it back on the LRU and hence be found by xfs_wait_buftarg() calls in the log mount failure handling. The last four traces for the buffer before the assert were (trimmed for relevance) kworker/0:1-5259 xfs_buf_iodone: hold 2 lock 0 flags ASYNC kworker/0:1-5259 xfs_buf_ioerror: hold 2 lock 0 error -5 mount-7163 xfs_buf_lock_done: hold 2 lock 0 flags ASYNC mount-7163 xfs_buf_unlock: hold 2 lock 1 flags ASYNC This is an async write that is completing, so there's nobody waiting for it directly. Hence we call xfs_buf_relse() once all the processing is complete. That does: static inline void xfs_buf_relse(xfs_buf_t *bp) { xfs_buf_unlock(bp); xfs_buf_rele(bp); } Now, it's clear that mount is waiting on the buffer lock, and that it has been released by xfs_buf_relse() and gained by mount. This is expected, because at this point the mount process is in xfs_buf_delwri_submit() waiting for all the IO it submitted to complete. The mount process, however, is waiting on the lock for the buffer because it is in xfs_buf_delwri_submit(). This waits for IO completion, but it doesn't wait for the buffer reference owned by the IO to go away. The mount process collects all the completions, fails the log recovery, and the higher level code then calls xfs_wait_buftarg() to free all the remaining buffers in the filesystem. The issue is that on unlocking the buffer, the scheduler has decided that the mount process has higher priority than the the kworker thread that is running the IO completion, and so immediately switched contexts to the mount process from the semaphore unlock code, hence preventing the kworker thread from finishing the IO completion and releasing the IO reference to the buffer. Hence by the time that xfs_wait_buftarg() is run, the buffer still has an active reference and so isn't on the LRU list that the function walks to free the remaining buffers. Hence we miss that buffer and continue onwards to tear down the mount structures, at which time we get find a stray reference count on the perag structure. On a non-debug kernel, this will be ignored and the structure torn down and freed. Hence when the kworker thread is then rescheduled and the buffer released and freed, it will access a freed perag structure. The problem here is that when the log mount fails, we still need to quiesce the log to ensure that the IO workqueues have returned to idle before we run xfs_wait_buftarg(). By synchronising the workqueues, we ensure that all IO completions are fully processed, not just to the point where buffers have been unlocked. This ensures we don't end up in the situation above. cc: # 3.18 Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 3243cdf..cbddb91 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1520,6 +1520,16 @@ xfs_wait_buftarg( LIST_HEAD(dispose); int loop = 0; + /* + * We need to flush the buffer workqueue to ensure that all IO + * completion processing is 100% done. Just waiting on buffer locks is + * not sufficient for async IO as the reference count held over IO is + * not released until after the buffer lock is dropped. Hence we need to + * ensure here that all reference counts have been dropped before we + * start walking the LRU list. + */ + drain_workqueue(btp->bt_mount->m_buf_workqueue); + /* loop until there is nothing left on the lru list. */ while (list_lru_count(&btp->bt_lru)) { list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele, -- cgit v0.10.2