summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorScott Wood <scottwood@freescale.com>2014-04-08 01:00:49 (GMT)
committerScott Wood <scottwood@freescale.com>2014-04-08 19:58:35 (GMT)
commit47d2261a3fa71cde24263559a4219a25e50d8c89 (patch)
tree28774d5b330ccf1b777a3af222d8356918328013 /fs
parentfb7f27080adc65cd5f341bdf56a1d0c14f316c1b (diff)
parent5fb9d37f27351e42f002e372074249f92cbdf815 (diff)
downloadlinux-fsl-qoriq-47d2261a3fa71cde24263559a4219a25e50d8c89.tar.xz
Merge branch 'merge' into sdk-v1.6.x
This reverts v3.13-rc3+ (78fd82238d0e5716) to v3.12, except for commits which I noticed which appear relevant to the SDK. Signed-off-by: Scott Wood <scottwood@freescale.com> Conflicts: arch/powerpc/include/asm/kvm_host.h arch/powerpc/kvm/book3s_hv_rmhandlers.S arch/powerpc/kvm/book3s_interrupts.S arch/powerpc/kvm/e500.c arch/powerpc/kvm/e500mc.c arch/powerpc/sysdev/fsl_soc.h drivers/Kconfig drivers/cpufreq/ppc-corenet-cpufreq.c drivers/dma/fsldma.c drivers/dma/s3c24xx-dma.c drivers/misc/Makefile drivers/mmc/host/sdhci-of-esdhc.c drivers/mtd/devices/m25p80.c drivers/net/ethernet/freescale/gianfar.h drivers/platform/Kconfig drivers/platform/Makefile drivers/spi/spi-fsl-espi.c include/crypto/algapi.h include/linux/netdev_features.h include/linux/skbuff.h include/net/ip.h net/core/ethtool.c
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/cache.c6
-rw-r--r--fs/9p/cache.h12
-rw-r--r--fs/9p/vfs_dentry.c19
-rw-r--r--fs/9p/vfs_file.c2
-rw-r--r--fs/9p/vfs_inode.c6
-rw-r--r--fs/9p/vfs_inode_dotl.c4
-rw-r--r--fs/adfs/adfs.h9
-rw-r--r--fs/adfs/super.c3
-rw-r--r--fs/affs/Changes2
-rw-r--r--fs/afs/cell.c2
-rw-r--r--fs/afs/inode.c2
-rw-r--r--fs/afs/vlocation.c3
-rw-r--r--fs/afs/volume.c2
-rw-r--r--fs/aio.c201
-rw-r--r--fs/anon_inodes.c114
-rw-r--r--fs/attr.c25
-rw-r--r--fs/autofs4/autofs_i.h3
-rw-r--r--fs/autofs4/dev-ioctl.c6
-rw-r--r--fs/autofs4/inode.c13
-rw-r--r--fs/befs/linuxvfs.c61
-rw-r--r--fs/binfmt_aout.c13
-rw-r--r--fs/binfmt_elf.c127
-rw-r--r--fs/binfmt_elf_fdpic.c152
-rw-r--r--fs/binfmt_em86.c2
-rw-r--r--fs/bio.c48
-rw-r--r--fs/btrfs/Kconfig18
-rw-r--r--fs/btrfs/Makefile4
-rw-r--r--fs/btrfs/acl.c2
-rw-r--r--fs/btrfs/async-thread.c3
-rw-r--r--fs/btrfs/backref.c8
-rw-r--r--fs/btrfs/btrfs_inode.h20
-rw-r--r--fs/btrfs/check-integrity.c75
-rw-r--r--fs/btrfs/check-integrity.h2
-rw-r--r--fs/btrfs/compat.h7
-rw-r--r--fs/btrfs/compression.c3
-rw-r--r--fs/btrfs/ctree.c75
-rw-r--r--fs/btrfs/ctree.h47
-rw-r--r--fs/btrfs/delayed-inode.c19
-rw-r--r--fs/btrfs/dev-replace.c28
-rw-r--r--fs/btrfs/dir-item.c8
-rw-r--r--fs/btrfs/disk-io.c269
-rw-r--r--fs/btrfs/disk-io.h4
-rw-r--r--fs/btrfs/export.c1
-rw-r--r--fs/btrfs/extent-tree.c174
-rw-r--r--fs/btrfs/extent_io.c158
-rw-r--r--fs/btrfs/extent_io.h8
-rw-r--r--fs/btrfs/extent_map.h8
-rw-r--r--fs/btrfs/file-item.c7
-rw-r--r--fs/btrfs/file.c163
-rw-r--r--fs/btrfs/free-space-cache.c21
-rw-r--r--fs/btrfs/free-space-cache.h4
-rw-r--r--fs/btrfs/inode-item.c2
-rw-r--r--fs/btrfs/inode-map.c13
-rw-r--r--fs/btrfs/inode.c213
-rw-r--r--fs/btrfs/ioctl.c85
-rw-r--r--fs/btrfs/ordered-data.c55
-rw-r--r--fs/btrfs/ordered-data.h6
-rw-r--r--fs/btrfs/print-tree.c2
-rw-r--r--fs/btrfs/raid56.c1
-rw-r--r--fs/btrfs/relocation.c94
-rw-r--r--fs/btrfs/scrub.c85
-rw-r--r--fs/btrfs/send.c193
-rw-r--r--fs/btrfs/super.c31
-rw-r--r--fs/btrfs/tests/btrfs-tests.c74
-rw-r--r--fs/btrfs/tests/btrfs-tests.h25
-rw-r--r--fs/btrfs/tests/extent-buffer-tests.c229
-rw-r--r--fs/btrfs/tests/extent-io-tests.c276
-rw-r--r--fs/btrfs/tests/inode-tests.c955
-rw-r--r--fs/btrfs/transaction.c85
-rw-r--r--fs/btrfs/transaction.h2
-rw-r--r--fs/btrfs/tree-defrag.c5
-rw-r--r--fs/btrfs/tree-log.c153
-rw-r--r--fs/btrfs/uuid-tree.c6
-rw-r--r--fs/btrfs/volumes.c28
-rw-r--r--fs/btrfs/volumes.h24
-rw-r--r--fs/cachefiles/interface.c6
-rw-r--r--fs/cachefiles/namei.c4
-rw-r--r--fs/ceph/addr.c2
-rw-r--r--fs/ceph/cache.c7
-rw-r--r--fs/ceph/caps.c27
-rw-r--r--fs/ceph/dir.c11
-rw-r--r--fs/ceph/inode.c49
-rw-r--r--fs/ceph/mds_client.c61
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/super.h8
-rw-r--r--fs/char_dev.c9
-rw-r--r--fs/cifs/cifs_fs_sb.h1
-rw-r--r--fs/cifs/cifsencrypt.c40
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h38
-rw-r--r--fs/cifs/cifspdu.h42
-rw-r--r--fs/cifs/cifsproto.h2
-rw-r--r--fs/cifs/cifssmb.c62
-rw-r--r--fs/cifs/connect.c35
-rw-r--r--fs/cifs/dir.c2
-rw-r--r--fs/cifs/file.c22
-rw-r--r--fs/cifs/fscache.c8
-rw-r--r--fs/cifs/inode.c23
-rw-r--r--fs/cifs/ioctl.c170
-rw-r--r--fs/cifs/link.c7
-rw-r--r--fs/cifs/misc.c22
-rw-r--r--fs/cifs/netmisc.c2
-rw-r--r--fs/cifs/readdir.c40
-rw-r--r--fs/cifs/smb1ops.c33
-rw-r--r--fs/cifs/smb2inode.c16
-rw-r--r--fs/cifs/smb2maperror.c2
-rw-r--r--fs/cifs/smb2ops.c269
-rw-r--r--fs/cifs/smb2pdu.c219
-rw-r--r--fs/cifs/smb2pdu.h73
-rw-r--r--fs/cifs/smb2proto.h7
-rw-r--r--fs/cifs/smb2transport.c12
-rw-r--r--fs/cifs/smbfsctl.h2
-rw-r--r--fs/cifs/transport.c13
-rw-r--r--fs/coda/coda_linux.h2
-rw-r--r--fs/coda/dir.c6
-rw-r--r--fs/coda/file.c6
-rw-r--r--fs/coda/inode.c2
-rw-r--r--fs/compat_ioctl.c4
-rw-r--r--fs/configfs/dir.c28
-rw-r--r--fs/coredump.c71
-rw-r--r--fs/cramfs/Kconfig5
-rw-r--r--fs/dcache.c443
-rw-r--r--fs/debugfs/inode.c3
-rw-r--r--fs/devpts/inode.c1
-rw-r--r--fs/dlm/lockspace.c4
-rw-r--r--fs/dlm/netlink.c10
-rw-r--r--fs/ecryptfs/crypto.c2
-rw-r--r--fs/ecryptfs/dentry.c29
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h19
-rw-r--r--fs/ecryptfs/file.c16
-rw-r--r--fs/ecryptfs/inode.c29
-rw-r--r--fs/ecryptfs/main.c3
-rw-r--r--fs/efivarfs/super.c11
-rw-r--r--fs/eventpoll.c150
-rw-r--r--fs/exec.c47
-rw-r--r--fs/exportfs/expfs.c267
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext2/xip.c1
-rw-r--r--fs/ext3/super.c4
-rw-r--r--fs/ext4/balloc.c13
-rw-r--r--fs/ext4/ext4.h19
-rw-r--r--fs/ext4/extents.c35
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inline.c3
-rw-r--r--fs/ext4/inode.c54
-rw-r--r--fs/ext4/ioctl.c4
-rw-r--r--fs/ext4/mballoc.c4
-rw-r--r--fs/ext4/mmp.c2
-rw-r--r--fs/ext4/move_extent.c40
-rw-r--r--fs/ext4/page-io.c5
-rw-r--r--fs/ext4/super.c159
-rw-r--r--fs/ext4/xattr.c1
-rw-r--r--fs/f2fs/Kconfig8
-rw-r--r--fs/f2fs/acl.c36
-rw-r--r--fs/f2fs/acl.h9
-rw-r--r--fs/f2fs/checkpoint.c75
-rw-r--r--fs/f2fs/data.c29
-rw-r--r--fs/f2fs/dir.c4
-rw-r--r--fs/f2fs/f2fs.h117
-rw-r--r--fs/f2fs/file.c45
-rw-r--r--fs/f2fs/gc.c31
-rw-r--r--fs/f2fs/inode.c62
-rw-r--r--fs/f2fs/namei.c52
-rw-r--r--fs/f2fs/node.c142
-rw-r--r--fs/f2fs/recovery.c45
-rw-r--r--fs/f2fs/segment.c133
-rw-r--r--fs/f2fs/segment.h38
-rw-r--r--fs/f2fs/super.c143
-rw-r--r--fs/f2fs/xattr.c36
-rw-r--r--fs/fat/fat.h1
-rw-r--r--fs/fat/inode.c19
-rw-r--r--fs/fcntl.c5
-rw-r--r--fs/file_table.c129
-rw-r--r--fs/fs-writeback.c34
-rw-r--r--fs/fs_struct.c2
-rw-r--r--fs/fscache/cookie.c193
-rw-r--r--fs/fscache/fsdef.c1
-rw-r--r--fs/fscache/netfs.c1
-rw-r--r--fs/fscache/object.c9
-rw-r--r--fs/fscache/page.c59
-rw-r--r--fs/fuse/cuse.c7
-rw-r--r--fs/fuse/dir.c40
-rw-r--r--fs/fuse/file.c361
-rw-r--r--fs/fuse/fuse_i.h6
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/aops.c4
-rw-r--r--fs/gfs2/bmap.c7
-rw-r--r--fs/gfs2/file.c10
-rw-r--r--fs/gfs2/glock.c84
-rw-r--r--fs/gfs2/glock.h2
-rw-r--r--fs/gfs2/glops.c4
-rw-r--r--fs/gfs2/incore.h41
-rw-r--r--fs/gfs2/inode.c58
-rw-r--r--fs/gfs2/lock_dlm.c8
-rw-r--r--fs/gfs2/main.c19
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/gfs2/quota.c344
-rw-r--r--fs/gfs2/quota.h9
-rw-r--r--fs/gfs2/rgrp.c212
-rw-r--r--fs/gfs2/rgrp.h4
-rw-r--r--fs/gfs2/super.c2
-rw-r--r--fs/gfs2/sys.c2
-rw-r--r--fs/gfs2/util.c20
-rw-r--r--fs/gfs2/util.h2
-rw-r--r--fs/gfs2/xattr.c3
-rw-r--r--fs/hfs/btree.h5
-rw-r--r--fs/hfsplus/btree.c112
-rw-r--r--fs/hfsplus/hfsplus_fs.h10
-rw-r--r--fs/hfsplus/hfsplus_raw.h11
-rw-r--r--fs/hfsplus/super.c2
-rw-r--r--fs/hfsplus/wrapper.c17
-rw-r--r--fs/hfsplus/xattr.c210
-rw-r--r--fs/hostfs/hostfs_kern.c11
-rw-r--r--fs/hpfs/hpfs_fn.h1
-rw-r--r--fs/hpfs/namei.c2
-rw-r--r--fs/hpfs/super.c28
-rw-r--r--fs/inode.c62
-rw-r--r--fs/internal.h7
-rw-r--r--fs/ioctl.c4
-rw-r--r--fs/isofs/inode.c12
-rw-r--r--fs/jbd/transaction.c8
-rw-r--r--fs/jffs2/fs.c4
-rw-r--r--fs/libfs.c134
-rw-r--r--fs/locks.c70
-rw-r--r--fs/logfs/dev_bdev.c13
-rw-r--r--fs/minix/Kconfig2
-rw-r--r--fs/mount.h20
-rw-r--r--fs/namei.c322
-rw-r--r--fs/namespace.c390
-rw-r--r--fs/ncpfs/dir.c55
-rw-r--r--fs/ncpfs/file.c12
-rw-r--r--fs/ncpfs/inode.c19
-rw-r--r--fs/ncpfs/ncp_fs_sb.h2
-rw-r--r--fs/nfs/Kconfig17
-rw-r--r--fs/nfs/blocklayout/blocklayout.h1
-rw-r--r--fs/nfs/blocklayout/extents.c2
-rw-r--r--fs/nfs/callback.c3
-rw-r--r--fs/nfs/client.c10
-rw-r--r--fs/nfs/dir.c129
-rw-r--r--fs/nfs/direct.c17
-rw-r--r--fs/nfs/dns_resolve.c2
-rw-r--r--fs/nfs/file.c117
-rw-r--r--fs/nfs/fscache.c202
-rw-r--r--fs/nfs/fscache.h18
-rw-r--r--fs/nfs/inode.c18
-rw-r--r--fs/nfs/internal.h23
-rw-r--r--fs/nfs/namespace.c5
-rw-r--r--fs/nfs/nfs3proc.c8
-rw-r--r--fs/nfs/nfs4_fs.h25
-rw-r--r--fs/nfs/nfs4client.c138
-rw-r--r--fs/nfs/nfs4file.c6
-rw-r--r--fs/nfs/nfs4namespace.c125
-rw-r--r--fs/nfs/nfs4proc.c503
-rw-r--r--fs/nfs/nfs4state.c273
-rw-r--r--fs/nfs/nfs4super.c12
-rw-r--r--fs/nfs/nfs4xdr.c138
-rw-r--r--fs/nfs/proc.c8
-rw-r--r--fs/nfs/super.c200
-rw-r--r--fs/nfs/unlink.c12
-rw-r--r--fs/nfs/write.c6
-rw-r--r--fs/nfsd/Kconfig2
-rw-r--r--fs/nfsd/export.c24
-rw-r--r--fs/nfsd/nfs4recover.c12
-rw-r--r--fs/nfsd/nfs4state.c43
-rw-r--r--fs/nfsd/nfs4xdr.c135
-rw-r--r--fs/nfsd/nfsfh.c36
-rw-r--r--fs/nfsd/nfsfh.h4
-rw-r--r--fs/nfsd/vfs.c194
-rw-r--r--fs/ntfs/inode.c2
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/aops.c32
-rw-r--r--fs/ocfs2/buffer_head_io.c4
-rw-r--r--fs/ocfs2/cluster/heartbeat.c40
-rw-r--r--fs/ocfs2/cluster/masklog.h3
-rw-r--r--fs/ocfs2/dir.c12
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c8
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c7
-rw-r--r--fs/ocfs2/dlmglue.c4
-rw-r--r--fs/ocfs2/file.c5
-rw-r--r--fs/ocfs2/inode.c10
-rw-r--r--fs/ocfs2/journal.h3
-rw-r--r--fs/ocfs2/move_extents.c11
-rw-r--r--fs/ocfs2/namei.c2
-rw-r--r--fs/ocfs2/refcounttree.c20
-rw-r--r--fs/ocfs2/resize.c12
-rw-r--r--fs/ocfs2/stackglue.c8
-rw-r--r--fs/ocfs2/suballoc.c4
-rw-r--r--fs/ocfs2/super.c4
-rw-r--r--fs/ocfs2/xattr.c28
-rw-r--r--fs/open.c32
-rw-r--r--fs/pipe.c39
-rw-r--r--fs/pnode.c13
-rw-r--r--fs/proc/Kconfig4
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c14
-rw-r--r--fs/proc/consoles.c10
-rw-r--r--fs/proc/generic.c18
-rw-r--r--fs/proc/inode.c16
-rw-r--r--fs/proc/kcore.c3
-rw-r--r--fs/proc/meminfo.c7
-rw-r--r--fs/proc/namespaces.c8
-rw-r--r--fs/proc/nommu.c12
-rw-r--r--fs/proc/proc_devtree.c3
-rw-r--r--fs/proc/self.c10
-rw-r--r--fs/proc/task_mmu.c53
-rw-r--r--fs/proc/task_nommu.c19
-rw-r--r--fs/proc_namespace.c8
-rw-r--r--fs/qnx4/namei.c4
-rw-r--r--fs/quota/netlink.c16
-rw-r--r--fs/quota/quota.c1
-rw-r--r--fs/read_write.c25
-rw-r--r--fs/readdir.c2
-rw-r--r--fs/select.c4
-rw-r--r--fs/seq_file.c18
-rw-r--r--fs/splice.c6
-rw-r--r--fs/squashfs/Kconfig72
-rw-r--r--fs/squashfs/Makefile5
-rw-r--r--fs/squashfs/block.c36
-rw-r--r--fs/squashfs/cache.c28
-rw-r--r--fs/squashfs/decompressor.c59
-rw-r--r--fs/squashfs/decompressor.h24
-rw-r--r--fs/squashfs/decompressor_multi.c198
-rw-r--r--fs/squashfs/decompressor_multi_percpu.c97
-rw-r--r--fs/squashfs/decompressor_single.c85
-rw-r--r--fs/squashfs/file.c142
-rw-r--r--fs/squashfs/file_cache.c38
-rw-r--r--fs/squashfs/file_direct.c176
-rw-r--r--fs/squashfs/lzo_wrapper.c47
-rw-r--r--fs/squashfs/page_actor.c100
-rw-r--r--fs/squashfs/page_actor.h81
-rw-r--r--fs/squashfs/squashfs.h20
-rw-r--r--fs/squashfs/squashfs_fs_sb.h4
-rw-r--r--fs/squashfs/super.c10
-rw-r--r--fs/squashfs/xz_wrapper.c105
-rw-r--r--fs/squashfs/zlib_wrapper.c64
-rw-r--r--fs/stat.c31
-rw-r--r--fs/super.c201
-rw-r--r--fs/sync.c17
-rw-r--r--fs/sysfs/Makefile3
-rw-r--r--fs/sysfs/bin.c502
-rw-r--r--fs/sysfs/dir.c350
-rw-r--r--fs/sysfs/file.c890
-rw-r--r--fs/sysfs/group.c33
-rw-r--r--fs/sysfs/inode.c30
-rw-r--r--fs/sysfs/symlink.c50
-rw-r--r--fs/sysfs/sysfs.h71
-rw-r--r--fs/ubifs/debug.c6
-rw-r--r--fs/ubifs/dir.c41
-rw-r--r--fs/ubifs/gc.c3
-rw-r--r--fs/ubifs/journal.c6
-rw-r--r--fs/ubifs/super.c8
-rw-r--r--fs/ubifs/xattr.c16
-rw-r--r--fs/udf/super.c45
-rw-r--r--fs/utimes.c9
-rw-r--r--fs/xfs/Makefile8
-rw-r--r--fs/xfs/kmem.c22
-rw-r--r--fs/xfs/kmem.h21
-rw-r--r--fs/xfs/xfs_acl.c8
-rw-r--r--fs/xfs/xfs_ag.h4
-rw-r--r--fs/xfs/xfs_alloc.c19
-rw-r--r--fs/xfs/xfs_alloc.h3
-rw-r--r--fs/xfs/xfs_alloc_btree.c14
-rw-r--r--fs/xfs/xfs_alloc_btree.h35
-rw-r--r--fs/xfs/xfs_aops.c16
-rw-r--r--fs/xfs/xfs_attr.c12
-rw-r--r--fs/xfs/xfs_attr_inactive.c21
-rw-r--r--fs/xfs/xfs_attr_leaf.c29
-rw-r--r--fs/xfs/xfs_attr_leaf.h232
-rw-r--r--fs/xfs/xfs_attr_list.c32
-rw-r--r--fs/xfs/xfs_attr_remote.c14
-rw-r--r--fs/xfs/xfs_attr_remote.h29
-rw-r--r--fs/xfs/xfs_bit.c4
-rw-r--r--fs/xfs/xfs_bmap.c60
-rw-r--r--fs/xfs/xfs_bmap_btree.c13
-rw-r--r--fs/xfs/xfs_bmap_btree.h105
-rw-r--r--fs/xfs/xfs_bmap_util.c293
-rw-r--r--fs/xfs/xfs_bmap_util.h9
-rw-r--r--fs/xfs/xfs_btree.c12
-rw-r--r--fs/xfs/xfs_btree.h79
-rw-r--r--fs/xfs/xfs_buf.c11
-rw-r--r--fs/xfs/xfs_buf_item.c9
-rw-r--r--fs/xfs/xfs_buf_item.h4
-rw-r--r--fs/xfs/xfs_da_btree.c264
-rw-r--r--fs/xfs/xfs_da_btree.h143
-rw-r--r--fs/xfs/xfs_da_format.c907
-rw-r--r--fs/xfs/xfs_dir2.c20
-rw-r--r--fs/xfs/xfs_dir2.h106
-rw-r--r--fs/xfs/xfs_dir2_block.c109
-rw-r--r--fs/xfs/xfs_dir2_data.c161
-rw-r--r--fs/xfs/xfs_dir2_format.h (renamed from fs/xfs/xfs_da_format.h)681
-rw-r--r--fs/xfs/xfs_dir2_leaf.c243
-rw-r--r--fs/xfs/xfs_dir2_node.c351
-rw-r--r--fs/xfs/xfs_dir2_priv.h20
-rw-r--r--fs/xfs/xfs_dir2_readdir.c42
-rw-r--r--fs/xfs/xfs_dir2_sf.c216
-rw-r--r--fs/xfs/xfs_discard.c11
-rw-r--r--fs/xfs/xfs_dquot.c133
-rw-r--r--fs/xfs/xfs_dquot.h2
-rw-r--r--fs/xfs/xfs_dquot_buf.c288
-rw-r--r--fs/xfs/xfs_dquot_item.c14
-rw-r--r--fs/xfs/xfs_error.c11
-rw-r--r--fs/xfs/xfs_export.c12
-rw-r--r--fs/xfs/xfs_extent_busy.c11
-rw-r--r--fs/xfs/xfs_extent_busy.h4
-rw-r--r--fs/xfs/xfs_extfree_item.c8
-rw-r--r--fs/xfs/xfs_file.c92
-rw-r--r--fs/xfs/xfs_filestream.c12
-rw-r--r--fs/xfs/xfs_format.h263
-rw-r--r--fs/xfs/xfs_fs.h4
-rw-r--r--fs/xfs/xfs_fsops.c45
-rw-r--r--fs/xfs/xfs_ialloc.c20
-rw-r--r--fs/xfs/xfs_ialloc.h5
-rw-r--r--fs/xfs/xfs_ialloc_btree.c13
-rw-r--r--fs/xfs/xfs_ialloc_btree.h51
-rw-r--r--fs/xfs/xfs_icache.c20
-rw-r--r--fs/xfs/xfs_icreate_item.c7
-rw-r--r--fs/xfs/xfs_inode.c338
-rw-r--r--fs/xfs/xfs_inode.h6
-rw-r--r--fs/xfs/xfs_inode_buf.c10
-rw-r--r--fs/xfs/xfs_inode_buf.h3
-rw-r--r--fs/xfs/xfs_inode_fork.c40
-rw-r--r--fs/xfs/xfs_inode_fork.h1
-rw-r--r--fs/xfs/xfs_inode_item.c12
-rw-r--r--fs/xfs/xfs_ioctl.c146
-rw-r--r--fs/xfs/xfs_ioctl32.c7
-rw-r--r--fs/xfs/xfs_iomap.c23
-rw-r--r--fs/xfs/xfs_iomap.h8
-rw-r--r--fs/xfs/xfs_iops.c70
-rw-r--r--fs/xfs/xfs_iops.h8
-rw-r--r--fs/xfs/xfs_itable.c15
-rw-r--r--fs/xfs/xfs_log.c75
-rw-r--r--fs/xfs/xfs_log.h10
-rw-r--r--fs/xfs/xfs_log_cil.c26
-rw-r--r--fs/xfs/xfs_log_format.h177
-rw-r--r--fs/xfs/xfs_log_priv.h17
-rw-r--r--fs/xfs/xfs_log_recover.c171
-rw-r--r--fs/xfs/xfs_log_rlimit.c9
-rw-r--r--fs/xfs/xfs_message.c5
-rw-r--r--fs/xfs/xfs_mount.c36
-rw-r--r--fs/xfs/xfs_mount.h5
-rw-r--r--fs/xfs/xfs_qm.c39
-rw-r--r--fs/xfs/xfs_qm.h2
-rw-r--r--fs/xfs/xfs_qm_bhv.c12
-rw-r--r--fs/xfs/xfs_qm_syscalls.c28
-rw-r--r--fs/xfs/xfs_quota.h4
-rw-r--r--fs/xfs/xfs_quota_defs.h4
-rw-r--r--fs/xfs/xfs_quotaops.c5
-rw-r--r--fs/xfs/xfs_rtalloc.c1552
-rw-r--r--fs/xfs/xfs_rtalloc.h24
-rw-r--r--fs/xfs/xfs_rtbitmap.c974
-rw-r--r--fs/xfs/xfs_sb.c46
-rw-r--r--fs/xfs/xfs_sb.h3
-rw-r--r--fs/xfs/xfs_shared.h244
-rw-r--r--fs/xfs/xfs_super.c40
-rw-r--r--fs/xfs/xfs_symlink.c102
-rw-r--r--fs/xfs/xfs_symlink.h2
-rw-r--r--fs/xfs/xfs_symlink_remote.c6
-rw-r--r--fs/xfs/xfs_trace.c16
-rw-r--r--fs/xfs/xfs_trace.h84
-rw-r--r--fs/xfs/xfs_trans.c23
-rw-r--r--fs/xfs/xfs_trans.h20
-rw-r--r--fs/xfs/xfs_trans_ail.c10
-rw-r--r--fs/xfs/xfs_trans_buf.c12
-rw-r--r--fs/xfs/xfs_trans_dquot.c15
-rw-r--r--fs/xfs/xfs_trans_extfree.c7
-rw-r--r--fs/xfs/xfs_trans_inode.c21
-rw-r--r--fs/xfs/xfs_trans_priv.h1
-rw-r--r--fs/xfs/xfs_trans_resv.c21
-rw-r--r--fs/xfs/xfs_vnode.h8
-rw-r--r--fs/xfs/xfs_xattr.c8
471 files changed, 11312 insertions, 17115 deletions
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index 2b7a032..a9ea73d 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -90,7 +90,7 @@ void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses)
v9ses->fscache = fscache_acquire_cookie(v9fs_cache_netfs.primary_index,
&v9fs_cache_session_index_def,
- v9ses, true);
+ v9ses);
p9_debug(P9_DEBUG_FSC, "session %p get cookie %p\n",
v9ses, v9ses->fscache);
}
@@ -204,7 +204,7 @@ void v9fs_cache_inode_get_cookie(struct inode *inode)
v9ses = v9fs_inode2v9ses(inode);
v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
&v9fs_cache_inode_index_def,
- v9inode, true);
+ v9inode);
p9_debug(P9_DEBUG_FSC, "inode %p get cookie %p\n",
inode, v9inode->fscache);
@@ -271,7 +271,7 @@ void v9fs_cache_inode_reset_cookie(struct inode *inode)
v9ses = v9fs_inode2v9ses(inode);
v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
&v9fs_cache_inode_index_def,
- v9inode, true);
+ v9inode);
p9_debug(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p\n",
inode, old, v9inode->fscache);
diff --git a/fs/9p/cache.h b/fs/9p/cache.h
index 2f96754..40cc54c 100644
--- a/fs/9p/cache.h
+++ b/fs/9p/cache.h
@@ -101,18 +101,6 @@ static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
#else /* CONFIG_9P_FSCACHE */
-static inline void v9fs_cache_inode_get_cookie(struct inode *inode)
-{
-}
-
-static inline void v9fs_cache_inode_put_cookie(struct inode *inode)
-{
-}
-
-static inline void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *file)
-{
-}
-
static inline int v9fs_fscache_release_page(struct page *page,
gfp_t gfp) {
return 1;
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index b03dd23..f039b10 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -43,6 +43,23 @@
#include "fid.h"
/**
+ * v9fs_dentry_delete - called when dentry refcount equals 0
+ * @dentry: dentry in question
+ *
+ * By returning 1 here we should remove cacheing of unused
+ * dentry components.
+ *
+ */
+
+static int v9fs_dentry_delete(const struct dentry *dentry)
+{
+ p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n",
+ dentry->d_name.name, dentry);
+
+ return 1;
+}
+
+/**
* v9fs_cached_dentry_delete - called when dentry refcount equals 0
* @dentry: dentry in question
*
@@ -117,6 +134,6 @@ const struct dentry_operations v9fs_cached_dentry_operations = {
};
const struct dentry_operations v9fs_dentry_operations = {
- .d_delete = always_delete_dentry,
+ .d_delete = v9fs_dentry_delete,
.d_release = v9fs_dentry_release,
};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index a0df3e7..aa5ecf4 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -105,8 +105,10 @@ int v9fs_file_open(struct inode *inode, struct file *file)
v9inode->writeback_fid = (void *) fid;
}
mutex_unlock(&v9inode->v_mutex);
+#ifdef CONFIG_9P_FSCACHE
if (v9ses->cache)
v9fs_cache_inode_set_cookie(inode, file);
+#endif
return 0;
out_error:
p9_client_clunk(file->private_data);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 4e65aa9..94de6d1 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -448,7 +448,9 @@ void v9fs_evict_inode(struct inode *inode)
clear_inode(inode);
filemap_fdatawrite(inode->i_mapping);
+#ifdef CONFIG_9P_FSCACHE
v9fs_cache_inode_put_cookie(inode);
+#endif
/* clunk the fid stashed in writeback_fid */
if (v9inode->writeback_fid) {
p9_client_clunk(v9inode->writeback_fid);
@@ -529,7 +531,9 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
goto error;
v9fs_stat2inode(st, inode, sb);
+#ifdef CONFIG_9P_FSCACHE
v9fs_cache_inode_get_cookie(inode);
+#endif
unlock_new_inode(inode);
return inode;
error:
@@ -901,8 +905,10 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
goto error;
file->private_data = fid;
+#ifdef CONFIG_9P_FSCACHE
if (v9ses->cache)
v9fs_cache_inode_set_cookie(dentry->d_inode, file);
+#endif
*opened |= FILE_CREATED;
out:
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 4c10edec2..a7c4814 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -141,7 +141,9 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
goto error;
v9fs_stat2inode_dotl(st, inode);
+#ifdef CONFIG_9P_FSCACHE
v9fs_cache_inode_get_cookie(inode);
+#endif
retval = v9fs_get_acl(inode, fid);
if (retval)
goto error;
@@ -353,8 +355,10 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
if (err)
goto err_clunk_old_fid;
file->private_data = ofid;
+#ifdef CONFIG_9P_FSCACHE
if (v9ses->cache)
v9fs_cache_inode_set_cookie(inode, file);
+#endif
*opened |= FILE_CREATED;
out:
v9fs_put_acl(dacl, pacl);
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index c770337..585adaf 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -43,12 +43,9 @@ struct adfs_dir_ops;
* ADFS file system superblock data in memory
*/
struct adfs_sb_info {
- union { struct {
- struct adfs_discmap *s_map; /* bh list containing map */
- struct adfs_dir_ops *s_dir; /* directory operations */
- };
- struct rcu_head rcu; /* used only at shutdown time */
- };
+ struct adfs_discmap *s_map; /* bh list containing map */
+ struct adfs_dir_ops *s_dir; /* directory operations */
+
kuid_t s_uid; /* owner uid */
kgid_t s_gid; /* owner gid */
umode_t s_owner_mask; /* ADFS owner perm -> unix perm */
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 7b3003c..0ff4bae 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -123,7 +123,8 @@ static void adfs_put_super(struct super_block *sb)
for (i = 0; i < asb->s_map_size; i++)
brelse(asb->s_map[i].dm_bh);
kfree(asb->s_map);
- kfree_rcu(asb, rcu);
+ kfree(asb);
+ sb->s_fs_info = NULL;
}
static int adfs_show_options(struct seq_file *seq, struct dentry *root)
diff --git a/fs/affs/Changes b/fs/affs/Changes
index b41c2c9..a29409c 100644
--- a/fs/affs/Changes
+++ b/fs/affs/Changes
@@ -91,7 +91,7 @@ more 2.4 fixes: [Roman Zippel]
Version 3.11
------------
-- Converted to use 2.3.x page cache [Dave Jones]
+- Converted to use 2.3.x page cache [Dave Jones <dave@powertweak.com>]
- Corruption in truncate() bugfix [Ken Tyler <kent@werple.net.au>]
Version 3.10
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index ca0a3cf..3c090b7 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -179,7 +179,7 @@ struct afs_cell *afs_cell_create(const char *name, unsigned namesz,
/* put it up for caching (this never returns an error) */
cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index,
&afs_cell_cache_index_def,
- cell, true);
+ cell);
#endif
/* add to the cell lists */
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index ce25d75..789bc25 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -259,7 +259,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
#ifdef CONFIG_AFS_FSCACHE
vnode->cache = fscache_acquire_cookie(vnode->volume->cache,
&afs_vnode_cache_index_def,
- vnode, true);
+ vnode);
#endif
ret = afs_inode_map_status(vnode, key);
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index b6df2e8..57bcb15 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -308,8 +308,7 @@ static int afs_vlocation_fill_in_record(struct afs_vlocation *vl,
/* see if we have an in-cache copy (will set vl->valid if there is) */
#ifdef CONFIG_AFS_FSCACHE
vl->cache = fscache_acquire_cookie(vl->cell->cache,
- &afs_vlocation_cache_index_def, vl,
- true);
+ &afs_vlocation_cache_index_def, vl);
#endif
if (vl->valid) {
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 2b60725..401eeb2 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -131,7 +131,7 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
#ifdef CONFIG_AFS_FSCACHE
volume->cache = fscache_acquire_cookie(vlocation->cache,
&afs_volume_cache_index_def,
- volume, true);
+ volume);
#endif
afs_get_vlocation(vlocation);
volume->vlocation = vlocation;
diff --git a/fs/aio.c b/fs/aio.c
index 6efb7f6..067e3d3 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -36,10 +36,10 @@
#include <linux/eventfd.h>
#include <linux/blkdev.h>
#include <linux/compat.h>
+#include <linux/anon_inodes.h>
#include <linux/migrate.h>
#include <linux/ramfs.h>
#include <linux/percpu-refcount.h>
-#include <linux/mount.h>
#include <asm/kmap_types.h>
#include <asm/uaccess.h>
@@ -80,8 +80,6 @@ struct kioctx {
struct percpu_ref users;
atomic_t dead;
- struct percpu_ref reqs;
-
unsigned long user_id;
struct __percpu kioctx_cpu *cpu;
@@ -109,6 +107,7 @@ struct kioctx {
struct page **ring_pages;
long nr_pages;
+ struct rcu_head rcu_head;
struct work_struct free_work;
struct {
@@ -153,67 +152,12 @@ unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio request
static struct kmem_cache *kiocb_cachep;
static struct kmem_cache *kioctx_cachep;
-static struct vfsmount *aio_mnt;
-
-static const struct file_operations aio_ring_fops;
-static const struct address_space_operations aio_ctx_aops;
-
-static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
-{
- struct qstr this = QSTR_INIT("[aio]", 5);
- struct file *file;
- struct path path;
- struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
-
- inode->i_mapping->a_ops = &aio_ctx_aops;
- inode->i_mapping->private_data = ctx;
- inode->i_size = PAGE_SIZE * nr_pages;
-
- path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this);
- if (!path.dentry) {
- iput(inode);
- return ERR_PTR(-ENOMEM);
- }
- path.mnt = mntget(aio_mnt);
-
- d_instantiate(path.dentry, inode);
- file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops);
- if (IS_ERR(file)) {
- path_put(&path);
- return file;
- }
-
- file->f_flags = O_RDWR;
- file->private_data = ctx;
- return file;
-}
-
-static struct dentry *aio_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
-{
- static const struct dentry_operations ops = {
- .d_dname = simple_dname,
- };
- return mount_pseudo(fs_type, "aio:", NULL, &ops, 0xa10a10a1);
-}
-
/* aio_setup
* Creates the slab caches used by the aio routines, panic on
* failure as this is done early during the boot sequence.
*/
static int __init aio_setup(void)
{
- static struct file_system_type aio_fs = {
- .name = "aio",
- .mount = aio_mount,
- .kill_sb = kill_anon_super,
- };
- aio_mnt = kern_mount(&aio_fs);
- if (IS_ERR(aio_mnt))
- panic("Failed to create aio fs mount.");
-
kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
@@ -251,10 +195,8 @@ static void aio_free_ring(struct kioctx *ctx)
put_aio_ring_file(ctx);
- if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) {
+ if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages)
kfree(ctx->ring_pages);
- ctx->ring_pages = NULL;
- }
}
static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
@@ -341,12 +283,16 @@ static int aio_setup_ring(struct kioctx *ctx)
if (nr_pages < 0)
return -EINVAL;
- file = aio_private_file(ctx, nr_pages);
+ file = anon_inode_getfile_private("[aio]", &aio_ring_fops, ctx, O_RDWR);
if (IS_ERR(file)) {
ctx->aio_ring_file = NULL;
return -EAGAIN;
}
+ file->f_inode->i_mapping->a_ops = &aio_ctx_aops;
+ file->f_inode->i_mapping->private_data = ctx;
+ file->f_inode->i_size = PAGE_SIZE * (loff_t)nr_pages;
+
for (i = 0; i < nr_pages; i++) {
struct page *page;
page = find_or_create_page(file->f_inode->i_mapping,
@@ -367,10 +313,8 @@ static int aio_setup_ring(struct kioctx *ctx)
if (nr_pages > AIO_RING_PAGES) {
ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
GFP_KERNEL);
- if (!ctx->ring_pages) {
- put_aio_ring_file(ctx);
+ if (!ctx->ring_pages)
return -ENOMEM;
- }
}
ctx->mmap_size = nr_pages * PAGE_SIZE;
@@ -468,34 +412,26 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb)
return cancel(kiocb);
}
-static void free_ioctx(struct work_struct *work)
+static void free_ioctx_rcu(struct rcu_head *head)
{
- struct kioctx *ctx = container_of(work, struct kioctx, free_work);
+ struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
- pr_debug("freeing %p\n", ctx);
-
- aio_free_ring(ctx);
free_percpu(ctx->cpu);
kmem_cache_free(kioctx_cachep, ctx);
}
-static void free_ioctx_reqs(struct percpu_ref *ref)
-{
- struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
-
- INIT_WORK(&ctx->free_work, free_ioctx);
- schedule_work(&ctx->free_work);
-}
-
/*
* When this function runs, the kioctx has been removed from the "hash table"
* and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
* now it's safe to cancel any that need to be.
*/
-static void free_ioctx_users(struct percpu_ref *ref)
+static void free_ioctx(struct work_struct *work)
{
- struct kioctx *ctx = container_of(ref, struct kioctx, users);
+ struct kioctx *ctx = container_of(work, struct kioctx, free_work);
+ struct aio_ring *ring;
struct kiocb *req;
+ unsigned cpu, avail;
+ DEFINE_WAIT(wait);
spin_lock_irq(&ctx->ctx_lock);
@@ -509,8 +445,54 @@ static void free_ioctx_users(struct percpu_ref *ref)
spin_unlock_irq(&ctx->ctx_lock);
- percpu_ref_kill(&ctx->reqs);
- percpu_ref_put(&ctx->reqs);
+ for_each_possible_cpu(cpu) {
+ struct kioctx_cpu *kcpu = per_cpu_ptr(ctx->cpu, cpu);
+
+ atomic_add(kcpu->reqs_available, &ctx->reqs_available);
+ kcpu->reqs_available = 0;
+ }
+
+ while (1) {
+ prepare_to_wait(&ctx->wait, &wait, TASK_UNINTERRUPTIBLE);
+
+ ring = kmap_atomic(ctx->ring_pages[0]);
+ avail = (ring->head <= ring->tail)
+ ? ring->tail - ring->head
+ : ctx->nr_events - ring->head + ring->tail;
+
+ atomic_add(avail, &ctx->reqs_available);
+ ring->head = ring->tail;
+ kunmap_atomic(ring);
+
+ if (atomic_read(&ctx->reqs_available) >= ctx->nr_events - 1)
+ break;
+
+ schedule();
+ }
+ finish_wait(&ctx->wait, &wait);
+
+ WARN_ON(atomic_read(&ctx->reqs_available) > ctx->nr_events - 1);
+
+ aio_free_ring(ctx);
+
+ pr_debug("freeing %p\n", ctx);
+
+ /*
+ * Here the call_rcu() is between the wait_event() for reqs_active to
+ * hit 0, and freeing the ioctx.
+ *
+ * aio_complete() decrements reqs_active, but it has to touch the ioctx
+ * after to issue a wakeup so we use rcu.
+ */
+ call_rcu(&ctx->rcu_head, free_ioctx_rcu);
+}
+
+static void free_ioctx_ref(struct percpu_ref *ref)
+{
+ struct kioctx *ctx = container_of(ref, struct kioctx, users);
+
+ INIT_WORK(&ctx->free_work, free_ioctx);
+ schedule_work(&ctx->free_work);
}
static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
@@ -569,16 +551,6 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
}
}
-static void aio_nr_sub(unsigned nr)
-{
- spin_lock(&aio_nr_lock);
- if (WARN_ON(aio_nr - nr > aio_nr))
- aio_nr = 0;
- else
- aio_nr -= nr;
- spin_unlock(&aio_nr_lock);
-}
-
/* ioctx_alloc
* Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
*/
@@ -616,11 +588,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
ctx->max_reqs = nr_events;
- if (percpu_ref_init(&ctx->users, free_ioctx_users))
- goto err;
-
- if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs))
- goto err;
+ if (percpu_ref_init(&ctx->users, free_ioctx_ref))
+ goto out_freectx;
spin_lock_init(&ctx->ctx_lock);
spin_lock_init(&ctx->completion_lock);
@@ -631,10 +600,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
ctx->cpu = alloc_percpu(struct kioctx_cpu);
if (!ctx->cpu)
- goto err;
+ goto out_freeref;
if (aio_setup_ring(ctx) < 0)
- goto err;
+ goto out_freepcpu;
atomic_set(&ctx->reqs_available, ctx->nr_events - 1);
ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4);
@@ -646,8 +615,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
if (aio_nr + nr_events > (aio_max_nr * 2UL) ||
aio_nr + nr_events < aio_nr) {
spin_unlock(&aio_nr_lock);
- err = -EAGAIN;
- goto err_ctx;
+ goto out_cleanup;
}
aio_nr += ctx->max_reqs;
spin_unlock(&aio_nr_lock);
@@ -656,20 +624,23 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
err = ioctx_add_table(ctx, mm);
if (err)
- goto err_cleanup;
+ goto out_cleanup_put;
pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
ctx, ctx->user_id, mm, ctx->nr_events);
return ctx;
-err_cleanup:
- aio_nr_sub(ctx->max_reqs);
-err_ctx:
+out_cleanup_put:
+ percpu_ref_put(&ctx->users);
+out_cleanup:
+ err = -EAGAIN;
aio_free_ring(ctx);
-err:
+out_freepcpu:
free_percpu(ctx->cpu);
- free_percpu(ctx->reqs.pcpu_count);
+out_freeref:
free_percpu(ctx->users.pcpu_count);
+out_freectx:
+ put_aio_ring_file(ctx);
kmem_cache_free(kioctx_cachep, ctx);
pr_debug("error allocating ioctx %d\n", err);
return ERR_PTR(err);
@@ -704,7 +675,10 @@ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx)
* -EAGAIN with no ioctxs actually in use (as far as userspace
* could tell).
*/
- aio_nr_sub(ctx->max_reqs);
+ spin_lock(&aio_nr_lock);
+ BUG_ON(aio_nr - ctx->max_reqs > aio_nr);
+ aio_nr -= ctx->max_reqs;
+ spin_unlock(&aio_nr_lock);
if (ctx->mmap_size)
vm_munmap(ctx->mmap_base, ctx->mmap_size);
@@ -836,8 +810,6 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx)
if (unlikely(!req))
goto out_put;
- percpu_ref_get(&ctx->reqs);
-
req->ki_ctx = ctx;
return req;
out_put:
@@ -907,6 +879,12 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
return;
}
+ /*
+ * Take rcu_read_lock() in case the kioctx is being destroyed, as we
+ * need to issue a wakeup after incrementing reqs_available.
+ */
+ rcu_read_lock();
+
if (iocb->ki_list.next) {
unsigned long flags;
@@ -981,7 +959,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
if (waitqueue_active(&ctx->wait))
wake_up(&ctx->wait);
- percpu_ref_put(&ctx->reqs);
+ rcu_read_unlock();
}
EXPORT_SYMBOL(aio_complete);
@@ -1392,7 +1370,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
return 0;
out_put_req:
put_reqs_available(ctx, 1);
- percpu_ref_put(&ctx->reqs);
kiocb_free(req);
return ret;
}
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 2408473..85c9618 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -24,6 +24,7 @@
static struct vfsmount *anon_inode_mnt __read_mostly;
static struct inode *anon_inode_inode;
+static const struct file_operations anon_inode_fops;
/*
* anon_inodefs_dname() is called from d_path().
@@ -38,6 +39,51 @@ static const struct dentry_operations anon_inodefs_dentry_operations = {
.d_dname = anon_inodefs_dname,
};
+/*
+ * nop .set_page_dirty method so that people can use .page_mkwrite on
+ * anon inodes.
+ */
+static int anon_set_page_dirty(struct page *page)
+{
+ return 0;
+};
+
+static const struct address_space_operations anon_aops = {
+ .set_page_dirty = anon_set_page_dirty,
+};
+
+/*
+ * A single inode exists for all anon_inode files. Contrary to pipes,
+ * anon_inode inodes have no associated per-instance data, so we need
+ * only allocate one of them.
+ */
+static struct inode *anon_inode_mkinode(struct super_block *s)
+{
+ struct inode *inode = new_inode_pseudo(s);
+
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+
+ inode->i_ino = get_next_ino();
+ inode->i_fop = &anon_inode_fops;
+
+ inode->i_mapping->a_ops = &anon_aops;
+
+ /*
+ * Mark the inode dirty from the very beginning,
+ * that way it will never be moved to the dirty
+ * list because mark_inode_dirty() will think
+ * that it already _is_ on the dirty list.
+ */
+ inode->i_state = I_DIRTY;
+ inode->i_mode = S_IRUSR | S_IWUSR;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
+ inode->i_flags |= S_PRIVATE;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ return inode;
+}
+
static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
@@ -46,7 +92,7 @@ static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
&anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
if (!IS_ERR(root)) {
struct super_block *s = root->d_sb;
- anon_inode_inode = alloc_anon_inode(s);
+ anon_inode_inode = anon_inode_mkinode(s);
if (IS_ERR(anon_inode_inode)) {
dput(root);
deactivate_locked_super(s);
@@ -63,6 +109,72 @@ static struct file_system_type anon_inode_fs_type = {
};
/**
+ * anon_inode_getfile_private - creates a new file instance by hooking it up to an
+ * anonymous inode, and a dentry that describe the "class"
+ * of the file
+ *
+ * @name: [in] name of the "class" of the new file
+ * @fops: [in] file operations for the new file
+ * @priv: [in] private data for the new file (will be file's private_data)
+ * @flags: [in] flags
+ *
+ *
+ * Similar to anon_inode_getfile, but each file holds a single inode.
+ *
+ */
+struct file *anon_inode_getfile_private(const char *name,
+ const struct file_operations *fops,
+ void *priv, int flags)
+{
+ struct qstr this;
+ struct path path;
+ struct file *file;
+ struct inode *inode;
+
+ if (fops->owner && !try_module_get(fops->owner))
+ return ERR_PTR(-ENOENT);
+
+ inode = anon_inode_mkinode(anon_inode_mnt->mnt_sb);
+ if (IS_ERR(inode)) {
+ file = ERR_PTR(-ENOMEM);
+ goto err_module;
+ }
+
+ /*
+ * Link the inode to a directory entry by creating a unique name
+ * using the inode sequence number.
+ */
+ file = ERR_PTR(-ENOMEM);
+ this.name = name;
+ this.len = strlen(name);
+ this.hash = 0;
+ path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this);
+ if (!path.dentry)
+ goto err_module;
+
+ path.mnt = mntget(anon_inode_mnt);
+
+ d_instantiate(path.dentry, inode);
+
+ file = alloc_file(&path, OPEN_FMODE(flags), fops);
+ if (IS_ERR(file))
+ goto err_dput;
+
+ file->f_mapping = inode->i_mapping;
+ file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
+ file->private_data = priv;
+
+ return file;
+
+err_dput:
+ path_put(&path);
+err_module:
+ module_put(fops->owner);
+ return file;
+}
+EXPORT_SYMBOL_GPL(anon_inode_getfile_private);
+
+/**
* anon_inode_getfile - creates a new file instance by hooking it up to an
* anonymous inode, and a dentry that describe the "class"
* of the file
diff --git a/fs/attr.c b/fs/attr.c
index 267968d..1449adb 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -167,27 +167,7 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
}
EXPORT_SYMBOL(setattr_copy);
-/**
- * notify_change - modify attributes of a filesytem object
- * @dentry: object affected
- * @iattr: new attributes
- * @delegated_inode: returns inode, if the inode is delegated
- *
- * The caller must hold the i_mutex on the affected object.
- *
- * If notify_change discovers a delegation in need of breaking,
- * it will return -EWOULDBLOCK and return a reference to the inode in
- * delegated_inode. The caller should then break the delegation and
- * retry. Because breaking a delegation may take a long time, the
- * caller should drop the i_mutex before doing so.
- *
- * Alternatively, a caller may pass NULL for delegated_inode. This may
- * be appropriate for callers that expect the underlying filesystem not
- * to be NFS exported. Also, passing NULL is fine for callers holding
- * the file open for write, as there can be no conflicting delegation in
- * that case.
- */
-int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode)
+int notify_change(struct dentry * dentry, struct iattr * attr)
{
struct inode *inode = dentry->d_inode;
umode_t mode = inode->i_mode;
@@ -263,9 +243,6 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
error = security_inode_setattr(dentry, attr);
if (error)
return error;
- error = try_break_deleg(inode, delegated_inode);
- if (error)
- return error;
if (inode->i_op->setattr)
error = inode->i_op->setattr(dentry, attr);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 4218e26..3f1128b 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -122,7 +122,6 @@ struct autofs_sb_info {
spinlock_t lookup_lock;
struct list_head active_list;
struct list_head expiring_list;
- struct rcu_head rcu;
};
static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb)
@@ -272,7 +271,7 @@ void autofs4_clean_ino(struct autofs_info *);
static inline int autofs_prepare_pipe(struct file *pipe)
{
- if (!pipe->f_op->write)
+ if (!pipe->f_op || !pipe->f_op->write)
return -EINVAL;
if (!S_ISFIFO(file_inode(pipe)->i_mode))
return -EINVAL;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 1818ce7..0f00da3 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -658,6 +658,12 @@ static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __use
goto out;
}
+ if (!fp->f_op) {
+ err = -ENOTTY;
+ fput(fp);
+ goto out;
+ }
+
sbi = autofs_dev_ioctl_sbi(fp);
if (!sbi || sbi->magic != AUTOFS_SBI_MAGIC) {
err = -EINVAL;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 3b9cc9b..b104726 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -56,13 +56,18 @@ void autofs4_kill_sb(struct super_block *sb)
* just call kill_anon_super when we are called from
* deactivate_super.
*/
- if (sbi) /* Free wait queues, close pipe */
- autofs4_catatonic_mode(sbi);
+ if (!sbi)
+ goto out_kill_sb;
+
+ /* Free wait queues, close pipe */
+ autofs4_catatonic_mode(sbi);
+
+ sb->s_fs_info = NULL;
+ kfree(sbi);
+out_kill_sb:
DPRINTK("shutting down");
kill_litter_super(sb);
- if (sbi)
- kfree_rcu(sbi, rcu);
}
static int autofs4_show_options(struct seq_file *m, struct dentry *root)
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index daa15d6..e9c75e2 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -42,7 +42,7 @@ static void befs_destroy_inode(struct inode *inode);
static int befs_init_inodecache(void);
static void befs_destroy_inodecache(void);
static void *befs_follow_link(struct dentry *, struct nameidata *);
-static void *befs_fast_follow_link(struct dentry *, struct nameidata *);
+static void befs_put_link(struct dentry *, struct nameidata *, void *);
static int befs_utf2nls(struct super_block *sb, const char *in, int in_len,
char **out, int *out_len);
static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
@@ -79,15 +79,10 @@ static const struct address_space_operations befs_aops = {
.bmap = befs_bmap,
};
-static const struct inode_operations befs_fast_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = befs_fast_follow_link,
-};
-
static const struct inode_operations befs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = befs_follow_link,
- .put_link = kfree_put_link,
+ .put_link = befs_put_link,
};
/*
@@ -416,10 +411,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
inode->i_op = &befs_dir_inode_operations;
inode->i_fop = &befs_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
- if (befs_ino->i_flags & BEFS_LONG_SYMLINK)
- inode->i_op = &befs_symlink_inode_operations;
- else
- inode->i_op = &befs_fast_symlink_inode_operations;
+ inode->i_op = &befs_symlink_inode_operations;
} else {
befs_error(sb, "Inode %lu is not a regular file, "
"directory or symlink. THAT IS WRONG! BeFS has no "
@@ -485,40 +477,47 @@ befs_destroy_inodecache(void)
static void *
befs_follow_link(struct dentry *dentry, struct nameidata *nd)
{
- struct super_block *sb = dentry->d_sb;
befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
- befs_data_stream *data = &befs_ino->i_data.ds;
- befs_off_t len = data->size;
char *link;
- if (len == 0) {
- befs_error(sb, "Long symlink with illegal length");
- link = ERR_PTR(-EIO);
- } else {
- befs_debug(sb, "Follow long symlink");
+ if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
+ struct super_block *sb = dentry->d_sb;
+ befs_data_stream *data = &befs_ino->i_data.ds;
+ befs_off_t len = data->size;
- link = kmalloc(len, GFP_NOFS);
- if (!link) {
- link = ERR_PTR(-ENOMEM);
- } else if (befs_read_lsymlink(sb, data, link, len) != len) {
- kfree(link);
- befs_error(sb, "Failed to read entire long symlink");
+ if (len == 0) {
+ befs_error(sb, "Long symlink with illegal length");
link = ERR_PTR(-EIO);
} else {
- link[len - 1] = '\0';
+ befs_debug(sb, "Follow long symlink");
+
+ link = kmalloc(len, GFP_NOFS);
+ if (!link) {
+ link = ERR_PTR(-ENOMEM);
+ } else if (befs_read_lsymlink(sb, data, link, len) != len) {
+ kfree(link);
+ befs_error(sb, "Failed to read entire long symlink");
+ link = ERR_PTR(-EIO);
+ } else {
+ link[len - 1] = '\0';
+ }
}
+ } else {
+ link = befs_ino->i_data.symlink;
}
+
nd_set_link(nd, link);
return NULL;
}
-
-static void *
-befs_fast_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void befs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
{
befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
- nd_set_link(nd, befs_ino->i_data.symlink);
- return NULL;
+ if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
+ char *link = nd_get_link(nd);
+ if (!IS_ERR(link))
+ kfree(link);
+ }
}
/*
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index ca0ba15..89dec7f 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -45,6 +45,7 @@ static int load_aout_library(struct file*);
*/
static int aout_core_dump(struct coredump_params *cprm)
{
+ struct file *file = cprm->file;
mm_segment_t fs;
int has_dumped = 0;
void __user *dump_start;
@@ -84,10 +85,10 @@ static int aout_core_dump(struct coredump_params *cprm)
set_fs(KERNEL_DS);
/* struct user */
- if (!dump_emit(cprm, &dump, sizeof(dump)))
+ if (!dump_write(file, &dump, sizeof(dump)))
goto end_coredump;
/* Now dump all of the user data. Include malloced stuff as well */
- if (!dump_skip(cprm, PAGE_SIZE - sizeof(dump)))
+ if (!dump_seek(cprm->file, PAGE_SIZE - sizeof(dump)))
goto end_coredump;
/* now we start writing out the user space info */
set_fs(USER_DS);
@@ -95,14 +96,14 @@ static int aout_core_dump(struct coredump_params *cprm)
if (dump.u_dsize != 0) {
dump_start = START_DATA(dump);
dump_size = dump.u_dsize << PAGE_SHIFT;
- if (!dump_emit(cprm, dump_start, dump_size))
+ if (!dump_write(file, dump_start, dump_size))
goto end_coredump;
}
/* Now prepare to dump the stack area */
if (dump.u_ssize != 0) {
dump_start = START_STACK(dump);
dump_size = dump.u_ssize << PAGE_SHIFT;
- if (!dump_emit(cprm, dump_start, dump_size))
+ if (!dump_write(file, dump_start, dump_size))
goto end_coredump;
}
end_coredump:
@@ -220,7 +221,7 @@ static int load_aout_binary(struct linux_binprm * bprm)
* Requires a mmap handler. This prevents people from using a.out
* as part of an exploit attack against /proc-related vulnerabilities.
*/
- if (!bprm->file->f_op->mmap)
+ if (!bprm->file->f_op || !bprm->file->f_op->mmap)
return -ENOEXEC;
fd_offset = N_TXTOFF(ex);
@@ -373,7 +374,7 @@ static int load_aout_library(struct file *file)
* Requires a mmap handler. This prevents people from using a.out
* as part of an exploit attack against /proc-related vulnerabilities.
*/
- if (!file->f_op->mmap)
+ if (!file->f_op || !file->f_op->mmap)
goto out;
if (N_FLAGS(ex))
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 571a423..4c94a79 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -406,7 +406,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
goto out;
if (!elf_check_arch(interp_elf_ex))
goto out;
- if (!interpreter->f_op->mmap)
+ if (!interpreter->f_op || !interpreter->f_op->mmap)
goto out;
/*
@@ -607,7 +607,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
goto out;
if (!elf_check_arch(&loc->elf_ex))
goto out;
- if (!bprm->file->f_op->mmap)
+ if (!bprm->file->f_op || !bprm->file->f_op->mmap)
goto out;
/* Now read in all of the header information */
@@ -1028,7 +1028,7 @@ static int load_elf_library(struct file *file)
/* First of all, some simple consistency checks */
if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
- !elf_check_arch(&elf_ex) || !file->f_op->mmap)
+ !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
goto out;
/* Now read in all of the header information */
@@ -1225,17 +1225,35 @@ static int notesize(struct memelfnote *en)
return sz;
}
-static int writenote(struct memelfnote *men, struct coredump_params *cprm)
+#define DUMP_WRITE(addr, nr, foffset) \
+ do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
+
+static int alignfile(struct file *file, loff_t *foffset)
+{
+ static const char buf[4] = { 0, };
+ DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
+ return 1;
+}
+
+static int writenote(struct memelfnote *men, struct file *file,
+ loff_t *foffset)
{
struct elf_note en;
en.n_namesz = strlen(men->name) + 1;
en.n_descsz = men->datasz;
en.n_type = men->type;
- return dump_emit(cprm, &en, sizeof(en)) &&
- dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
- dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
+ DUMP_WRITE(&en, sizeof(en), foffset);
+ DUMP_WRITE(men->name, en.n_namesz, foffset);
+ if (!alignfile(file, foffset))
+ return 0;
+ DUMP_WRITE(men->data, men->datasz, foffset);
+ if (!alignfile(file, foffset))
+ return 0;
+
+ return 1;
}
+#undef DUMP_WRITE
static void fill_elf_header(struct elfhdr *elf, int segs,
u16 machine, u32 flags)
@@ -1374,7 +1392,7 @@ static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
}
static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
- const siginfo_t *siginfo)
+ siginfo_t *siginfo)
{
mm_segment_t old_fs = get_fs();
set_fs(KERNEL_DS);
@@ -1581,7 +1599,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
static int fill_note_info(struct elfhdr *elf, int phdrs,
struct elf_note_info *info,
- const siginfo_t *siginfo, struct pt_regs *regs)
+ siginfo_t *siginfo, struct pt_regs *regs)
{
struct task_struct *dump_task = current;
const struct user_regset_view *view = task_user_regset_view(dump_task);
@@ -1684,7 +1702,7 @@ static size_t get_note_info_size(struct elf_note_info *info)
* process-wide notes are interleaved after the first thread-specific note.
*/
static int write_note_info(struct elf_note_info *info,
- struct coredump_params *cprm)
+ struct file *file, loff_t *foffset)
{
bool first = 1;
struct elf_thread_core_info *t = info->thread;
@@ -1692,22 +1710,22 @@ static int write_note_info(struct elf_note_info *info,
do {
int i;
- if (!writenote(&t->notes[0], cprm))
+ if (!writenote(&t->notes[0], file, foffset))
return 0;
- if (first && !writenote(&info->psinfo, cprm))
+ if (first && !writenote(&info->psinfo, file, foffset))
return 0;
- if (first && !writenote(&info->signote, cprm))
+ if (first && !writenote(&info->signote, file, foffset))
return 0;
- if (first && !writenote(&info->auxv, cprm))
+ if (first && !writenote(&info->auxv, file, foffset))
return 0;
if (first && info->files.data &&
- !writenote(&info->files, cprm))
+ !writenote(&info->files, file, foffset))
return 0;
for (i = 1; i < info->thread_notes; ++i)
if (t->notes[i].data &&
- !writenote(&t->notes[i], cprm))
+ !writenote(&t->notes[i], file, foffset))
return 0;
first = 0;
@@ -1830,31 +1848,34 @@ static int elf_note_info_init(struct elf_note_info *info)
static int fill_note_info(struct elfhdr *elf, int phdrs,
struct elf_note_info *info,
- const siginfo_t *siginfo, struct pt_regs *regs)
+ siginfo_t *siginfo, struct pt_regs *regs)
{
struct list_head *t;
- struct core_thread *ct;
- struct elf_thread_status *ets;
if (!elf_note_info_init(info))
return 0;
- for (ct = current->mm->core_state->dumper.next;
- ct; ct = ct->next) {
- ets = kzalloc(sizeof(*ets), GFP_KERNEL);
- if (!ets)
- return 0;
+ if (siginfo->si_signo) {
+ struct core_thread *ct;
+ struct elf_thread_status *ets;
- ets->thread = ct->task;
- list_add(&ets->list, &info->thread_list);
- }
+ for (ct = current->mm->core_state->dumper.next;
+ ct; ct = ct->next) {
+ ets = kzalloc(sizeof(*ets), GFP_KERNEL);
+ if (!ets)
+ return 0;
- list_for_each(t, &info->thread_list) {
- int sz;
+ ets->thread = ct->task;
+ list_add(&ets->list, &info->thread_list);
+ }
+
+ list_for_each(t, &info->thread_list) {
+ int sz;
- ets = list_entry(t, struct elf_thread_status, list);
- sz = elf_dump_thread_status(siginfo->si_signo, ets);
- info->thread_status_size += sz;
+ ets = list_entry(t, struct elf_thread_status, list);
+ sz = elf_dump_thread_status(siginfo->si_signo, ets);
+ info->thread_status_size += sz;
+ }
}
/* now collect the dump for the current */
memset(info->prstatus, 0, sizeof(*info->prstatus));
@@ -1914,13 +1935,13 @@ static size_t get_note_info_size(struct elf_note_info *info)
}
static int write_note_info(struct elf_note_info *info,
- struct coredump_params *cprm)
+ struct file *file, loff_t *foffset)
{
int i;
struct list_head *t;
for (i = 0; i < info->numnote; i++)
- if (!writenote(info->notes + i, cprm))
+ if (!writenote(info->notes + i, file, foffset))
return 0;
/* write out the thread status notes section */
@@ -1929,7 +1950,7 @@ static int write_note_info(struct elf_note_info *info,
list_entry(t, struct elf_thread_status, list);
for (i = 0; i < tmp->num_notes; i++)
- if (!writenote(&tmp->notes[i], cprm))
+ if (!writenote(&tmp->notes[i], file, foffset))
return 0;
}
@@ -2025,9 +2046,10 @@ static int elf_core_dump(struct coredump_params *cprm)
int has_dumped = 0;
mm_segment_t fs;
int segs;
+ size_t size = 0;
struct vm_area_struct *vma, *gate_vma;
struct elfhdr *elf = NULL;
- loff_t offset = 0, dataoff;
+ loff_t offset = 0, dataoff, foffset;
struct elf_note_info info = { };
struct elf_phdr *phdr4note = NULL;
struct elf_shdr *shdr4extnum = NULL;
@@ -2083,6 +2105,7 @@ static int elf_core_dump(struct coredump_params *cprm)
offset += sizeof(*elf); /* Elf header */
offset += segs * sizeof(struct elf_phdr); /* Program headers */
+ foffset = offset;
/* Write notes phdr entry */
{
@@ -2113,10 +2136,13 @@ static int elf_core_dump(struct coredump_params *cprm)
offset = dataoff;
- if (!dump_emit(cprm, elf, sizeof(*elf)))
+ size += sizeof(*elf);
+ if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
goto end_coredump;
- if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
+ size += sizeof(*phdr4note);
+ if (size > cprm->limit
+ || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
goto end_coredump;
/* Write program headers for segments dump */
@@ -2138,22 +2164,24 @@ static int elf_core_dump(struct coredump_params *cprm)
phdr.p_flags |= PF_X;
phdr.p_align = ELF_EXEC_PAGESIZE;
- if (!dump_emit(cprm, &phdr, sizeof(phdr)))
+ size += sizeof(phdr);
+ if (size > cprm->limit
+ || !dump_write(cprm->file, &phdr, sizeof(phdr)))
goto end_coredump;
}
- if (!elf_core_write_extra_phdrs(cprm, offset))
+ if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
goto end_coredump;
/* write out the notes section */
- if (!write_note_info(&info, cprm))
+ if (!write_note_info(&info, cprm->file, &foffset))
goto end_coredump;
- if (elf_coredump_extra_notes_write(cprm))
+ if (elf_coredump_extra_notes_write(cprm->file, &foffset))
goto end_coredump;
/* Align to page */
- if (!dump_skip(cprm, dataoff - cprm->written))
+ if (!dump_seek(cprm->file, dataoff - foffset))
goto end_coredump;
for (vma = first_vma(current, gate_vma); vma != NULL;
@@ -2170,21 +2198,26 @@ static int elf_core_dump(struct coredump_params *cprm)
page = get_dump_page(addr);
if (page) {
void *kaddr = kmap(page);
- stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
+ stop = ((size += PAGE_SIZE) > cprm->limit) ||
+ !dump_write(cprm->file, kaddr,
+ PAGE_SIZE);
kunmap(page);
page_cache_release(page);
} else
- stop = !dump_skip(cprm, PAGE_SIZE);
+ stop = !dump_seek(cprm->file, PAGE_SIZE);
if (stop)
goto end_coredump;
}
}
- if (!elf_core_write_extra_data(cprm))
+ if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
goto end_coredump;
if (e_phnum == PN_XNUM) {
- if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
+ size += sizeof(*shdr4extnum);
+ if (size > cprm->limit
+ || !dump_write(cprm->file, shdr4extnum,
+ sizeof(*shdr4extnum)))
goto end_coredump;
}
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index fe2a643..c166f32 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -111,7 +111,7 @@ static int is_elf_fdpic(struct elfhdr *hdr, struct file *file)
return 0;
if (!elf_check_arch(hdr) || !elf_check_fdpic(hdr))
return 0;
- if (!file->f_op->mmap)
+ if (!file->f_op || !file->f_op->mmap)
return 0;
return 1;
}
@@ -1267,17 +1267,35 @@ static int notesize(struct memelfnote *en)
/* #define DEBUG */
-static int writenote(struct memelfnote *men, struct coredump_params *cprm)
+#define DUMP_WRITE(addr, nr, foffset) \
+ do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
+
+static int alignfile(struct file *file, loff_t *foffset)
+{
+ static const char buf[4] = { 0, };
+ DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
+ return 1;
+}
+
+static int writenote(struct memelfnote *men, struct file *file,
+ loff_t *foffset)
{
struct elf_note en;
en.n_namesz = strlen(men->name) + 1;
en.n_descsz = men->datasz;
en.n_type = men->type;
- return dump_emit(cprm, &en, sizeof(en)) &&
- dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
- dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
+ DUMP_WRITE(&en, sizeof(en), foffset);
+ DUMP_WRITE(men->name, en.n_namesz, foffset);
+ if (!alignfile(file, foffset))
+ return 0;
+ DUMP_WRITE(men->data, men->datasz, foffset);
+ if (!alignfile(file, foffset))
+ return 0;
+
+ return 1;
}
+#undef DUMP_WRITE
static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
{
@@ -1482,40 +1500,66 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
/*
* dump the segments for an MMU process
*/
-static bool elf_fdpic_dump_segments(struct coredump_params *cprm)
+#ifdef CONFIG_MMU
+static int elf_fdpic_dump_segments(struct file *file, size_t *size,
+ unsigned long *limit, unsigned long mm_flags)
{
struct vm_area_struct *vma;
+ int err = 0;
for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
unsigned long addr;
- if (!maydump(vma, cprm->mm_flags))
+ if (!maydump(vma, mm_flags))
continue;
-#ifdef CONFIG_MMU
for (addr = vma->vm_start; addr < vma->vm_end;
addr += PAGE_SIZE) {
- bool res;
struct page *page = get_dump_page(addr);
if (page) {
void *kaddr = kmap(page);
- res = dump_emit(cprm, kaddr, PAGE_SIZE);
+ *size += PAGE_SIZE;
+ if (*size > *limit)
+ err = -EFBIG;
+ else if (!dump_write(file, kaddr, PAGE_SIZE))
+ err = -EIO;
kunmap(page);
page_cache_release(page);
- } else {
- res = dump_skip(cprm, PAGE_SIZE);
- }
- if (!res)
- return false;
+ } else if (!dump_seek(file, PAGE_SIZE))
+ err = -EFBIG;
+ if (err)
+ goto out;
}
-#else
- if (!dump_emit(cprm, (void *) vma->vm_start,
- vma->vm_end - vma->vm_start))
- return false;
+ }
+out:
+ return err;
+}
#endif
+
+/*
+ * dump the segments for a NOMMU process
+ */
+#ifndef CONFIG_MMU
+static int elf_fdpic_dump_segments(struct file *file, size_t *size,
+ unsigned long *limit, unsigned long mm_flags)
+{
+ struct vm_area_struct *vma;
+
+ for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
+ if (!maydump(vma, mm_flags))
+ continue;
+
+ if ((*size += PAGE_SIZE) > *limit)
+ return -EFBIG;
+
+ if (!dump_write(file, (void *) vma->vm_start,
+ vma->vm_end - vma->vm_start))
+ return -EIO;
}
- return true;
+
+ return 0;
}
+#endif
static size_t elf_core_vma_data_size(unsigned long mm_flags)
{
@@ -1541,10 +1585,11 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
int has_dumped = 0;
mm_segment_t fs;
int segs;
+ size_t size = 0;
int i;
struct vm_area_struct *vma;
struct elfhdr *elf = NULL;
- loff_t offset = 0, dataoff;
+ loff_t offset = 0, dataoff, foffset;
int numnote;
struct memelfnote *notes = NULL;
struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
@@ -1561,8 +1606,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
struct elf_shdr *shdr4extnum = NULL;
Elf_Half e_phnum;
elf_addr_t e_shoff;
- struct core_thread *ct;
- struct elf_thread_status *tmp;
/*
* We no longer stop all VM operations.
@@ -1598,23 +1641,28 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
goto cleanup;
#endif
- for (ct = current->mm->core_state->dumper.next;
- ct; ct = ct->next) {
- tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
- if (!tmp)
- goto cleanup;
+ if (cprm->siginfo->si_signo) {
+ struct core_thread *ct;
+ struct elf_thread_status *tmp;
- tmp->thread = ct->task;
- list_add(&tmp->list, &thread_list);
- }
+ for (ct = current->mm->core_state->dumper.next;
+ ct; ct = ct->next) {
+ tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ goto cleanup;
- list_for_each(t, &thread_list) {
- struct elf_thread_status *tmp;
- int sz;
+ tmp->thread = ct->task;
+ list_add(&tmp->list, &thread_list);
+ }
- tmp = list_entry(t, struct elf_thread_status, list);
- sz = elf_dump_thread_status(cprm->siginfo->si_signo, tmp);
- thread_status_size += sz;
+ list_for_each(t, &thread_list) {
+ struct elf_thread_status *tmp;
+ int sz;
+
+ tmp = list_entry(t, struct elf_thread_status, list);
+ sz = elf_dump_thread_status(cprm->siginfo->si_signo, tmp);
+ thread_status_size += sz;
+ }
}
/* now collect the dump for the current */
@@ -1672,6 +1720,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
offset += sizeof(*elf); /* Elf header */
offset += segs * sizeof(struct elf_phdr); /* Program headers */
+ foffset = offset;
/* Write notes phdr entry */
{
@@ -1706,10 +1755,13 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
offset = dataoff;
- if (!dump_emit(cprm, elf, sizeof(*elf)))
+ size += sizeof(*elf);
+ if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
goto end_coredump;
- if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
+ size += sizeof(*phdr4note);
+ if (size > cprm->limit
+ || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
goto end_coredump;
/* write program headers for segments dump */
@@ -1733,16 +1785,18 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
phdr.p_flags |= PF_X;
phdr.p_align = ELF_EXEC_PAGESIZE;
- if (!dump_emit(cprm, &phdr, sizeof(phdr)))
+ size += sizeof(phdr);
+ if (size > cprm->limit
+ || !dump_write(cprm->file, &phdr, sizeof(phdr)))
goto end_coredump;
}
- if (!elf_core_write_extra_phdrs(cprm, offset))
+ if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
goto end_coredump;
/* write out the notes section */
for (i = 0; i < numnote; i++)
- if (!writenote(notes + i, cprm))
+ if (!writenote(notes + i, cprm->file, &foffset))
goto end_coredump;
/* write out the thread status notes section */
@@ -1751,21 +1805,25 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
list_entry(t, struct elf_thread_status, list);
for (i = 0; i < tmp->num_notes; i++)
- if (!writenote(&tmp->notes[i], cprm))
+ if (!writenote(&tmp->notes[i], cprm->file, &foffset))
goto end_coredump;
}
- if (!dump_skip(cprm, dataoff - cprm->written))
+ if (!dump_seek(cprm->file, dataoff - foffset))
goto end_coredump;
- if (!elf_fdpic_dump_segments(cprm))
+ if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit,
+ cprm->mm_flags) < 0)
goto end_coredump;
- if (!elf_core_write_extra_data(cprm))
+ if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
goto end_coredump;
if (e_phnum == PN_XNUM) {
- if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
+ size += sizeof(*shdr4extnum);
+ if (size > cprm->limit
+ || !dump_write(cprm->file, shdr4extnum,
+ sizeof(*shdr4extnum)))
goto end_coredump;
}
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index f37b08c..037a3e2 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -38,7 +38,7 @@ static int load_em86(struct linux_binprm *bprm)
/* First of all, some simple consistency checks */
if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) ||
(!((elf_ex.e_machine == EM_386) || (elf_ex.e_machine == EM_486))) ||
- !bprm->file->f_op->mmap) {
+ (!bprm->file->f_op || !bprm->file->f_op->mmap)) {
return -ENOEXEC;
}
diff --git a/fs/bio.c b/fs/bio.c
index 33d79a4..ea5035d 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -601,7 +601,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs);
static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
*page, unsigned int len, unsigned int offset,
- unsigned int max_sectors)
+ unsigned short max_sectors)
{
int retried_segments = 0;
struct bio_vec *bvec;
@@ -1805,52 +1805,6 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
EXPORT_SYMBOL(bio_split);
/**
- * bio_trim - trim a bio
- * @bio: bio to trim
- * @offset: number of sectors to trim from the front of @bio
- * @size: size we want to trim @bio to, in sectors
- */
-void bio_trim(struct bio *bio, int offset, int size)
-{
- /* 'bio' is a cloned bio which we need to trim to match
- * the given offset and size.
- * This requires adjusting bi_sector, bi_size, and bi_io_vec
- */
- int i;
- struct bio_vec *bvec;
- int sofar = 0;
-
- size <<= 9;
- if (offset == 0 && size == bio->bi_size)
- return;
-
- clear_bit(BIO_SEG_VALID, &bio->bi_flags);
-
- bio_advance(bio, offset << 9);
-
- bio->bi_size = size;
-
- /* avoid any complications with bi_idx being non-zero*/
- if (bio->bi_idx) {
- memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
- (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
- bio->bi_vcnt -= bio->bi_idx;
- bio->bi_idx = 0;
- }
- /* Make sure vcnt and last bv are not too big */
- bio_for_each_segment(bvec, bio, i) {
- if (sofar + bvec->bv_len > size)
- bvec->bv_len = size - sofar;
- if (bvec->bv_len == 0) {
- bio->bi_vcnt = i;
- break;
- }
- sofar += bvec->bv_len;
- }
-}
-EXPORT_SYMBOL_GPL(bio_trim);
-
-/**
* bio_sector_offset - Find hardware sector offset in bio
* @bio: bio to inspect
* @index: bio_vec index
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index aa976ec..398cbd5 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -9,17 +9,12 @@ config BTRFS_FS
select XOR_BLOCKS
help
- Btrfs is a general purpose copy-on-write filesystem with extents,
- writable snapshotting, support for multiple devices and many more
- features focused on fault tolerance, repair and easy administration.
+ Btrfs is a new filesystem with extents, writable snapshotting,
+ support for multiple devices and many more features.
- The filesystem disk format is no longer unstable, and it's not
- expected to change unless there are strong reasons to do so. If there
- is a format change, file systems with a unchanged format will
- continue to be mountable and usable by newer kernels.
-
- For more information, please see the web pages at
- http://btrfs.wiki.kernel.org.
+ Btrfs is highly experimental, and THE DISK FORMAT IS NOT YET
+ FINALIZED. You should say N here unless you are interested in
+ testing Btrfs with non-critical data.
To compile this file system support as a module, choose M here. The
module will be called btrfs.
@@ -64,8 +59,7 @@ config BTRFS_FS_RUN_SANITY_TESTS
help
This will run some basic sanity tests on the free space cache
code to make sure it is acting as it should. These are mostly
- regression tests and are only really interesting to btrfs
- developers.
+ regression tests and are only really interesting to btrfs devlopers.
If unsure, say N.
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 1a44e42..a91a6a3 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -14,6 +14,4 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
-btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
- tests/extent-buffer-tests.o tests/btrfs-tests.o \
- tests/extent-io-tests.o tests/inode-tests.o
+btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 0890c83..e15d2b0 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -229,7 +229,7 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
if (ret > 0) {
/* we need an acl */
ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);
- } else if (ret < 0) {
+ } else {
cache_no_acl(inode);
}
} else {
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index c1e0b0c..08cc08f 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -262,7 +262,7 @@ static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker,
struct btrfs_work *work = NULL;
struct list_head *cur = NULL;
- if (!list_empty(prio_head))
+ if(!list_empty(prio_head))
cur = prio_head->next;
smp_mb();
@@ -495,7 +495,6 @@ static int __btrfs_start_workers(struct btrfs_workers *workers)
spin_lock_irq(&workers->lock);
if (workers->stopping) {
spin_unlock_irq(&workers->lock);
- ret = -EINVAL;
goto fail_kthread;
}
list_add_tail(&worker->worker_list, &workers->idle_list);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 3775947..0552a59 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -185,9 +185,6 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
{
struct __prelim_ref *ref;
- if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID)
- return 0;
-
ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask);
if (!ref)
return -ENOMEM;
@@ -326,7 +323,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
eb = path->nodes[level];
while (!eb) {
- if (WARN_ON(!level)) {
+ if (!level) {
+ WARN_ON(1);
ret = 1;
goto out;
}
@@ -1621,7 +1619,7 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
btrfs_release_path(path);
- item = btrfs_item_nr(slot);
+ item = btrfs_item_nr(eb, slot);
iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) {
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index ac0b39d..71f074e 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -19,7 +19,6 @@
#ifndef __BTRFS_I__
#define __BTRFS_I__
-#include <linux/hash.h>
#include "extent_map.h"
#include "extent_io.h"
#include "ordered-data.h"
@@ -180,25 +179,6 @@ static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
return container_of(inode, struct btrfs_inode, vfs_inode);
}
-static inline unsigned long btrfs_inode_hash(u64 objectid,
- const struct btrfs_root *root)
-{
- u64 h = objectid ^ (root->objectid * GOLDEN_RATIO_PRIME);
-
-#if BITS_PER_LONG == 32
- h = (h >> 32) ^ (h & 0xffffffff);
-#endif
-
- return (unsigned long)h;
-}
-
-static inline void btrfs_insert_inode_hash(struct inode *inode)
-{
- unsigned long h = btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root);
-
- __insert_inode_hash(inode, h);
-}
-
static inline u64 btrfs_ino(struct inode *inode)
{
u64 ino = BTRFS_I(inode)->location.objectid;
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 131d828..1c47be1 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -77,15 +77,6 @@
* the integrity of (super)-block write requests, do not
* enable the config option BTRFS_FS_CHECK_INTEGRITY to
* include and compile the integrity check tool.
- *
- * Expect millions of lines of information in the kernel log with an
- * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the
- * kernel config to at least 26 (which is 64MB). Usually the value is
- * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be
- * changed like this before LOG_BUF_SHIFT can be set to a high value:
- * config LOG_BUF_SHIFT
- * int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
- * range 12 30
*/
#include <linux/sched.h>
@@ -133,7 +124,6 @@
#define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400
#define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800
#define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000
-#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE 0x00002000
struct btrfsic_dev_state;
struct btrfsic_state;
@@ -333,6 +323,7 @@ static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
static int btrfsic_read_block(struct btrfsic_state *state,
struct btrfsic_block_data_ctx *block_ctx);
static void btrfsic_dump_database(struct btrfsic_state *state);
+static void btrfsic_complete_bio_end_io(struct bio *bio, int err);
static int btrfsic_test_for_metadata(struct btrfsic_state *state,
char **datav, unsigned int num_pages);
static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
@@ -1047,7 +1038,7 @@ leaf_item_out_of_bounce_error:
disk_item_offset,
sizeof(struct btrfs_item));
item_offset = btrfs_stack_item_offset(&disk_item);
- item_size = btrfs_stack_item_size(&disk_item);
+ item_size = btrfs_stack_item_offset(&disk_item);
disk_key = &disk_item.key;
type = btrfs_disk_key_type(disk_key);
@@ -1686,6 +1677,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
for (i = 0; i < num_pages;) {
struct bio *bio;
unsigned int j;
+ DECLARE_COMPLETION_ONSTACK(complete);
bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i);
if (!bio) {
@@ -1696,6 +1688,8 @@ static int btrfsic_read_block(struct btrfsic_state *state,
}
bio->bi_bdev = block_ctx->dev->bdev;
bio->bi_sector = dev_bytenr >> 9;
+ bio->bi_end_io = btrfsic_complete_bio_end_io;
+ bio->bi_private = &complete;
for (j = i; j < num_pages; j++) {
ret = bio_add_page(bio, block_ctx->pagev[j],
@@ -1708,7 +1702,12 @@ static int btrfsic_read_block(struct btrfsic_state *state,
"btrfsic: error, failed to add a single page!\n");
return -1;
}
- if (submit_bio_wait(READ, bio)) {
+ submit_bio(READ, bio);
+
+ /* this will also unplug the queue */
+ wait_for_completion(&complete);
+
+ if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
printk(KERN_INFO
"btrfsic: read error at logical %llu dev %s!\n",
block_ctx->start, block_ctx->dev->name);
@@ -1731,6 +1730,11 @@ static int btrfsic_read_block(struct btrfsic_state *state,
return block_ctx->len;
}
+static void btrfsic_complete_bio_end_io(struct bio *bio, int err)
+{
+ complete((struct completion *)bio->bi_private);
+}
+
static void btrfsic_dump_database(struct btrfsic_state *state)
{
struct list_head *elem_all;
@@ -1896,9 +1900,7 @@ again:
dev_state,
dev_bytenr);
}
- if (block->logical_bytenr != bytenr &&
- !(!block->is_metadata &&
- block->logical_bytenr == 0))
+ if (block->logical_bytenr != bytenr) {
printk(KERN_INFO
"Written block @%llu (%s/%llu/%d)"
" found in hash table, %c,"
@@ -1908,14 +1910,15 @@ again:
block->mirror_num,
btrfsic_get_block_type(state, block),
block->logical_bytenr);
- else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ block->logical_bytenr = bytenr;
+ } else if (state->print_mask &
+ BTRFSIC_PRINT_MASK_VERBOSE)
printk(KERN_INFO
"Written block @%llu (%s/%llu/%d)"
" found in hash table, %c.\n",
bytenr, dev_state->name, dev_bytenr,
block->mirror_num,
btrfsic_get_block_type(state, block));
- block->logical_bytenr = bytenr;
} else {
if (num_pages * PAGE_CACHE_SIZE <
state->datablock_size) {
@@ -2460,8 +2463,10 @@ static int btrfsic_process_written_superblock(
}
}
- if (WARN_ON(-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)))
+ if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) {
+ WARN_ON(1);
btrfsic_dump_tree(state);
+ }
return 0;
}
@@ -2901,7 +2906,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
btrfsic_release_block_ctx(&block_ctx);
}
- if (WARN_ON(!match)) {
+ if (!match) {
printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
" buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
" phys_bytenr=%llu)!\n",
@@ -2918,6 +2923,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
bytenr, block_ctx.dev->name,
block_ctx.dev_bytenr, mirror_num);
}
+ WARN_ON(1);
}
}
@@ -2994,12 +3000,14 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh)
return submit_bh(rw, bh);
}
-static void __btrfsic_submit_bio(int rw, struct bio *bio)
+void btrfsic_submit_bio(int rw, struct bio *bio)
{
struct btrfsic_dev_state *dev_state;
- if (!btrfsic_is_initialized)
+ if (!btrfsic_is_initialized) {
+ submit_bio(rw, bio);
return;
+ }
mutex_lock(&btrfsic_mutex);
/* since btrfsic_submit_bio() is also called before
@@ -3009,7 +3017,6 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
(rw & WRITE) && NULL != bio->bi_io_vec) {
unsigned int i;
u64 dev_bytenr;
- u64 cur_bytenr;
int bio_is_patched;
char **mapped_datav;
@@ -3028,7 +3035,6 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
GFP_NOFS);
if (!mapped_datav)
goto leave;
- cur_bytenr = dev_bytenr;
for (i = 0; i < bio->bi_vcnt; i++) {
BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
@@ -3040,13 +3046,16 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
kfree(mapped_datav);
goto leave;
}
- if (dev_state->state->print_mask &
- BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE)
+ if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
+ BTRFSIC_PRINT_MASK_VERBOSE) ==
+ (dev_state->state->print_mask &
+ (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
+ BTRFSIC_PRINT_MASK_VERBOSE)))
printk(KERN_INFO
- "#%u: bytenr=%llu, len=%u, offset=%u\n",
- i, cur_bytenr, bio->bi_io_vec[i].bv_len,
+ "#%u: page=%p, len=%u, offset=%u\n",
+ i, bio->bi_io_vec[i].bv_page,
+ bio->bi_io_vec[i].bv_len,
bio->bi_io_vec[i].bv_offset);
- cur_bytenr += bio->bi_io_vec[i].bv_len;
}
btrfsic_process_written_block(dev_state, dev_bytenr,
mapped_datav, bio->bi_vcnt,
@@ -3090,20 +3099,10 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
}
leave:
mutex_unlock(&btrfsic_mutex);
-}
-void btrfsic_submit_bio(int rw, struct bio *bio)
-{
- __btrfsic_submit_bio(rw, bio);
submit_bio(rw, bio);
}
-int btrfsic_submit_bio_wait(int rw, struct bio *bio)
-{
- __btrfsic_submit_bio(rw, bio);
- return submit_bio_wait(rw, bio);
-}
-
int btrfsic_mount(struct btrfs_root *root,
struct btrfs_fs_devices *fs_devices,
int including_extent_data, u32 print_mask)
diff --git a/fs/btrfs/check-integrity.h b/fs/btrfs/check-integrity.h
index 13b8566..8b59175 100644
--- a/fs/btrfs/check-integrity.h
+++ b/fs/btrfs/check-integrity.h
@@ -22,11 +22,9 @@
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
int btrfsic_submit_bh(int rw, struct buffer_head *bh);
void btrfsic_submit_bio(int rw, struct bio *bio);
-int btrfsic_submit_bio_wait(int rw, struct bio *bio);
#else
#define btrfsic_submit_bh submit_bh
#define btrfsic_submit_bio submit_bio
-#define btrfsic_submit_bio_wait submit_bio_wait
#endif
int btrfsic_mount(struct btrfs_root *root,
diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h
new file mode 100644
index 0000000..7c4503e
--- /dev/null
+++ b/fs/btrfs/compat.h
@@ -0,0 +1,7 @@
+#ifndef _COMPAT_H_
+#define _COMPAT_H_
+
+#define btrfs_drop_nlink(inode) drop_nlink(inode)
+#define btrfs_inc_nlink(inode) inc_nlink(inode)
+
+#endif /* _COMPAT_H_ */
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 1499b27..6aad98c 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -32,6 +32,7 @@
#include <linux/writeback.h>
#include <linux/bit_spinlock.h>
#include <linux/slab.h>
+#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -359,7 +360,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
- if (!bio) {
+ if(!bio) {
kfree(cb);
return -ENOMEM;
}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 316136b..61b5bcd 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -274,7 +274,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
else
btrfs_set_header_owner(cow, new_root_objectid);
- write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(),
+ write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow),
BTRFS_FSID_SIZE);
WARN_ON(btrfs_header_generation(buf) > trans->transid);
@@ -996,7 +996,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
else
btrfs_set_header_owner(cow, root->root_key.objectid);
- write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(),
+ write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow),
BTRFS_FSID_SIZE);
ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
@@ -1285,10 +1285,11 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
free_extent_buffer(eb_root);
blocksize = btrfs_level_size(root, old_root->level);
old = read_tree_block(root, logical, blocksize, 0);
- if (WARN_ON(!old || !extent_buffer_uptodate(old))) {
+ if (!old || !extent_buffer_uptodate(old)) {
free_extent_buffer(old);
pr_warn("btrfs: failed to read tree block %llu from get_old_root\n",
logical);
+ WARN_ON(1);
} else {
eb = btrfs_clone_extent_buffer(old);
free_extent_buffer(old);
@@ -2757,7 +2758,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
int level;
int lowest_unlock = 1;
u8 lowest_level = 0;
- int prev_cmp = -1;
+ int prev_cmp;
lowest_level = p->lowest_level;
WARN_ON(p->nodes[0] != NULL);
@@ -2768,6 +2769,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
}
again:
+ prev_cmp = -1;
b = get_old_root(root, time_seq);
level = btrfs_header_level(b);
p->locks[level] = BTRFS_READ_LOCK;
@@ -2785,11 +2787,6 @@ again:
*/
btrfs_unlock_up_safe(p, level + 1);
- /*
- * Since we can unwind eb's we want to do a real search every
- * time.
- */
- prev_cmp = -1;
ret = key_search(b, key, level, &prev_cmp, &slot);
if (level != 0) {
@@ -3151,7 +3148,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
btrfs_set_header_owner(c, root->root_key.objectid);
- write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(),
+ write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(c),
BTRFS_FSID_SIZE);
write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
@@ -3290,7 +3287,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
btrfs_set_header_owner(split, root->root_key.objectid);
write_extent_buffer(split, root->fs_info->fsid,
- btrfs_header_fsid(), BTRFS_FSID_SIZE);
+ btrfs_header_fsid(split), BTRFS_FSID_SIZE);
write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
btrfs_header_chunk_tree_uuid(split),
BTRFS_UUID_SIZE);
@@ -3340,8 +3337,8 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr)
if (!nr)
return 0;
btrfs_init_map_token(&token);
- start_item = btrfs_item_nr(start);
- end_item = btrfs_item_nr(end);
+ start_item = btrfs_item_nr(l, start);
+ end_item = btrfs_item_nr(l, end);
data_len = btrfs_token_item_offset(l, start_item, &token) +
btrfs_token_item_size(l, start_item, &token);
data_len = data_len - btrfs_token_item_offset(l, end_item, &token);
@@ -3409,7 +3406,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
slot = path->slots[1];
i = left_nritems - 1;
while (i >= nr) {
- item = btrfs_item_nr(i);
+ item = btrfs_item_nr(left, i);
if (!empty && push_items > 0) {
if (path->slots[0] > i)
@@ -3473,7 +3470,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(right, right_nritems);
push_space = BTRFS_LEAF_DATA_SIZE(root);
for (i = 0; i < right_nritems; i++) {
- item = btrfs_item_nr(i);
+ item = btrfs_item_nr(right, i);
push_space -= btrfs_token_item_size(right, item, &token);
btrfs_set_token_item_offset(right, item, push_space, &token);
}
@@ -3615,7 +3612,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
nr = min(right_nritems - 1, max_slot);
for (i = 0; i < nr; i++) {
- item = btrfs_item_nr(i);
+ item = btrfs_item_nr(right, i);
if (!empty && push_items > 0) {
if (path->slots[0] < i)
@@ -3642,7 +3639,8 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
ret = 1;
goto out;
}
- WARN_ON(!empty && push_items == btrfs_header_nritems(right));
+ if (!empty && push_items == btrfs_header_nritems(right))
+ WARN_ON(1);
/* push data from right to left */
copy_extent_buffer(left, right,
@@ -3665,7 +3663,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
u32 ioff;
- item = btrfs_item_nr(i);
+ item = btrfs_item_nr(left, i);
ioff = btrfs_token_item_offset(left, item, &token);
btrfs_set_token_item_offset(left, item,
@@ -3696,7 +3694,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(right, right_nritems);
push_space = BTRFS_LEAF_DATA_SIZE(root);
for (i = 0; i < right_nritems; i++) {
- item = btrfs_item_nr(i);
+ item = btrfs_item_nr(right, i);
push_space = push_space - btrfs_token_item_size(right,
item, &token);
@@ -3837,7 +3835,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
btrfs_item_end_nr(l, mid);
for (i = 0; i < nritems; i++) {
- struct btrfs_item *item = btrfs_item_nr(i);
+ struct btrfs_item *item = btrfs_item_nr(right, i);
u32 ioff;
ioff = btrfs_token_item_offset(right, item, &token);
@@ -4018,7 +4016,7 @@ again:
data_size > BTRFS_LEAF_DATA_SIZE(root)) {
if (data_size && !tried_avoid_double)
goto push_for_double;
- split = 2;
+ split = 2 ;
}
}
}
@@ -4044,7 +4042,7 @@ again:
btrfs_set_header_owner(right, root->root_key.objectid);
btrfs_set_header_level(right, 0);
write_extent_buffer(right, root->fs_info->fsid,
- btrfs_header_fsid(), BTRFS_FSID_SIZE);
+ btrfs_header_fsid(right), BTRFS_FSID_SIZE);
write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
btrfs_header_chunk_tree_uuid(right),
@@ -4179,7 +4177,7 @@ static noinline int split_item(struct btrfs_trans_handle *trans,
btrfs_set_path_blocking(path);
- item = btrfs_item_nr(path->slots[0]);
+ item = btrfs_item_nr(leaf, path->slots[0]);
orig_offset = btrfs_item_offset(leaf, item);
item_size = btrfs_item_size(leaf, item);
@@ -4202,7 +4200,7 @@ static noinline int split_item(struct btrfs_trans_handle *trans,
btrfs_cpu_key_to_disk(&disk_key, new_key);
btrfs_set_item_key(leaf, &disk_key, slot);
- new_item = btrfs_item_nr(slot);
+ new_item = btrfs_item_nr(leaf, slot);
btrfs_set_item_offset(leaf, new_item, orig_offset);
btrfs_set_item_size(leaf, new_item, item_size - split_offset);
@@ -4341,7 +4339,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
/* first correct the data pointers */
for (i = slot; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(i);
+ item = btrfs_item_nr(leaf, i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
@@ -4389,7 +4387,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
fixup_low_keys(root, path, &disk_key, 1);
}
- item = btrfs_item_nr(slot);
+ item = btrfs_item_nr(leaf, slot);
btrfs_set_item_size(leaf, item, new_size);
btrfs_mark_buffer_dirty(leaf);
@@ -4443,7 +4441,7 @@ void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
/* first correct the data pointers */
for (i = slot; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(i);
+ item = btrfs_item_nr(leaf, i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
@@ -4457,7 +4455,7 @@ void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
data_end = old_data;
old_size = btrfs_item_size_nr(leaf, slot);
- item = btrfs_item_nr(slot);
+ item = btrfs_item_nr(leaf, slot);
btrfs_set_item_size(leaf, item, old_size + data_size);
btrfs_mark_buffer_dirty(leaf);
@@ -4516,7 +4514,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
for (i = slot; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr( i);
+ item = btrfs_item_nr(leaf, i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
ioff - total_data, &token);
@@ -4537,7 +4535,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
for (i = 0; i < nr; i++) {
btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
btrfs_set_item_key(leaf, &disk_key, slot + i);
- item = btrfs_item_nr(slot + i);
+ item = btrfs_item_nr(leaf, slot + i);
btrfs_set_token_item_offset(leaf, item,
data_end - data_size[i], &token);
data_end -= data_size[i];
@@ -4732,7 +4730,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
for (i = slot + nr; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(i);
+ item = btrfs_item_nr(leaf, i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
ioff + dsize, &token);
@@ -4825,18 +4823,14 @@ static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
- if (key.offset > 0) {
+ if (key.offset > 0)
key.offset--;
- } else if (key.type > 0) {
+ else if (key.type > 0)
key.type--;
- key.offset = (u64)-1;
- } else if (key.objectid > 0) {
+ else if (key.objectid > 0)
key.objectid--;
- key.type = (u8)-1;
- key.offset = (u64)-1;
- } else {
+ else
return 1;
- }
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -4872,6 +4866,7 @@ static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
* was nothing in the tree that matched the search criteria.
*/
int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
+ struct btrfs_key *max_key,
struct btrfs_path *path,
u64 min_trans)
{
@@ -4916,8 +4911,10 @@ again:
* If it is too old, old, skip to the next one.
*/
while (slot < nritems) {
+ u64 blockptr;
u64 gen;
+ blockptr = btrfs_node_blockptr(cur, slot);
gen = btrfs_node_ptr_generation(cur, slot);
if (gen < min_trans) {
slot++;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 54ab861..0506f40 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -47,12 +47,6 @@ extern struct kmem_cache *btrfs_path_cachep;
extern struct kmem_cache *btrfs_free_space_cachep;
struct btrfs_ordered_sum;
-#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-#define STATIC noinline
-#else
-#define STATIC static noinline
-#endif
-
#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
#define BTRFS_MAX_MIRRORS 3
@@ -1586,6 +1580,7 @@ struct btrfs_fs_info {
atomic_t scrubs_paused;
atomic_t scrub_cancel_req;
wait_queue_head_t scrub_pause_wait;
+ struct rw_semaphore scrub_super_lock;
int scrub_workers_refcnt;
struct btrfs_workers scrub_workers;
struct btrfs_workers scrub_wr_completion_workers;
@@ -1729,9 +1724,7 @@ struct btrfs_root {
int ref_cows;
int track_dirty;
int in_radix;
-#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
- int dummy_root;
-#endif
+
u64 defrag_trans_start;
struct btrfs_key defrag_progress;
struct btrfs_key defrag_max;
@@ -2468,7 +2461,8 @@ static inline unsigned long btrfs_item_nr_offset(int nr)
sizeof(struct btrfs_item) * nr;
}
-static inline struct btrfs_item *btrfs_item_nr(int nr)
+static inline struct btrfs_item *btrfs_item_nr(struct extent_buffer *eb,
+ int nr)
{
return (struct btrfs_item *)btrfs_item_nr_offset(nr);
}
@@ -2481,30 +2475,30 @@ static inline u32 btrfs_item_end(struct extent_buffer *eb,
static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr)
{
- return btrfs_item_end(eb, btrfs_item_nr(nr));
+ return btrfs_item_end(eb, btrfs_item_nr(eb, nr));
}
static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr)
{
- return btrfs_item_offset(eb, btrfs_item_nr(nr));
+ return btrfs_item_offset(eb, btrfs_item_nr(eb, nr));
}
static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr)
{
- return btrfs_item_size(eb, btrfs_item_nr(nr));
+ return btrfs_item_size(eb, btrfs_item_nr(eb, nr));
}
static inline void btrfs_item_key(struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr)
{
- struct btrfs_item *item = btrfs_item_nr(nr);
+ struct btrfs_item *item = btrfs_item_nr(eb, nr);
read_eb_member(eb, item, struct btrfs_item, key, disk_key);
}
static inline void btrfs_set_item_key(struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr)
{
- struct btrfs_item *item = btrfs_item_nr(nr);
+ struct btrfs_item *item = btrfs_item_nr(eb, nr);
write_eb_member(eb, item, struct btrfs_item, key, disk_key);
}
@@ -2672,7 +2666,7 @@ static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb,
btrfs_set_header_flags(eb, flags);
}
-static inline unsigned long btrfs_header_fsid(void)
+static inline unsigned long btrfs_header_fsid(struct extent_buffer *eb)
{
return offsetof(struct btrfs_header, fsid);
}
@@ -3111,6 +3105,11 @@ static inline u32 btrfs_level_size(struct btrfs_root *root, int level)
((unsigned long)(btrfs_leaf_data(leaf) + \
btrfs_item_offset_nr(leaf, slot)))
+static inline struct dentry *fdentry(struct file *file)
+{
+ return file->f_path.dentry;
+}
+
static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
{
return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
@@ -3309,6 +3308,7 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *key, int lowest_level,
u64 min_trans);
int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
+ struct btrfs_key *max_key,
struct btrfs_path *path,
u64 min_trans);
enum btrfs_compare_tree_result {
@@ -3613,6 +3613,9 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum *sums);
int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u64 file_start, int contig);
+int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ u64 isize);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit);
/* inode.c */
@@ -3672,7 +3675,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
u32 min_type);
int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
-int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput);
+int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info,
+ int delay_iput);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
struct extent_state **cached_state);
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
@@ -3741,6 +3745,9 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
int skip_pinned);
+int btrfs_replace_extent_cache(struct inode *inode, struct extent_map *replace,
+ u64 start, u64 end, int skip_pinned,
+ int modified);
extern const struct file_operations btrfs_file_operations;
int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
@@ -3937,7 +3944,9 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
u64 end, struct btrfs_scrub_progress *progress,
int readonly, int is_dev_replace);
void btrfs_scrub_pause(struct btrfs_root *root);
+void btrfs_scrub_pause_super(struct btrfs_root *root);
void btrfs_scrub_continue(struct btrfs_root *root);
+void btrfs_scrub_continue_super(struct btrfs_root *root);
int btrfs_scrub_cancel(struct btrfs_fs_info *info);
int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info,
struct btrfs_device *dev);
@@ -4019,9 +4028,5 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info)
return signal_pending(current);
}
-/* Sanity test specific functions */
-#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-void btrfs_test_destroy_inode(struct inode *inode);
-#endif
#endif
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 8d292fb..cbd9523 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -108,8 +108,8 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
return node;
}
btrfs_inode->delayed_node = node;
- /* can be accessed and cached in the inode */
- atomic_add(2, &node->refs);
+ atomic_inc(&node->refs); /* can be accessed */
+ atomic_inc(&node->refs); /* cached in the inode */
spin_unlock(&root->inode_lock);
return node;
}
@@ -138,8 +138,8 @@ again:
return ERR_PTR(-ENOMEM);
btrfs_init_delayed_node(node, root, ino);
- /* cached in the btrfs inode and can be accessed */
- atomic_add(2, &node->refs);
+ atomic_inc(&node->refs); /* cached in the btrfs inode */
+ atomic_inc(&node->refs); /* can be accessed */
ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
if (ret) {
@@ -649,13 +649,14 @@ static int btrfs_delayed_inode_reserve_metadata(
goto out;
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
- if (!WARN_ON(ret))
+ if (!ret)
goto out;
/*
* Ok this is a problem, let's just steal from the global rsv
* since this really shouldn't happen that often.
*/
+ WARN_ON(1);
ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv,
dst_rsv, num_bytes);
goto out;
@@ -770,13 +771,13 @@ static int btrfs_batch_insert_items(struct btrfs_root *root,
*/
btrfs_set_path_blocking(path);
- keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
+ keys = kmalloc(sizeof(struct btrfs_key) * nitems, GFP_NOFS);
if (!keys) {
ret = -ENOMEM;
goto out;
}
- data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS);
+ data_size = kmalloc(sizeof(u32) * nitems, GFP_NOFS);
if (!data_size) {
ret = -ENOMEM;
goto error;
@@ -1173,10 +1174,8 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
mutex_unlock(&delayed_node->mutex);
path = btrfs_alloc_path();
- if (!path) {
- btrfs_release_delayed_node(delayed_node);
+ if (!path)
return -ENOMEM;
- }
path->leave_spinning = 1;
block_rsv = trans->block_rsv;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 2cfc3df..9efb94e 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -26,6 +26,7 @@
#include <linux/kthread.h>
#include <linux/math64.h>
#include <asm/div64.h>
+#include "compat.h"
#include "ctree.h"
#include "extent_map.h"
#include "disk-io.h"
@@ -37,6 +38,7 @@
#include "rcu-string.h"
#include "dev-replace.h"
+static u64 btrfs_get_seconds_since_1970(void);
static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
int scrub_ret);
static void btrfs_dev_replace_update_device_in_mapping_tree(
@@ -294,6 +296,13 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info)
dev_replace->cursor_left_last_write_of_item;
}
+static u64 btrfs_get_seconds_since_1970(void)
+{
+ struct timespec t = CURRENT_TIME_SEC;
+
+ return t.tv_sec;
+}
+
int btrfs_dev_replace_start(struct btrfs_root *root,
struct btrfs_ioctl_dev_replace_args *args)
{
@@ -366,7 +375,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
dev_replace->tgtdev = tgt_device;
printk_in_rcu(KERN_INFO
- "btrfs: dev_replace from %s (devid %llu) to %s started\n",
+ "btrfs: dev_replace from %s (devid %llu) to %s) started\n",
src_device->missing ? "<missing disk>" :
rcu_str_deref(src_device->name),
src_device->devid,
@@ -381,7 +390,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
* go to the tgtdev as well (refer to btrfs_map_block()).
*/
dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED;
- dev_replace->time_started = get_seconds();
+ dev_replace->time_started = btrfs_get_seconds_since_1970();
dev_replace->cursor_left = 0;
dev_replace->committed_cursor_left = 0;
dev_replace->cursor_left_last_write_of_item = 0;
@@ -391,7 +400,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
btrfs_dev_replace_unlock(dev_replace);
- btrfs_wait_ordered_roots(root->fs_info, -1);
+ btrfs_wait_all_ordered_extents(root->fs_info);
/* force writing the updated state information to disk */
trans = btrfs_start_transaction(root, 0);
@@ -461,12 +470,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
* flush all outstanding I/O and inode extent mappings before the
* copy operation is declared as being finished
*/
- ret = btrfs_start_delalloc_roots(root->fs_info, 0);
+ ret = btrfs_start_all_delalloc_inodes(root->fs_info, 0);
if (ret) {
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return ret;
}
- btrfs_wait_ordered_roots(root->fs_info, -1);
+ btrfs_wait_all_ordered_extents(root->fs_info);
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
@@ -484,7 +493,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
: BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED;
dev_replace->tgtdev = NULL;
dev_replace->srcdev = NULL;
- dev_replace->time_stopped = get_seconds();
+ dev_replace->time_stopped = btrfs_get_seconds_since_1970();
dev_replace->item_needs_writeback = 1;
if (scrub_ret) {
@@ -641,9 +650,6 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
u64 result;
int ret;
- if (fs_info->sb->s_flags & MS_RDONLY)
- return -EROFS;
-
mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
btrfs_dev_replace_lock(dev_replace);
switch (dev_replace->replace_state) {
@@ -662,7 +668,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
break;
}
dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
- dev_replace->time_stopped = get_seconds();
+ dev_replace->time_stopped = btrfs_get_seconds_since_1970();
dev_replace->item_needs_writeback = 1;
btrfs_dev_replace_unlock(dev_replace);
btrfs_scrub_cancel(fs_info);
@@ -697,7 +703,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
dev_replace->replace_state =
BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED;
- dev_replace->time_stopped = get_seconds();
+ dev_replace->time_stopped = btrfs_get_seconds_since_1970();
dev_replace->item_needs_writeback = 1;
pr_info("btrfs: suspending dev_replace for unmount\n");
break;
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index c031ea3..79e594e 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -58,7 +58,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
return ERR_PTR(ret);
WARN_ON(ret > 0);
leaf = path->nodes[0];
- item = btrfs_item_nr(path->slots[0]);
+ item = btrfs_item_nr(leaf, path->slots[0]);
ptr = btrfs_item_ptr(leaf, path->slots[0], char);
BUG_ON(data_size > btrfs_item_size(leaf, item));
ptr += btrfs_item_size(leaf, item) - data_size;
@@ -474,10 +474,8 @@ int verify_dir_item(struct btrfs_root *root,
}
/* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
- if ((btrfs_dir_data_len(leaf, dir_item) +
- btrfs_dir_name_len(leaf, dir_item)) > BTRFS_MAX_XATTR_SIZE(root)) {
- printk(KERN_CRIT "btrfs: invalid dir item name + data len: %u + %u\n",
- (unsigned)btrfs_dir_name_len(leaf, dir_item),
+ if (btrfs_dir_data_len(leaf, dir_item) > BTRFS_MAX_XATTR_SIZE(root)) {
+ printk(KERN_CRIT "btrfs: invalid dir item data len: %u\n",
(unsigned)btrfs_dir_data_len(leaf, dir_item));
return 1;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8072cfa..62176ad 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -33,6 +33,7 @@
#include <linux/uuid.h>
#include <linux/semaphore.h>
#include <asm/unaligned.h>
+#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -63,6 +64,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_root *root);
+static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t);
static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
static int btrfs_destroy_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages,
@@ -475,8 +477,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
if (page != eb->pages[0])
return 0;
found_start = btrfs_header_bytenr(eb);
- if (WARN_ON(found_start != start || !PageUptodate(page)))
+ if (found_start != start) {
+ WARN_ON(1);
return 0;
+ }
+ if (!PageUptodate(page)) {
+ WARN_ON(1);
+ return 0;
+ }
csum_tree_block(root, eb, 0);
return 0;
}
@@ -488,7 +496,7 @@ static int check_tree_block_fsid(struct btrfs_root *root,
u8 fsid[BTRFS_UUID_SIZE];
int ret = 1;
- read_extent_buffer(eb, fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE);
+ read_extent_buffer(eb, fsid, btrfs_header_fsid(eb), BTRFS_FSID_SIZE);
while (fs_devices) {
if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) {
ret = 0;
@@ -1097,7 +1105,8 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
{
struct inode *btree_inode = root->fs_info->btree_inode;
struct extent_buffer *eb;
- eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr);
+ eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
+ bytenr, blocksize);
return eb;
}
@@ -1220,18 +1229,14 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
atomic_set(&root->refs, 1);
root->log_transid = 0;
root->last_log_commit = 0;
- if (fs_info)
- extent_io_tree_init(&root->dirty_log_pages,
- fs_info->btree_inode->i_mapping);
+ extent_io_tree_init(&root->dirty_log_pages,
+ fs_info->btree_inode->i_mapping);
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
memset(&root->root_kobj, 0, sizeof(root->root_kobj));
- if (fs_info)
- root->defrag_trans_start = fs_info->generation;
- else
- root->defrag_trans_start = 0;
+ root->defrag_trans_start = fs_info->generation;
init_completion(&root->kobj_unregister);
root->defrag_running = 0;
root->root_key.objectid = objectid;
@@ -1248,22 +1253,6 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info)
return root;
}
-#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-/* Should only be used by the testing infrastructure */
-struct btrfs_root *btrfs_alloc_dummy_root(void)
-{
- struct btrfs_root *root;
-
- root = btrfs_alloc_root(NULL);
- if (!root)
- return ERR_PTR(-ENOMEM);
- __setup_root(4096, 4096, 4096, 4096, root, NULL, 1);
- root->dummy_root = 1;
-
- return root;
-}
-#endif
-
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
u64 objectid)
@@ -1303,7 +1292,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
btrfs_set_header_owner(leaf, objectid);
root->node = leaf;
- write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(),
+ write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(leaf),
BTRFS_FSID_SIZE);
write_extent_buffer(leaf, fs_info->chunk_tree_uuid,
btrfs_header_chunk_tree_uuid(leaf),
@@ -1390,7 +1379,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
root->node = leaf;
write_extent_buffer(root->node, root->fs_info->fsid,
- btrfs_header_fsid(), BTRFS_FSID_SIZE);
+ btrfs_header_fsid(root->node), BTRFS_FSID_SIZE);
btrfs_mark_buffer_dirty(root->node);
btrfs_tree_unlock(root->node);
return root;
@@ -1791,9 +1780,6 @@ sleep:
wake_up_process(root->fs_info->cleaner_kthread);
mutex_unlock(&root->fs_info->transaction_kthread_mutex);
- if (unlikely(test_bit(BTRFS_FS_STATE_ERROR,
- &root->fs_info->fs_state)))
- btrfs_cleanup_transaction(root);
if (!try_to_freeze()) {
set_current_state(TASK_INTERRUPTIBLE);
if (!kthread_should_stop() &&
@@ -2027,28 +2013,50 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
btrfs_stop_workers(&fs_info->qgroup_rescan_workers);
}
-static void free_root_extent_buffers(struct btrfs_root *root)
-{
- if (root) {
- free_extent_buffer(root->node);
- free_extent_buffer(root->commit_root);
- root->node = NULL;
- root->commit_root = NULL;
- }
-}
-
/* helper to cleanup tree roots */
static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
{
- free_root_extent_buffers(info->tree_root);
-
- free_root_extent_buffers(info->dev_root);
- free_root_extent_buffers(info->extent_root);
- free_root_extent_buffers(info->csum_root);
- free_root_extent_buffers(info->quota_root);
- free_root_extent_buffers(info->uuid_root);
- if (chunk_root)
- free_root_extent_buffers(info->chunk_root);
+ free_extent_buffer(info->tree_root->node);
+ free_extent_buffer(info->tree_root->commit_root);
+ info->tree_root->node = NULL;
+ info->tree_root->commit_root = NULL;
+
+ if (info->dev_root) {
+ free_extent_buffer(info->dev_root->node);
+ free_extent_buffer(info->dev_root->commit_root);
+ info->dev_root->node = NULL;
+ info->dev_root->commit_root = NULL;
+ }
+ if (info->extent_root) {
+ free_extent_buffer(info->extent_root->node);
+ free_extent_buffer(info->extent_root->commit_root);
+ info->extent_root->node = NULL;
+ info->extent_root->commit_root = NULL;
+ }
+ if (info->csum_root) {
+ free_extent_buffer(info->csum_root->node);
+ free_extent_buffer(info->csum_root->commit_root);
+ info->csum_root->node = NULL;
+ info->csum_root->commit_root = NULL;
+ }
+ if (info->quota_root) {
+ free_extent_buffer(info->quota_root->node);
+ free_extent_buffer(info->quota_root->commit_root);
+ info->quota_root->node = NULL;
+ info->quota_root->commit_root = NULL;
+ }
+ if (info->uuid_root) {
+ free_extent_buffer(info->uuid_root->node);
+ free_extent_buffer(info->uuid_root->commit_root);
+ info->uuid_root->node = NULL;
+ info->uuid_root->commit_root = NULL;
+ }
+ if (chunk_root) {
+ free_extent_buffer(info->chunk_root->node);
+ free_extent_buffer(info->chunk_root->commit_root);
+ info->chunk_root->node = NULL;
+ info->chunk_root->commit_root = NULL;
+ }
}
static void del_fs_roots(struct btrfs_fs_info *fs_info)
@@ -2222,6 +2230,7 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->scrubs_paused, 0);
atomic_set(&fs_info->scrub_cancel_req, 0);
init_waitqueue_head(&fs_info->scrub_pause_wait);
+ init_rwsem(&fs_info->scrub_super_lock);
fs_info->scrub_workers_refcnt = 0;
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
fs_info->check_integrity_print_mask = 0;
@@ -2263,7 +2272,7 @@ int open_ctree(struct super_block *sb,
sizeof(struct btrfs_key));
set_bit(BTRFS_INODE_DUMMY,
&BTRFS_I(fs_info->btree_inode)->runtime_flags);
- btrfs_insert_inode_hash(fs_info->btree_inode);
+ insert_inode_hash(fs_info->btree_inode);
spin_lock_init(&fs_info->block_group_cache_lock);
fs_info->block_group_cache_tree = RB_ROOT;
@@ -2661,7 +2670,6 @@ retry_root_backup:
btrfs_set_root_node(&tree_root->root_item, tree_root->node);
tree_root->commit_root = btrfs_root_node(tree_root);
- btrfs_set_root_refs(&tree_root->root_item, 1);
location.objectid = BTRFS_EXTENT_TREE_OBJECTID;
location.type = BTRFS_ROOT_ITEM_KEY;
@@ -3440,7 +3448,10 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
int write_ctree_super(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int max_mirrors)
{
- return write_all_supers(root, max_mirrors);
+ int ret;
+
+ ret = write_all_supers(root, max_mirrors);
+ return ret;
}
/* Drop a fs root from the radix tree and free it. */
@@ -3517,6 +3528,7 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
int btrfs_commit_super(struct btrfs_root *root)
{
struct btrfs_trans_handle *trans;
+ int ret;
mutex_lock(&root->fs_info->cleaner_mutex);
btrfs_run_delayed_iputs(root);
@@ -3530,7 +3542,25 @@ int btrfs_commit_super(struct btrfs_root *root)
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
- return btrfs_commit_transaction(trans, root);
+ ret = btrfs_commit_transaction(trans, root);
+ if (ret)
+ return ret;
+ /* run commit again to drop the original snapshot */
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+ ret = btrfs_commit_transaction(trans, root);
+ if (ret)
+ return ret;
+ ret = btrfs_write_and_wait_transaction(NULL, root);
+ if (ret) {
+ btrfs_error(root->fs_info, ret,
+ "Failed to sync btree inode to disk.");
+ return ret;
+ }
+
+ ret = write_ctree_super(NULL, root, 0);
+ return ret;
}
int close_ctree(struct btrfs_root *root)
@@ -3584,12 +3614,12 @@ int close_ctree(struct btrfs_root *root)
percpu_counter_sum(&fs_info->delalloc_bytes));
}
- del_fs_roots(fs_info);
-
btrfs_free_block_groups(fs_info);
btrfs_stop_all_workers(fs_info);
+ del_fs_roots(fs_info);
+
free_root_pointers(fs_info, 1);
iput(fs_info->btree_inode);
@@ -3639,20 +3669,10 @@ int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
{
- struct btrfs_root *root;
+ struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
u64 transid = btrfs_header_generation(buf);
int was_dirty;
-#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
- /*
- * This is a fast path so only do this check if we have sanity tests
- * enabled. Normal people shouldn't be marking dummy buffers as dirty
- * outside of the sanity tests.
- */
- if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &buf->bflags)))
- return;
-#endif
- root = BTRFS_I(buf->pages[0]->mapping->host)->root;
btrfs_assert_tree_locked(buf);
if (transid != root->fs_info->generation)
WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, "
@@ -3782,8 +3802,7 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
while (!list_empty(&splice)) {
root = list_first_entry(&splice, struct btrfs_root,
ordered_root);
- list_move_tail(&root->ordered_root,
- &fs_info->ordered_roots);
+ list_del_init(&root->ordered_root);
btrfs_destroy_ordered_extents(root);
@@ -3861,6 +3880,24 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
return ret;
}
+static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t)
+{
+ struct btrfs_pending_snapshot *snapshot;
+ struct list_head splice;
+
+ INIT_LIST_HEAD(&splice);
+
+ list_splice_init(&t->pending_snapshots, &splice);
+
+ while (!list_empty(&splice)) {
+ snapshot = list_entry(splice.next,
+ struct btrfs_pending_snapshot,
+ list);
+ snapshot->error = -ECANCELED;
+ list_del_init(&snapshot->list);
+ }
+}
+
static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
{
struct btrfs_inode *btrfs_inode;
@@ -3990,13 +4027,15 @@ again:
void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
struct btrfs_root *root)
{
- btrfs_destroy_ordered_operations(cur_trans, root);
-
btrfs_destroy_delayed_refs(cur_trans, root);
+ btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
+ cur_trans->dirty_pages.dirty_bytes);
cur_trans->state = TRANS_STATE_COMMIT_START;
wake_up(&root->fs_info->transaction_blocked_wait);
+ btrfs_evict_pending_snapshots(cur_trans);
+
cur_trans->state = TRANS_STATE_UNBLOCKED;
wake_up(&root->fs_info->transaction_wait);
@@ -4020,51 +4059,63 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
static int btrfs_cleanup_transaction(struct btrfs_root *root)
{
struct btrfs_transaction *t;
+ LIST_HEAD(list);
mutex_lock(&root->fs_info->transaction_kthread_mutex);
spin_lock(&root->fs_info->trans_lock);
- while (!list_empty(&root->fs_info->trans_list)) {
- t = list_first_entry(&root->fs_info->trans_list,
- struct btrfs_transaction, list);
- if (t->state >= TRANS_STATE_COMMIT_START) {
- atomic_inc(&t->use_count);
- spin_unlock(&root->fs_info->trans_lock);
- btrfs_wait_for_commit(root, t->transid);
- btrfs_put_transaction(t);
- spin_lock(&root->fs_info->trans_lock);
- continue;
- }
- if (t == root->fs_info->running_transaction) {
- t->state = TRANS_STATE_COMMIT_DOING;
- spin_unlock(&root->fs_info->trans_lock);
- /*
- * We wait for 0 num_writers since we don't hold a trans
- * handle open currently for this transaction.
- */
- wait_event(t->writer_wait,
- atomic_read(&t->num_writers) == 0);
- } else {
- spin_unlock(&root->fs_info->trans_lock);
- }
- btrfs_cleanup_one_transaction(t, root);
+ list_splice_init(&root->fs_info->trans_list, &list);
+ root->fs_info->running_transaction = NULL;
+ spin_unlock(&root->fs_info->trans_lock);
- spin_lock(&root->fs_info->trans_lock);
- if (t == root->fs_info->running_transaction)
- root->fs_info->running_transaction = NULL;
- list_del_init(&t->list);
- spin_unlock(&root->fs_info->trans_lock);
+ while (!list_empty(&list)) {
+ t = list_entry(list.next, struct btrfs_transaction, list);
- btrfs_put_transaction(t);
- trace_btrfs_transaction_commit(root);
- spin_lock(&root->fs_info->trans_lock);
+ btrfs_destroy_ordered_operations(t, root);
+
+ btrfs_destroy_all_ordered_extents(root->fs_info);
+
+ btrfs_destroy_delayed_refs(t, root);
+
+ /*
+ * FIXME: cleanup wait for commit
+ * We needn't acquire the lock here, because we are during
+ * the umount, there is no other task which will change it.
+ */
+ t->state = TRANS_STATE_COMMIT_START;
+ smp_mb();
+ if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
+ wake_up(&root->fs_info->transaction_blocked_wait);
+
+ btrfs_evict_pending_snapshots(t);
+
+ t->state = TRANS_STATE_UNBLOCKED;
+ smp_mb();
+ if (waitqueue_active(&root->fs_info->transaction_wait))
+ wake_up(&root->fs_info->transaction_wait);
+
+ btrfs_destroy_delayed_inodes(root);
+ btrfs_assert_delayed_root_empty(root);
+
+ btrfs_destroy_all_delalloc_inodes(root->fs_info);
+
+ btrfs_destroy_marked_extents(root, &t->dirty_pages,
+ EXTENT_DIRTY);
+
+ btrfs_destroy_pinned_extent(root,
+ root->fs_info->pinned_extents);
+
+ t->state = TRANS_STATE_COMPLETED;
+ smp_mb();
+ if (waitqueue_active(&t->commit_wait))
+ wake_up(&t->commit_wait);
+
+ atomic_set(&t->use_count, 0);
+ list_del_init(&t->list);
+ memset(t, 0, sizeof(*t));
+ kmem_cache_free(btrfs_transaction_cachep, t);
}
- spin_unlock(&root->fs_info->trans_lock);
- btrfs_destroy_all_ordered_extents(root->fs_info);
- btrfs_destroy_delayed_inodes(root);
- btrfs_assert_delayed_root_empty(root);
- btrfs_destroy_pinned_extent(root, root->fs_info->pinned_extents);
- btrfs_destroy_all_delalloc_inodes(root->fs_info);
+
mutex_unlock(&root->fs_info->transaction_kthread_mutex);
return 0;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 53059df..5ce2a7d 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -86,10 +86,6 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
void btrfs_free_fs_root(struct btrfs_root *root);
-#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-struct btrfs_root *btrfs_alloc_dummy_root(void);
-#endif
-
/*
* This function is used to grab the root, and avoid it is freed when we
* access it. But it doesn't ensure that the tree is not dropped.
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 41422a3..4b86916 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -5,6 +5,7 @@
#include "btrfs_inode.h"
#include "print-tree.h"
#include "export.h"
+#include "compat.h"
#define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, \
parent_objectid) / 4)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 45d98d0..d58bef1 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -25,6 +25,7 @@
#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <linux/percpu_counter.h>
+#include "compat.h"
#include "hash.h"
#include "ctree.h"
#include "disk-io.h"
@@ -1550,8 +1551,9 @@ again:
if (ret && !insert) {
err = -ENOENT;
goto out;
- } else if (WARN_ON(ret)) {
+ } else if (ret) {
err = -EIO;
+ WARN_ON(1);
goto out;
}
@@ -1977,6 +1979,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_extent_item *item;
u64 refs;
int ret;
+ int err = 0;
path = btrfs_alloc_path();
if (!path)
@@ -1989,9 +1992,14 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
path, bytenr, num_bytes, parent,
root_objectid, owner, offset,
refs_to_add, extent_op);
- if (ret != -EAGAIN)
+ if (ret == 0)
goto out;
+ if (ret != -EAGAIN) {
+ err = ret;
+ goto out;
+ }
+
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
refs = btrfs_extent_refs(leaf, item);
@@ -2013,7 +2021,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
btrfs_abort_transaction(trans, root, ret);
out:
btrfs_free_path(path);
- return ret;
+ return err;
}
static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
@@ -2129,28 +2137,15 @@ again:
}
if (ret > 0) {
if (metadata) {
- if (path->slots[0] > 0) {
- path->slots[0]--;
- btrfs_item_key_to_cpu(path->nodes[0], &key,
- path->slots[0]);
- if (key.objectid == node->bytenr &&
- key.type == BTRFS_EXTENT_ITEM_KEY &&
- key.offset == node->num_bytes)
- ret = 0;
- }
- if (ret > 0) {
- btrfs_release_path(path);
- metadata = 0;
+ btrfs_release_path(path);
+ metadata = 0;
- key.objectid = node->bytenr;
- key.offset = node->num_bytes;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- goto again;
- }
- } else {
- err = -EIO;
- goto out;
+ key.offset = node->num_bytes;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ goto again;
}
+ err = -EIO;
+ goto out;
}
leaf = path->nodes[0];
@@ -2239,12 +2234,8 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
{
int ret = 0;
- if (trans->aborted) {
- if (insert_reserved)
- btrfs_pin_extent(root, node->bytenr,
- node->num_bytes, 1);
+ if (trans->aborted)
return 0;
- }
if (btrfs_delayed_ref_is_head(node)) {
struct btrfs_delayed_ref_head *head;
@@ -2420,14 +2411,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
btrfs_free_delayed_extent_op(extent_op);
if (ret) {
- /*
- * Need to reset must_insert_reserved if
- * there was an error so the abort stuff
- * can cleanup the reserved space
- * properly.
- */
- if (must_insert_reserved)
- locked_ref->must_insert_reserved = 1;
btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
spin_lock(&delayed_refs->lock);
btrfs_delayed_ref_unlock(locked_ref);
@@ -3214,7 +3197,8 @@ again:
if (ret)
goto out_put;
- ret = btrfs_truncate_free_space_cache(root, trans, inode);
+ ret = btrfs_truncate_free_space_cache(root, trans, path,
+ inode);
if (ret)
goto out_put;
}
@@ -3334,9 +3318,10 @@ again:
last = cache->key.objectid + cache->key.offset;
err = write_one_cache_group(trans, root, path, cache);
- btrfs_put_block_group(cache);
if (err) /* File system offline */
goto out;
+
+ btrfs_put_block_group(cache);
}
while (1) {
@@ -3620,9 +3605,10 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
/* make sure bytes are sectorsize aligned */
bytes = ALIGN(bytes, root->sectorsize);
- if (btrfs_is_free_space_inode(inode)) {
+ if (root == root->fs_info->tree_root ||
+ BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) {
+ alloc_chunk = 0;
committed = 1;
- ASSERT(current->journal_info);
}
data_sinfo = fs_info->data_sinfo;
@@ -3650,16 +3636,6 @@ again:
spin_unlock(&data_sinfo->lock);
alloc:
alloc_target = btrfs_get_alloc_profile(root, 1);
- /*
- * It is ugly that we don't call nolock join
- * transaction for the free space inode case here.
- * But it is safe because we only do the data space
- * reservation for the free space cache in the
- * transaction context, the common join transaction
- * just increase the counter of the current transaction
- * handler, doesn't try to acquire the trans_lock of
- * the fs.
- */
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
@@ -3705,9 +3681,6 @@ commit_trans:
goto again;
}
- trace_btrfs_space_reservation(root->fs_info,
- "space_info:enospc",
- data_sinfo->flags, bytes, 1);
return -ENOSPC;
}
data_sinfo->bytes_may_use += bytes;
@@ -4016,26 +3989,12 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
* the filesystem is readonly(all dirty pages are written to
* the disk).
*/
- btrfs_start_delalloc_roots(root->fs_info, 0);
+ btrfs_start_all_delalloc_inodes(root->fs_info, 0);
if (!current->journal_info)
- btrfs_wait_ordered_roots(root->fs_info, -1);
+ btrfs_wait_all_ordered_extents(root->fs_info);
}
}
-static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim)
-{
- u64 bytes;
- int nr;
-
- bytes = btrfs_calc_trans_metadata_size(root, 1);
- nr = (int)div64_u64(to_reclaim, bytes);
- if (!nr)
- nr = 1;
- return nr;
-}
-
-#define EXTENT_SIZE_PER_ITEM (256 * 1024)
-
/*
* shrink metadata reservation for delalloc
*/
@@ -4048,30 +4007,24 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
u64 delalloc_bytes;
u64 max_reclaim;
long time_left;
- unsigned long nr_pages;
- int loops;
- int items;
+ unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
+ int loops = 0;
enum btrfs_reserve_flush_enum flush;
- /* Calc the number of the pages we need flush for space reservation */
- items = calc_reclaim_items_nr(root, to_reclaim);
- to_reclaim = items * EXTENT_SIZE_PER_ITEM;
-
trans = (struct btrfs_trans_handle *)current->journal_info;
block_rsv = &root->fs_info->delalloc_block_rsv;
space_info = block_rsv->space_info;
+ smp_mb();
delalloc_bytes = percpu_counter_sum_positive(
&root->fs_info->delalloc_bytes);
if (delalloc_bytes == 0) {
if (trans)
return;
- if (wait_ordered)
- btrfs_wait_ordered_roots(root->fs_info, items);
+ btrfs_wait_all_ordered_extents(root->fs_info);
return;
}
- loops = 0;
while (delalloc_bytes && loops < 3) {
max_reclaim = min(delalloc_bytes, to_reclaim);
nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
@@ -4080,19 +4033,9 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
* We need to wait for the async pages to actually start before
* we do anything.
*/
- max_reclaim = atomic_read(&root->fs_info->async_delalloc_pages);
- if (!max_reclaim)
- goto skip_async;
-
- if (max_reclaim <= nr_pages)
- max_reclaim = 0;
- else
- max_reclaim -= nr_pages;
-
wait_event(root->fs_info->async_submit_wait,
- atomic_read(&root->fs_info->async_delalloc_pages) <=
- (int)max_reclaim);
-skip_async:
+ !atomic_read(&root->fs_info->async_delalloc_pages));
+
if (!trans)
flush = BTRFS_RESERVE_FLUSH_ALL;
else
@@ -4106,12 +4049,13 @@ skip_async:
loops++;
if (wait_ordered && !trans) {
- btrfs_wait_ordered_roots(root->fs_info, items);
+ btrfs_wait_all_ordered_extents(root->fs_info);
} else {
time_left = schedule_timeout_killable(1);
if (time_left)
break;
}
+ smp_mb();
delalloc_bytes = percpu_counter_sum_positive(
&root->fs_info->delalloc_bytes);
}
@@ -4196,11 +4140,16 @@ static int flush_space(struct btrfs_root *root,
switch (state) {
case FLUSH_DELAYED_ITEMS_NR:
case FLUSH_DELAYED_ITEMS:
- if (state == FLUSH_DELAYED_ITEMS_NR)
- nr = calc_reclaim_items_nr(root, num_bytes) * 2;
- else
- nr = -1;
+ if (state == FLUSH_DELAYED_ITEMS_NR) {
+ u64 bytes = btrfs_calc_trans_metadata_size(root, 1);
+ nr = (int)div64_u64(num_bytes, bytes);
+ if (!nr)
+ nr = 1;
+ nr *= 2;
+ } else {
+ nr = -1;
+ }
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
@@ -4383,10 +4332,6 @@ out:
!block_rsv_use_bytes(global_rsv, orig_bytes))
ret = 0;
}
- if (ret == -ENOSPC)
- trace_btrfs_space_reservation(root->fs_info,
- "space_info:enospc",
- space_info->flags, orig_bytes, 1);
if (flushing) {
spin_lock(&space_info->lock);
space_info->flush = 0;
@@ -5041,7 +4986,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
if (to_reserve)
- trace_btrfs_space_reservation(root->fs_info, "delalloc",
+ trace_btrfs_space_reservation(root->fs_info,"delalloc",
btrfs_ino(inode), to_reserve, 1);
block_rsv_add_bytes(block_rsv, to_reserve, 1);
@@ -5319,8 +5264,6 @@ static int pin_down_extent(struct btrfs_root *root,
set_extent_dirty(root->fs_info->pinned_extents, bytenr,
bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
- if (reserved)
- trace_btrfs_reserved_extent_free(root, bytenr, num_bytes);
return 0;
}
@@ -5775,8 +5718,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
extent_slot = path->slots[0];
}
- } else if (WARN_ON(ret == -ENOENT)) {
+ } else if (ret == -ENOENT) {
btrfs_print_leaf(extent_root, path->nodes[0]);
+ WARN_ON(1);
btrfs_err(info,
"unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
bytenr, parent, root_objectid, owner_objectid,
@@ -6023,7 +5967,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
btrfs_add_free_space(cache, buf->start, buf->len);
btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE);
- trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
pin = 0;
}
out:
@@ -6651,6 +6594,8 @@ again:
}
}
+ trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
+
return ret;
}
@@ -6762,7 +6707,6 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
ins->objectid, ins->offset);
BUG();
}
- trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
return ret;
}
@@ -6787,18 +6731,13 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
size += sizeof(*block_info);
path = btrfs_alloc_path();
- if (!path) {
- btrfs_free_and_pin_reserved_extent(root, ins->objectid,
- root->leafsize);
+ if (!path)
return -ENOMEM;
- }
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
ins, size);
if (ret) {
- btrfs_free_and_pin_reserved_extent(root, ins->objectid,
- root->leafsize);
btrfs_free_path(path);
return ret;
}
@@ -6840,8 +6779,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
ins->objectid, ins->offset);
BUG();
}
-
- trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->leafsize);
return ret;
}
@@ -8046,7 +7983,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
spin_lock(&sinfo->lock);
- for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
+ for(i = 0; i < BTRFS_NR_RAID_TYPES; i++)
if (!list_empty(&sinfo->block_groups[i]))
free_bytes += __btrfs_get_ro_block_group_free_space(
&sinfo->block_groups[i]);
@@ -8334,14 +8271,15 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
release_global_block_rsv(info);
- while (!list_empty(&info->space_info)) {
+ while(!list_empty(&info->space_info)) {
space_info = list_entry(info->space_info.next,
struct btrfs_space_info,
list);
if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
- if (WARN_ON(space_info->bytes_pinned > 0 ||
+ if (space_info->bytes_pinned > 0 ||
space_info->bytes_reserved > 0 ||
- space_info->bytes_may_use > 0)) {
+ space_info->bytes_may_use > 0) {
+ WARN_ON(1);
dump_space_info(space_info, 0, 0);
}
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ff43802..51731b7 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -13,13 +13,13 @@
#include <linux/cleancache.h>
#include "extent_io.h"
#include "extent_map.h"
+#include "compat.h"
#include "ctree.h"
#include "btrfs_inode.h"
#include "volumes.h"
#include "check-integrity.h"
#include "locking.h"
#include "rcu-string.h"
-#include "backref.h"
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
@@ -1597,10 +1597,11 @@ done:
*
* 1 is returned if we find something, 0 if nothing was in the tree
*/
-STATIC u64 find_lock_delalloc_range(struct inode *inode,
- struct extent_io_tree *tree,
- struct page *locked_page, u64 *start,
- u64 *end, u64 max_bytes)
+static noinline u64 find_lock_delalloc_range(struct inode *inode,
+ struct extent_io_tree *tree,
+ struct page *locked_page,
+ u64 *start, u64 *end,
+ u64 max_bytes)
{
u64 delalloc_start;
u64 delalloc_end;
@@ -1739,8 +1740,10 @@ u64 count_range_bits(struct extent_io_tree *tree,
u64 last = 0;
int found = 0;
- if (WARN_ON(search_end <= cur_start))
+ if (search_end <= cur_start) {
+ WARN_ON(1);
return 0;
+ }
spin_lock(&tree->lock);
if (cur_start == 0 && bits == EXTENT_DIRTY) {
@@ -1952,6 +1955,11 @@ static int free_io_failure(struct inode *inode, struct io_failure_record *rec,
return err;
}
+static void repair_io_failure_callback(struct bio *bio, int err)
+{
+ complete(bio->bi_private);
+}
+
/*
* this bypasses the standard btrfs submit functions deliberately, as
* the standard behavior is to write all copies in a raid setup. here we only
@@ -1968,13 +1976,13 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
{
struct bio *bio;
struct btrfs_device *dev;
+ DECLARE_COMPLETION_ONSTACK(compl);
u64 map_length = 0;
u64 sector;
struct btrfs_bio *bbio = NULL;
struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
int ret;
- ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
BUG_ON(!mirror_num);
/* we can't repair anything in raid56 yet */
@@ -1984,6 +1992,8 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio)
return -EIO;
+ bio->bi_private = &compl;
+ bio->bi_end_io = repair_io_failure_callback;
bio->bi_size = 0;
map_length = length;
@@ -2004,8 +2014,10 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
}
bio->bi_bdev = dev->bdev;
bio_add_page(bio, page, length, start - page_offset(page));
+ btrfsic_submit_bio(WRITE_SYNC, bio);
+ wait_for_completion(&compl);
- if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
+ if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
/* try to remap that extent elsewhere? */
bio_put(bio);
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
@@ -2027,9 +2039,6 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
int ret = 0;
- if (root->fs_info->sb->s_flags & MS_RDONLY)
- return -EROFS;
-
for (i = 0; i < num_pages; i++) {
struct page *p = extent_buffer_page(eb, i);
ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE,
@@ -2051,12 +2060,12 @@ static int clean_io_failure(u64 start, struct page *page)
u64 private;
u64 private_failure;
struct io_failure_record *failrec;
- struct inode *inode = page->mapping->host;
- struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ struct btrfs_fs_info *fs_info;
struct extent_state *state;
int num_copies;
int did_repair = 0;
int ret;
+ struct inode *inode = page->mapping->host;
private = 0;
ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
@@ -2079,8 +2088,6 @@ static int clean_io_failure(u64 start, struct page *page)
did_repair = 1;
goto out;
}
- if (fs_info->sb->s_flags & MS_RDONLY)
- goto out;
spin_lock(&BTRFS_I(inode)->io_tree.lock);
state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
@@ -2090,6 +2097,7 @@ static int clean_io_failure(u64 start, struct page *page)
if (state && state->start <= failrec->start &&
state->end >= failrec->start + failrec->len - 1) {
+ fs_info = BTRFS_I(inode)->root->fs_info;
num_copies = btrfs_num_copies(fs_info, failrec->logical,
failrec->len);
if (num_copies > 1) {
@@ -3561,8 +3569,9 @@ retry:
* but no sense in crashing the users box for something
* we can survive anyway.
*/
- if (WARN_ON(!eb)) {
+ if (!eb) {
spin_unlock(&mapping->private_lock);
+ WARN_ON(1);
continue;
}
@@ -4029,7 +4038,7 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
if (offset >= last)
return NULL;
- while (1) {
+ while(1) {
len = last - offset;
if (len == 0)
break;
@@ -4053,19 +4062,6 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
return NULL;
}
-static noinline int count_ext_ref(u64 inum, u64 offset, u64 root_id, void *ctx)
-{
- unsigned long cnt = *((unsigned long *)ctx);
-
- cnt++;
- *((unsigned long *)ctx) = cnt;
-
- /* Now we're sure that the extent is shared. */
- if (cnt > 1)
- return 1;
- return 0;
-}
-
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent)
{
@@ -4132,7 +4128,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
last = found_key.offset;
last_for_get_extent = last + 1;
}
- btrfs_release_path(path);
+ btrfs_free_path(path);
/*
* we might have some extents allocated but more delalloc past those
@@ -4202,24 +4198,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
flags |= (FIEMAP_EXTENT_DELALLOC |
FIEMAP_EXTENT_UNKNOWN);
} else {
- unsigned long ref_cnt = 0;
-
disko = em->block_start + offset_in_extent;
-
- /*
- * As btrfs supports shared space, this information
- * can be exported to userspace tools via
- * flag FIEMAP_EXTENT_SHARED.
- */
- ret = iterate_inodes_from_logical(
- em->block_start,
- BTRFS_I(inode)->root->fs_info,
- path, count_ext_ref, &ref_cnt);
- if (ret < 0 && ret != -ENOENT)
- goto out_free;
-
- if (ref_cnt > 1)
- flags |= FIEMAP_EXTENT_SHARED;
}
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
flags |= FIEMAP_EXTENT_ENCODED;
@@ -4251,7 +4230,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
out_free:
free_extent_map(em);
out:
- btrfs_free_path(path);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
&cached_state, GFP_NOFS);
return ret;
@@ -4477,23 +4455,6 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb)
}
}
-struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
- u64 start)
-{
- struct extent_buffer *eb;
-
- rcu_read_lock();
- eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
- if (eb && atomic_inc_not_zero(&eb->refs)) {
- rcu_read_unlock();
- mark_extent_buffer_accessed(eb);
- return eb;
- }
- rcu_read_unlock();
-
- return NULL;
-}
-
struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
u64 start, unsigned long len)
{
@@ -4507,10 +4468,14 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
int uptodate = 1;
int ret;
-
- eb = find_extent_buffer(tree, start);
- if (eb)
+ rcu_read_lock();
+ eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+ if (eb && atomic_inc_not_zero(&eb->refs)) {
+ rcu_read_unlock();
+ mark_extent_buffer_accessed(eb);
return eb;
+ }
+ rcu_read_unlock();
eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS);
if (!eb)
@@ -4569,17 +4534,24 @@ again:
spin_lock(&tree->buffer_lock);
ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
- spin_unlock(&tree->buffer_lock);
- radix_tree_preload_end();
if (ret == -EEXIST) {
- exists = find_extent_buffer(tree, start);
- if (exists)
- goto free_eb;
- else
+ exists = radix_tree_lookup(&tree->buffer,
+ start >> PAGE_CACHE_SHIFT);
+ if (!atomic_inc_not_zero(&exists->refs)) {
+ spin_unlock(&tree->buffer_lock);
+ radix_tree_preload_end();
+ exists = NULL;
goto again;
+ }
+ spin_unlock(&tree->buffer_lock);
+ radix_tree_preload_end();
+ mark_extent_buffer_accessed(exists);
+ goto free_eb;
}
/* add one reference for the tree */
check_buffer_tree_ref(eb);
+ spin_unlock(&tree->buffer_lock);
+ radix_tree_preload_end();
/*
* there is a race where release page may have
@@ -4610,6 +4582,23 @@ free_eb:
return exists;
}
+struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
+ u64 start, unsigned long len)
+{
+ struct extent_buffer *eb;
+
+ rcu_read_lock();
+ eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+ if (eb && atomic_inc_not_zero(&eb->refs)) {
+ rcu_read_unlock();
+ mark_extent_buffer_accessed(eb);
+ return eb;
+ }
+ rcu_read_unlock();
+
+ return NULL;
+}
+
static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
{
struct extent_buffer *eb =
@@ -5073,6 +5062,23 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
}
}
+static void move_pages(struct page *dst_page, struct page *src_page,
+ unsigned long dst_off, unsigned long src_off,
+ unsigned long len)
+{
+ char *dst_kaddr = page_address(dst_page);
+ if (dst_page == src_page) {
+ memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
+ } else {
+ char *src_kaddr = page_address(src_page);
+ char *p = dst_kaddr + dst_off + len;
+ char *s = src_kaddr + src_off + len;
+
+ while (len--)
+ *--p = *--s;
+ }
+}
+
static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
{
unsigned long distance = (src > dst) ? src - dst : dst - src;
@@ -5183,7 +5189,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
cur = min_t(unsigned long, len, src_off_in_page + 1);
cur = min(cur, dst_off_in_page + 1);
- copy_pages(extent_buffer_page(dst, dst_i),
+ move_pages(extent_buffer_page(dst, dst_i),
extent_buffer_page(dst, src_i),
dst_off_in_page - cur + 1,
src_off_in_page - cur + 1, cur);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 19620c5..6dbc645 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -271,7 +271,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len);
struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
- u64 start);
+ u64 start, unsigned long len);
void free_extent_buffer(struct extent_buffer *eb);
void free_extent_buffer_stale(struct extent_buffer *eb);
#define WAIT_NONE 0
@@ -345,10 +345,4 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
int mirror_num);
-#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-noinline u64 find_lock_delalloc_range(struct inode *inode,
- struct extent_io_tree *tree,
- struct page *locked_page, u64 *start,
- u64 *end, u64 max_bytes);
-#endif
#endif
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 93fba71..61adc44 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -3,10 +3,10 @@
#include <linux/rbtree.h>
-#define EXTENT_MAP_LAST_BYTE ((u64)-4)
-#define EXTENT_MAP_HOLE ((u64)-3)
-#define EXTENT_MAP_INLINE ((u64)-2)
-#define EXTENT_MAP_DELALLOC ((u64)-1)
+#define EXTENT_MAP_LAST_BYTE (u64)-4
+#define EXTENT_MAP_HOLE (u64)-3
+#define EXTENT_MAP_INLINE (u64)-2
+#define EXTENT_MAP_DELALLOC (u64)-1
/* bits for the flags field */
#define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 6f38488..4f53159 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -329,9 +329,6 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
u64 csum_end;
u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
- ASSERT(start == ALIGN(start, root->sectorsize) &&
- (end + 1) == ALIGN(end + 1, root->sectorsize));
-
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -849,8 +846,10 @@ insert:
path->leave_spinning = 0;
if (ret < 0)
goto fail_unlock;
- if (WARN_ON(ret != 0))
+ if (ret != 0) {
+ WARN_ON(1);
goto fail_unlock;
+ }
leaf = path->nodes[0];
csum:
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 82d0342..72da4df 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -39,6 +39,7 @@
#include "print-tree.h"
#include "tree-log.h"
#include "locking.h"
+#include "compat.h"
#include "volumes.h"
static struct kmem_cache *btrfs_inode_defrag_cachep;
@@ -369,7 +370,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
u64 root_objectid = 0;
atomic_inc(&fs_info->defrag_running);
- while (1) {
+ while(1) {
/* Pause the auto defragger. */
if (test_bit(BTRFS_FS_STATE_REMOUNTING,
&fs_info->fs_state))
@@ -1280,7 +1281,6 @@ again:
}
wait_on_page_writeback(pages[i]);
}
- faili = num_pages - 1;
err = 0;
if (start_pos < inode->i_size) {
struct btrfs_ordered_extent *ordered;
@@ -1299,10 +1299,8 @@ again:
unlock_page(pages[i]);
page_cache_release(pages[i]);
}
- err = btrfs_wait_ordered_range(inode, start_pos,
- last_pos - start_pos);
- if (err)
- goto fail;
+ btrfs_wait_ordered_range(inode, start_pos,
+ last_pos - start_pos);
goto again;
}
if (ordered)
@@ -1811,13 +1809,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
atomic_inc(&root->log_batch);
full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
- if (full_sync) {
- ret = btrfs_wait_ordered_range(inode, start, end - start + 1);
- if (ret) {
- mutex_unlock(&inode->i_mutex);
- goto out;
- }
- }
+ if (full_sync)
+ btrfs_wait_ordered_range(inode, start, end - start + 1);
atomic_inc(&root->log_batch);
/*
@@ -1883,20 +1876,27 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
mutex_unlock(&inode->i_mutex);
if (ret != BTRFS_NO_LOG_SYNC) {
- if (!ret) {
+ if (ret > 0) {
+ /*
+ * If we didn't already wait for ordered extents we need
+ * to do that now.
+ */
+ if (!full_sync)
+ btrfs_wait_ordered_range(inode, start,
+ end - start + 1);
+ ret = btrfs_commit_transaction(trans, root);
+ } else {
ret = btrfs_sync_log(trans, root);
- if (!ret) {
+ if (ret == 0) {
ret = btrfs_end_transaction(trans, root);
- goto out;
+ } else {
+ if (!full_sync)
+ btrfs_wait_ordered_range(inode, start,
+ end -
+ start + 1);
+ ret = btrfs_commit_transaction(trans, root);
}
}
- if (!full_sync) {
- ret = btrfs_wait_ordered_range(inode, start,
- end - start + 1);
- if (ret)
- goto out;
- }
- ret = btrfs_commit_transaction(trans, root);
} else {
ret = btrfs_end_transaction(trans, root);
}
@@ -2067,9 +2067,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
((offset + len - 1) >> PAGE_CACHE_SHIFT));
- ret = btrfs_wait_ordered_range(inode, offset, len);
- if (ret)
- return ret;
+ btrfs_wait_ordered_range(inode, offset, len);
mutex_lock(&inode->i_mutex);
/*
@@ -2138,12 +2136,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
btrfs_put_ordered_extent(ordered);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
lockend, &cached_state, GFP_NOFS);
- ret = btrfs_wait_ordered_range(inode, lockstart,
- lockend - lockstart + 1);
- if (ret) {
- mutex_unlock(&inode->i_mutex);
- return ret;
- }
+ btrfs_wait_ordered_range(inode, lockstart,
+ lockend - lockstart + 1);
}
path = btrfs_alloc_path();
@@ -2314,10 +2308,7 @@ static long btrfs_fallocate(struct file *file, int mode,
* wait for ordered IO before we have any locks. We'll loop again
* below with the locks held.
*/
- ret = btrfs_wait_ordered_range(inode, alloc_start,
- alloc_end - alloc_start);
- if (ret)
- goto out;
+ btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
locked_end = alloc_end - 1;
while (1) {
@@ -2341,10 +2332,8 @@ static long btrfs_fallocate(struct file *file, int mode,
* we can't wait on the range with the transaction
* running or with the extent lock held
*/
- ret = btrfs_wait_ordered_range(inode, alloc_start,
- alloc_end - alloc_start);
- if (ret)
- goto out;
+ btrfs_wait_ordered_range(inode, alloc_start,
+ alloc_end - alloc_start);
} else {
if (ordered)
btrfs_put_ordered_extent(ordered);
@@ -2416,12 +2405,14 @@ out_reserve_fail:
static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_map *em = NULL;
+ struct extent_map *em;
struct extent_state *cached_state = NULL;
u64 lockstart = *offset;
u64 lockend = i_size_read(inode);
u64 start = *offset;
+ u64 orig_start = *offset;
u64 len = i_size_read(inode);
+ u64 last_end = 0;
int ret = 0;
lockend = max_t(u64, root->sectorsize, lockend);
@@ -2438,35 +2429,89 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
&cached_state);
- while (start < inode->i_size) {
+ /*
+ * Delalloc is such a pain. If we have a hole and we have pending
+ * delalloc for a portion of the hole we will get back a hole that
+ * exists for the entire range since it hasn't been actually written
+ * yet. So to take care of this case we need to look for an extent just
+ * before the position we want in case there is outstanding delalloc
+ * going on here.
+ */
+ if (whence == SEEK_HOLE && start != 0) {
+ if (start <= root->sectorsize)
+ em = btrfs_get_extent_fiemap(inode, NULL, 0, 0,
+ root->sectorsize, 0);
+ else
+ em = btrfs_get_extent_fiemap(inode, NULL, 0,
+ start - root->sectorsize,
+ root->sectorsize, 0);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto out;
+ }
+ last_end = em->start + em->len;
+ if (em->block_start == EXTENT_MAP_DELALLOC)
+ last_end = min_t(u64, last_end, inode->i_size);
+ free_extent_map(em);
+ }
+
+ while (1) {
em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
- em = NULL;
break;
}
- if (whence == SEEK_HOLE &&
- (em->block_start == EXTENT_MAP_HOLE ||
- test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
- break;
- else if (whence == SEEK_DATA &&
- (em->block_start != EXTENT_MAP_HOLE &&
- !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
- break;
+ if (em->block_start == EXTENT_MAP_HOLE) {
+ if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
+ if (last_end <= orig_start) {
+ free_extent_map(em);
+ ret = -ENXIO;
+ break;
+ }
+ }
+
+ if (whence == SEEK_HOLE) {
+ *offset = start;
+ free_extent_map(em);
+ break;
+ }
+ } else {
+ if (whence == SEEK_DATA) {
+ if (em->block_start == EXTENT_MAP_DELALLOC) {
+ if (start >= inode->i_size) {
+ free_extent_map(em);
+ ret = -ENXIO;
+ break;
+ }
+ }
+
+ if (!test_bit(EXTENT_FLAG_PREALLOC,
+ &em->flags)) {
+ *offset = start;
+ free_extent_map(em);
+ break;
+ }
+ }
+ }
start = em->start + em->len;
+ last_end = em->start + em->len;
+
+ if (em->block_start == EXTENT_MAP_DELALLOC)
+ last_end = min_t(u64, last_end, inode->i_size);
+
+ if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
+ free_extent_map(em);
+ ret = -ENXIO;
+ break;
+ }
free_extent_map(em);
- em = NULL;
cond_resched();
}
- free_extent_map(em);
- if (!ret) {
- if (whence == SEEK_DATA && start >= inode->i_size)
- ret = -ENXIO;
- else
- *offset = min_t(loff_t, start, inode->i_size);
- }
+ if (!ret)
+ *offset = min(*offset, inode->i_size);
+out:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state, GFP_NOFS);
return ret;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 057be95..b4f9904 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -218,6 +218,7 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
int btrfs_truncate_free_space_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
struct inode *inode)
{
int ret = 0;
@@ -1008,13 +1009,8 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
if (ret)
goto out;
- ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
- if (ret) {
- clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
- EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
- GFP_NOFS);
- goto out;
- }
+
+ btrfs_wait_ordered_range(inode, 0, (u64)-1);
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
key.offset = offset;
@@ -2280,7 +2276,7 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
goto out;
entry = rb_entry(node, struct btrfs_free_space, offset_index);
- while (1) {
+ while(1) {
if (entry->bytes < bytes && entry->bytes > *max_extent_size)
*max_extent_size = entry->bytes;
@@ -2971,15 +2967,19 @@ out:
int btrfs_write_out_ino_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
- struct inode *inode)
+ struct btrfs_path *path)
{
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
+ struct inode *inode;
int ret;
if (!btrfs_test_opt(root, INODE_MAP_CACHE))
return 0;
+ inode = lookup_free_ino_inode(root, path);
+ if (IS_ERR(inode))
+ return 0;
+
ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
if (ret) {
btrfs_delalloc_release_metadata(inode, inode->i_size);
@@ -2990,6 +2990,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
#endif
}
+ iput(inode);
return ret;
}
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 0cf4977..e737f92 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -58,6 +58,7 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
struct btrfs_block_rsv *rsv);
int btrfs_truncate_free_space_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
struct inode *inode);
int load_free_space_cache(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group);
@@ -75,8 +76,7 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
int btrfs_write_out_ino_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
- struct inode *inode);
+ struct btrfs_path *path);
void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group);
int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index ec82fae..e0b7034 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -369,7 +369,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
goto out;
leaf = path->nodes[0];
- item = btrfs_item_nr(path->slots[0]);
+ item = btrfs_item_nr(leaf, path->slots[0]);
ptr = (unsigned long)btrfs_item_ptr(leaf, path->slots[0], char);
ptr += btrfs_item_size(leaf, item) - ins_len;
extref = (struct btrfs_inode_extref *)ptr;
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index ab485e5..2c66ddb 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -78,8 +78,10 @@ again:
btrfs_transaction_in_commit(fs_info)) {
leaf = path->nodes[0];
- if (WARN_ON(btrfs_header_nritems(leaf) == 0))
+ if (btrfs_header_nritems(leaf) == 0) {
+ WARN_ON(1);
break;
+ }
/*
* Save the key so we can advances forward
@@ -235,7 +237,7 @@ again:
start_caching(root);
if (objectid <= root->cache_progress ||
- objectid >= root->highest_objectid)
+ objectid > root->highest_objectid)
__btrfs_add_free_space(ctl, objectid, 1);
else
__btrfs_add_free_space(pinned, objectid, 1);
@@ -410,7 +412,8 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
return 0;
/* Don't save inode cache if we are deleting this root */
- if (btrfs_root_refs(&root->root_item) == 0)
+ if (btrfs_root_refs(&root->root_item) == 0 &&
+ root != root->fs_info->tree_root)
return 0;
if (!btrfs_test_opt(root, INODE_MAP_CACHE))
@@ -464,7 +467,7 @@ again:
}
if (i_size_read(inode) > 0) {
- ret = btrfs_truncate_free_space_cache(root, trans, inode);
+ ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
if (ret) {
if (ret != -ENOSPC)
btrfs_abort_transaction(trans, root, ret);
@@ -501,7 +504,7 @@ again:
}
btrfs_free_reserved_data_space(inode, prealloc);
- ret = btrfs_write_out_ino_cache(root, trans, path, inode);
+ ret = btrfs_write_out_ino_cache(root, trans, path);
out_put:
iput(inode);
out_release:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f1a7744..51e3afa 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -43,6 +43,7 @@
#include <linux/btrfs.h>
#include <linux/blkdev.h>
#include <linux/posix_acl_xattr.h>
+#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -843,10 +844,7 @@ static noinline int cow_file_range(struct inode *inode,
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0;
- if (btrfs_is_free_space_inode(inode)) {
- WARN_ON_ONCE(1);
- return -EINVAL;
- }
+ BUG_ON(btrfs_is_free_space_inode(inode));
num_bytes = ALIGN(end - start + 1, blocksize);
num_bytes = max(blocksize, num_bytes);
@@ -1180,8 +1178,10 @@ static noinline int run_delalloc_nocow(struct inode *inode,
while (1) {
ret = btrfs_lookup_file_extent(trans, root, path, ino,
cur_offset, 0);
- if (ret < 0)
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, root, ret);
goto error;
+ }
if (ret > 0 && path->slots[0] > 0 && check_prev) {
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key,
@@ -1195,8 +1195,10 @@ next_slot:
leaf = path->nodes[0];
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
- if (ret < 0)
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, root, ret);
goto error;
+ }
if (ret > 0)
break;
leaf = path->nodes[0];
@@ -1287,8 +1289,10 @@ out_check:
ret = cow_file_range(inode, locked_page,
cow_start, found_key.offset - 1,
page_started, nr_written, 1);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
goto error;
+ }
cow_start = (u64)-1;
}
@@ -1335,8 +1339,10 @@ out_check:
BTRFS_DATA_RELOC_TREE_OBJECTID) {
ret = btrfs_reloc_clone_csums(inode, cur_offset,
num_bytes);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
goto error;
+ }
}
extent_clear_unlock_delalloc(inode, cur_offset,
@@ -1358,8 +1364,10 @@ out_check:
if (cow_start != (u64)-1) {
ret = cow_file_range(inode, locked_page, cow_start, end,
page_started, nr_written, 1);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
goto error;
+ }
}
error:
@@ -1543,13 +1551,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
spin_unlock(&BTRFS_I(inode)->lock);
}
- /*
- * We don't reserve metadata space for space cache inodes so we
- * don't need to call dellalloc_release_metadata if there is an
- * error.
- */
- if (*bits & EXTENT_DO_ACCOUNTING &&
- root != root->fs_info->tree_root)
+ if (*bits & EXTENT_DO_ACCOUNTING)
btrfs_delalloc_release_metadata(inode, len);
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
@@ -2039,8 +2041,10 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
key.offset = offset;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (WARN_ON(ret < 0))
+ if (ret < 0) {
+ WARN_ON(1);
return ret;
+ }
ret = 0;
while (1) {
@@ -2129,8 +2133,7 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,
old->extent_offset, fs_info,
path, record_one_backref,
old);
- if (ret < 0 && ret != -ENOENT)
- return false;
+ BUG_ON(ret < 0 && ret != -ENOENT);
/* no backref to be processed for this extent */
if (!old->count) {
@@ -2364,23 +2367,10 @@ out_unlock:
return ret;
}
-static void free_sa_defrag_extent(struct new_sa_defrag_extent *new)
-{
- struct old_sa_defrag_extent *old, *tmp;
-
- if (!new)
- return;
-
- list_for_each_entry_safe(old, tmp, &new->head, list) {
- list_del(&old->list);
- kfree(old);
- }
- kfree(new);
-}
-
static void relink_file_extents(struct new_sa_defrag_extent *new)
{
struct btrfs_path *path;
+ struct old_sa_defrag_extent *old, *tmp;
struct sa_defrag_extent_backref *backref;
struct sa_defrag_extent_backref *prev = NULL;
struct inode *inode;
@@ -2423,11 +2413,16 @@ static void relink_file_extents(struct new_sa_defrag_extent *new)
kfree(prev);
btrfs_free_path(path);
-out:
- free_sa_defrag_extent(new);
+ list_for_each_entry_safe(old, tmp, &new->head, list) {
+ list_del(&old->list);
+ kfree(old);
+ }
+out:
atomic_dec(&root->fs_info->defrag_running);
wake_up(&root->fs_info->transaction_wait);
+
+ kfree(new);
}
static struct new_sa_defrag_extent *
@@ -2437,7 +2432,7 @@ record_old_file_extents(struct inode *inode,
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path;
struct btrfs_key key;
- struct old_sa_defrag_extent *old;
+ struct old_sa_defrag_extent *old, *tmp;
struct new_sa_defrag_extent *new;
int ret;
@@ -2485,7 +2480,7 @@ record_old_file_extents(struct inode *inode,
if (slot >= btrfs_header_nritems(l)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
- goto out_free_path;
+ goto out_free_list;
else if (ret > 0)
break;
continue;
@@ -2514,7 +2509,7 @@ record_old_file_extents(struct inode *inode,
old = kmalloc(sizeof(*old), GFP_NOFS);
if (!old)
- goto out_free_path;
+ goto out_free_list;
offset = max(new->file_pos, key.offset);
end = min(new->file_pos + new->len, key.offset + num_bytes);
@@ -2536,10 +2531,15 @@ next:
return new;
+out_free_list:
+ list_for_each_entry_safe(old, tmp, &new->head, list) {
+ list_del(&old->list);
+ kfree(old);
+ }
out_free_path:
btrfs_free_path(path);
out_kfree:
- free_sa_defrag_extent(new);
+ kfree(new);
return NULL;
}
@@ -2710,14 +2710,8 @@ out:
btrfs_remove_ordered_extent(inode, ordered_extent);
/* for snapshot-aware defrag */
- if (new) {
- if (ret) {
- free_sa_defrag_extent(new);
- atomic_dec(&root->fs_info->defrag_running);
- } else {
- relink_file_extents(new);
- }
- }
+ if (new)
+ relink_file_extents(new);
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
@@ -2975,7 +2969,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
if (insert >= 1) {
ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
if (ret) {
- atomic_dec(&root->orphan_inodes);
if (reserve) {
clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
&BTRFS_I(inode)->runtime_flags);
@@ -3025,15 +3018,13 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
release_rsv = 1;
spin_unlock(&root->orphan_lock);
- if (delete_item) {
- atomic_dec(&root->orphan_inodes);
- if (trans)
- ret = btrfs_del_orphan_item(trans, root,
- btrfs_ino(inode));
- }
+ if (trans && delete_item)
+ ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
- if (release_rsv)
+ if (release_rsv) {
btrfs_orphan_release_metadata(inode);
+ atomic_dec(&root->orphan_inodes);
+ }
return ret;
}
@@ -3181,7 +3172,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
/* if we have links, this was a truncate, lets do that */
if (inode->i_nlink) {
- if (WARN_ON(!S_ISREG(inode->i_mode))) {
+ if (!S_ISREG(inode->i_mode)) {
+ WARN_ON(1);
iput(inode);
continue;
}
@@ -3644,7 +3636,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
int ret;
ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
if (!ret) {
- drop_nlink(inode);
+ btrfs_drop_nlink(inode);
ret = btrfs_update_inode(trans, root, inode);
}
return ret;
@@ -4238,16 +4230,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
while (1) {
struct btrfs_ordered_extent *ordered;
-
+ btrfs_wait_ordered_range(inode, hole_start,
+ block_end - hole_start);
lock_extent_bits(io_tree, hole_start, block_end - 1, 0,
&cached_state);
- ordered = btrfs_lookup_ordered_range(inode, hole_start,
- block_end - hole_start);
+ ordered = btrfs_lookup_ordered_extent(inode, hole_start);
if (!ordered)
break;
unlock_extent_cached(io_tree, hole_start, block_end - 1,
&cached_state, GFP_NOFS);
- btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
}
@@ -4481,10 +4472,8 @@ void btrfs_evict_inode(struct inode *inode)
trace_btrfs_inode_evict(inode);
truncate_inode_pages(&inode->i_data, 0);
- if (inode->i_nlink &&
- ((btrfs_root_refs(&root->root_item) != 0 &&
- root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
- btrfs_is_free_space_inode(inode)))
+ if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
+ btrfs_is_free_space_inode(inode)))
goto no_delete;
if (is_bad_inode(inode)) {
@@ -4501,8 +4490,7 @@ void btrfs_evict_inode(struct inode *inode)
}
if (inode->i_nlink > 0) {
- BUG_ON(btrfs_root_refs(&root->root_item) != 0 &&
- root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID);
+ BUG_ON(btrfs_root_refs(&root->root_item) != 0);
goto no_delete;
}
@@ -4743,7 +4731,14 @@ static void inode_tree_del(struct inode *inode)
}
spin_unlock(&root->inode_lock);
- if (empty && btrfs_root_refs(&root->root_item) == 0) {
+ /*
+ * Free space cache has inodes in the tree root, but the tree root has a
+ * root_refs of 0, so this could end up dropping the tree root as a
+ * snapshot, so we need the extra !root->fs_info->tree_root check to
+ * make sure we don't drop it.
+ */
+ if (empty && btrfs_root_refs(&root->root_item) == 0 &&
+ root != root->fs_info->tree_root) {
synchronize_srcu(&root->fs_info->subvol_srcu);
spin_lock(&root->inode_lock);
empty = RB_EMPTY_ROOT(&root->inode_tree);
@@ -4836,12 +4831,10 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
{
struct inode *inode;
struct btrfs_iget_args args;
- unsigned long hashval = btrfs_inode_hash(objectid, root);
-
args.ino = objectid;
args.root = root;
- inode = iget5_locked(s, hashval, btrfs_find_actor,
+ inode = iget5_locked(s, objectid, btrfs_find_actor,
btrfs_init_locked_inode,
(void *)&args);
return inode;
@@ -5055,7 +5048,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
continue;
}
- item = btrfs_item_nr(slot);
+ item = btrfs_item_nr(leaf, slot);
btrfs_item_key_to_cpu(leaf, &found_key, slot);
if (found_key.objectid != key.objectid)
@@ -5461,7 +5454,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
BTRFS_INODE_NODATASUM;
}
- btrfs_insert_inode_hash(inode);
+ insert_inode_hash(inode);
inode_tree_add(inode);
trace_btrfs_inode_new(inode);
@@ -5737,7 +5730,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
goto fail;
}
- inc_nlink(inode);
+ btrfs_inc_nlink(inode);
inode_inc_iversion(inode);
inode->i_ctime = CURRENT_TIME;
ihold(inode);
@@ -5867,7 +5860,7 @@ static noinline int uncompress_inline(struct btrfs_path *path,
compress_type = btrfs_file_extent_compression(leaf, item);
max_size = btrfs_file_extent_ram_bytes(leaf, item);
inline_size = btrfs_file_extent_inline_item_len(leaf,
- btrfs_item_nr(path->slots[0]));
+ btrfs_item_nr(leaf, path->slots[0]));
tmp = kmalloc(inline_size, GFP_NOFS);
if (!tmp)
return -ENOMEM;
@@ -5981,14 +5974,7 @@ again:
found_type = btrfs_key_type(&found_key);
if (found_key.objectid != objectid ||
found_type != BTRFS_EXTENT_DATA_KEY) {
- /*
- * If we backup past the first extent we want to move forward
- * and see if there is an extent in front of us, otherwise we'll
- * say there is a hole for our whole search range which can
- * cause problems.
- */
- extent_end = start;
- goto next;
+ goto not_found;
}
found_type = btrfs_file_extent_type(leaf, item);
@@ -6003,7 +5989,7 @@ again:
size = btrfs_file_extent_inline_len(leaf, item);
extent_end = ALIGN(extent_start + size, root->sectorsize);
}
-next:
+
if (start >= extent_end) {
path->slots[0]++;
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
@@ -6187,7 +6173,8 @@ insert:
write_unlock(&em_tree->lock);
out:
- trace_btrfs_get_extent(root, em);
+ if (em)
+ trace_btrfs_get_extent(root, em);
if (path)
btrfs_free_path(path);
@@ -6262,7 +6249,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
/* adjust the range_start to make sure it doesn't
* go backwards from the start they passed in
*/
- range_start = max(start, range_start);
+ range_start = max(start,range_start);
found = found_end - range_start;
if (found > 0) {
@@ -7066,7 +7053,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
}
} else {
submit_len += bvec->bv_len;
- nr_pages++;
+ nr_pages ++;
bvec++;
}
}
@@ -7235,9 +7222,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
* outstanding dirty pages are on disk.
*/
count = iov_length(iov, nr_segs);
- ret = btrfs_wait_ordered_range(inode, offset, count);
- if (ret)
- return ret;
+ btrfs_wait_ordered_range(inode, offset, count);
if (rw & WRITE) {
/*
@@ -7578,10 +7563,7 @@ static int btrfs_truncate(struct inode *inode)
u64 mask = root->sectorsize - 1;
u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
- ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
- (u64)-1);
- if (ret)
- return ret;
+ btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
/*
* Yes ladies and gentelment, this is indeed ugly. The fact is we have
@@ -7805,14 +7787,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
return inode;
}
-#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-void btrfs_test_destroy_inode(struct inode *inode)
-{
- btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
- kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
-}
-#endif
-
static void btrfs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
@@ -7883,7 +7857,8 @@ int btrfs_drop_inode(struct inode *inode)
return 1;
/* the snap/subvol tree is on deleting */
- if (btrfs_root_refs(&root->root_item) == 0)
+ if (btrfs_root_refs(&root->root_item) == 0 &&
+ root != root->fs_info->tree_root)
return 1;
else
return generic_drop_inode(inode);
@@ -8020,7 +7995,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (ret == -EEXIST) {
/* we shouldn't get
* eexist without a new_inode */
- if (WARN_ON(!new_inode)) {
+ if (!new_inode) {
+ WARN_ON(1);
return ret;
}
} else {
@@ -8168,24 +8144,18 @@ out_notrans:
static void btrfs_run_delalloc_work(struct btrfs_work *work)
{
struct btrfs_delalloc_work *delalloc_work;
- struct inode *inode;
delalloc_work = container_of(work, struct btrfs_delalloc_work,
work);
- inode = delalloc_work->inode;
- if (delalloc_work->wait) {
- btrfs_wait_ordered_range(inode, 0, (u64)-1);
- } else {
- filemap_flush(inode->i_mapping);
- if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
- &BTRFS_I(inode)->runtime_flags))
- filemap_flush(inode->i_mapping);
- }
+ if (delalloc_work->wait)
+ btrfs_wait_ordered_range(delalloc_work->inode, 0, (u64)-1);
+ else
+ filemap_flush(delalloc_work->inode->i_mapping);
if (delalloc_work->delay_iput)
- btrfs_add_delayed_iput(inode);
+ btrfs_add_delayed_iput(delalloc_work->inode);
else
- iput(inode);
+ iput(delalloc_work->inode);
complete(&delalloc_work->completion);
}
@@ -8306,7 +8276,8 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
return ret;
}
-int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput)
+int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info,
+ int delay_iput)
{
struct btrfs_root *root;
struct list_head splice;
@@ -8366,14 +8337,14 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
int err;
int drop_inode = 0;
u64 objectid;
- u64 index = 0;
+ u64 index = 0 ;
int name_len;
int datasize;
unsigned long ptr;
struct btrfs_file_extent_item *ei;
struct extent_buffer *leaf;
- name_len = strlen(symname);
+ name_len = strlen(symname) + 1;
if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
return -ENAMETOOLONG;
@@ -8461,7 +8432,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
inode->i_mapping->a_ops = &btrfs_symlink_aops;
inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
inode_set_bytes(inode, name_len);
- btrfs_i_size_write(inode, name_len);
+ btrfs_i_size_write(inode, name_len - 1);
err = btrfs_update_inode(trans, root, inode);
if (err)
drop_inode = 1;
@@ -8520,8 +8491,6 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
ins.offset, 0, 0, 0,
BTRFS_FILE_EXTENT_PREALLOC);
if (ret) {
- btrfs_free_reserved_extent(root, ins.objectid,
- ins.offset);
btrfs_abort_transaction(trans, root, ret);
if (own_trans)
btrfs_end_transaction(trans, root);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a111622..9d46f60 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -44,6 +44,7 @@
#include <linux/uuid.h>
#include <linux/btrfs.h>
#include <linux/uaccess.h>
+#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -320,7 +321,7 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(file_inode(file)->i_sb);
+ struct btrfs_fs_info *fs_info = btrfs_sb(fdentry(file)->d_sb);
struct btrfs_device *device;
struct request_queue *q;
struct fstrim_range range;
@@ -368,13 +369,9 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
int btrfs_is_empty_uuid(u8 *uuid)
{
- int i;
+ static char empty_uuid[BTRFS_UUID_SIZE] = {0};
- for (i = 0; i < BTRFS_UUID_SIZE; i++) {
- if (uuid[i])
- return 0;
- }
- return 1;
+ return !memcmp(uuid, empty_uuid, BTRFS_UUID_SIZE);
}
static noinline int create_subvol(struct inode *dir,
@@ -439,7 +436,7 @@ static noinline int create_subvol(struct inode *dir,
btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
btrfs_set_header_owner(leaf, objectid);
- write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(),
+ write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(leaf),
BTRFS_FSID_SIZE);
write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
btrfs_header_chunk_tree_uuid(leaf),
@@ -577,7 +574,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
if (ret)
return ret;
- btrfs_wait_ordered_extents(root, -1);
+ btrfs_wait_ordered_extents(root);
pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
if (!pending_snapshot)
@@ -691,7 +688,7 @@ static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
* nfs_async_unlink().
*/
-static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
+static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir)
{
int error;
@@ -845,6 +842,7 @@ static int find_new_extents(struct btrfs_root *root,
{
struct btrfs_path *path;
struct btrfs_key min_key;
+ struct btrfs_key max_key;
struct extent_buffer *leaf;
struct btrfs_file_extent_item *extent;
int type;
@@ -859,10 +857,15 @@ static int find_new_extents(struct btrfs_root *root,
min_key.type = BTRFS_EXTENT_DATA_KEY;
min_key.offset = *off;
+ max_key.objectid = ino;
+ max_key.type = (u8)-1;
+ max_key.offset = (u64)-1;
+
path->keep_locks = 1;
- while (1) {
- ret = btrfs_search_forward(root, &min_key, path, newer_than);
+ while(1) {
+ ret = btrfs_search_forward(root, &min_key, &max_key,
+ path, newer_than);
if (ret != 0)
goto none;
if (min_key.objectid != ino)
@@ -1203,7 +1206,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
ra = &file->f_ra;
}
- pages = kmalloc_array(max_cluster, sizeof(struct page *),
+ pages = kmalloc(sizeof(struct page *) * max_cluster,
GFP_NOFS);
if (!pages) {
ret = -ENOMEM;
@@ -1890,6 +1893,7 @@ static noinline int search_ioctl(struct inode *inode,
{
struct btrfs_root *root;
struct btrfs_key key;
+ struct btrfs_key max_key;
struct btrfs_path *path;
struct btrfs_ioctl_search_key *sk = &args->key;
struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info;
@@ -1921,10 +1925,15 @@ static noinline int search_ioctl(struct inode *inode,
key.type = sk->min_type;
key.offset = sk->min_offset;
+ max_key.objectid = sk->max_objectid;
+ max_key.type = sk->max_type;
+ max_key.offset = sk->max_offset;
+
path->keep_locks = 1;
- while (1) {
- ret = btrfs_search_forward(root, &key, path, sk->min_transid);
+ while(1) {
+ ret = btrfs_search_forward(root, &key, &max_key, path,
+ sk->min_transid);
if (ret != 0) {
if (ret > 0)
ret = 0;
@@ -2009,7 +2018,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
key.type = BTRFS_INODE_REF_KEY;
key.offset = (u64)-1;
- while (1) {
+ while(1) {
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
@@ -2038,7 +2047,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
}
*(ptr + len) = '/';
- read_extent_buffer(l, ptr, (unsigned long)(iref + 1), len);
+ read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len);
if (key.offset == BTRFS_FIRST_FREE_OBJECTID)
break;
@@ -2049,7 +2058,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
dirid = key.objectid;
}
memmove(name, ptr, total_len);
- name[total_len] = '\0';
+ name[total_len]='\0';
ret = 0;
out:
btrfs_free_path(path);
@@ -2089,7 +2098,7 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
static noinline int btrfs_ioctl_snap_destroy(struct file *file,
void __user *arg)
{
- struct dentry *parent = file->f_path.dentry;
+ struct dentry *parent = fdentry(file);
struct dentry *dentry;
struct inode *dir = parent->d_inode;
struct inode *inode;
@@ -2135,7 +2144,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
inode = dentry->d_inode;
dest = BTRFS_I(inode)->root;
- if (!capable(CAP_SYS_ADMIN)) {
+ if (!capable(CAP_SYS_ADMIN)){
/*
* Regular user. Only allow this with a special mount
* option, when the user has write+exec access to the
@@ -2718,10 +2727,15 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
size = sizeof(tmp) +
tmp.dest_count * sizeof(struct btrfs_ioctl_same_extent_info);
- same = memdup_user((struct btrfs_ioctl_same_args __user *)argp, size);
+ same = kmalloc(size, GFP_NOFS);
+ if (!same) {
+ ret = -EFAULT;
+ goto out;
+ }
- if (IS_ERR(same)) {
- ret = PTR_ERR(same);
+ if (copy_from_user(same,
+ (struct btrfs_ioctl_same_args __user *)argp, size)) {
+ ret = -EFAULT;
goto out;
}
@@ -3105,7 +3119,7 @@ out:
static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
u64 off, u64 olen, u64 destoff)
{
- struct inode *inode = file_inode(file);
+ struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct fd src_file;
struct inode *src;
@@ -3665,10 +3679,9 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg)
switch (p->cmd) {
case BTRFS_IOCTL_DEV_REPLACE_CMD_START:
- if (root->fs_info->sb->s_flags & MS_RDONLY) {
- ret = -EROFS;
- goto out;
- }
+ if (root->fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
if (atomic_xchg(
&root->fs_info->mutually_exclusive_operation_running,
1)) {
@@ -3694,7 +3707,7 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg)
if (copy_to_user(arg, p, sizeof(*p)))
ret = -EFAULT;
-out:
+
kfree(p);
return ret;
}
@@ -4304,7 +4317,7 @@ static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg)
static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg)
{
- struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
+ struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -4544,15 +4557,9 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_logical_to_ino(root, argp);
case BTRFS_IOC_SPACE_INFO:
return btrfs_ioctl_space_info(root, argp);
- case BTRFS_IOC_SYNC: {
- int ret;
-
- ret = btrfs_start_delalloc_roots(root->fs_info, 0);
- if (ret)
- return ret;
- ret = btrfs_sync_fs(file->f_dentry->d_sb, 1);
- return ret;
- }
+ case BTRFS_IOC_SYNC:
+ btrfs_sync_fs(file->f_dentry->d_sb, 1);
+ return 0;
case BTRFS_IOC_START_SYNC:
return btrfs_ioctl_start_sync(root, argp);
case BTRFS_IOC_WAIT_SYNC:
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 69582d5..c702cb6 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -537,9 +537,7 @@ void btrfs_remove_ordered_extent(struct inode *inode,
*/
if (RB_EMPTY_ROOT(&tree->tree) &&
!mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
- spin_lock(&root->fs_info->ordered_root_lock);
list_del_init(&BTRFS_I(inode)->ordered_operations);
- spin_unlock(&root->fs_info->ordered_root_lock);
}
if (!root->nr_ordered_extents) {
@@ -565,11 +563,10 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
* wait for all the ordered extents in a root. This is done when balancing
* space between drives.
*/
-int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
+void btrfs_wait_ordered_extents(struct btrfs_root *root)
{
struct list_head splice, works;
struct btrfs_ordered_extent *ordered, *next;
- int count = 0;
INIT_LIST_HEAD(&splice);
INIT_LIST_HEAD(&works);
@@ -577,7 +574,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
mutex_lock(&root->fs_info->ordered_operations_mutex);
spin_lock(&root->ordered_extent_lock);
list_splice_init(&root->ordered_extents, &splice);
- while (!list_empty(&splice) && nr) {
+ while (!list_empty(&splice)) {
ordered = list_first_entry(&splice, struct btrfs_ordered_extent,
root_extent_list);
list_move_tail(&ordered->root_extent_list,
@@ -592,11 +589,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
cond_resched();
spin_lock(&root->ordered_extent_lock);
- if (nr != -1)
- nr--;
- count++;
}
- list_splice_tail(&splice, &root->ordered_extents);
spin_unlock(&root->ordered_extent_lock);
list_for_each_entry_safe(ordered, next, &works, work_list) {
@@ -606,21 +599,18 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
cond_resched();
}
mutex_unlock(&root->fs_info->ordered_operations_mutex);
-
- return count;
}
-void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
+void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *root;
struct list_head splice;
- int done;
INIT_LIST_HEAD(&splice);
spin_lock(&fs_info->ordered_root_lock);
list_splice_init(&fs_info->ordered_roots, &splice);
- while (!list_empty(&splice) && nr) {
+ while (!list_empty(&splice)) {
root = list_first_entry(&splice, struct btrfs_root,
ordered_root);
root = btrfs_grab_fs_root(root);
@@ -629,16 +619,11 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
&fs_info->ordered_roots);
spin_unlock(&fs_info->ordered_root_lock);
- done = btrfs_wait_ordered_extents(root, nr);
+ btrfs_wait_ordered_extents(root);
btrfs_put_fs_root(root);
spin_lock(&fs_info->ordered_root_lock);
- if (nr != -1) {
- nr -= done;
- WARN_ON(nr < 0);
- }
}
- list_splice_tail(&splice, &fs_info->ordered_roots);
spin_unlock(&fs_info->ordered_root_lock);
}
@@ -749,9 +734,8 @@ void btrfs_start_ordered_extent(struct inode *inode,
/*
* Used to wait on ordered extents across a large range of bytes.
*/
-int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
+void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
{
- int ret = 0;
u64 end;
u64 orig_end;
struct btrfs_ordered_extent *ordered;
@@ -767,9 +751,8 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
/* start IO across the range first to instantiate any delalloc
* extents
*/
- ret = filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
- if (ret)
- return ret;
+ filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
+
/*
* So with compression we will find and lock a dirty page and clear the
* first one as dirty, setup an async extent, and immediately return
@@ -785,15 +768,10 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
* right and you are wrong.
*/
if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
- &BTRFS_I(inode)->runtime_flags)) {
- ret = filemap_fdatawrite_range(inode->i_mapping, start,
- orig_end);
- if (ret)
- return ret;
- }
- ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
- if (ret)
- return ret;
+ &BTRFS_I(inode)->runtime_flags))
+ filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
+
+ filemap_fdatawait_range(inode->i_mapping, start, orig_end);
end = orig_end;
while (1) {
@@ -804,20 +782,17 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
btrfs_put_ordered_extent(ordered);
break;
}
- if (ordered->file_offset + ordered->len <= start) {
+ if (ordered->file_offset + ordered->len < start) {
btrfs_put_ordered_extent(ordered);
break;
}
btrfs_start_ordered_extent(inode, ordered, 1);
end = ordered->file_offset;
- if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
- ret = -EIO;
btrfs_put_ordered_extent(ordered);
- if (ret || end == 0 || end == start)
+ if (end == 0 || end == start)
break;
end--;
}
- return ret;
}
/*
@@ -1101,7 +1076,7 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
* if this file hasn't been changed since the last transaction
* commit, we can safely return without doing anything
*/
- if (last_mod <= root->fs_info->last_trans_committed)
+ if (last_mod < root->fs_info->last_trans_committed)
return;
spin_lock(&root->fs_info->ordered_root_lock);
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 9b0450f..0c0b356 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -180,7 +180,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
u64 file_offset);
void btrfs_start_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry, int wait);
-int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
+void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
struct btrfs_ordered_extent *
btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
@@ -195,8 +195,8 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode);
-int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr);
-void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr);
+void btrfs_wait_ordered_extents(struct btrfs_root *root);
+void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info);
void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode);
void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid);
void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 417053b..0088bed 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -193,7 +193,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
btrfs_info(root->fs_info, "leaf %llu total ptrs %d free space %d",
btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l));
for (i = 0 ; i < nr ; i++) {
- item = btrfs_item_nr(i);
+ item = btrfs_item_nr(l, i);
btrfs_item_key_to_cpu(l, &key, i);
type = btrfs_key_type(&key);
printk(KERN_INFO "\titem %d key (%llu %u %llu) itemoff %d "
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 24ac218..d0ecfbd 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -33,6 +33,7 @@
#include <linux/raid/xor.h>
#include <linux/vmalloc.h>
#include <asm/div64.h>
+#include "compat.h"
#include "ctree.h"
#include "extent_map.h"
#include "disk-io.h"
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index ce459a7..4a35572 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1383,7 +1383,6 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
{
struct btrfs_root *reloc_root;
struct reloc_control *rc = root->fs_info->reloc_ctl;
- struct btrfs_block_rsv *rsv;
int clear_rsv = 0;
int ret;
@@ -1397,14 +1396,13 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
return 0;
- if (!trans->reloc_reserved) {
- rsv = trans->block_rsv;
+ if (!trans->block_rsv) {
trans->block_rsv = rc->block_rsv;
clear_rsv = 1;
}
reloc_root = create_reloc_root(trans, root, root->root_key.objectid);
if (clear_rsv)
- trans->block_rsv = rsv;
+ trans->block_rsv = NULL;
ret = __add_reloc_root(reloc_root);
BUG_ON(ret < 0);
@@ -1777,7 +1775,8 @@ again:
new_ptr_gen = 0;
}
- if (WARN_ON(new_bytenr > 0 && new_bytenr == old_bytenr)) {
+ if (new_bytenr > 0 && new_bytenr == old_bytenr) {
+ WARN_ON(1);
ret = level;
break;
}
@@ -2059,7 +2058,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
LIST_HEAD(inode_list);
struct btrfs_key key;
struct btrfs_key next_key;
- struct btrfs_trans_handle *trans = NULL;
+ struct btrfs_trans_handle *trans;
struct btrfs_root *reloc_root;
struct btrfs_root_item *root_item;
struct btrfs_path *path;
@@ -2108,19 +2107,18 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
memset(&next_key, 0, sizeof(next_key));
while (1) {
+ trans = btrfs_start_transaction(root, 0);
+ BUG_ON(IS_ERR(trans));
+ trans->block_rsv = rc->block_rsv;
+
ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved,
BTRFS_RESERVE_FLUSH_ALL);
if (ret) {
- err = ret;
- goto out;
- }
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
- trans = NULL;
- goto out;
+ BUG_ON(ret != -EAGAIN);
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+ continue;
}
- trans->block_rsv = rc->block_rsv;
replaced = 0;
max_level = level;
@@ -2166,7 +2164,6 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
root_item->drop_level = level;
btrfs_end_transaction_throttle(trans, root);
- trans = NULL;
btrfs_btree_balance_dirty(root);
@@ -2195,8 +2192,7 @@ out:
btrfs_update_reloc_root(trans, root);
}
- if (trans)
- btrfs_end_transaction_throttle(trans, root);
+ btrfs_end_transaction_throttle(trans, root);
btrfs_btree_balance_dirty(root);
@@ -3262,7 +3258,7 @@ static int add_tree_block(struct reloc_control *rc,
struct rb_node *rb_node;
u32 item_size;
int level = -1;
- u64 generation;
+ int generation;
eb = path->nodes[0];
item_size = btrfs_item_size_nr(eb, path->slots[0]);
@@ -3411,6 +3407,7 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
struct inode *inode, u64 ino)
{
struct btrfs_key key;
+ struct btrfs_path *path;
struct btrfs_root *root = fs_info->tree_root;
struct btrfs_trans_handle *trans;
int ret = 0;
@@ -3435,14 +3432,22 @@ truncate:
if (ret)
goto out;
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
+ btrfs_free_path(path);
ret = PTR_ERR(trans);
goto out;
}
- ret = btrfs_truncate_free_space_cache(root, trans, inode);
+ ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
+ btrfs_free_path(path);
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root);
out:
@@ -3544,8 +3549,10 @@ static int find_data_references(struct reloc_control *rc,
err = ret;
goto out;
}
- if (WARN_ON(ret > 0))
+ if (ret > 0) {
+ WARN_ON(1);
goto out;
+ }
leaf = path->nodes[0];
nritems = btrfs_header_nritems(leaf);
@@ -3565,9 +3572,11 @@ static int find_data_references(struct reloc_control *rc,
}
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- if (WARN_ON(key.objectid != ref_objectid ||
- key.type != BTRFS_EXTENT_DATA_KEY))
+ if (key.objectid != ref_objectid ||
+ key.type != BTRFS_EXTENT_DATA_KEY) {
+ WARN_ON(1);
break;
+ }
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
@@ -3992,6 +4001,16 @@ restart:
}
}
+ ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5);
+ if (ret < 0) {
+ if (ret != -ENOSPC) {
+ err = ret;
+ WARN_ON(1);
+ break;
+ }
+ rc->commit_transaction = 1;
+ }
+
if (rc->commit_transaction) {
rc->commit_transaction = 0;
ret = btrfs_commit_transaction(trans, rc->extent_root);
@@ -4222,12 +4241,12 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n",
rc->block_group->key.objectid, rc->block_group->flags);
- ret = btrfs_start_delalloc_roots(fs_info, 0);
+ ret = btrfs_start_all_delalloc_inodes(fs_info, 0);
if (ret < 0) {
err = ret;
goto out;
}
- btrfs_wait_ordered_roots(fs_info, -1);
+ btrfs_wait_all_ordered_extents(fs_info);
while (1) {
mutex_lock(&fs_info->cleaner_mutex);
@@ -4245,12 +4264,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
rc->extents_found);
if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
- ret = btrfs_wait_ordered_range(rc->data_inode, 0,
- (u64)-1);
- if (ret) {
- err = ret;
- goto out;
- }
+ btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1);
invalidate_mapping_pages(rc->data_inode->i_mapping,
0, -1);
rc->stage = UPDATE_DATA_PTRS;
@@ -4467,7 +4481,6 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
u64 disk_bytenr;
- u64 new_bytenr;
LIST_HEAD(list);
ordered = btrfs_lookup_ordered_extent(inode, file_pos);
@@ -4479,24 +4492,13 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
if (ret)
goto out;
+ disk_bytenr = ordered->start;
while (!list_empty(&list)) {
sums = list_entry(list.next, struct btrfs_ordered_sum, list);
list_del_init(&sums->list);
- /*
- * We need to offset the new_bytenr based on where the csum is.
- * We need to do this because we will read in entire prealloc
- * extents but we may have written to say the middle of the
- * prealloc extent, so we need to make sure the csum goes with
- * the right disk offset.
- *
- * We can do this because the data reloc inode refers strictly
- * to the on disk bytes, so we don't have to worry about
- * disk_len vs real len like with real inodes since it's all
- * disk length.
- */
- new_bytenr = ordered->start + (sums->bytenr - disk_bytenr);
- sums->bytenr = new_bytenr;
+ sums->bytenr = disk_bytenr;
+ disk_bytenr += sums->len;
btrfs_add_ordered_sum(inode, ordered, sums);
}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 1fd3f33..a18e0e2 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -208,6 +208,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
int is_metadata, int have_csum,
const u8 *csum, u64 generation,
u16 csum_size);
+static void scrub_complete_bio_end_io(struct bio *bio, int err);
static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
struct scrub_block *sblock_good,
int force_write);
@@ -937,10 +938,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
BTRFS_DEV_STAT_CORRUPTION_ERRS);
}
- if (sctx->readonly) {
- ASSERT(!sctx->is_dev_replace);
- goto out;
- }
+ if (sctx->readonly && !sctx->is_dev_replace)
+ goto did_not_correct_error;
if (!is_metadata && !have_csum) {
struct scrub_fixup_nodatasum *fixup_nodatasum;
@@ -1293,6 +1292,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
for (page_num = 0; page_num < sblock->page_count; page_num++) {
struct bio *bio;
struct scrub_page *page = sblock->pagev[page_num];
+ DECLARE_COMPLETION_ONSTACK(complete);
if (page->dev->bdev == NULL) {
page->io_error = 1;
@@ -1309,11 +1309,18 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
}
bio->bi_bdev = page->dev->bdev;
bio->bi_sector = page->physical >> 9;
+ bio->bi_end_io = scrub_complete_bio_end_io;
+ bio->bi_private = &complete;
bio_add_page(bio, page->page, PAGE_SIZE, 0);
- if (btrfsic_submit_bio_wait(READ, bio))
- sblock->no_io_error_seen = 0;
+ btrfsic_submit_bio(READ, bio);
+
+ /* this will also unplug the queue */
+ wait_for_completion(&complete);
+ page->io_error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
+ if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+ sblock->no_io_error_seen = 0;
bio_put(bio);
}
@@ -1382,6 +1389,11 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
sblock->checksum_error = 1;
}
+static void scrub_complete_bio_end_io(struct bio *bio, int err)
+{
+ complete((struct completion *)bio->bi_private);
+}
+
static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
struct scrub_block *sblock_good,
int force_write)
@@ -1416,6 +1428,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
sblock_bad->checksum_error || page_bad->io_error) {
struct bio *bio;
int ret;
+ DECLARE_COMPLETION_ONSTACK(complete);
if (!page_bad->dev->bdev) {
printk_ratelimited(KERN_WARNING
@@ -1428,14 +1441,19 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
return -EIO;
bio->bi_bdev = page_bad->dev->bdev;
bio->bi_sector = page_bad->physical >> 9;
+ bio->bi_end_io = scrub_complete_bio_end_io;
+ bio->bi_private = &complete;
ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
if (PAGE_SIZE != ret) {
bio_put(bio);
return -EIO;
}
+ btrfsic_submit_bio(WRITE, bio);
- if (btrfsic_submit_bio_wait(WRITE, bio)) {
+ /* this will also unplug the queue */
+ wait_for_completion(&complete);
+ if (!bio_flagged(bio, BIO_UPTODATE)) {
btrfs_dev_stat_inc_and_print(page_bad->dev,
BTRFS_DEV_STAT_WRITE_ERRS);
btrfs_dev_replace_stats_inc(
@@ -2699,6 +2717,8 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
mutex_unlock(&fs_info->scrub_lock);
wake_up(&fs_info->scrub_pause_wait);
+ dev_replace->cursor_left = dev_replace->cursor_right;
+ dev_replace->item_needs_writeback = 1;
btrfs_put_block_group(cache);
if (ret)
break;
@@ -2712,9 +2732,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
break;
}
- dev_replace->cursor_left = dev_replace->cursor_right;
- dev_replace->item_needs_writeback = 1;
-
key.offset = found_key.offset + length;
btrfs_release_path(path);
}
@@ -2766,6 +2783,7 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
{
int ret = 0;
+ mutex_lock(&fs_info->scrub_lock);
if (fs_info->scrub_workers_refcnt == 0) {
if (is_dev_replace)
btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1,
@@ -2795,17 +2813,21 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
}
++fs_info->scrub_workers_refcnt;
out:
+ mutex_unlock(&fs_info->scrub_lock);
+
return ret;
}
static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
{
+ mutex_lock(&fs_info->scrub_lock);
if (--fs_info->scrub_workers_refcnt == 0) {
btrfs_stop_workers(&fs_info->scrub_workers);
btrfs_stop_workers(&fs_info->scrub_wr_completion_workers);
btrfs_stop_workers(&fs_info->scrub_nocow_workers);
}
WARN_ON(fs_info->scrub_workers_refcnt < 0);
+ mutex_unlock(&fs_info->scrub_lock);
}
int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
@@ -2866,18 +2888,23 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
return -EINVAL;
}
+ ret = scrub_workers_get(fs_info, is_dev_replace);
+ if (ret)
+ return ret;
mutex_lock(&fs_info->fs_devices->device_list_mutex);
dev = btrfs_find_device(fs_info, devid, NULL, NULL);
if (!dev || (dev->missing && !is_dev_replace)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ scrub_workers_put(fs_info);
return -ENODEV;
}
-
mutex_lock(&fs_info->scrub_lock);
+
if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) {
mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ scrub_workers_put(fs_info);
return -EIO;
}
@@ -2888,17 +2915,10 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
btrfs_dev_replace_unlock(&fs_info->dev_replace);
mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ scrub_workers_put(fs_info);
return -EINPROGRESS;
}
btrfs_dev_replace_unlock(&fs_info->dev_replace);
-
- ret = scrub_workers_get(fs_info, is_dev_replace);
- if (ret) {
- mutex_unlock(&fs_info->scrub_lock);
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- return ret;
- }
-
sctx = scrub_setup_ctx(dev, is_dev_replace);
if (IS_ERR(sctx)) {
mutex_unlock(&fs_info->scrub_lock);
@@ -2911,15 +2931,13 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
atomic_inc(&fs_info->scrubs_running);
mutex_unlock(&fs_info->scrub_lock);
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
if (!is_dev_replace) {
- /*
- * by holding device list mutex, we can
- * kick off writing super in log tree sync.
- */
+ down_read(&fs_info->scrub_super_lock);
ret = scrub_supers(sctx, dev);
+ up_read(&fs_info->scrub_super_lock);
}
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
if (!ret)
ret = scrub_enumerate_chunks(sctx, dev, start, end,
@@ -2936,10 +2954,10 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
mutex_lock(&fs_info->scrub_lock);
dev->scrub_device = NULL;
- scrub_workers_put(fs_info);
mutex_unlock(&fs_info->scrub_lock);
scrub_free_ctx(sctx);
+ scrub_workers_put(fs_info);
return ret;
}
@@ -2969,6 +2987,16 @@ void btrfs_scrub_continue(struct btrfs_root *root)
wake_up(&fs_info->scrub_pause_wait);
}
+void btrfs_scrub_pause_super(struct btrfs_root *root)
+{
+ down_write(&root->fs_info->scrub_super_lock);
+}
+
+void btrfs_scrub_continue_super(struct btrfs_root *root)
+{
+ up_write(&root->fs_info->scrub_super_lock);
+}
+
int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
{
mutex_lock(&fs_info->scrub_lock);
@@ -3355,6 +3383,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
struct bio *bio;
struct btrfs_device *dev;
int ret;
+ DECLARE_COMPLETION_ONSTACK(compl);
dev = sctx->wr_ctx.tgtdev;
if (!dev)
@@ -3371,6 +3400,8 @@ static int write_page_nocow(struct scrub_ctx *sctx,
spin_unlock(&sctx->stat_lock);
return -ENOMEM;
}
+ bio->bi_private = &compl;
+ bio->bi_end_io = scrub_complete_bio_end_io;
bio->bi_size = 0;
bio->bi_sector = physical_for_dev_replace >> 9;
bio->bi_bdev = dev->bdev;
@@ -3381,8 +3412,10 @@ leave_with_eio:
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
return -EIO;
}
+ btrfsic_submit_bio(WRITE_SYNC, bio);
+ wait_for_completion(&compl);
- if (btrfsic_submit_bio_wait(WRITE_SYNC, bio))
+ if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
goto leave_with_eio;
bio_put(bio);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 6837fe8..e46e0ed 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -121,6 +121,7 @@ struct send_ctx {
struct list_head name_cache_list;
int name_cache_size;
+ struct file *cur_inode_filp;
char *read_buf;
};
@@ -564,8 +565,10 @@ static int begin_cmd(struct send_ctx *sctx, int cmd)
{
struct btrfs_cmd_header *hdr;
- if (WARN_ON(!sctx->send_buf))
+ if (!sctx->send_buf) {
+ WARN_ON(1);
return -EINVAL;
+ }
BUG_ON(sctx->send_size);
@@ -788,7 +791,7 @@ static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
if (found_key->type == BTRFS_INODE_REF_KEY) {
ptr = (unsigned long)btrfs_item_ptr(eb, slot,
struct btrfs_inode_ref);
- item = btrfs_item_nr(slot);
+ item = btrfs_item_nr(eb, slot);
total = btrfs_item_size(eb, item);
elem_size = sizeof(*iref);
} else {
@@ -902,7 +905,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
eb = path->nodes[0];
slot = path->slots[0];
- item = btrfs_item_nr(slot);
+ item = btrfs_item_nr(eb, slot);
di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
cur = 0;
len = 0;
@@ -2117,6 +2120,77 @@ out:
}
/*
+ * Called for regular files when sending extents data. Opens a struct file
+ * to read from the file.
+ */
+static int open_cur_inode_file(struct send_ctx *sctx)
+{
+ int ret = 0;
+ struct btrfs_key key;
+ struct path path;
+ struct inode *inode;
+ struct dentry *dentry;
+ struct file *filp;
+ int new = 0;
+
+ if (sctx->cur_inode_filp)
+ goto out;
+
+ key.objectid = sctx->cur_ino;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
+ inode = btrfs_iget(sctx->send_root->fs_info->sb, &key, sctx->send_root,
+ &new);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ goto out;
+ }
+
+ dentry = d_obtain_alias(inode);
+ inode = NULL;
+ if (IS_ERR(dentry)) {
+ ret = PTR_ERR(dentry);
+ goto out;
+ }
+
+ path.mnt = sctx->mnt;
+ path.dentry = dentry;
+ filp = dentry_open(&path, O_RDONLY | O_LARGEFILE, current_cred());
+ dput(dentry);
+ dentry = NULL;
+ if (IS_ERR(filp)) {
+ ret = PTR_ERR(filp);
+ goto out;
+ }
+ sctx->cur_inode_filp = filp;
+
+out:
+ /*
+ * no xxxput required here as every vfs op
+ * does it by itself on failure
+ */
+ return ret;
+}
+
+/*
+ * Closes the struct file that was created in open_cur_inode_file
+ */
+static int close_cur_inode_file(struct send_ctx *sctx)
+{
+ int ret = 0;
+
+ if (!sctx->cur_inode_filp)
+ goto out;
+
+ ret = filp_close(sctx->cur_inode_filp, NULL);
+ sctx->cur_inode_filp = NULL;
+
+out:
+ return ret;
+}
+
+/*
* Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
*/
static int send_subvol_begin(struct send_ctx *sctx)
@@ -3548,72 +3622,6 @@ out:
return ret;
}
-static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
-{
- struct btrfs_root *root = sctx->send_root;
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct inode *inode;
- struct page *page;
- char *addr;
- struct btrfs_key key;
- pgoff_t index = offset >> PAGE_CACHE_SHIFT;
- pgoff_t last_index;
- unsigned pg_offset = offset & ~PAGE_CACHE_MASK;
- ssize_t ret = 0;
-
- key.objectid = sctx->cur_ino;
- key.type = BTRFS_INODE_ITEM_KEY;
- key.offset = 0;
-
- inode = btrfs_iget(fs_info->sb, &key, root, NULL);
- if (IS_ERR(inode))
- return PTR_ERR(inode);
-
- if (offset + len > i_size_read(inode)) {
- if (offset > i_size_read(inode))
- len = 0;
- else
- len = offset - i_size_read(inode);
- }
- if (len == 0)
- goto out;
-
- last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT;
- while (index <= last_index) {
- unsigned cur_len = min_t(unsigned, len,
- PAGE_CACHE_SIZE - pg_offset);
- page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
- if (!page) {
- ret = -ENOMEM;
- break;
- }
-
- if (!PageUptodate(page)) {
- btrfs_readpage(NULL, page);
- lock_page(page);
- if (!PageUptodate(page)) {
- unlock_page(page);
- page_cache_release(page);
- ret = -EIO;
- break;
- }
- }
-
- addr = kmap(page);
- memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len);
- kunmap(page);
- unlock_page(page);
- page_cache_release(page);
- index++;
- pg_offset = 0;
- len -= cur_len;
- ret += cur_len;
- }
-out:
- iput(inode);
- return ret;
-}
-
/*
* Read some bytes from the current inode/file and send a write command to
* user space.
@@ -3622,20 +3630,35 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
{
int ret = 0;
struct fs_path *p;
- ssize_t num_read = 0;
+ loff_t pos = offset;
+ int num_read = 0;
+ mm_segment_t old_fs;
p = fs_path_alloc();
if (!p)
return -ENOMEM;
+ /*
+ * vfs normally only accepts user space buffers for security reasons.
+ * we only read from the file and also only provide the read_buf buffer
+ * to vfs. As this buffer does not come from a user space call, it's
+ * ok to temporary allow kernel space buffers.
+ */
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+
verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
- num_read = fill_read_buf(sctx, offset, len);
- if (num_read <= 0) {
- if (num_read < 0)
- ret = num_read;
+ ret = open_cur_inode_file(sctx);
+ if (ret < 0)
+ goto out;
+
+ ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len, &pos);
+ if (ret < 0)
+ goto out;
+ num_read = ret;
+ if (!num_read)
goto out;
- }
ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
if (ret < 0)
@@ -3654,6 +3677,7 @@ verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
tlv_put_failure:
out:
fs_path_free(p);
+ set_fs(old_fs);
if (ret < 0)
return ret;
return num_read;
@@ -3902,16 +3926,16 @@ static int is_extent_unchanged(struct send_ctx *sctx,
while (key.offset < ekey->offset + left_len) {
ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
right_type = btrfs_file_extent_type(eb, ei);
- if (right_type != BTRFS_FILE_EXTENT_REG) {
- ret = 0;
- goto out;
- }
-
right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
right_len = btrfs_file_extent_num_bytes(eb, ei);
right_offset = btrfs_file_extent_offset(eb, ei);
right_gen = btrfs_file_extent_generation(eb, ei);
+ if (right_type != BTRFS_FILE_EXTENT_REG) {
+ ret = 0;
+ goto out;
+ }
+
/*
* Are we at extent 8? If yes, we know the extent is changed.
* This may only happen on the first iteration.
@@ -4198,6 +4222,10 @@ static int changed_inode(struct send_ctx *sctx,
u64 left_gen = 0;
u64 right_gen = 0;
+ ret = close_cur_inode_file(sctx);
+ if (ret < 0)
+ goto out;
+
sctx->cur_ino = key->objectid;
sctx->cur_inode_new_gen = 0;
@@ -4658,6 +4686,11 @@ static int send_subvol(struct send_ctx *sctx)
}
out:
+ if (!ret)
+ ret = close_cur_inode_file(sctx);
+ else
+ close_cur_inode_file(sctx);
+
free_recorded_refs(sctx);
return ret;
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2d8ac1b..e913328 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -42,6 +42,7 @@
#include <linux/cleancache.h>
#include <linux/ratelimit.h>
#include <linux/btrfs.h>
+#include "compat.h"
#include "delayed-inode.h"
#include "ctree.h"
#include "disk-io.h"
@@ -920,7 +921,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
return 0;
}
- btrfs_wait_ordered_roots(fs_info, -1);
+ btrfs_wait_all_ordered_extents(fs_info);
trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) {
@@ -1329,12 +1330,6 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
* this also happens on 'umount -rf' or on shutdown, when
* the filesystem is busy.
*/
-
- /* wait for the uuid_scan task to finish */
- down(&fs_info->uuid_tree_rescan_sem);
- /* avoid complains from lockdep et al. */
- up(&fs_info->uuid_tree_rescan_sem);
-
sb->s_flags |= MS_RDONLY;
btrfs_dev_replace_suspend_for_unmount(fs_info);
@@ -1470,7 +1465,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
nr_devices = fs_info->fs_devices->open_devices;
BUG_ON(!nr_devices);
- devices_info = kmalloc_array(nr_devices, sizeof(*devices_info),
+ devices_info = kmalloc(sizeof(*devices_info) * nr_devices,
GFP_NOFS);
if (!devices_info)
return -ENOMEM;
@@ -1794,25 +1789,7 @@ static void btrfs_print_info(void)
static int btrfs_run_sanity_tests(void)
{
- int ret;
-
- ret = btrfs_init_test_fs();
- if (ret)
- return ret;
-
- ret = btrfs_test_free_space_cache();
- if (ret)
- goto out;
- ret = btrfs_test_extent_buffer_operations();
- if (ret)
- goto out;
- ret = btrfs_test_extent_io();
- if (ret)
- goto out;
- ret = btrfs_test_inodes();
-out:
- btrfs_destroy_test_fs();
- return ret;
+ return btrfs_test_free_space_cache();
}
static int __init init_btrfs_fs(void)
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
deleted file mode 100644
index 757ef00..0000000
--- a/fs/btrfs/tests/btrfs-tests.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (C) 2013 Fusion IO. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/fs.h>
-#include <linux/mount.h>
-#include <linux/magic.h>
-#include "btrfs-tests.h"
-#include "../ctree.h"
-
-static struct vfsmount *test_mnt = NULL;
-
-static const struct super_operations btrfs_test_super_ops = {
- .alloc_inode = btrfs_alloc_inode,
- .destroy_inode = btrfs_test_destroy_inode,
-};
-
-static struct dentry *btrfs_test_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name,
- void *data)
-{
- return mount_pseudo(fs_type, "btrfs_test:", &btrfs_test_super_ops,
- NULL, BTRFS_TEST_MAGIC);
-}
-
-static struct file_system_type test_type = {
- .name = "btrfs_test_fs",
- .mount = btrfs_test_mount,
- .kill_sb = kill_anon_super,
-};
-
-struct inode *btrfs_new_test_inode(void)
-{
- return new_inode(test_mnt->mnt_sb);
-}
-
-int btrfs_init_test_fs(void)
-{
- int ret;
-
- ret = register_filesystem(&test_type);
- if (ret) {
- printk(KERN_ERR "btrfs: cannot register test file system\n");
- return ret;
- }
-
- test_mnt = kern_mount(&test_type);
- if (IS_ERR(test_mnt)) {
- printk(KERN_ERR "btrfs: cannot mount test file system\n");
- unregister_filesystem(&test_type);
- return ret;
- }
- return 0;
-}
-
-void btrfs_destroy_test_fs(void)
-{
- kern_unmount(test_mnt);
- unregister_filesystem(&test_type);
-}
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h
index b353bc8..5808776 100644
--- a/fs/btrfs/tests/btrfs-tests.h
+++ b/fs/btrfs/tests/btrfs-tests.h
@@ -24,36 +24,11 @@
#define test_msg(fmt, ...) pr_info("btrfs: selftest: " fmt, ##__VA_ARGS__)
int btrfs_test_free_space_cache(void);
-int btrfs_test_extent_buffer_operations(void);
-int btrfs_test_extent_io(void);
-int btrfs_test_inodes(void);
-int btrfs_init_test_fs(void);
-void btrfs_destroy_test_fs(void);
-struct inode *btrfs_new_test_inode(void);
#else
static inline int btrfs_test_free_space_cache(void)
{
return 0;
}
-static inline int btrfs_test_extent_buffer_operations(void)
-{
- return 0;
-}
-static inline int btrfs_init_test_fs(void)
-{
- return 0;
-}
-static inline void btrfs_destroy_test_fs(void)
-{
-}
-static inline int btrfs_test_extent_io(void)
-{
- return 0;
-}
-static inline int btrfs_test_inodes(void)
-{
- return 0;
-}
#endif
#endif
diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c
deleted file mode 100644
index cc286ce..0000000
--- a/fs/btrfs/tests/extent-buffer-tests.c
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Copyright (C) 2013 Fusion IO. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/slab.h>
-#include "btrfs-tests.h"
-#include "../ctree.h"
-#include "../extent_io.h"
-#include "../disk-io.h"
-
-static int test_btrfs_split_item(void)
-{
- struct btrfs_path *path;
- struct btrfs_root *root;
- struct extent_buffer *eb;
- struct btrfs_item *item;
- char *value = "mary had a little lamb";
- char *split1 = "mary had a little";
- char *split2 = " lamb";
- char *split3 = "mary";
- char *split4 = " had a little";
- char buf[32];
- struct btrfs_key key;
- u32 value_len = strlen(value);
- int ret = 0;
-
- test_msg("Running btrfs_split_item tests\n");
-
- root = btrfs_alloc_dummy_root();
- if (IS_ERR(root)) {
- test_msg("Could not allocate root\n");
- return PTR_ERR(root);
- }
-
- path = btrfs_alloc_path();
- if (!path) {
- test_msg("Could not allocate path\n");
- kfree(root);
- return -ENOMEM;
- }
-
- path->nodes[0] = eb = alloc_dummy_extent_buffer(0, 4096);
- if (!eb) {
- test_msg("Could not allocate dummy buffer\n");
- ret = -ENOMEM;
- goto out;
- }
- path->slots[0] = 0;
-
- key.objectid = 0;
- key.type = BTRFS_EXTENT_CSUM_KEY;
- key.offset = 0;
-
- setup_items_for_insert(root, path, &key, &value_len, value_len,
- value_len + sizeof(struct btrfs_item), 1);
- item = btrfs_item_nr(0);
- write_extent_buffer(eb, value, btrfs_item_ptr_offset(eb, 0),
- value_len);
-
- key.offset = 3;
-
- /*
- * Passing NULL trans here should be safe because we have plenty of
- * space in this leaf to split the item without having to split the
- * leaf.
- */
- ret = btrfs_split_item(NULL, root, path, &key, 17);
- if (ret) {
- test_msg("Split item failed %d\n", ret);
- goto out;
- }
-
- /*
- * Read the first slot, it should have the original key and contain only
- * 'mary had a little'
- */
- btrfs_item_key_to_cpu(eb, &key, 0);
- if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
- key.offset != 0) {
- test_msg("Invalid key at slot 0\n");
- ret = -EINVAL;
- goto out;
- }
-
- item = btrfs_item_nr(0);
- if (btrfs_item_size(eb, item) != strlen(split1)) {
- test_msg("Invalid len in the first split\n");
- ret = -EINVAL;
- goto out;
- }
-
- read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 0),
- strlen(split1));
- if (memcmp(buf, split1, strlen(split1))) {
- test_msg("Data in the buffer doesn't match what it should "
- "in the first split have='%.*s' want '%s'\n",
- (int)strlen(split1), buf, split1);
- ret = -EINVAL;
- goto out;
- }
-
- btrfs_item_key_to_cpu(eb, &key, 1);
- if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
- key.offset != 3) {
- test_msg("Invalid key at slot 1\n");
- ret = -EINVAL;
- goto out;
- }
-
- item = btrfs_item_nr(1);
- if (btrfs_item_size(eb, item) != strlen(split2)) {
- test_msg("Invalid len in the second split\n");
- ret = -EINVAL;
- goto out;
- }
-
- read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 1),
- strlen(split2));
- if (memcmp(buf, split2, strlen(split2))) {
- test_msg("Data in the buffer doesn't match what it should "
- "in the second split\n");
- ret = -EINVAL;
- goto out;
- }
-
- key.offset = 1;
- /* Do it again so we test memmoving the other items in the leaf */
- ret = btrfs_split_item(NULL, root, path, &key, 4);
- if (ret) {
- test_msg("Second split item failed %d\n", ret);
- goto out;
- }
-
- btrfs_item_key_to_cpu(eb, &key, 0);
- if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
- key.offset != 0) {
- test_msg("Invalid key at slot 0\n");
- ret = -EINVAL;
- goto out;
- }
-
- item = btrfs_item_nr(0);
- if (btrfs_item_size(eb, item) != strlen(split3)) {
- test_msg("Invalid len in the first split\n");
- ret = -EINVAL;
- goto out;
- }
-
- read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 0),
- strlen(split3));
- if (memcmp(buf, split3, strlen(split3))) {
- test_msg("Data in the buffer doesn't match what it should "
- "in the third split");
- ret = -EINVAL;
- goto out;
- }
-
- btrfs_item_key_to_cpu(eb, &key, 1);
- if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
- key.offset != 1) {
- test_msg("Invalid key at slot 1\n");
- ret = -EINVAL;
- goto out;
- }
-
- item = btrfs_item_nr(1);
- if (btrfs_item_size(eb, item) != strlen(split4)) {
- test_msg("Invalid len in the second split\n");
- ret = -EINVAL;
- goto out;
- }
-
- read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 1),
- strlen(split4));
- if (memcmp(buf, split4, strlen(split4))) {
- test_msg("Data in the buffer doesn't match what it should "
- "in the fourth split\n");
- ret = -EINVAL;
- goto out;
- }
-
- btrfs_item_key_to_cpu(eb, &key, 2);
- if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
- key.offset != 3) {
- test_msg("Invalid key at slot 2\n");
- ret = -EINVAL;
- goto out;
- }
-
- item = btrfs_item_nr(2);
- if (btrfs_item_size(eb, item) != strlen(split2)) {
- test_msg("Invalid len in the second split\n");
- ret = -EINVAL;
- goto out;
- }
-
- read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 2),
- strlen(split2));
- if (memcmp(buf, split2, strlen(split2))) {
- test_msg("Data in the buffer doesn't match what it should "
- "in the last chunk\n");
- ret = -EINVAL;
- goto out;
- }
-out:
- btrfs_free_path(path);
- kfree(root);
- return ret;
-}
-
-int btrfs_test_extent_buffer_operations(void)
-{
- test_msg("Running extent buffer operation tests");
- return test_btrfs_split_item();
-}
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
deleted file mode 100644
index 7e99c2f..0000000
--- a/fs/btrfs/tests/extent-io-tests.c
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Copyright (C) 2013 Fusion IO. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/pagemap.h>
-#include <linux/sched.h>
-#include "btrfs-tests.h"
-#include "../extent_io.h"
-
-#define PROCESS_UNLOCK (1 << 0)
-#define PROCESS_RELEASE (1 << 1)
-#define PROCESS_TEST_LOCKED (1 << 2)
-
-static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
- unsigned long flags)
-{
- int ret;
- struct page *pages[16];
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = end >> PAGE_CACHE_SHIFT;
- unsigned long nr_pages = end_index - index + 1;
- int i;
- int count = 0;
- int loops = 0;
-
- while (nr_pages > 0) {
- ret = find_get_pages_contig(inode->i_mapping, index,
- min_t(unsigned long, nr_pages,
- ARRAY_SIZE(pages)), pages);
- for (i = 0; i < ret; i++) {
- if (flags & PROCESS_TEST_LOCKED &&
- !PageLocked(pages[i]))
- count++;
- if (flags & PROCESS_UNLOCK && PageLocked(pages[i]))
- unlock_page(pages[i]);
- page_cache_release(pages[i]);
- if (flags & PROCESS_RELEASE)
- page_cache_release(pages[i]);
- }
- nr_pages -= ret;
- index += ret;
- cond_resched();
- loops++;
- if (loops > 100000) {
- printk(KERN_ERR "stuck in a loop, start %Lu, end %Lu, nr_pages %lu, ret %d\n", start, end, nr_pages, ret);
- break;
- }
- }
- return count;
-}
-
-static int test_find_delalloc(void)
-{
- struct inode *inode;
- struct extent_io_tree tmp;
- struct page *page;
- struct page *locked_page = NULL;
- unsigned long index = 0;
- u64 total_dirty = 256 * 1024 * 1024;
- u64 max_bytes = 128 * 1024 * 1024;
- u64 start, end, test_start;
- u64 found;
- int ret = -EINVAL;
-
- inode = btrfs_new_test_inode();
- if (!inode) {
- test_msg("Failed to allocate test inode\n");
- return -ENOMEM;
- }
-
- extent_io_tree_init(&tmp, &inode->i_data);
-
- /*
- * First go through and create and mark all of our pages dirty, we pin
- * everything to make sure our pages don't get evicted and screw up our
- * test.
- */
- for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
- page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
- if (!page) {
- test_msg("Failed to allocate test page\n");
- ret = -ENOMEM;
- goto out;
- }
- SetPageDirty(page);
- if (index) {
- unlock_page(page);
- } else {
- page_cache_get(page);
- locked_page = page;
- }
- }
-
- /* Test this scenario
- * |--- delalloc ---|
- * |--- search ---|
- */
- set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS);
- start = 0;
- end = 0;
- found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
- &end, max_bytes);
- if (!found) {
- test_msg("Should have found at least one delalloc\n");
- goto out_bits;
- }
- if (start != 0 || end != 4095) {
- test_msg("Expected start 0 end 4095, got start %Lu end %Lu\n",
- start, end);
- goto out_bits;
- }
- unlock_extent(&tmp, start, end);
- unlock_page(locked_page);
- page_cache_release(locked_page);
-
- /*
- * Test this scenario
- *
- * |--- delalloc ---|
- * |--- search ---|
- */
- test_start = 64 * 1024 * 1024;
- locked_page = find_lock_page(inode->i_mapping,
- test_start >> PAGE_CACHE_SHIFT);
- if (!locked_page) {
- test_msg("Couldn't find the locked page\n");
- goto out_bits;
- }
- set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS);
- start = test_start;
- end = 0;
- found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
- &end, max_bytes);
- if (!found) {
- test_msg("Couldn't find delalloc in our range\n");
- goto out_bits;
- }
- if (start != test_start || end != max_bytes - 1) {
- test_msg("Expected start %Lu end %Lu, got start %Lu, end "
- "%Lu\n", test_start, max_bytes - 1, start, end);
- goto out_bits;
- }
- if (process_page_range(inode, start, end,
- PROCESS_TEST_LOCKED | PROCESS_UNLOCK)) {
- test_msg("There were unlocked pages in the range\n");
- goto out_bits;
- }
- unlock_extent(&tmp, start, end);
- /* locked_page was unlocked above */
- page_cache_release(locked_page);
-
- /*
- * Test this scenario
- * |--- delalloc ---|
- * |--- search ---|
- */
- test_start = max_bytes + 4096;
- locked_page = find_lock_page(inode->i_mapping, test_start >>
- PAGE_CACHE_SHIFT);
- if (!locked_page) {
- test_msg("Could'nt find the locked page\n");
- goto out_bits;
- }
- start = test_start;
- end = 0;
- found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
- &end, max_bytes);
- if (found) {
- test_msg("Found range when we shouldn't have\n");
- goto out_bits;
- }
- if (end != (u64)-1) {
- test_msg("Did not return the proper end offset\n");
- goto out_bits;
- }
-
- /*
- * Test this scenario
- * [------- delalloc -------|
- * [max_bytes]|-- search--|
- *
- * We are re-using our test_start from above since it works out well.
- */
- set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS);
- start = test_start;
- end = 0;
- found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
- &end, max_bytes);
- if (!found) {
- test_msg("Didn't find our range\n");
- goto out_bits;
- }
- if (start != test_start || end != total_dirty - 1) {
- test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n",
- test_start, total_dirty - 1, start, end);
- goto out_bits;
- }
- if (process_page_range(inode, start, end,
- PROCESS_TEST_LOCKED | PROCESS_UNLOCK)) {
- test_msg("Pages in range were not all locked\n");
- goto out_bits;
- }
- unlock_extent(&tmp, start, end);
-
- /*
- * Now to test where we run into a page that is no longer dirty in the
- * range we want to find.
- */
- page = find_get_page(inode->i_mapping, (max_bytes + (1 * 1024 * 1024))
- >> PAGE_CACHE_SHIFT);
- if (!page) {
- test_msg("Couldn't find our page\n");
- goto out_bits;
- }
- ClearPageDirty(page);
- page_cache_release(page);
-
- /* We unlocked it in the previous test */
- lock_page(locked_page);
- start = test_start;
- end = 0;
- /*
- * Currently if we fail to find dirty pages in the delalloc range we
- * will adjust max_bytes down to PAGE_CACHE_SIZE and then re-search. If
- * this changes at any point in the future we will need to fix this
- * tests expected behavior.
- */
- found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
- &end, max_bytes);
- if (!found) {
- test_msg("Didn't find our range\n");
- goto out_bits;
- }
- if (start != test_start && end != test_start + PAGE_CACHE_SIZE - 1) {
- test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n",
- test_start, test_start + PAGE_CACHE_SIZE - 1, start,
- end);
- goto out_bits;
- }
- if (process_page_range(inode, start, end, PROCESS_TEST_LOCKED |
- PROCESS_UNLOCK)) {
- test_msg("Pages in range were not all locked\n");
- goto out_bits;
- }
- ret = 0;
-out_bits:
- clear_extent_bits(&tmp, 0, total_dirty - 1,
- (unsigned long)-1, GFP_NOFS);
-out:
- if (locked_page)
- page_cache_release(locked_page);
- process_page_range(inode, 0, total_dirty - 1,
- PROCESS_UNLOCK | PROCESS_RELEASE);
- iput(inode);
- return ret;
-}
-
-int btrfs_test_extent_io(void)
-{
- test_msg("Running find delalloc tests\n");
- return test_find_delalloc();
-}
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
deleted file mode 100644
index 397d1f9..0000000
--- a/fs/btrfs/tests/inode-tests.c
+++ /dev/null
@@ -1,955 +0,0 @@
-/*
- * Copyright (C) 2013 Fusion IO. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include "btrfs-tests.h"
-#include "../ctree.h"
-#include "../btrfs_inode.h"
-#include "../disk-io.h"
-#include "../extent_io.h"
-#include "../volumes.h"
-
-static struct btrfs_fs_info *alloc_dummy_fs_info(void)
-{
- struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
- GFP_NOFS);
- if (!fs_info)
- return fs_info;
- fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
- GFP_NOFS);
- if (!fs_info->fs_devices) {
- kfree(fs_info);
- return NULL;
- }
- return fs_info;
-}
-static void free_dummy_root(struct btrfs_root *root)
-{
- if (!root)
- return;
- if (root->fs_info) {
- kfree(root->fs_info->fs_devices);
- kfree(root->fs_info);
- }
- if (root->node)
- free_extent_buffer(root->node);
- kfree(root);
-}
-
-static void insert_extent(struct btrfs_root *root, u64 start, u64 len,
- u64 ram_bytes, u64 offset, u64 disk_bytenr,
- u64 disk_len, u32 type, u8 compression, int slot)
-{
- struct btrfs_path path;
- struct btrfs_file_extent_item *fi;
- struct extent_buffer *leaf = root->node;
- struct btrfs_key key;
- u32 value_len = sizeof(struct btrfs_file_extent_item);
-
- if (type == BTRFS_FILE_EXTENT_INLINE)
- value_len += len;
- memset(&path, 0, sizeof(path));
-
- path.nodes[0] = leaf;
- path.slots[0] = slot;
-
- key.objectid = BTRFS_FIRST_FREE_OBJECTID;
- key.type = BTRFS_EXTENT_DATA_KEY;
- key.offset = start;
-
- setup_items_for_insert(root, &path, &key, &value_len, value_len,
- value_len + sizeof(struct btrfs_item), 1);
- fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
- btrfs_set_file_extent_generation(leaf, fi, 1);
- btrfs_set_file_extent_type(leaf, fi, type);
- btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
- btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_len);
- btrfs_set_file_extent_offset(leaf, fi, offset);
- btrfs_set_file_extent_num_bytes(leaf, fi, len);
- btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes);
- btrfs_set_file_extent_compression(leaf, fi, compression);
- btrfs_set_file_extent_encryption(leaf, fi, 0);
- btrfs_set_file_extent_other_encoding(leaf, fi, 0);
-}
-
-static void insert_inode_item_key(struct btrfs_root *root)
-{
- struct btrfs_path path;
- struct extent_buffer *leaf = root->node;
- struct btrfs_key key;
- u32 value_len = 0;
-
- memset(&path, 0, sizeof(path));
-
- path.nodes[0] = leaf;
- path.slots[0] = 0;
-
- key.objectid = BTRFS_INODE_ITEM_KEY;
- key.type = BTRFS_INODE_ITEM_KEY;
- key.offset = 0;
-
- setup_items_for_insert(root, &path, &key, &value_len, value_len,
- value_len + sizeof(struct btrfs_item), 1);
-}
-
-/*
- * Build the most complicated map of extents the earth has ever seen. We want
- * this so we can test all of the corner cases of btrfs_get_extent. Here is a
- * diagram of how the extents will look though this may not be possible we still
- * want to make sure everything acts normally (the last number is not inclusive)
- *
- * [0 - 5][5 - 6][6 - 10][10 - 4096][ 4096 - 8192 ][8192 - 12288]
- * [hole ][inline][ hole ][ regular ][regular1 split][ hole ]
- *
- * [ 12288 - 20480][20480 - 24576][ 24576 - 28672 ][28672 - 36864][36864 - 45056]
- * [regular1 split][ prealloc1 ][prealloc1 written][ prealloc1 ][ compressed ]
- *
- * [45056 - 49152][49152-53248][53248-61440][61440-65536][ 65536+81920 ]
- * [ compressed1 ][ regular ][compressed1][ regular ][ hole but no extent]
- *
- * [81920-86016]
- * [ regular ]
- */
-static void setup_file_extents(struct btrfs_root *root)
-{
- int slot = 0;
- u64 disk_bytenr = 1 * 1024 * 1024;
- u64 offset = 0;
-
- /* First we want a hole */
- insert_extent(root, offset, 5, 5, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0,
- slot);
- slot++;
- offset += 5;
-
- /*
- * Now we want an inline extent, I don't think this is possible but hey
- * why not? Also keep in mind if we have an inline extent it counts as
- * the whole first page. If we were to expand it we would have to cow
- * and we wouldn't have an inline extent anymore.
- */
- insert_extent(root, offset, 1, 1, 0, 0, 0, BTRFS_FILE_EXTENT_INLINE, 0,
- slot);
- slot++;
- offset = 4096;
-
- /* Now another hole */
- insert_extent(root, offset, 4, 4, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0,
- slot);
- slot++;
- offset += 4;
-
- /* Now for a regular extent */
- insert_extent(root, offset, 4095, 4095, 0, disk_bytenr, 4096,
- BTRFS_FILE_EXTENT_REG, 0, slot);
- slot++;
- disk_bytenr += 4096;
- offset += 4095;
-
- /*
- * Now for 3 extents that were split from a hole punch so we test
- * offsets properly.
- */
- insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384,
- BTRFS_FILE_EXTENT_REG, 0, slot);
- slot++;
- offset += 4096;
- insert_extent(root, offset, 4096, 4096, 0, 0, 0, BTRFS_FILE_EXTENT_REG,
- 0, slot);
- slot++;
- offset += 4096;
- insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384,
- BTRFS_FILE_EXTENT_REG, 0, slot);
- slot++;
- offset += 8192;
- disk_bytenr += 16384;
-
- /* Now for a unwritten prealloc extent */
- insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
- BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
- slot++;
- offset += 4096;
-
- /*
- * We want to jack up disk_bytenr a little more so the em stuff doesn't
- * merge our records.
- */
- disk_bytenr += 8192;
-
- /*
- * Now for a partially written prealloc extent, basically the same as
- * the hole punch example above. Ram_bytes never changes when you mark
- * extents written btw.
- */
- insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384,
- BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
- slot++;
- offset += 4096;
- insert_extent(root, offset, 4096, 16384, 4096, disk_bytenr, 16384,
- BTRFS_FILE_EXTENT_REG, 0, slot);
- slot++;
- offset += 4096;
- insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384,
- BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
- slot++;
- offset += 8192;
- disk_bytenr += 16384;
-
- /* Now a normal compressed extent */
- insert_extent(root, offset, 8192, 8192, 0, disk_bytenr, 4096,
- BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
- slot++;
- offset += 8192;
- /* No merges */
- disk_bytenr += 8192;
-
- /* Now a split compressed extent */
- insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 4096,
- BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
- slot++;
- offset += 4096;
- insert_extent(root, offset, 4096, 4096, 0, disk_bytenr + 4096, 4096,
- BTRFS_FILE_EXTENT_REG, 0, slot);
- slot++;
- offset += 4096;
- insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 4096,
- BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
- slot++;
- offset += 8192;
- disk_bytenr += 8192;
-
- /* Now extents that have a hole but no hole extent */
- insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
- BTRFS_FILE_EXTENT_REG, 0, slot);
- slot++;
- offset += 16384;
- disk_bytenr += 4096;
- insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
- BTRFS_FILE_EXTENT_REG, 0, slot);
-}
-
-static unsigned long prealloc_only = 0;
-static unsigned long compressed_only = 0;
-static unsigned long vacancy_only = 0;
-
-static noinline int test_btrfs_get_extent(void)
-{
- struct inode *inode = NULL;
- struct btrfs_root *root = NULL;
- struct extent_map *em = NULL;
- u64 orig_start;
- u64 disk_bytenr;
- u64 offset;
- int ret = -ENOMEM;
-
- inode = btrfs_new_test_inode();
- if (!inode) {
- test_msg("Couldn't allocate inode\n");
- return ret;
- }
-
- BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
- BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
- BTRFS_I(inode)->location.offset = 0;
-
- root = btrfs_alloc_dummy_root();
- if (IS_ERR(root)) {
- test_msg("Couldn't allocate root\n");
- goto out;
- }
-
- /*
- * We do this since btrfs_get_extent wants to assign em->bdev to
- * root->fs_info->fs_devices->latest_bdev.
- */
- root->fs_info = alloc_dummy_fs_info();
- if (!root->fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
- goto out;
- }
-
- root->node = alloc_dummy_extent_buffer(0, 4096);
- if (!root->node) {
- test_msg("Couldn't allocate dummy buffer\n");
- goto out;
- }
-
- /*
- * We will just free a dummy node if it's ref count is 2 so we need an
- * extra ref so our searches don't accidently release our page.
- */
- extent_buffer_get(root->node);
- btrfs_set_header_nritems(root->node, 0);
- btrfs_set_header_level(root->node, 0);
- ret = -EINVAL;
-
- /* First with no extents */
- BTRFS_I(inode)->root = root;
- em = btrfs_get_extent(inode, NULL, 0, 0, 4096, 0);
- if (IS_ERR(em)) {
- em = NULL;
- test_msg("Got an error when we shouldn't have\n");
- goto out;
- }
- if (em->block_start != EXTENT_MAP_HOLE) {
- test_msg("Expected a hole, got %llu\n", em->block_start);
- goto out;
- }
- if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
- test_msg("Vacancy flag wasn't set properly\n");
- goto out;
- }
- free_extent_map(em);
- btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
-
- /*
- * All of the magic numbers are based on the mapping setup in
- * setup_file_extents, so if you change anything there you need to
- * update the comment and update the expected values below.
- */
- setup_file_extents(root);
-
- em = btrfs_get_extent(inode, NULL, 0, 0, (u64)-1, 0);
- if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
- goto out;
- }
- if (em->block_start != EXTENT_MAP_HOLE) {
- test_msg("Expected a hole, got %llu\n", em->block_start);
- goto out;
- }
- if (em->start != 0 || em->len != 5) {
- test_msg("Unexpected extent wanted start 0 len 5, got start "
- "%llu len %llu\n", em->start, em->len);
- goto out;
- }
- if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
- goto out;
- }
- offset = em->start + em->len;
- free_extent_map(em);
-
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
- if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
- goto out;
- }
- if (em->block_start != EXTENT_MAP_INLINE) {
- test_msg("Expected an inline, got %llu\n", em->block_start);
- goto out;
- }
- if (em->start != offset || em->len != 4091) {
- test_msg("Unexpected extent wanted start %llu len 1, got start "
- "%llu len %llu\n", offset, em->start, em->len);
- goto out;
- }
- if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
- goto out;
- }
- /*
- * We don't test anything else for inline since it doesn't get set
- * unless we have a page for it to write into. Maybe we should change
- * this?
- */
- offset = em->start + em->len;
- free_extent_map(em);
-
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
- if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
- goto out;
- }
- if (em->block_start != EXTENT_MAP_HOLE) {
- test_msg("Expected a hole, got %llu\n", em->block_start);
- goto out;
- }
- if (em->start != offset || em->len != 4) {
- test_msg("Unexpected extent wanted start %llu len 4, got start "
- "%llu len %llu\n", offset, em->start, em->len);
- goto out;
- }
- if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
- goto out;
- }
- offset = em->start + em->len;
- free_extent_map(em);
-
- /* Regular extent */
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
- if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
- goto out;
- }
- if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
- goto out;
- }
- if (em->start != offset || em->len != 4095) {
- test_msg("Unexpected extent wanted start %llu len 4095, got "
- "start %llu len %llu\n", offset, em->start, em->len);
- goto out;
- }
- if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
- goto out;
- }
- if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
- em->orig_start);
- goto out;
- }
- offset = em->start + em->len;
- free_extent_map(em);
-
- /* The next 3 are split extents */
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
- if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
- goto out;
- }
- if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
- goto out;
- }
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
- goto out;
- }
- if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
- goto out;
- }
- if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
- em->orig_start);
- goto out;
- }
- disk_bytenr = em->block_start;
- orig_start = em->start;
- offset = em->start + em->len;
- free_extent_map(em);
-
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
- if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
- goto out;
- }
- if (em->block_start != EXTENT_MAP_HOLE) {
- test_msg("Expected a hole, got %llu\n", em->block_start);
- goto out;
- }
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
- goto out;
- }
- if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
- goto out;
- }
- offset = em->start + em->len;
- free_extent_map(em);
-
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
- if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
- goto out;
- }
- if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
- goto out;
- }
- if (em->start != offset || em->len != 8192) {
- test_msg("Unexpected extent wanted start %llu len 8192, got "
- "start %llu len %llu\n", offset, em->start, em->len);
- goto out;
- }
- if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
- goto out;
- }
- if (em->orig_start != orig_start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n",
- orig_start, em->orig_start);
- goto out;
- }
- disk_bytenr += (em->start - orig_start);
- if (em->block_start != disk_bytenr) {
- test_msg("Wrong block start, want %llu, have %llu\n",
- disk_bytenr, em->block_start);
- goto out;
- }
- offset = em->start + em->len;
- free_extent_map(em);
-
- /* Prealloc extent */
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
- if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
- goto out;
- }
- if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
- goto out;
- }
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
- goto out;
- }
- if (em->flags != prealloc_only) {
- test_msg("Unexpected flags set, want %lu have %lu\n",
- prealloc_only, em->flags);
- goto out;
- }
- if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
- em->orig_start);
- goto out;
- }
- offset = em->start + em->len;
- free_extent_map(em);
-
- /* The next 3 are a half written prealloc extent */
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
- if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
- goto out;
- }
- if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
- goto out;
- }
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
- goto out;
- }
- if (em->flags != prealloc_only) {
- test_msg("Unexpected flags set, want %lu have %lu\n",
- prealloc_only, em->flags);
- goto out;
- }
- if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
- em->orig_start);
- goto out;
- }
- disk_bytenr = em->block_start;
- orig_start = em->start;
- offset = em->start + em->len;
- free_extent_map(em);
-
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
- if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
- goto out;
- }
- if (em->block_start >= EXTENT_MAP_HOLE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
- goto out;
- }
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
- goto out;
- }
- if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
- goto out;
- }
- if (em->orig_start != orig_start) {
- test_msg("Unexpected orig offset, wanted %llu, have %llu\n",
- orig_start, em->orig_start);
- goto out;
- }
- if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) {
- test_msg("Unexpected block start, wanted %llu, have %llu\n",
- disk_bytenr + (em->start - em->orig_start),
- em->block_start);
- goto out;
- }
- offset = em->start + em->len;
- free_extent_map(em);
-
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
- if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
- goto out;
- }
- if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
- goto out;
- }
- if (em->start != offset || em->len != 8192) {
- test_msg("Unexpected extent wanted start %llu len 8192, got "
- "start %llu len %llu\n", offset, em->start, em->len);
- goto out;
- }
- if (em->flags != prealloc_only) {
- test_msg("Unexpected flags set, want %lu have %lu\n",
- prealloc_only, em->flags);
- goto out;
- }
- if (em->orig_start != orig_start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", orig_start,
- em->orig_start);
- goto out;
- }
- if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) {
- test_msg("Unexpected block start, wanted %llu, have %llu\n",
- disk_bytenr + (em->start - em->orig_start),
- em->block_start);
- goto out;
- }
- offset = em->start + em->len;
- free_extent_map(em);
-
- /* Now for the compressed extent */
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
- if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
- goto out;
- }
- if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
- goto out;
- }
- if (em->start != offset || em->len != 8192) {
- test_msg("Unexpected extent wanted start %llu len 8192, got "
- "start %llu len %llu\n", offset, em->start, em->len);
- goto out;
- }
- if (em->flags != compressed_only) {
- test_msg("Unexpected flags set, want %lu have %lu\n",
- compressed_only, em->flags);
- goto out;
- }
- if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n",
- em->start, em->orig_start);
- goto out;
- }
- if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
- test_msg("Unexpected compress type, wanted %d, got %d\n",
- BTRFS_COMPRESS_ZLIB, em->compress_type);
- goto out;
- }
- offset = em->start + em->len;
- free_extent_map(em);
-
- /* Split compressed extent */
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
- if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
- goto out;
- }
- if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
- goto out;
- }
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
- goto out;
- }
- if (em->flags != compressed_only) {
- test_msg("Unexpected flags set, want %lu have %lu\n",
- compressed_only, em->flags);
- goto out;
- }
- if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n",
- em->start, em->orig_start);
- goto out;
- }
- if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
- test_msg("Unexpected compress type, wanted %d, got %d\n",
- BTRFS_COMPRESS_ZLIB, em->compress_type);
- goto out;
- }
- disk_bytenr = em->block_start;
- orig_start = em->start;
- offset = em->start + em->len;
- free_extent_map(em);
-
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
- if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
- goto out;
- }
- if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
- goto out;
- }
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
- goto out;
- }
- if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
- goto out;
- }
- if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
- em->orig_start);
- goto out;
- }
- offset = em->start + em->len;
- free_extent_map(em);
-
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
- if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
- goto out;
- }
- if (em->block_start != disk_bytenr) {
- test_msg("Block start does not match, want %llu got %llu\n",
- disk_bytenr, em->block_start);
- goto out;
- }
- if (em->start != offset || em->len != 8192) {
- test_msg("Unexpected extent wanted start %llu len 8192, got "
- "start %llu len %llu\n", offset, em->start, em->len);
- goto out;
- }
- if (em->flags != compressed_only) {
- test_msg("Unexpected flags set, want %lu have %lu\n",
- compressed_only, em->flags);
- goto out;
- }
- if (em->orig_start != orig_start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n",
- em->start, orig_start);
- goto out;
- }
- if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
- test_msg("Unexpected compress type, wanted %d, got %d\n",
- BTRFS_COMPRESS_ZLIB, em->compress_type);
- goto out;
- }
- offset = em->start + em->len;
- free_extent_map(em);
-
- /* A hole between regular extents but no hole extent */
- em = btrfs_get_extent(inode, NULL, 0, offset + 6, 4096, 0);
- if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
- goto out;
- }
- if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
- goto out;
- }
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
- goto out;
- }
- if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
- goto out;
- }
- if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
- em->orig_start);
- goto out;
- }
- offset = em->start + em->len;
- free_extent_map(em);
-
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096 * 1024, 0);
- if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
- goto out;
- }
- if (em->block_start != EXTENT_MAP_HOLE) {
- test_msg("Expected a hole extent, got %llu\n", em->block_start);
- goto out;
- }
- /*
- * Currently we just return a length that we requested rather than the
- * length of the actual hole, if this changes we'll have to change this
- * test.
- */
- if (em->start != offset || em->len != 12288) {
- test_msg("Unexpected extent wanted start %llu len 12288, got "
- "start %llu len %llu\n", offset, em->start, em->len);
- goto out;
- }
- if (em->flags != vacancy_only) {
- test_msg("Unexpected flags set, want %lu have %lu\n",
- vacancy_only, em->flags);
- goto out;
- }
- if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
- em->orig_start);
- goto out;
- }
- offset = em->start + em->len;
- free_extent_map(em);
-
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
- if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
- goto out;
- }
- if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
- goto out;
- }
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
- goto out;
- }
- if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
- goto out;
- }
- if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
- em->orig_start);
- goto out;
- }
- ret = 0;
-out:
- if (!IS_ERR(em))
- free_extent_map(em);
- iput(inode);
- free_dummy_root(root);
- return ret;
-}
-
-static int test_hole_first(void)
-{
- struct inode *inode = NULL;
- struct btrfs_root *root = NULL;
- struct extent_map *em = NULL;
- int ret = -ENOMEM;
-
- inode = btrfs_new_test_inode();
- if (!inode) {
- test_msg("Couldn't allocate inode\n");
- return ret;
- }
-
- BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
- BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
- BTRFS_I(inode)->location.offset = 0;
-
- root = btrfs_alloc_dummy_root();
- if (IS_ERR(root)) {
- test_msg("Couldn't allocate root\n");
- goto out;
- }
-
- root->fs_info = alloc_dummy_fs_info();
- if (!root->fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
- goto out;
- }
-
- root->node = alloc_dummy_extent_buffer(0, 4096);
- if (!root->node) {
- test_msg("Couldn't allocate dummy buffer\n");
- goto out;
- }
-
- extent_buffer_get(root->node);
- btrfs_set_header_nritems(root->node, 0);
- btrfs_set_header_level(root->node, 0);
- BTRFS_I(inode)->root = root;
- ret = -EINVAL;
-
- /*
- * Need a blank inode item here just so we don't confuse
- * btrfs_get_extent.
- */
- insert_inode_item_key(root);
- insert_extent(root, 4096, 4096, 4096, 0, 4096, 4096,
- BTRFS_FILE_EXTENT_REG, 0, 1);
- em = btrfs_get_extent(inode, NULL, 0, 0, 8192, 0);
- if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
- goto out;
- }
- if (em->block_start != EXTENT_MAP_HOLE) {
- test_msg("Expected a hole, got %llu\n", em->block_start);
- goto out;
- }
- if (em->start != 0 || em->len != 4096) {
- test_msg("Unexpected extent wanted start 0 len 4096, got start "
- "%llu len %llu\n", em->start, em->len);
- goto out;
- }
- if (em->flags != vacancy_only) {
- test_msg("Wrong flags, wanted %lu, have %lu\n", vacancy_only,
- em->flags);
- goto out;
- }
- free_extent_map(em);
-
- em = btrfs_get_extent(inode, NULL, 0, 4096, 8192, 0);
- if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
- goto out;
- }
- if (em->block_start != 4096) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
- goto out;
- }
- if (em->start != 4096 || em->len != 4096) {
- test_msg("Unexpected extent wanted start 4096 len 4096, got "
- "start %llu len %llu\n", em->start, em->len);
- goto out;
- }
- if (em->flags != 0) {
- test_msg("Unexpected flags set, wanted 0 got %lu\n",
- em->flags);
- goto out;
- }
- ret = 0;
-out:
- if (!IS_ERR(em))
- free_extent_map(em);
- iput(inode);
- free_dummy_root(root);
- return ret;
-}
-
-int btrfs_test_inodes(void)
-{
- int ret;
-
- set_bit(EXTENT_FLAG_COMPRESSED, &compressed_only);
- set_bit(EXTENT_FLAG_VACANCY, &vacancy_only);
- set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only);
-
- test_msg("Running btrfs_get_extent tests\n");
- ret = test_btrfs_get_extent();
- if (ret)
- return ret;
- test_msg("Running hole first btrfs_get_extent test\n");
- return test_hole_first();
-}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c6a872a..8c81bdc 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -57,7 +57,7 @@ static unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
__TRANS_JOIN_NOLOCK),
};
-void btrfs_put_transaction(struct btrfs_transaction *transaction)
+static void put_transaction(struct btrfs_transaction *transaction)
{
WARN_ON(atomic_read(&transaction->use_count) == 0);
if (atomic_dec_and_test(&transaction->use_count)) {
@@ -332,7 +332,7 @@ static void wait_current_trans(struct btrfs_root *root)
wait_event(root->fs_info->transaction_wait,
cur_trans->state >= TRANS_STATE_UNBLOCKED ||
cur_trans->aborted);
- btrfs_put_transaction(cur_trans);
+ put_transaction(cur_trans);
} else {
spin_unlock(&root->fs_info->trans_lock);
}
@@ -353,17 +353,6 @@ static int may_wait_transaction(struct btrfs_root *root, int type)
return 0;
}
-static inline bool need_reserve_reloc_root(struct btrfs_root *root)
-{
- if (!root->fs_info->reloc_ctl ||
- !root->ref_cows ||
- root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
- root->reloc_root)
- return false;
-
- return true;
-}
-
static struct btrfs_trans_handle *
start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
enum btrfs_reserve_flush_enum flush)
@@ -371,9 +360,8 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
struct btrfs_trans_handle *h;
struct btrfs_transaction *cur_trans;
u64 num_bytes = 0;
- u64 qgroup_reserved = 0;
- bool reloc_reserved = false;
int ret;
+ u64 qgroup_reserved = 0;
if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
return ERR_PTR(-EROFS);
@@ -402,14 +390,6 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
}
num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
- /*
- * Do the reservation for the relocation root creation
- */
- if (unlikely(need_reserve_reloc_root(root))) {
- num_bytes += root->nodesize;
- reloc_reserved = true;
- }
-
ret = btrfs_block_rsv_add(root,
&root->fs_info->trans_block_rsv,
num_bytes, flush);
@@ -471,7 +451,6 @@ again:
h->delayed_ref_elem.seq = 0;
h->type = type;
h->allocating_chunk = false;
- h->reloc_reserved = false;
INIT_LIST_HEAD(&h->qgroup_ref_list);
INIT_LIST_HEAD(&h->new_bgs);
@@ -487,7 +466,6 @@ again:
h->transid, num_bytes, 1);
h->block_rsv = &root->fs_info->trans_block_rsv;
h->bytes_reserved = num_bytes;
- h->reloc_reserved = reloc_reserved;
}
h->qgroup_reserved = qgroup_reserved;
@@ -632,7 +610,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
}
wait_for_commit(root, cur_trans);
- btrfs_put_transaction(cur_trans);
+ put_transaction(cur_trans);
out:
return ret;
}
@@ -757,7 +735,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
smp_mb();
if (waitqueue_active(&cur_trans->writer_wait))
wake_up(&cur_trans->writer_wait);
- btrfs_put_transaction(cur_trans);
+ put_transaction(cur_trans);
if (current->journal_info == trans)
current->journal_info = NULL;
@@ -766,10 +744,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
btrfs_run_delayed_iputs(root);
if (trans->aborted ||
- test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
- wake_up_process(info->transaction_kthread);
+ test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
err = -EIO;
- }
assert_qgroups_uptodate(trans);
kmem_cache_free(btrfs_trans_handle_cachep, trans);
@@ -972,19 +948,16 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
return ret;
ret = btrfs_run_dev_stats(trans, root->fs_info);
- if (ret)
- return ret;
+ WARN_ON(ret);
ret = btrfs_run_dev_replace(trans, root->fs_info);
- if (ret)
- return ret;
+ WARN_ON(ret);
+
ret = btrfs_run_qgroups(trans, root->fs_info);
- if (ret)
- return ret;
+ BUG_ON(ret);
/* run_qgroups might have added some more refs */
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
- if (ret)
- return ret;
+ BUG_ON(ret);
while (!list_empty(&fs_info->dirty_cowonly_roots)) {
next = fs_info->dirty_cowonly_roots.next;
@@ -1480,7 +1453,7 @@ static void do_async_commit(struct work_struct *work)
* We've got freeze protection passed with the transaction.
* Tell lockdep about it.
*/
- if (ac->newtrans->type & __TRANS_FREEZABLE)
+ if (ac->newtrans->type < TRANS_JOIN_NOLOCK)
rwsem_acquire_read(
&ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
0, 1, _THIS_IP_);
@@ -1521,7 +1494,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
* Tell lockdep we've released the freeze rwsem, since the
* async commit thread will be the one to unlock it.
*/
- if (ac->newtrans->type & __TRANS_FREEZABLE)
+ if (trans->type < TRANS_JOIN_NOLOCK)
rwsem_release(
&root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
1, _THIS_IP_);
@@ -1537,7 +1510,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
if (current->journal_info == trans)
current->journal_info = NULL;
- btrfs_put_transaction(cur_trans);
+ put_transaction(cur_trans);
return 0;
}
@@ -1579,10 +1552,8 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
root->fs_info->running_transaction = NULL;
spin_unlock(&root->fs_info->trans_lock);
- if (trans->type & __TRANS_FREEZABLE)
- sb_end_intwrite(root->fs_info->sb);
- btrfs_put_transaction(cur_trans);
- btrfs_put_transaction(cur_trans);
+ put_transaction(cur_trans);
+ put_transaction(cur_trans);
trace_btrfs_transaction_commit(root);
@@ -1600,19 +1571,15 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
int ret;
ret = btrfs_run_delayed_items(trans, root);
+ if (ret)
+ return ret;
+
/*
* running the delayed items may have added new refs. account
* them now so that they hinder processing of more delayed refs
* as little as possible.
*/
- if (ret) {
- btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
- return ret;
- }
-
- ret = btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
- if (ret)
- return ret;
+ btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
/*
* rename don't use btrfs_join_transaction, so, once we
@@ -1629,14 +1596,14 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
{
if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
- return btrfs_start_delalloc_roots(fs_info, 1);
+ return btrfs_start_all_delalloc_inodes(fs_info, 1);
return 0;
}
static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
{
if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
- btrfs_wait_ordered_roots(fs_info, -1);
+ btrfs_wait_all_ordered_extents(fs_info);
}
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
@@ -1702,7 +1669,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
wait_for_commit(root, cur_trans);
- btrfs_put_transaction(cur_trans);
+ put_transaction(cur_trans);
return ret;
}
@@ -1719,7 +1686,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
wait_for_commit(root, prev_trans);
- btrfs_put_transaction(prev_trans);
+ put_transaction(prev_trans);
} else {
spin_unlock(&root->fs_info->trans_lock);
}
@@ -1918,8 +1885,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
list_del_init(&cur_trans->list);
spin_unlock(&root->fs_info->trans_lock);
- btrfs_put_transaction(cur_trans);
- btrfs_put_transaction(cur_trans);
+ put_transaction(cur_trans);
+ put_transaction(cur_trans);
if (trans->type & __TRANS_FREEZABLE)
sb_end_intwrite(root->fs_info->sb);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 7657d11..5c2af84 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -92,7 +92,6 @@ struct btrfs_trans_handle {
short aborted;
short adding_csums;
bool allocating_chunk;
- bool reloc_reserved;
unsigned int type;
/*
* this root is only needed to validate that the root passed to
@@ -167,5 +166,4 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages, int mark);
int btrfs_transaction_blocked(struct btrfs_fs_info *info);
int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
-void btrfs_put_transaction(struct btrfs_transaction *transaction);
#endif
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index 76928ca..94e05c1 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -37,6 +37,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
int ret = 0;
int wret;
int level;
+ int is_extent = 0;
int next_key_ret = 0;
u64 last_ret = 0;
u64 min_trans = 0;
@@ -49,7 +50,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
goto out;
}
- if (root->ref_cows == 0)
+ if (root->ref_cows == 0 && !is_extent)
goto out;
if (btrfs_test_opt(root, SSD))
@@ -84,7 +85,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
path->keep_locks = 1;
- ret = btrfs_search_forward(root, &key, path, min_trans);
+ ret = btrfs_search_forward(root, &key, NULL, path, min_trans);
if (ret < 0)
goto out;
if (ret > 0) {
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9f7fc51..79f057c 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -26,6 +26,7 @@
#include "locking.h"
#include "print-tree.h"
#include "backref.h"
+#include "compat.h"
#include "tree-log.h"
#include "hash.h"
@@ -935,7 +936,7 @@ again:
parent_objectid,
victim_name,
victim_name_len)) {
- inc_nlink(inode);
+ btrfs_inc_nlink(inode);
btrfs_release_path(path);
ret = btrfs_unlink_inode(trans, root, dir,
@@ -1005,7 +1006,7 @@ again:
victim_parent = read_one_inode(root,
parent_objectid);
if (victim_parent) {
- inc_nlink(inode);
+ btrfs_inc_nlink(inode);
btrfs_release_path(path);
ret = btrfs_unlink_inode(trans, root,
@@ -1112,11 +1113,11 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
struct extent_buffer *eb, int slot,
struct btrfs_key *key)
{
- struct inode *dir = NULL;
- struct inode *inode = NULL;
+ struct inode *dir;
+ struct inode *inode;
unsigned long ref_ptr;
unsigned long ref_end;
- char *name = NULL;
+ char *name;
int namelen;
int ret;
int search_done = 0;
@@ -1149,15 +1150,13 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
* care of the rest
*/
dir = read_one_inode(root, parent_objectid);
- if (!dir) {
- ret = -ENOENT;
- goto out;
- }
+ if (!dir)
+ return -ENOENT;
inode = read_one_inode(root, inode_objectid);
if (!inode) {
- ret = -EIO;
- goto out;
+ iput(dir);
+ return -EIO;
}
while (ref_ptr < ref_end) {
@@ -1170,16 +1169,14 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
*/
if (!dir)
dir = read_one_inode(root, parent_objectid);
- if (!dir) {
- ret = -ENOENT;
- goto out;
- }
+ if (!dir)
+ return -ENOENT;
} else {
ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
&ref_index);
}
if (ret)
- goto out;
+ return ret;
/* if we already have a perfect match, we're done */
if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
@@ -1199,11 +1196,12 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
parent_objectid,
ref_index, name, namelen,
&search_done);
- if (ret) {
- if (ret == 1)
- ret = 0;
+ if (ret == 1) {
+ ret = 0;
goto out;
}
+ if (ret)
+ goto out;
}
/* insert our name */
@@ -1217,7 +1215,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
kfree(name);
- name = NULL;
if (log_ref_ver) {
iput(dir);
dir = NULL;
@@ -1228,7 +1225,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
ret = overwrite_item(trans, root, path, eb, slot, key);
out:
btrfs_release_path(path);
- kfree(name);
iput(dir);
iput(inode);
return ret;
@@ -1311,7 +1307,6 @@ static int count_inode_refs(struct btrfs_root *root,
break;
path->slots[0]--;
}
-process_slot:
btrfs_item_key_to_cpu(path->nodes[0], &key,
path->slots[0]);
if (key.objectid != ino ||
@@ -1332,10 +1327,6 @@ process_slot:
if (key.offset == 0)
break;
- if (path->slots[0] > 0) {
- path->slots[0]--;
- goto process_slot;
- }
key.offset--;
btrfs_release_path(path);
}
@@ -1489,7 +1480,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
if (!inode->i_nlink)
set_nlink(inode, 1);
else
- inc_nlink(inode);
+ btrfs_inc_nlink(inode);
ret = btrfs_update_inode(trans, root, inode);
} else if (ret == -EEXIST) {
ret = 0;
@@ -1832,7 +1823,7 @@ again:
dir_key->offset,
name, name_len, 0);
}
- if (!log_di || (IS_ERR(log_di) && PTR_ERR(log_di) == -ENOENT)) {
+ if (IS_ERR_OR_NULL(log_di)) {
btrfs_dir_item_key_to_cpu(eb, di, &location);
btrfs_release_path(path);
btrfs_release_path(log_path);
@@ -1850,7 +1841,7 @@ again:
goto out;
}
- inc_nlink(inode);
+ btrfs_inc_nlink(inode);
ret = btrfs_unlink_inode(trans, root, dir, inode,
name, name_len);
if (!ret)
@@ -1869,9 +1860,6 @@ again:
goto again;
ret = 0;
goto out;
- } else if (IS_ERR(log_di)) {
- kfree(name);
- return PTR_ERR(log_di);
}
btrfs_release_path(log_path);
kfree(name);
@@ -2130,7 +2118,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
WARN_ON(*level >= BTRFS_MAX_LEVEL);
cur = path->nodes[*level];
- WARN_ON(btrfs_header_level(cur) != *level);
+ if (btrfs_header_level(cur) != *level)
+ WARN_ON(1);
if (path->slots[*level] >=
btrfs_header_nritems(cur))
@@ -2162,13 +2151,11 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
return ret;
}
- if (trans) {
- btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
- clean_tree_block(trans, root, next);
- btrfs_wait_tree_block_writeback(next);
- btrfs_tree_unlock(next);
- }
+ btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
+ clean_tree_block(trans, root, next);
+ btrfs_wait_tree_block_writeback(next);
+ btrfs_tree_unlock(next);
WARN_ON(root_owner !=
BTRFS_TREE_LOG_OBJECTID);
@@ -2240,13 +2227,11 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
next = path->nodes[*level];
- if (trans) {
- btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
- clean_tree_block(trans, root, next);
- btrfs_wait_tree_block_writeback(next);
- btrfs_tree_unlock(next);
- }
+ btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
+ clean_tree_block(trans, root, next);
+ btrfs_wait_tree_block_writeback(next);
+ btrfs_tree_unlock(next);
WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
ret = btrfs_free_and_pin_reserved_extent(root,
@@ -2316,13 +2301,11 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
next = path->nodes[orig_level];
- if (trans) {
- btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
- clean_tree_block(trans, log, next);
- btrfs_wait_tree_block_writeback(next);
- btrfs_tree_unlock(next);
- }
+ btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
+ clean_tree_block(trans, log, next);
+ btrfs_wait_tree_block_writeback(next);
+ btrfs_tree_unlock(next);
WARN_ON(log->root_key.objectid !=
BTRFS_TREE_LOG_OBJECTID);
@@ -2588,7 +2571,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
* the running transaction open, so a full commit can't hop
* in and cause problems either.
*/
+ btrfs_scrub_pause_super(root);
ret = write_ctree_super(trans, root->fs_info->tree_root, 1);
+ btrfs_scrub_continue_super(root);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto out_wake_log_root;
@@ -2623,10 +2608,13 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
.process_func = process_one_buffer
};
- ret = walk_log_tree(trans, log, &wc);
- /* I don't think this can happen but just in case */
- if (ret)
- btrfs_abort_transaction(trans, log, ret);
+ if (trans) {
+ ret = walk_log_tree(trans, log, &wc);
+
+ /* I don't think this can happen but just in case */
+ if (ret)
+ btrfs_abort_transaction(trans, log, ret);
+ }
while (1) {
ret = find_first_extent_bit(&log->dirty_log_pages,
@@ -2879,6 +2867,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
u64 min_offset, u64 *last_offset_ret)
{
struct btrfs_key min_key;
+ struct btrfs_key max_key;
struct btrfs_root *log = root->log_root;
struct extent_buffer *src;
int err = 0;
@@ -2890,6 +2879,9 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
u64 ino = btrfs_ino(inode);
log = root->log_root;
+ max_key.objectid = ino;
+ max_key.offset = (u64)-1;
+ max_key.type = key_type;
min_key.objectid = ino;
min_key.type = key_type;
@@ -2897,7 +2889,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
path->keep_locks = 1;
- ret = btrfs_search_forward(root, &min_key, path, trans->transid);
+ ret = btrfs_search_forward(root, &min_key, &max_key,
+ path, trans->transid);
/*
* we didn't find anything from this transaction, see if there
@@ -2950,8 +2943,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
/* find the first key from this transaction again */
ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
- if (WARN_ON(ret != 0))
+ if (ret != 0) {
+ WARN_ON(1);
goto done;
+ }
/*
* we have a block from this transaction, log every item in it
@@ -3177,10 +3172,11 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
struct inode *inode)
{
struct btrfs_inode_item *inode_item;
+ struct btrfs_key key;
int ret;
- ret = btrfs_insert_empty_item(trans, log, path,
- &BTRFS_I(inode)->location,
+ memcpy(&key, &BTRFS_I(inode)->location, sizeof(key));
+ ret = btrfs_insert_empty_item(trans, log, path, &key,
sizeof(*inode_item));
if (ret && ret != -EEXIST)
return ret;
@@ -3379,7 +3375,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
btrfs_set_token_file_extent_type(leaf, fi,
BTRFS_FILE_EXTENT_REG,
&token);
- if (em->block_start == EXTENT_MAP_HOLE)
+ if (em->block_start == 0)
skip_csum = true;
}
@@ -3421,6 +3417,11 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
if (skip_csum)
return 0;
+ if (em->compress_type) {
+ csum_offset = 0;
+ csum_len = block_len;
+ }
+
/*
* First check and see if our csums are on our outstanding ordered
* extents.
@@ -3504,13 +3505,8 @@ unlocked:
if (!mod_len || ret)
return ret;
- if (em->compress_type) {
- csum_offset = 0;
- csum_len = block_len;
- } else {
- csum_offset = mod_start - em->start;
- csum_len = mod_len;
- }
+ csum_offset = mod_start - em->start;
+ csum_len = mod_len;
/* block start is already adjusted for the file extent offset. */
ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
@@ -3697,8 +3693,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
ret = btrfs_truncate_inode_items(trans, log,
inode, 0, 0);
} else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
- &BTRFS_I(inode)->runtime_flags) ||
- inode_only == LOG_INODE_EXISTS) {
+ &BTRFS_I(inode)->runtime_flags)) {
if (inode_only == LOG_INODE_ALL)
fast_search = true;
max_key.type = BTRFS_XATTR_ITEM_KEY;
@@ -3724,7 +3719,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
while (1) {
ins_nr = 0;
- ret = btrfs_search_forward(root, &min_key,
+ ret = btrfs_search_forward(root, &min_key, &max_key,
path, trans->transid);
if (ret != 0)
break;
@@ -3774,14 +3769,14 @@ next_slot:
}
btrfs_release_path(path);
- if (min_key.offset < (u64)-1) {
+ if (min_key.offset < (u64)-1)
min_key.offset++;
- } else if (min_key.type < max_key.type) {
+ else if (min_key.type < (u8)-1)
min_key.type++;
- min_key.offset = 0;
- } else {
+ else if (min_key.objectid < (u64)-1)
+ min_key.objectid++;
+ else
break;
- }
}
if (ins_nr) {
ret = copy_items(trans, inode, dst_path, src, ins_start_slot,
@@ -3802,7 +3797,7 @@ log_extents:
err = ret;
goto out_unlock;
}
- } else if (inode_only == LOG_INODE_ALL) {
+ } else {
struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
struct extent_map *em, *n;
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index fbda900..dd0dea3 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -260,6 +260,7 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
{
struct btrfs_root *root = fs_info->uuid_root;
struct btrfs_key key;
+ struct btrfs_key max_key;
struct btrfs_path *path;
int ret = 0;
struct extent_buffer *leaf;
@@ -276,10 +277,13 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
key.objectid = 0;
key.type = 0;
key.offset = 0;
+ max_key.objectid = (u64)-1;
+ max_key.type = (u8)-1;
+ max_key.offset = (u64)-1;
again_search_slot:
path->keep_locks = 1;
- ret = btrfs_search_forward(root, &key, path, 0);
+ ret = btrfs_search_forward(root, &key, &max_key, path, 0);
if (ret) {
if (ret > 0)
ret = 0;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 92303f4..043b215 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -28,6 +28,7 @@
#include <linux/raid/pq.h>
#include <linux/semaphore.h>
#include <asm/div64.h>
+#include "compat.h"
#include "ctree.h"
#include "extent_map.h"
#include "disk-io.h"
@@ -665,8 +666,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
if (device->bdev)
fs_devices->open_devices--;
- if (device->writeable &&
- device->devid != BTRFS_DEV_REPLACE_DEVID) {
+ if (device->writeable && !device->is_tgtdev_for_dev_replace) {
list_del_init(&device->dev_alloc_list);
fs_devices->rw_devices--;
}
@@ -2041,7 +2041,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
device->in_fs_metadata = 1;
device->is_tgtdev_for_dev_replace = 0;
device->mode = FMODE_EXCL;
- device->dev_stats_valid = 1;
set_blocksize(device->bdev, 4096);
if (seeding_dev) {
@@ -2209,7 +2208,6 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
device->in_fs_metadata = 1;
device->is_tgtdev_for_dev_replace = 1;
device->mode = FMODE_EXCL;
- device->dev_stats_valid = 1;
set_blocksize(device->bdev, 4096);
device->fs_devices = fs_info->fs_devices;
list_add(&device->dev_list, &fs_info->fs_devices->devices);
@@ -2552,7 +2550,8 @@ again:
failed = 0;
retried = true;
goto again;
- } else if (WARN_ON(failed && retried)) {
+ } else if (failed && retried) {
+ WARN_ON(1);
ret = -ENOSPC;
}
error:
@@ -3424,9 +3423,6 @@ int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
{
- if (fs_info->sb->s_flags & MS_RDONLY)
- return -EROFS;
-
mutex_lock(&fs_info->balance_mutex);
if (!fs_info->balance_ctl) {
mutex_unlock(&fs_info->balance_mutex);
@@ -3492,7 +3488,7 @@ static int btrfs_uuid_scan_kthread(void *data)
path->keep_locks = 1;
while (1) {
- ret = btrfs_search_forward(root, &key, path, 0);
+ ret = btrfs_search_forward(root, &key, &max_key, path, 0);
if (ret) {
if (ret > 0)
ret = 0;
@@ -4492,7 +4488,6 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got "
"%Lu-%Lu\n", logical, logical+len, em->start,
em->start + em->len);
- free_extent_map(em);
return 1;
}
@@ -4673,7 +4668,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, "
"found %Lu-%Lu\n", logical, em->start,
em->start + em->len);
- free_extent_map(em);
return -EINVAL;
}
@@ -4901,7 +4895,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
num_stripes = map->num_stripes;
max_errors = nr_parity_stripes(map);
- raid_map = kmalloc_array(num_stripes, sizeof(u64),
+ raid_map = kmalloc(sizeof(u64) * num_stripes,
GFP_NOFS);
if (!raid_map) {
ret = -ENOMEM;
@@ -5394,15 +5388,17 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio,
{
struct bio_vec *prev;
struct request_queue *q = bdev_get_queue(bdev);
- unsigned int max_sectors = queue_max_sectors(q);
+ unsigned short max_sectors = queue_max_sectors(q);
struct bvec_merge_data bvm = {
.bi_bdev = bdev,
.bi_sector = sector,
.bi_rw = bio->bi_rw,
};
- if (WARN_ON(bio->bi_vcnt == 0))
+ if (bio->bi_vcnt == 0) {
+ WARN_ON(1);
return 1;
+ }
prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
if (bio_sectors(bio) > max_sectors)
@@ -5635,8 +5631,10 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
struct btrfs_device *dev;
u64 tmp;
- if (WARN_ON(!devid && !fs_info))
+ if (!devid && !fs_info) {
+ WARN_ON(1);
return ERR_PTR(-EINVAL);
+ }
dev = __alloc_device();
if (IS_ERR(dev))
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 8b3cd14..b72f540 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -43,8 +43,9 @@ struct btrfs_device {
/* WRITE_SYNC bios */
struct btrfs_pending_bios pending_sync_bios;
- u64 generation;
int running_pending;
+ u64 generation;
+
int writeable;
int in_fs_metadata;
int missing;
@@ -52,11 +53,11 @@ struct btrfs_device {
int is_tgtdev_for_dev_replace;
spinlock_t io_lock;
- /* the mode sent to blkdev_get */
- fmode_t mode;
struct block_device *bdev;
+ /* the mode sent to blkdev_get */
+ fmode_t mode;
struct rcu_string *name;
@@ -77,21 +78,16 @@ struct btrfs_device {
/* optimal io width for this device */
u32 io_width;
- /* type and info about this device */
- u64 type;
/* minimal io size for this device */
u32 sector_size;
+ /* type and info about this device */
+ u64 type;
/* physical drive uuid (or lvm uuid) */
u8 uuid[BTRFS_UUID_SIZE];
- /* for sending down flush barriers */
- int nobarriers;
- struct bio *flush_bio;
- struct completion flush_wait;
-
/* per-device scrub information */
struct scrub_ctx *scrub_device;
@@ -107,6 +103,10 @@ struct btrfs_device {
struct radix_tree_root reada_zones;
struct radix_tree_root reada_extents;
+ /* for sending down flush barriers */
+ struct bio *flush_bio;
+ struct completion flush_wait;
+ int nobarriers;
/* disk I/O failure stats. For detailed description refer to
* enum btrfs_dev_stat_values in ioctl.h */
@@ -132,9 +132,7 @@ struct btrfs_fs_devices {
/* all of the devices in the FS, protected by a mutex
* so we can safely walk it to write out the supers without
- * worrying about add/remove by the multi-device code.
- * Scrubbing super can kick off supers writing by holding
- * this mutex lock.
+ * worrying about add/remove by the multi-device code
*/
struct mutex device_list_mutex;
struct list_head devices;
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 57e17fe..43eb559 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -270,7 +270,7 @@ static void cachefiles_drop_object(struct fscache_object *_object)
#endif
/* delete retired objects */
- if (test_bit(FSCACHE_OBJECT_RETIRED, &object->fscache.flags) &&
+ if (test_bit(FSCACHE_COOKIE_RETIRED, &object->fscache.cookie->flags) &&
_object != cache->cache.fsdef
) {
_debug("- retire object OBJ%x", object->fscache.debug_id);
@@ -449,14 +449,14 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
_debug("discard tail %llx", oi_size);
newattrs.ia_valid = ATTR_SIZE;
newattrs.ia_size = oi_size & PAGE_MASK;
- ret = notify_change(object->backer, &newattrs, NULL);
+ ret = notify_change(object->backer, &newattrs);
if (ret < 0)
goto truncate_failed;
}
newattrs.ia_valid = ATTR_SIZE;
newattrs.ia_size = ni_size;
- ret = notify_change(object->backer, &newattrs, NULL);
+ ret = notify_change(object->backer, &newattrs);
truncate_failed:
mutex_unlock(&object->backer->d_inode->i_mutex);
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index ca65f39..f4a08d7 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -294,7 +294,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
if (ret < 0) {
cachefiles_io_error(cache, "Unlink security error");
} else {
- ret = vfs_unlink(dir->d_inode, rep, NULL);
+ ret = vfs_unlink(dir->d_inode, rep);
if (preemptive)
cachefiles_mark_object_buried(cache, rep);
@@ -396,7 +396,7 @@ try_again:
cachefiles_io_error(cache, "Rename security error %d", ret);
} else {
ret = vfs_rename(dir->d_inode, rep,
- cache->graveyard->d_inode, grave, NULL);
+ cache->graveyard->d_inode, grave);
if (ret != 0 && ret != -ENOMEM)
cachefiles_io_error(cache,
"Rename failed with error %d", ret);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 1e561c0..6df8bd4 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -216,7 +216,7 @@ static int readpage_nounlock(struct file *filp, struct page *page)
}
SetPageUptodate(page);
- if (err >= 0)
+ if (err == 0)
ceph_readpage_to_fscache(inode, page);
out:
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 8c44fdd..6bfe65e 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -68,7 +68,7 @@ int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
{
fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
&ceph_fscache_fsid_object_def,
- fsc, true);
+ fsc);
if (fsc->fscache == NULL) {
pr_err("Unable to resgister fsid: %p fscache cookie", fsc);
@@ -204,7 +204,7 @@ void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
ci->fscache = fscache_acquire_cookie(fsc->fscache,
&ceph_fscache_inode_object_def,
- ci, true);
+ ci);
done:
mutex_unlock(&inode->i_mutex);
@@ -324,9 +324,6 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
{
struct ceph_inode_info *ci = ceph_inode(inode);
- if (!PageFsCache(page))
- return;
-
fscache_wait_on_page_write(ci->fscache, page);
fscache_uncache_page(ci->fscache, page);
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 3c0a4bd..13976c3 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -897,7 +897,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
* caller should hold i_ceph_lock.
* caller will not hold session s_mutex if called from destroy_inode.
*/
-void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
+void __ceph_remove_cap(struct ceph_cap *cap)
{
struct ceph_mds_session *session = cap->session;
struct ceph_inode_info *ci = cap->ci;
@@ -909,16 +909,6 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
/* remove from session list */
spin_lock(&session->s_cap_lock);
- /*
- * s_cap_reconnect is protected by s_cap_lock. no one changes
- * s_cap_gen while session is in the reconnect state.
- */
- if (queue_release &&
- (!session->s_cap_reconnect ||
- cap->cap_gen == session->s_cap_gen))
- __queue_cap_release(session, ci->i_vino.ino, cap->cap_id,
- cap->mseq, cap->issue_seq);
-
if (session->s_cap_iterator == cap) {
/* not yet, we are iterating over this very cap */
dout("__ceph_remove_cap delaying %p removal from session %p\n",
@@ -1033,6 +1023,7 @@ void __queue_cap_release(struct ceph_mds_session *session,
struct ceph_mds_cap_release *head;
struct ceph_mds_cap_item *item;
+ spin_lock(&session->s_cap_lock);
BUG_ON(!session->s_num_cap_releases);
msg = list_first_entry(&session->s_cap_releases,
struct ceph_msg, list_head);
@@ -1061,6 +1052,7 @@ void __queue_cap_release(struct ceph_mds_session *session,
(int)CEPH_CAPS_PER_RELEASE,
(int)msg->front.iov_len);
}
+ spin_unlock(&session->s_cap_lock);
}
/*
@@ -1075,8 +1067,12 @@ void ceph_queue_caps_release(struct inode *inode)
p = rb_first(&ci->i_caps);
while (p) {
struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
+ struct ceph_mds_session *session = cap->session;
+
+ __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
+ cap->mseq, cap->issue_seq);
p = rb_next(p);
- __ceph_remove_cap(cap, true);
+ __ceph_remove_cap(cap);
}
}
@@ -2795,7 +2791,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
}
spin_unlock(&mdsc->cap_dirty_lock);
}
- __ceph_remove_cap(cap, false);
+ __ceph_remove_cap(cap);
}
/* else, we already released it */
@@ -2935,12 +2931,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
if (!inode) {
dout(" i don't have ino %llx\n", vino.ino);
- if (op == CEPH_CAP_OP_IMPORT) {
- spin_lock(&session->s_cap_lock);
+ if (op == CEPH_CAP_OP_IMPORT)
__queue_cap_release(session, vino.ino, cap_id,
mseq, seq);
- spin_unlock(&session->s_cap_lock);
- }
goto flush_cap_releases;
}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 2a0bcae..868b61d 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -352,18 +352,8 @@ more:
}
/* note next offset and last dentry name */
- rinfo = &req->r_reply_info;
- if (le32_to_cpu(rinfo->dir_dir->frag) != frag) {
- frag = le32_to_cpu(rinfo->dir_dir->frag);
- if (ceph_frag_is_leftmost(frag))
- fi->next_offset = 2;
- else
- fi->next_offset = 0;
- off = fi->next_offset;
- }
fi->offset = fi->next_offset;
fi->last_readdir = req;
- fi->frag = frag;
if (req->r_reply_info.dir_end) {
kfree(fi->last_name);
@@ -373,6 +363,7 @@ more:
else
fi->next_offset = 0;
} else {
+ rinfo = &req->r_reply_info;
err = note_last_dentry(fi,
rinfo->dir_dname[rinfo->dir_nr-1],
rinfo->dir_dname_len[rinfo->dir_nr-1]);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 9a8e396..8549a48 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -577,8 +577,6 @@ static int fill_inode(struct inode *inode,
int issued = 0, implemented;
struct timespec mtime, atime, ctime;
u32 nsplits;
- struct ceph_inode_frag *frag;
- struct rb_node *rb_node;
struct ceph_buffer *xattr_blob = NULL;
int err = 0;
int queue_trunc = 0;
@@ -753,38 +751,15 @@ no_change:
/* FIXME: move me up, if/when version reflects fragtree changes */
nsplits = le32_to_cpu(info->fragtree.nsplits);
mutex_lock(&ci->i_fragtree_mutex);
- rb_node = rb_first(&ci->i_fragtree);
for (i = 0; i < nsplits; i++) {
u32 id = le32_to_cpu(info->fragtree.splits[i].frag);
- frag = NULL;
- while (rb_node) {
- frag = rb_entry(rb_node, struct ceph_inode_frag, node);
- if (ceph_frag_compare(frag->frag, id) >= 0) {
- if (frag->frag != id)
- frag = NULL;
- else
- rb_node = rb_next(rb_node);
- break;
- }
- rb_node = rb_next(rb_node);
- rb_erase(&frag->node, &ci->i_fragtree);
- kfree(frag);
- frag = NULL;
- }
- if (!frag) {
- frag = __get_or_create_frag(ci, id);
- if (IS_ERR(frag))
- continue;
- }
+ struct ceph_inode_frag *frag = __get_or_create_frag(ci, id);
+
+ if (IS_ERR(frag))
+ continue;
frag->split_by = le32_to_cpu(info->fragtree.splits[i].by);
dout(" frag %x split by %d\n", frag->frag, frag->split_by);
}
- while (rb_node) {
- frag = rb_entry(rb_node, struct ceph_inode_frag, node);
- rb_node = rb_next(rb_node);
- rb_erase(&frag->node, &ci->i_fragtree);
- kfree(frag);
- }
mutex_unlock(&ci->i_fragtree_mutex);
/* were we issued a capability? */
@@ -1275,20 +1250,8 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
int err = 0, i;
struct inode *snapdir = NULL;
struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
+ u64 frag = le32_to_cpu(rhead->args.readdir.frag);
struct ceph_dentry_info *di;
- u64 r_readdir_offset = req->r_readdir_offset;
- u32 frag = le32_to_cpu(rhead->args.readdir.frag);
-
- if (rinfo->dir_dir &&
- le32_to_cpu(rinfo->dir_dir->frag) != frag) {
- dout("readdir_prepopulate got new frag %x -> %x\n",
- frag, le32_to_cpu(rinfo->dir_dir->frag));
- frag = le32_to_cpu(rinfo->dir_dir->frag);
- if (ceph_frag_is_leftmost(frag))
- r_readdir_offset = 2;
- else
- r_readdir_offset = 0;
- }
if (req->r_aborted)
return readdir_prepopulate_inodes_only(req, session);
@@ -1352,7 +1315,7 @@ retry_lookup:
}
di = dn->d_fsdata;
- di->offset = ceph_make_fpos(frag, i + r_readdir_offset);
+ di->offset = ceph_make_fpos(frag, i + req->r_readdir_offset);
/* inode */
if (dn->d_inode) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index d90861f..b7bda5d 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -43,7 +43,6 @@
*/
struct ceph_reconnect_state {
- int nr_caps;
struct ceph_pagelist *pagelist;
bool flock;
};
@@ -444,7 +443,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
INIT_LIST_HEAD(&s->s_waiting);
INIT_LIST_HEAD(&s->s_unsafe);
s->s_num_cap_releases = 0;
- s->s_cap_reconnect = 0;
s->s_cap_iterator = NULL;
INIT_LIST_HEAD(&s->s_cap_releases);
INIT_LIST_HEAD(&s->s_cap_releases_done);
@@ -644,8 +642,6 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
req->r_unsafe_dir = NULL;
}
- complete_all(&req->r_safe_completion);
-
ceph_mdsc_put_request(req);
}
@@ -990,7 +986,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
dout("removing cap %p, ci is %p, inode is %p\n",
cap, ci, &ci->vfs_inode);
spin_lock(&ci->i_ceph_lock);
- __ceph_remove_cap(cap, false);
+ __ceph_remove_cap(cap);
if (!__ceph_is_any_real_caps(ci)) {
struct ceph_mds_client *mdsc =
ceph_sb_to_client(inode->i_sb)->mdsc;
@@ -1235,7 +1231,9 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
session->s_trim_caps--;
if (oissued) {
/* we aren't the only cap.. just remove us */
- __ceph_remove_cap(cap, true);
+ __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
+ cap->mseq, cap->issue_seq);
+ __ceph_remove_cap(cap);
} else {
/* try to drop referring dentries */
spin_unlock(&ci->i_ceph_lock);
@@ -1418,6 +1416,7 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
unsigned num;
dout("discard_cap_releases mds%d\n", session->s_mds);
+ spin_lock(&session->s_cap_lock);
/* zero out the in-progress message */
msg = list_first_entry(&session->s_cap_releases,
@@ -1444,6 +1443,8 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
msg->front.iov_len = sizeof(*head);
list_add(&msg->list_head, &session->s_cap_releases);
}
+
+ spin_unlock(&session->s_cap_lock);
}
/*
@@ -1874,11 +1875,8 @@ static int __do_request(struct ceph_mds_client *mdsc,
int mds = -1;
int err = -EAGAIN;
- if (req->r_err || req->r_got_result) {
- if (req->r_aborted)
- __unregister_request(mdsc, req);
+ if (req->r_err || req->r_got_result)
goto out;
- }
if (req->r_timeout &&
time_after_eq(jiffies, req->r_started + req->r_timeout)) {
@@ -2188,6 +2186,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
if (head->safe) {
req->r_got_safe = true;
__unregister_request(mdsc, req);
+ complete_all(&req->r_safe_completion);
if (req->r_got_unsafe) {
/*
@@ -2239,7 +2238,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
if (err == 0) {
if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
- req->r_op == CEPH_MDS_OP_LSSNAP))
+ req->r_op == CEPH_MDS_OP_LSSNAP) &&
+ rinfo->dir_nr)
ceph_readdir_prepopulate(req, req->r_session);
ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
}
@@ -2490,7 +2490,6 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
cap->seq = 0; /* reset cap seq */
cap->issue_seq = 0; /* and issue_seq */
cap->mseq = 0; /* and migrate_seq */
- cap->cap_gen = cap->session->s_cap_gen;
if (recon_state->flock) {
rec.v2.cap_id = cpu_to_le64(cap->cap_id);
@@ -2553,8 +2552,6 @@ encode_again:
} else {
err = ceph_pagelist_append(pagelist, &rec, reclen);
}
-
- recon_state->nr_caps++;
out_free:
kfree(path);
out_dput:
@@ -2582,7 +2579,6 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
struct rb_node *p;
int mds = session->s_mds;
int err = -ENOMEM;
- int s_nr_caps;
struct ceph_pagelist *pagelist;
struct ceph_reconnect_state recon_state;
@@ -2614,38 +2610,20 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
dout("session %p state %s\n", session,
session_state_name(session->s_state));
- spin_lock(&session->s_gen_ttl_lock);
- session->s_cap_gen++;
- spin_unlock(&session->s_gen_ttl_lock);
-
- spin_lock(&session->s_cap_lock);
- /*
- * notify __ceph_remove_cap() that we are composing cap reconnect.
- * If a cap get released before being added to the cap reconnect,
- * __ceph_remove_cap() should skip queuing cap release.
- */
- session->s_cap_reconnect = 1;
/* drop old cap expires; we're about to reestablish that state */
discard_cap_releases(mdsc, session);
- spin_unlock(&session->s_cap_lock);
/* traverse this session's caps */
- s_nr_caps = session->s_nr_caps;
- err = ceph_pagelist_encode_32(pagelist, s_nr_caps);
+ err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps);
if (err)
goto fail;
- recon_state.nr_caps = 0;
recon_state.pagelist = pagelist;
recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK;
err = iterate_session_caps(session, encode_caps_cb, &recon_state);
if (err < 0)
goto fail;
- spin_lock(&session->s_cap_lock);
- session->s_cap_reconnect = 0;
- spin_unlock(&session->s_cap_lock);
-
/*
* snaprealms. we provide mds with the ino, seq (version), and
* parent for all of our realms. If the mds has any newer info,
@@ -2668,18 +2646,11 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
if (recon_state.flock)
reply->hdr.version = cpu_to_le16(2);
-
- /* raced with cap release? */
- if (s_nr_caps != recon_state.nr_caps) {
- struct page *page = list_first_entry(&pagelist->head,
- struct page, lru);
- __le32 *addr = kmap_atomic(page);
- *addr = cpu_to_le32(recon_state.nr_caps);
- kunmap_atomic(addr);
+ if (pagelist->length) {
+ /* set up outbound data if we have any */
+ reply->hdr.data_len = cpu_to_le32(pagelist->length);
+ ceph_msg_data_add_pagelist(reply, pagelist);
}
-
- reply->hdr.data_len = cpu_to_le32(pagelist->length);
- ceph_msg_data_add_pagelist(reply, pagelist);
ceph_con_send(&session->s_con, reply);
mutex_unlock(&session->s_mutex);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 4c053d0..c2a19fb 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -132,7 +132,6 @@ struct ceph_mds_session {
struct list_head s_caps; /* all caps issued by this session */
int s_nr_caps, s_trim_caps;
int s_num_cap_releases;
- int s_cap_reconnect;
struct list_head s_cap_releases; /* waiting cap_release messages */
struct list_head s_cap_releases_done; /* ready to send */
struct ceph_cap *s_cap_iterator;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index ef4ac38..6014b0a 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -741,7 +741,13 @@ extern int ceph_add_cap(struct inode *inode,
int fmode, unsigned issued, unsigned wanted,
unsigned cap, unsigned seq, u64 realmino, int flags,
struct ceph_cap_reservation *caps_reservation);
-extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
+extern void __ceph_remove_cap(struct ceph_cap *cap);
+static inline void ceph_remove_cap(struct ceph_cap *cap)
+{
+ spin_lock(&cap->ci->i_ceph_lock);
+ __ceph_remove_cap(cap);
+ spin_unlock(&cap->ci->i_ceph_lock);
+}
extern void ceph_put_cap(struct ceph_mds_client *mdsc,
struct ceph_cap *cap);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index f77f770..afc2bb6 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -368,7 +368,6 @@ void cdev_put(struct cdev *p)
*/
static int chrdev_open(struct inode *inode, struct file *filp)
{
- const struct file_operations *fops;
struct cdev *p;
struct cdev *new = NULL;
int ret = 0;
@@ -401,11 +400,10 @@ static int chrdev_open(struct inode *inode, struct file *filp)
return ret;
ret = -ENXIO;
- fops = fops_get(p->ops);
- if (!fops)
+ filp->f_op = fops_get(p->ops);
+ if (!filp->f_op)
goto out_cdev_put;
- replace_fops(filp, fops);
if (filp->f_op->open) {
ret = filp->f_op->open(inode, filp);
if (ret)
@@ -576,8 +574,7 @@ static struct kobject *base_probe(dev_t dev, int *part, void *data)
void __init chrdev_init(void)
{
cdev_map = kobj_map_init(base_probe, &chrdevs_lock);
- if (bdi_init(&directly_mappable_cdev_bdi))
- panic("Failed to init directly mappable cdev bdi");
+ bdi_init(&directly_mappable_cdev_bdi);
}
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 9409fa1..37e4a72 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -65,6 +65,5 @@ struct cifs_sb_info {
char *mountdata; /* options received at mount time or via DFS refs */
struct backing_dev_info bdi;
struct delayed_work prune_tlinks;
- struct rcu_head rcu;
};
#endif /* _CIFS_FS_SB_H */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 4934347..fc6f4f3 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -548,13 +548,7 @@ static int
CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
{
int rc;
- struct ntlmv2_resp *ntlmv2 = (struct ntlmv2_resp *)
- (ses->auth_key.response + CIFS_SESS_KEY_SIZE);
- unsigned int hash_len;
-
- /* The MD5 hash starts at challenge_key.key */
- hash_len = ses->auth_key.len - (CIFS_SESS_KEY_SIZE +
- offsetof(struct ntlmv2_resp, challenge.key[0]));
+ unsigned int offset = CIFS_SESS_KEY_SIZE + 8;
if (!ses->server->secmech.sdeschmacmd5) {
cifs_dbg(VFS, "%s: can't generate ntlmv2 hash\n", __func__);
@@ -562,7 +556,7 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
}
rc = crypto_shash_setkey(ses->server->secmech.hmacmd5,
- ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
+ ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
if (rc) {
cifs_dbg(VFS, "%s: Could not set NTLMV2 Hash as a key\n",
__func__);
@@ -576,21 +570,20 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
}
if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED)
- memcpy(ntlmv2->challenge.key,
- ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
+ memcpy(ses->auth_key.response + offset,
+ ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
else
- memcpy(ntlmv2->challenge.key,
- ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
+ memcpy(ses->auth_key.response + offset,
+ ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
- ntlmv2->challenge.key, hash_len);
+ ses->auth_key.response + offset, ses->auth_key.len - offset);
if (rc) {
cifs_dbg(VFS, "%s: Could not update with response\n", __func__);
return rc;
}
- /* Note that the MD5 digest over writes anon.challenge_key.key */
rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
- ntlmv2->ntlmv2_hash);
+ ses->auth_key.response + CIFS_SESS_KEY_SIZE);
if (rc)
cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__);
@@ -634,7 +627,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
int rc;
int baselen;
unsigned int tilen;
- struct ntlmv2_resp *ntlmv2;
+ struct ntlmv2_resp *buf;
char ntlmv2_hash[16];
unsigned char *tiblob = NULL; /* target info blob */
@@ -667,14 +660,13 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
}
ses->auth_key.len += baselen;
- ntlmv2 = (struct ntlmv2_resp *)
+ buf = (struct ntlmv2_resp *)
(ses->auth_key.response + CIFS_SESS_KEY_SIZE);
- ntlmv2->blob_signature = cpu_to_le32(0x00000101);
- ntlmv2->reserved = 0;
- /* Must be within 5 minutes of the server */
- ntlmv2->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
- get_random_bytes(&ntlmv2->client_chal, sizeof(ntlmv2->client_chal));
- ntlmv2->reserved2 = 0;
+ buf->blob_signature = cpu_to_le32(0x00000101);
+ buf->reserved = 0;
+ buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
+ get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
+ buf->reserved2 = 0;
memcpy(ses->auth_key.response + baselen, tiblob, tilen);
@@ -714,7 +706,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
}
rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
- ntlmv2->ntlmv2_hash,
+ ses->auth_key.response + CIFS_SESS_KEY_SIZE,
CIFS_HMAC_MD5_HASH_SIZE);
if (rc) {
cifs_dbg(VFS, "%s: Could not update with response\n", __func__);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 849f613..77fc5e1 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -862,7 +862,7 @@ const struct inode_operations cifs_file_inode_ops = {
const struct inode_operations cifs_symlink_inode_ops = {
.readlink = generic_readlink,
.follow_link = cifs_follow_link,
- .put_link = kfree_put_link,
+ .put_link = cifs_put_link,
.permission = cifs_permission,
/* BB add the following two eventually */
/* revalidate: cifs_revalidate,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 26a754f..6d0b072 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -115,6 +115,8 @@ extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
/* Functions related to symlinks */
extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd);
+extern void cifs_put_link(struct dentry *direntry,
+ struct nameidata *nd, void *);
extern int cifs_readlink(struct dentry *direntry, char __user *buffer,
int buflen);
extern int cifs_symlink(struct inode *inode, struct dentry *direntry,
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index f918a99..52b6f6c 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -261,7 +261,7 @@ struct smb_version_operations {
/* query path data from the server */
int (*query_path_info)(const unsigned int, struct cifs_tcon *,
struct cifs_sb_info *, const char *,
- FILE_ALL_INFO *, bool *, bool *);
+ FILE_ALL_INFO *, bool *);
/* query file data from the server */
int (*query_file_info)(const unsigned int, struct cifs_tcon *,
struct cifs_fid *, FILE_ALL_INFO *);
@@ -278,8 +278,6 @@ struct smb_version_operations {
/* set attributes */
int (*set_file_info)(struct inode *, const char *, FILE_BASIC_INFO *,
const unsigned int);
- int (*set_compression)(const unsigned int, struct cifs_tcon *,
- struct cifsFileInfo *);
/* check if we can send an echo or nor */
bool (*can_echo)(struct TCP_Server_Info *);
/* send echo request */
@@ -381,10 +379,6 @@ struct smb_version_operations {
char * (*create_lease_buf)(u8 *, u8);
/* parse lease context buffer and return oplock/epoch info */
__u8 (*parse_lease_buf)(void *, unsigned int *);
- int (*clone_range)(const unsigned int, struct cifsFileInfo *src_file,
- struct cifsFileInfo *target_file, u64 src_off, u64 len,
- u64 dest_off);
- int (*validate_negotiate)(const unsigned int, struct cifs_tcon *);
};
struct smb_version_values {
@@ -626,32 +620,9 @@ set_credits(struct TCP_Server_Info *server, const int val)
}
static inline __u64
-get_next_mid64(struct TCP_Server_Info *server)
-{
- return server->ops->get_next_mid(server);
-}
-
-static inline __le16
get_next_mid(struct TCP_Server_Info *server)
{
- __u16 mid = get_next_mid64(server);
- /*
- * The value in the SMB header should be little endian for easy
- * on-the-wire decoding.
- */
- return cpu_to_le16(mid);
-}
-
-static inline __u16
-get_mid(const struct smb_hdr *smb)
-{
- return le16_to_cpu(smb->Mid);
-}
-
-static inline bool
-compare_mid(__u16 mid, const struct smb_hdr *smb)
-{
- return mid == le16_to_cpu(smb->Mid);
+ return server->ops->get_next_mid(server);
}
/*
@@ -857,11 +828,6 @@ struct cifs_tcon {
__u32 maximal_access;
__u32 vol_serial_number;
__le64 vol_create_time;
- __u32 ss_flags; /* sector size flags */
- __u32 perf_sector_size; /* best sector size for perf */
- __u32 max_chunks;
- __u32 max_bytes_chunk;
- __u32 max_bytes_copy;
#endif /* CONFIG_CIFS_SMB2 */
#ifdef CONFIG_CIFS_FSCACHE
u64 resource_id; /* server resource id */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 33df36e..08f9dfb 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -428,7 +428,7 @@ struct smb_hdr {
__u16 Tid;
__le16 Pid;
__u16 Uid;
- __le16 Mid;
+ __u16 Mid;
__u8 WordCount;
} __attribute__((packed));
@@ -697,13 +697,7 @@ struct ntlmssp2_name {
} __attribute__((packed));
struct ntlmv2_resp {
- union {
- char ntlmv2_hash[CIFS_ENCPWD_SIZE];
- struct {
- __u8 reserved[8];
- __u8 key[CIFS_SERVER_CHALLENGE_SIZE];
- } __attribute__((packed)) challenge;
- } __attribute__((packed));
+ char ntlmv2_hash[CIFS_ENCPWD_SIZE];
__le32 blob_signature;
__u32 reserved;
__le64 time;
@@ -1358,35 +1352,6 @@ typedef struct smb_com_transaction_ioctl_req {
__u8 Data[1];
} __attribute__((packed)) TRANSACT_IOCTL_REQ;
-typedef struct smb_com_transaction_compr_ioctl_req {
- struct smb_hdr hdr; /* wct = 23 */
- __u8 MaxSetupCount;
- __u16 Reserved;
- __le32 TotalParameterCount;
- __le32 TotalDataCount;
- __le32 MaxParameterCount;
- __le32 MaxDataCount;
- __le32 ParameterCount;
- __le32 ParameterOffset;
- __le32 DataCount;
- __le32 DataOffset;
- __u8 SetupCount; /* four setup words follow subcommand */
- /* SNIA spec incorrectly included spurious pad here */
- __le16 SubCommand; /* 2 = IOCTL/FSCTL */
- __le32 FunctionCode;
- __u16 Fid;
- __u8 IsFsctl; /* 1 = File System Control 0 = device control (IOCTL) */
- __u8 IsRootFlag; /* 1 = apply command to root of share (must be DFS) */
- __le16 ByteCount;
- __u8 Pad[3];
- __le16 compression_state; /* See below for valid flags */
-} __attribute__((packed)) TRANSACT_COMPR_IOCTL_REQ;
-
-/* compression state flags */
-#define COMPRESSION_FORMAT_NONE 0x0000
-#define COMPRESSION_FORMAT_DEFAULT 0x0001
-#define COMPRESSION_FORMAT_LZNT1 0x0002
-
typedef struct smb_com_transaction_ioctl_rsp {
struct smb_hdr hdr; /* wct = 19 */
__u8 Reserved[3];
@@ -2250,9 +2215,6 @@ typedef struct {
__le32 DeviceCharacteristics;
} __attribute__((packed)) FILE_SYSTEM_DEVICE_INFO; /* device info level 0x104 */
-/* minimum includes first three fields, and empty FS Name */
-#define MIN_FS_ATTR_INFO_SIZE 12
-
typedef struct {
__le32 Attributes;
__le32 MaxPathNameComponentLength;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index aa33976..b5ec2a2 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -360,8 +360,6 @@ extern int CIFSSMBUnixQuerySymLink(const unsigned int xid,
extern int CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
__u16 fid, char **symlinkinfo,
const struct nls_table *nls_codepage);
-extern int CIFSSMB_set_compression(const unsigned int xid,
- struct cifs_tcon *tcon, __u16 fid);
extern int CIFSSMBOpen(const unsigned int xid, struct cifs_tcon *tcon,
const char *fileName, const int disposition,
const int access_flags, const int omode,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 124aa02..ccd31ab 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -3199,60 +3199,6 @@ qreparse_out:
return rc;
}
-int
-CIFSSMB_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
- __u16 fid)
-{
- int rc = 0;
- int bytes_returned;
- struct smb_com_transaction_compr_ioctl_req *pSMB;
- struct smb_com_transaction_ioctl_rsp *pSMBr;
-
- cifs_dbg(FYI, "Set compression for %u\n", fid);
- rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
- (void **) &pSMBr);
- if (rc)
- return rc;
-
- pSMB->compression_state = cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
-
- pSMB->TotalParameterCount = 0;
- pSMB->TotalDataCount = __constant_cpu_to_le32(2);
- pSMB->MaxParameterCount = 0;
- pSMB->MaxDataCount = 0;
- pSMB->MaxSetupCount = 4;
- pSMB->Reserved = 0;
- pSMB->ParameterOffset = 0;
- pSMB->DataCount = __constant_cpu_to_le32(2);
- pSMB->DataOffset =
- cpu_to_le32(offsetof(struct smb_com_transaction_compr_ioctl_req,
- compression_state) - 4); /* 84 */
- pSMB->SetupCount = 4;
- pSMB->SubCommand = __constant_cpu_to_le16(NT_TRANSACT_IOCTL);
- pSMB->ParameterCount = 0;
- pSMB->FunctionCode = __constant_cpu_to_le32(FSCTL_SET_COMPRESSION);
- pSMB->IsFsctl = 1; /* FSCTL */
- pSMB->IsRootFlag = 0;
- pSMB->Fid = fid; /* file handle always le */
- /* 3 byte pad, followed by 2 byte compress state */
- pSMB->ByteCount = __constant_cpu_to_le16(5);
- inc_rfc1001_len(pSMB, 5);
-
- rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
- (struct smb_hdr *) pSMBr, &bytes_returned, 0);
- if (rc)
- cifs_dbg(FYI, "Send error in SetCompression = %d\n", rc);
-
- cifs_buf_release(pSMB);
-
- /*
- * Note: On -EAGAIN error only caller can retry on handle based calls
- * since file handle passed in no longer valid.
- */
- return rc;
-}
-
-
#ifdef CONFIG_CIFS_POSIX
/*Convert an Access Control Entry from wire format to local POSIX xattr format*/
@@ -3369,13 +3315,11 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
return 0;
}
cifs_acl->version = cpu_to_le16(1);
- if (acl_type == ACL_TYPE_ACCESS) {
+ if (acl_type == ACL_TYPE_ACCESS)
cifs_acl->access_entry_count = cpu_to_le16(count);
- cifs_acl->default_entry_count = __constant_cpu_to_le16(0xFFFF);
- } else if (acl_type == ACL_TYPE_DEFAULT) {
+ else if (acl_type == ACL_TYPE_DEFAULT)
cifs_acl->default_entry_count = cpu_to_le16(count);
- cifs_acl->access_entry_count = __constant_cpu_to_le16(0xFFFF);
- } else {
+ else {
cifs_dbg(FYI, "unknown ACL type %d\n", acl_type);
return 0;
}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 8813ff7..a279ffc 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2242,8 +2242,6 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
- if (ses->status == CifsExiting)
- continue;
if (!match_session(ses, vol))
continue;
++ses->ses_count;
@@ -2257,37 +2255,24 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
static void
cifs_put_smb_ses(struct cifs_ses *ses)
{
- unsigned int rc, xid;
+ unsigned int xid;
struct TCP_Server_Info *server = ses->server;
cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count);
-
spin_lock(&cifs_tcp_ses_lock);
- if (ses->status == CifsExiting) {
- spin_unlock(&cifs_tcp_ses_lock);
- return;
- }
if (--ses->ses_count > 0) {
spin_unlock(&cifs_tcp_ses_lock);
return;
}
- if (ses->status == CifsGood)
- ses->status = CifsExiting;
+
+ list_del_init(&ses->smb_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
- if (ses->status == CifsExiting && server->ops->logoff) {
+ if (ses->status == CifsGood && server->ops->logoff) {
xid = get_xid();
- rc = server->ops->logoff(xid, ses);
- if (rc)
- cifs_dbg(VFS, "%s: Session Logoff failure rc=%d\n",
- __func__, rc);
+ server->ops->logoff(xid, ses);
_free_xid(xid);
}
-
- spin_lock(&cifs_tcp_ses_lock);
- list_del_init(&ses->smb_ses_list);
- spin_unlock(&cifs_tcp_ses_lock);
-
sesInfoFree(ses);
cifs_put_tcp_session(server);
}
@@ -3770,13 +3755,6 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
return rc;
}
-static void delayed_free(struct rcu_head *p)
-{
- struct cifs_sb_info *sbi = container_of(p, struct cifs_sb_info, rcu);
- unload_nls(sbi->local_nls);
- kfree(sbi);
-}
-
void
cifs_umount(struct cifs_sb_info *cifs_sb)
{
@@ -3801,7 +3779,8 @@ cifs_umount(struct cifs_sb_info *cifs_sb)
bdi_destroy(&cifs_sb->bdi);
kfree(cifs_sb->mountdata);
- call_rcu(&cifs_sb->rcu, delayed_free);
+ unload_nls(cifs_sb->local_nls);
+ kfree(cifs_sb);
}
int
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 11ff5f1..5384c2a 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -756,7 +756,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
/* if it was once a directory (but how can we tell?) we could do
shrink_dcache_parent(direntry); */
} else if (rc != -EACCES) {
- cifs_dbg(FYI, "Unexpected lookup error %d\n", rc);
+ cifs_dbg(VFS, "Unexpected lookup error %d\n", rc);
/* We special case check for Access Denied - since that
is a common return code */
}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 5a5a872..7ddddf2 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3663,27 +3663,6 @@ void cifs_oplock_break(struct work_struct *work)
}
}
-/*
- * The presence of cifs_direct_io() in the address space ops vector
- * allowes open() O_DIRECT flags which would have failed otherwise.
- *
- * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
- * so this method should never be called.
- *
- * Direct IO is not yet supported in the cached mode.
- */
-static ssize_t
-cifs_direct_io(int rw, struct kiocb *iocb, const struct iovec *iov,
- loff_t pos, unsigned long nr_segs)
-{
- /*
- * FIXME
- * Eventually need to support direct IO for non forcedirectio mounts
- */
- return -EINVAL;
-}
-
-
const struct address_space_operations cifs_addr_ops = {
.readpage = cifs_readpage,
.readpages = cifs_readpages,
@@ -3693,7 +3672,6 @@ const struct address_space_operations cifs_addr_ops = {
.write_end = cifs_write_end,
.set_page_dirty = __set_page_dirty_nobuffers,
.releasepage = cifs_release_page,
- .direct_IO = cifs_direct_io,
.invalidatepage = cifs_invalidate_page,
.launder_page = cifs_launder_page,
};
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index 8d4b7bc..b3258f3 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -27,7 +27,7 @@ void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server)
{
server->fscache =
fscache_acquire_cookie(cifs_fscache_netfs.primary_index,
- &cifs_fscache_server_index_def, server, true);
+ &cifs_fscache_server_index_def, server);
cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
__func__, server, server->fscache);
}
@@ -46,7 +46,7 @@ void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
tcon->fscache =
fscache_acquire_cookie(server->fscache,
- &cifs_fscache_super_index_def, tcon, true);
+ &cifs_fscache_super_index_def, tcon);
cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
__func__, server->fscache, tcon->fscache);
}
@@ -69,7 +69,7 @@ static void cifs_fscache_enable_inode_cookie(struct inode *inode)
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) {
cifsi->fscache = fscache_acquire_cookie(tcon->fscache,
- &cifs_fscache_inode_object_def, cifsi, true);
+ &cifs_fscache_inode_object_def, cifsi);
cifs_dbg(FYI, "%s: got FH cookie (0x%p/0x%p)\n",
__func__, tcon->fscache, cifsi->fscache);
}
@@ -119,7 +119,7 @@ void cifs_fscache_reset_inode_cookie(struct inode *inode)
cifsi->fscache = fscache_acquire_cookie(
cifs_sb_master_tcon(cifs_sb)->fscache,
&cifs_fscache_inode_object_def,
- cifsi, true);
+ cifsi);
cifs_dbg(FYI, "%s: new cookie 0x%p oldcookie 0x%p\n",
__func__, cifsi->fscache, old);
}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 36f9ebb..867b7cd 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -542,8 +542,7 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path,
/* Fill a cifs_fattr struct with info from FILE_ALL_INFO */
static void
cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
- struct cifs_sb_info *cifs_sb, bool adjust_tz,
- bool symlink)
+ struct cifs_sb_info *cifs_sb, bool adjust_tz)
{
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
@@ -570,11 +569,7 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
fattr->cf_createtime = le64_to_cpu(info->CreationTime);
fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
-
- if (symlink) {
- fattr->cf_mode = S_IFLNK;
- fattr->cf_dtype = DT_LNK;
- } else if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
+ if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
fattr->cf_dtype = DT_DIR;
/*
@@ -583,6 +578,10 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
*/
if (!tcon->unix_ext)
fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK;
+ } else if (fattr->cf_cifsattrs & ATTR_REPARSE) {
+ fattr->cf_mode = S_IFLNK;
+ fattr->cf_dtype = DT_LNK;
+ fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
} else {
fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
fattr->cf_dtype = DT_REG;
@@ -627,8 +626,7 @@ cifs_get_file_info(struct file *filp)
rc = server->ops->query_file_info(xid, tcon, &cfile->fid, &find_data);
switch (rc) {
case 0:
- cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false,
- false);
+ cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false);
break;
case -EREMOTE:
cifs_create_dfs_fattr(&fattr, inode->i_sb);
@@ -675,7 +673,6 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
bool adjust_tz = false;
struct cifs_fattr fattr;
struct cifs_search_info *srchinf = NULL;
- bool symlink = false;
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
@@ -705,12 +702,12 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
}
data = (FILE_ALL_INFO *)buf;
rc = server->ops->query_path_info(xid, tcon, cifs_sb, full_path,
- data, &adjust_tz, &symlink);
+ data, &adjust_tz);
}
if (!rc) {
- cifs_all_info_to_fattr(&fattr, data, cifs_sb, adjust_tz,
- symlink);
+ cifs_all_info_to_fattr(&fattr, (FILE_ALL_INFO *)data, cifs_sb,
+ adjust_tz);
} else if (rc == -EREMOTE) {
cifs_create_dfs_fattr(&fattr, sb);
rc = 0;
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 7749230..3e08455 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -3,7 +3,7 @@
*
* vfs operations that deal with io control
*
- * Copyright (C) International Business Machines Corp., 2005,2013
+ * Copyright (C) International Business Machines Corp., 2005,2007
* Author(s): Steve French (sfrench@us.ibm.com)
*
* This library is free software; you can redistribute it and/or modify
@@ -22,132 +22,25 @@
*/
#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/mount.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
#include "cifspdu.h"
#include "cifsglob.h"
#include "cifsproto.h"
#include "cifs_debug.h"
#include "cifsfs.h"
-#define CIFS_IOCTL_MAGIC 0xCF
-#define CIFS_IOC_COPYCHUNK_FILE _IOW(CIFS_IOCTL_MAGIC, 3, int)
-
-static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
- unsigned long srcfd, u64 off, u64 len, u64 destoff)
-{
- int rc;
- struct cifsFileInfo *smb_file_target = dst_file->private_data;
- struct inode *target_inode = file_inode(dst_file);
- struct cifs_tcon *target_tcon;
- struct fd src_file;
- struct cifsFileInfo *smb_file_src;
- struct inode *src_inode;
- struct cifs_tcon *src_tcon;
-
- cifs_dbg(FYI, "ioctl clone range\n");
- /* the destination must be opened for writing */
- if (!(dst_file->f_mode & FMODE_WRITE)) {
- cifs_dbg(FYI, "file target not open for write\n");
- return -EINVAL;
- }
-
- /* check if target volume is readonly and take reference */
- rc = mnt_want_write_file(dst_file);
- if (rc) {
- cifs_dbg(FYI, "mnt_want_write failed with rc %d\n", rc);
- return rc;
- }
-
- src_file = fdget(srcfd);
- if (!src_file.file) {
- rc = -EBADF;
- goto out_drop_write;
- }
-
- if ((!src_file.file->private_data) || (!dst_file->private_data)) {
- rc = -EBADF;
- cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
- goto out_fput;
- }
-
- rc = -EXDEV;
- smb_file_target = dst_file->private_data;
- smb_file_src = src_file.file->private_data;
- src_tcon = tlink_tcon(smb_file_src->tlink);
- target_tcon = tlink_tcon(smb_file_target->tlink);
-
- /* check if source and target are on same tree connection */
- if (src_tcon != target_tcon) {
- cifs_dbg(VFS, "file copy src and target on different volume\n");
- goto out_fput;
- }
-
- src_inode = src_file.file->f_dentry->d_inode;
-
- /*
- * Note: cifs case is easier than btrfs since server responsible for
- * checks for proper open modes and file type and if it wants
- * server could even support copy of range where source = target
- */
-
- /* so we do not deadlock racing two ioctls on same files */
- if (target_inode < src_inode) {
- mutex_lock_nested(&target_inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD);
- } else {
- mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&target_inode->i_mutex, I_MUTEX_CHILD);
- }
-
- /* determine range to clone */
- rc = -EINVAL;
- if (off + len > src_inode->i_size || off + len < off)
- goto out_unlock;
- if (len == 0)
- len = src_inode->i_size - off;
-
- cifs_dbg(FYI, "about to flush pages\n");
- /* should we flush first and last page first */
- truncate_inode_pages_range(&target_inode->i_data, destoff,
- PAGE_CACHE_ALIGN(destoff + len)-1);
-
- if (target_tcon->ses->server->ops->clone_range)
- rc = target_tcon->ses->server->ops->clone_range(xid,
- smb_file_src, smb_file_target, off, len, destoff);
-
- /* force revalidate of size and timestamps of target file now
- that target is updated on the server */
- CIFS_I(target_inode)->time = 0;
-out_unlock:
- /* although unlocking in the reverse order from locking is not
- strictly necessary here it is a little cleaner to be consistent */
- if (target_inode < src_inode) {
- mutex_unlock(&src_inode->i_mutex);
- mutex_unlock(&target_inode->i_mutex);
- } else {
- mutex_unlock(&target_inode->i_mutex);
- mutex_unlock(&src_inode->i_mutex);
- }
-out_fput:
- fdput(src_file);
-out_drop_write:
- mnt_drop_write_file(dst_file);
- return rc;
-}
-
long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
{
struct inode *inode = file_inode(filep);
int rc = -ENOTTY; /* strange error - but the precedent */
unsigned int xid;
struct cifs_sb_info *cifs_sb;
+#ifdef CONFIG_CIFS_POSIX
struct cifsFileInfo *pSMBFile = filep->private_data;
struct cifs_tcon *tcon;
__u64 ExtAttrBits = 0;
+ __u64 ExtAttrMask = 0;
__u64 caps;
+#endif /* CONFIG_CIFS_POSIX */
xid = get_xid();
@@ -156,14 +49,13 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
cifs_sb = CIFS_SB(inode->i_sb);
switch (command) {
+#ifdef CONFIG_CIFS_POSIX
case FS_IOC_GETFLAGS:
if (pSMBFile == NULL)
break;
tcon = tlink_tcon(pSMBFile->tlink);
caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
-#ifdef CONFIG_CIFS_POSIX
if (CIFS_UNIX_EXTATTR_CAP & caps) {
- __u64 ExtAttrMask = 0;
rc = CIFSGetExtAttr(xid, tcon,
pSMBFile->fid.netfid,
&ExtAttrBits, &ExtAttrMask);
@@ -171,53 +63,29 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
rc = put_user(ExtAttrBits &
FS_FL_USER_VISIBLE,
(int __user *)arg);
- if (rc != EOPNOTSUPP)
- break;
- }
-#endif /* CONFIG_CIFS_POSIX */
- rc = 0;
- if (CIFS_I(inode)->cifsAttrs & ATTR_COMPRESSED) {
- /* add in the compressed bit */
- ExtAttrBits = FS_COMPR_FL;
- rc = put_user(ExtAttrBits & FS_FL_USER_VISIBLE,
- (int __user *)arg);
}
break;
+
case FS_IOC_SETFLAGS:
if (pSMBFile == NULL)
break;
tcon = tlink_tcon(pSMBFile->tlink);
caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
-
- if (get_user(ExtAttrBits, (int __user *)arg)) {
- rc = -EFAULT;
- break;
- }
-
- /*
- * if (CIFS_UNIX_EXTATTR_CAP & caps)
- * rc = CIFSSetExtAttr(xid, tcon,
- * pSMBFile->fid.netfid,
- * extAttrBits,
- * &ExtAttrMask);
- * if (rc != EOPNOTSUPP)
- * break;
- */
-
- /* Currently only flag we can set is compressed flag */
- if ((ExtAttrBits & FS_COMPR_FL) == 0)
- break;
-
- /* Try to set compress flag */
- if (tcon->ses->server->ops->set_compression) {
- rc = tcon->ses->server->ops->set_compression(
- xid, tcon, pSMBFile);
- cifs_dbg(FYI, "set compress flag rc %d\n", rc);
+ if (CIFS_UNIX_EXTATTR_CAP & caps) {
+ if (get_user(ExtAttrBits, (int __user *)arg)) {
+ rc = -EFAULT;
+ break;
+ }
+ /*
+ * rc = CIFSGetExtAttr(xid, tcon,
+ * pSMBFile->fid.netfid,
+ * extAttrBits,
+ * &ExtAttrMask);
+ */
}
+ cifs_dbg(FYI, "set flags not implemented yet\n");
break;
- case CIFS_IOC_COPYCHUNK_FILE:
- rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0);
- break;
+#endif /* CONFIG_CIFS_POSIX */
default:
cifs_dbg(FYI, "unsupported ioctl\n");
break;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index cc02347..7e36ceb 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -621,3 +621,10 @@ symlink_exit:
free_xid(xid);
return rc;
}
+
+void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *cookie)
+{
+ char *p = nd_get_link(nd);
+ if (!IS_ERR(p))
+ kfree(p);
+}
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 2f9f379..138a011 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -278,7 +278,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
}
static int
-check_smb_hdr(struct smb_hdr *smb)
+check_smb_hdr(struct smb_hdr *smb, __u16 mid)
{
/* does it have the right SMB "signature" ? */
if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff)) {
@@ -287,6 +287,13 @@ check_smb_hdr(struct smb_hdr *smb)
return 1;
}
+ /* Make sure that message ids match */
+ if (mid != smb->Mid) {
+ cifs_dbg(VFS, "Mids do not match. received=%u expected=%u\n",
+ smb->Mid, mid);
+ return 1;
+ }
+
/* if it's a response then accept */
if (smb->Flags & SMBFLG_RESPONSE)
return 0;
@@ -295,8 +302,7 @@ check_smb_hdr(struct smb_hdr *smb)
if (smb->Command == SMB_COM_LOCKING_ANDX)
return 0;
- cifs_dbg(VFS, "Server sent request, not response. mid=%u\n",
- get_mid(smb));
+ cifs_dbg(VFS, "Server sent request, not response. mid=%u\n", smb->Mid);
return 1;
}
@@ -304,6 +310,7 @@ int
checkSMB(char *buf, unsigned int total_read)
{
struct smb_hdr *smb = (struct smb_hdr *)buf;
+ __u16 mid = smb->Mid;
__u32 rfclen = be32_to_cpu(smb->smb_buf_length);
__u32 clc_len; /* calculated length */
cifs_dbg(FYI, "checkSMB Length: 0x%x, smb_buf_length: 0x%x\n",
@@ -341,7 +348,7 @@ checkSMB(char *buf, unsigned int total_read)
}
/* otherwise, there is enough to get to the BCC */
- if (check_smb_hdr(smb))
+ if (check_smb_hdr(smb, mid))
return -EIO;
clc_len = smbCalcSize(smb);
@@ -352,7 +359,6 @@ checkSMB(char *buf, unsigned int total_read)
}
if (4 + rfclen != clc_len) {
- __u16 mid = get_mid(smb);
/* check if bcc wrapped around for large read responses */
if ((rfclen > 64 * 1024) && (rfclen > clc_len)) {
/* check if lengths match mod 64K */
@@ -360,11 +366,11 @@ checkSMB(char *buf, unsigned int total_read)
return 0; /* bcc wrapped */
}
cifs_dbg(FYI, "Calculated size %u vs length %u mismatch for mid=%u\n",
- clc_len, 4 + rfclen, mid);
+ clc_len, 4 + rfclen, smb->Mid);
if (4 + rfclen < clc_len) {
cifs_dbg(VFS, "RFC1001 size %u smaller than SMB for mid=%u\n",
- rfclen, mid);
+ rfclen, smb->Mid);
return -EIO;
} else if (rfclen > clc_len + 512) {
/*
@@ -377,7 +383,7 @@ checkSMB(char *buf, unsigned int total_read)
* data to 512 bytes.
*/
cifs_dbg(VFS, "RFC1001 size %u more than 512 bytes larger than SMB for mid=%u\n",
- rfclen, mid);
+ rfclen, smb->Mid);
return -EIO;
}
}
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 0498845..651a527 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -51,7 +51,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = {
{ERRnoaccess, -EACCES},
{ERRbadfid, -EBADF},
{ERRbadmcb, -EIO},
- {ERRnomem, -EREMOTEIO},
+ {ERRnomem, -ENOMEM},
{ERRbadmem, -EFAULT},
{ERRbadenv, -EFAULT},
{ERRbadformat, -EINVAL},
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 5940eca..53a75f3 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -134,6 +134,22 @@ out:
dput(dentry);
}
+/*
+ * Is it possible that this directory might turn out to be a DFS referral
+ * once we go to try and use it?
+ */
+static bool
+cifs_dfs_is_possible(struct cifs_sb_info *cifs_sb)
+{
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+
+ if (tcon->Flags & SMB_SHARE_IS_IN_DFS)
+ return true;
+#endif
+ return false;
+}
+
static void
cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
{
@@ -143,19 +159,27 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
fattr->cf_dtype = DT_DIR;
+ /*
+ * Windows CIFS servers generally make DFS referrals look
+ * like directories in FIND_* responses with the reparse
+ * attribute flag also set (since DFS junctions are
+ * reparse points). We must revalidate at least these
+ * directory inodes before trying to use them (if
+ * they are DFS we will get PATH_NOT_COVERED back
+ * when queried directly and can then try to connect
+ * to the DFS target)
+ */
+ if (cifs_dfs_is_possible(cifs_sb) &&
+ (fattr->cf_cifsattrs & ATTR_REPARSE))
+ fattr->cf_flags |= CIFS_FATTR_NEED_REVAL;
+ } else if (fattr->cf_cifsattrs & ATTR_REPARSE) {
+ fattr->cf_mode = S_IFLNK;
+ fattr->cf_dtype = DT_LNK;
} else {
fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
fattr->cf_dtype = DT_REG;
}
- /*
- * We need to revalidate it further to make a decision about whether it
- * is a symbolic link, DFS referral or a reparse point with a direct
- * access like junctions, deduplicated files, NFS symlinks.
- */
- if (fattr->cf_cifsattrs & ATTR_REPARSE)
- fattr->cf_flags |= CIFS_FATTR_NEED_REVAL;
-
/* non-unix readdir doesn't provide nlink */
fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK;
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 5f5ba0d..8233b17 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -67,7 +67,7 @@ send_nt_cancel(struct TCP_Server_Info *server, void *buf,
mutex_unlock(&server->srv_mutex);
cifs_dbg(FYI, "issued NT_CANCEL for mid %u, rc = %d\n",
- get_mid(in_buf), rc);
+ in_buf->Mid, rc);
return rc;
}
@@ -101,7 +101,7 @@ cifs_find_mid(struct TCP_Server_Info *server, char *buffer)
spin_lock(&GlobalMid_Lock);
list_for_each_entry(mid, &server->pending_mid_q, qhead) {
- if (compare_mid(mid->mid, buf) &&
+ if (mid->mid == buf->Mid &&
mid->mid_state == MID_REQUEST_SUBMITTED &&
le16_to_cpu(mid->command) == buf->Command) {
spin_unlock(&GlobalMid_Lock);
@@ -534,12 +534,10 @@ cifs_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
static int
cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb, const char *full_path,
- FILE_ALL_INFO *data, bool *adjustTZ, bool *symlink)
+ FILE_ALL_INFO *data, bool *adjustTZ)
{
int rc;
- *symlink = false;
-
/* could do find first instead but this returns more info */
rc = CIFSSMBQPathInfo(xid, tcon, full_path, data, 0 /* not legacy */,
cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
@@ -556,23 +554,6 @@ cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
CIFS_MOUNT_MAP_SPECIAL_CHR);
*adjustTZ = true;
}
-
- if (!rc && (le32_to_cpu(data->Attributes) & ATTR_REPARSE)) {
- int tmprc;
- int oplock = 0;
- __u16 netfid;
-
- /* Need to check if this is a symbolic link or not */
- tmprc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN,
- FILE_READ_ATTRIBUTES, 0, &netfid, &oplock,
- NULL, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
- if (tmprc == -EOPNOTSUPP)
- *symlink = true;
- else
- CIFSSMBClose(xid, tcon, netfid);
- }
-
return rc;
}
@@ -826,13 +807,6 @@ out:
}
static int
-cifs_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
- struct cifsFileInfo *cfile)
-{
- return CIFSSMB_set_compression(xid, tcon, cfile->fid.netfid);
-}
-
-static int
cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
const char *path, struct cifs_sb_info *cifs_sb,
struct cifs_fid *fid, __u16 search_flags,
@@ -982,7 +956,6 @@ struct smb_version_operations smb1_operations = {
.set_path_size = CIFSSMBSetEOF,
.set_file_size = CIFSSMBSetFileSize,
.set_file_info = smb_set_file_info,
- .set_compression = cifs_set_compression,
.echo = CIFSSMBEcho,
.mkdir = CIFSSMBMkDir,
.mkdir_setinfo = cifs_mkdir_setinfo,
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 84c012a..78ff88c 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -123,13 +123,12 @@ move_smb2_info_to_cifs(FILE_ALL_INFO *dst, struct smb2_file_all_info *src)
int
smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb, const char *full_path,
- FILE_ALL_INFO *data, bool *adjust_tz, bool *symlink)
+ FILE_ALL_INFO *data, bool *adjust_tz)
{
int rc;
struct smb2_file_all_info *smb2_data;
*adjust_tz = false;
- *symlink = false;
smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
GFP_KERNEL);
@@ -137,16 +136,9 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
return -ENOMEM;
rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path,
- FILE_READ_ATTRIBUTES, FILE_OPEN, 0,
- smb2_data, SMB2_OP_QUERY_INFO);
- if (rc == -EOPNOTSUPP) {
- *symlink = true;
- /* Failed on a symbolic link - query a reparse point info */
- rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path,
- FILE_READ_ATTRIBUTES, FILE_OPEN,
- OPEN_REPARSE_POINT, smb2_data,
- SMB2_OP_QUERY_INFO);
- }
+ FILE_READ_ATTRIBUTES, FILE_OPEN,
+ OPEN_REPARSE_POINT, smb2_data,
+ SMB2_OP_QUERY_INFO);
if (rc)
goto out;
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 94bd4fb..7c2f45c 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -306,7 +306,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_NONEXISTENT_SECTOR, -EIO, "STATUS_NONEXISTENT_SECTOR"},
{STATUS_MORE_PROCESSING_REQUIRED, -EIO,
"STATUS_MORE_PROCESSING_REQUIRED"},
- {STATUS_NO_MEMORY, -EREMOTEIO, "STATUS_NO_MEMORY"},
+ {STATUS_NO_MEMORY, -ENOMEM, "STATUS_NO_MEMORY"},
{STATUS_CONFLICTING_ADDRESSES, -EADDRINUSE,
"STATUS_CONFLICTING_ADDRESSES"},
{STATUS_NOT_MAPPED_VIEW, -EIO, "STATUS_NOT_MAPPED_VIEW"},
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 757da3e..861b332 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -209,94 +209,6 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
return rsize;
}
-#ifdef CONFIG_CIFS_STATS2
-static int
-SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
-{
- int rc;
- unsigned int ret_data_len = 0;
- struct network_interface_info_ioctl_rsp *out_buf;
-
- rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
- FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */,
- NULL /* no data input */, 0 /* no data input */,
- (char **)&out_buf, &ret_data_len);
-
- if ((rc == 0) && (ret_data_len > 0)) {
- /* Dump info on first interface */
- cifs_dbg(FYI, "Adapter Capability 0x%x\t",
- le32_to_cpu(out_buf->Capability));
- cifs_dbg(FYI, "Link Speed %lld\n",
- le64_to_cpu(out_buf->LinkSpeed));
- } else
- cifs_dbg(VFS, "error %d on ioctl to get interface list\n", rc);
-
- return rc;
-}
-#endif /* STATS2 */
-
-static void
-smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
-{
- int rc;
- __le16 srch_path = 0; /* Null - open root of share */
- u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
- struct cifs_open_parms oparms;
- struct cifs_fid fid;
-
- oparms.tcon = tcon;
- oparms.desired_access = FILE_READ_ATTRIBUTES;
- oparms.disposition = FILE_OPEN;
- oparms.create_options = 0;
- oparms.fid = &fid;
- oparms.reconnect = false;
-
- rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL);
- if (rc)
- return;
-
-#ifdef CONFIG_CIFS_STATS2
- SMB3_request_interfaces(xid, tcon);
-#endif /* STATS2 */
-
- SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
- FS_ATTRIBUTE_INFORMATION);
- SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
- FS_DEVICE_INFORMATION);
- SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
- FS_SECTOR_SIZE_INFORMATION); /* SMB3 specific */
- SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
- return;
-}
-
-static void
-smb2_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
-{
- int rc;
- __le16 srch_path = 0; /* Null - open root of share */
- u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
- struct cifs_open_parms oparms;
- struct cifs_fid fid;
-
- oparms.tcon = tcon;
- oparms.desired_access = FILE_READ_ATTRIBUTES;
- oparms.disposition = FILE_OPEN;
- oparms.create_options = 0;
- oparms.fid = &fid;
- oparms.reconnect = false;
-
- rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL);
- if (rc)
- return;
-
- SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
- FS_ATTRIBUTE_INFORMATION);
- SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
- FS_DEVICE_INFORMATION);
- SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
- return;
-}
-
static int
smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb, const char *full_path)
@@ -392,19 +304,7 @@ smb2_dump_share_caps(struct seq_file *m, struct cifs_tcon *tcon)
seq_puts(m, " ASYMMETRIC,");
if (tcon->capabilities == 0)
seq_puts(m, " None");
- if (tcon->ss_flags & SSINFO_FLAGS_ALIGNED_DEVICE)
- seq_puts(m, " Aligned,");
- if (tcon->ss_flags & SSINFO_FLAGS_PARTITION_ALIGNED_ON_DEVICE)
- seq_puts(m, " Partition Aligned,");
- if (tcon->ss_flags & SSINFO_FLAGS_NO_SEEK_PENALTY)
- seq_puts(m, " SSD,");
- if (tcon->ss_flags & SSINFO_FLAGS_TRIM_ENABLED)
- seq_puts(m, " TRIM-support,");
-
seq_printf(m, "\tShare Flags: 0x%x", tcon->share_flags);
- if (tcon->perf_sector_size)
- seq_printf(m, "\tOptimal sector size: 0x%x",
- tcon->perf_sector_size);
}
static void
@@ -494,157 +394,6 @@ smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon,
}
static int
-SMB2_request_res_key(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid,
- struct copychunk_ioctl *pcchunk)
-{
- int rc;
- unsigned int ret_data_len;
- struct resume_key_req *res_key;
-
- rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
- FSCTL_SRV_REQUEST_RESUME_KEY, true /* is_fsctl */,
- NULL, 0 /* no input */,
- (char **)&res_key, &ret_data_len);
-
- if (rc) {
- cifs_dbg(VFS, "refcpy ioctl error %d getting resume key\n", rc);
- goto req_res_key_exit;
- }
- if (ret_data_len < sizeof(struct resume_key_req)) {
- cifs_dbg(VFS, "Invalid refcopy resume key length\n");
- rc = -EINVAL;
- goto req_res_key_exit;
- }
- memcpy(pcchunk->SourceKey, res_key->ResumeKey, COPY_CHUNK_RES_KEY_SIZE);
-
-req_res_key_exit:
- kfree(res_key);
- return rc;
-}
-
-static int
-smb2_clone_range(const unsigned int xid,
- struct cifsFileInfo *srcfile,
- struct cifsFileInfo *trgtfile, u64 src_off,
- u64 len, u64 dest_off)
-{
- int rc;
- unsigned int ret_data_len;
- struct copychunk_ioctl *pcchunk;
- struct copychunk_ioctl_rsp *retbuf = NULL;
- struct cifs_tcon *tcon;
- int chunks_copied = 0;
- bool chunk_sizes_updated = false;
-
- pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL);
-
- if (pcchunk == NULL)
- return -ENOMEM;
-
- cifs_dbg(FYI, "in smb2_clone_range - about to call request res key\n");
- /* Request a key from the server to identify the source of the copy */
- rc = SMB2_request_res_key(xid, tlink_tcon(srcfile->tlink),
- srcfile->fid.persistent_fid,
- srcfile->fid.volatile_fid, pcchunk);
-
- /* Note: request_res_key sets res_key null only if rc !=0 */
- if (rc)
- goto cchunk_out;
-
- /* For now array only one chunk long, will make more flexible later */
- pcchunk->ChunkCount = __constant_cpu_to_le32(1);
- pcchunk->Reserved = 0;
- pcchunk->Reserved2 = 0;
-
- tcon = tlink_tcon(trgtfile->tlink);
-
- while (len > 0) {
- pcchunk->SourceOffset = cpu_to_le64(src_off);
- pcchunk->TargetOffset = cpu_to_le64(dest_off);
- pcchunk->Length =
- cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk));
-
- /* Request server copy to target from src identified by key */
- rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
- trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
- true /* is_fsctl */, (char *)pcchunk,
- sizeof(struct copychunk_ioctl), (char **)&retbuf,
- &ret_data_len);
- if (rc == 0) {
- if (ret_data_len !=
- sizeof(struct copychunk_ioctl_rsp)) {
- cifs_dbg(VFS, "invalid cchunk response size\n");
- rc = -EIO;
- goto cchunk_out;
- }
- if (retbuf->TotalBytesWritten == 0) {
- cifs_dbg(FYI, "no bytes copied\n");
- rc = -EIO;
- goto cchunk_out;
- }
- /*
- * Check if server claimed to write more than we asked
- */
- if (le32_to_cpu(retbuf->TotalBytesWritten) >
- le32_to_cpu(pcchunk->Length)) {
- cifs_dbg(VFS, "invalid copy chunk response\n");
- rc = -EIO;
- goto cchunk_out;
- }
- if (le32_to_cpu(retbuf->ChunksWritten) != 1) {
- cifs_dbg(VFS, "invalid num chunks written\n");
- rc = -EIO;
- goto cchunk_out;
- }
- chunks_copied++;
-
- src_off += le32_to_cpu(retbuf->TotalBytesWritten);
- dest_off += le32_to_cpu(retbuf->TotalBytesWritten);
- len -= le32_to_cpu(retbuf->TotalBytesWritten);
-
- cifs_dbg(FYI, "Chunks %d PartialChunk %d Total %d\n",
- le32_to_cpu(retbuf->ChunksWritten),
- le32_to_cpu(retbuf->ChunkBytesWritten),
- le32_to_cpu(retbuf->TotalBytesWritten));
- } else if (rc == -EINVAL) {
- if (ret_data_len != sizeof(struct copychunk_ioctl_rsp))
- goto cchunk_out;
-
- cifs_dbg(FYI, "MaxChunks %d BytesChunk %d MaxCopy %d\n",
- le32_to_cpu(retbuf->ChunksWritten),
- le32_to_cpu(retbuf->ChunkBytesWritten),
- le32_to_cpu(retbuf->TotalBytesWritten));
-
- /*
- * Check if this is the first request using these sizes,
- * (ie check if copy succeed once with original sizes
- * and check if the server gave us different sizes after
- * we already updated max sizes on previous request).
- * if not then why is the server returning an error now
- */
- if ((chunks_copied != 0) || chunk_sizes_updated)
- goto cchunk_out;
-
- /* Check that server is not asking us to grow size */
- if (le32_to_cpu(retbuf->ChunkBytesWritten) <
- tcon->max_bytes_chunk)
- tcon->max_bytes_chunk =
- le32_to_cpu(retbuf->ChunkBytesWritten);
- else
- goto cchunk_out; /* server gave us bogus size */
-
- /* No need to change MaxChunks since already set to 1 */
- chunk_sizes_updated = true;
- }
- }
-
-cchunk_out:
- kfree(pcchunk);
- return rc;
-}
-
-static int
smb2_flush_file(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid *fid)
{
@@ -697,14 +446,6 @@ smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon,
}
static int
-smb2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
- struct cifsFileInfo *cfile)
-{
- return SMB2_set_compression(xid, tcon, cfile->fid.persistent_fid,
- cfile->fid.volatile_fid);
-}
-
-static int
smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
const char *path, struct cifs_sb_info *cifs_sb,
struct cifs_fid *fid, __u16 search_flags,
@@ -1124,7 +865,6 @@ struct smb_version_operations smb20_operations = {
.logoff = SMB2_logoff,
.tree_connect = SMB2_tcon,
.tree_disconnect = SMB2_tdis,
- .qfs_tcon = smb2_qfs_tcon,
.is_path_accessible = smb2_is_path_accessible,
.can_echo = smb2_can_echo,
.echo = SMB2_echo,
@@ -1134,7 +874,6 @@ struct smb_version_operations smb20_operations = {
.set_path_size = smb2_set_path_size,
.set_file_size = smb2_set_file_size,
.set_file_info = smb2_set_file_info,
- .set_compression = smb2_set_compression,
.mkdir = smb2_mkdir,
.mkdir_setinfo = smb2_mkdir_setinfo,
.rmdir = smb2_rmdir,
@@ -1168,7 +907,6 @@ struct smb_version_operations smb20_operations = {
.set_oplock_level = smb2_set_oplock_level,
.create_lease_buf = smb2_create_lease_buf,
.parse_lease_buf = smb2_parse_lease_buf,
- .clone_range = smb2_clone_range,
};
struct smb_version_operations smb21_operations = {
@@ -1198,7 +936,6 @@ struct smb_version_operations smb21_operations = {
.logoff = SMB2_logoff,
.tree_connect = SMB2_tcon,
.tree_disconnect = SMB2_tdis,
- .qfs_tcon = smb2_qfs_tcon,
.is_path_accessible = smb2_is_path_accessible,
.can_echo = smb2_can_echo,
.echo = SMB2_echo,
@@ -1208,7 +945,6 @@ struct smb_version_operations smb21_operations = {
.set_path_size = smb2_set_path_size,
.set_file_size = smb2_set_file_size,
.set_file_info = smb2_set_file_info,
- .set_compression = smb2_set_compression,
.mkdir = smb2_mkdir,
.mkdir_setinfo = smb2_mkdir_setinfo,
.rmdir = smb2_rmdir,
@@ -1242,7 +978,6 @@ struct smb_version_operations smb21_operations = {
.set_oplock_level = smb21_set_oplock_level,
.create_lease_buf = smb2_create_lease_buf,
.parse_lease_buf = smb2_parse_lease_buf,
- .clone_range = smb2_clone_range,
};
struct smb_version_operations smb30_operations = {
@@ -1273,7 +1008,6 @@ struct smb_version_operations smb30_operations = {
.logoff = SMB2_logoff,
.tree_connect = SMB2_tcon,
.tree_disconnect = SMB2_tdis,
- .qfs_tcon = smb3_qfs_tcon,
.is_path_accessible = smb2_is_path_accessible,
.can_echo = smb2_can_echo,
.echo = SMB2_echo,
@@ -1283,7 +1017,6 @@ struct smb_version_operations smb30_operations = {
.set_path_size = smb2_set_path_size,
.set_file_size = smb2_set_file_size,
.set_file_info = smb2_set_file_info,
- .set_compression = smb2_set_compression,
.mkdir = smb2_mkdir,
.mkdir_setinfo = smb2_mkdir_setinfo,
.rmdir = smb2_rmdir,
@@ -1318,8 +1051,6 @@ struct smb_version_operations smb30_operations = {
.set_oplock_level = smb3_set_oplock_level,
.create_lease_buf = smb3_create_lease_buf,
.parse_lease_buf = smb3_parse_lease_buf,
- .clone_range = smb2_clone_range,
- .validate_negotiate = smb3_validate_negotiate,
};
struct smb_version_values smb20_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 2013234..edccb52 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -454,81 +454,6 @@ neg_exit:
return rc;
}
-int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
-{
- int rc = 0;
- struct validate_negotiate_info_req vneg_inbuf;
- struct validate_negotiate_info_rsp *pneg_rsp;
- u32 rsplen;
-
- cifs_dbg(FYI, "validate negotiate\n");
-
- /*
- * validation ioctl must be signed, so no point sending this if we
- * can not sign it. We could eventually change this to selectively
- * sign just this, the first and only signed request on a connection.
- * This is good enough for now since a user who wants better security
- * would also enable signing on the mount. Having validation of
- * negotiate info for signed connections helps reduce attack vectors
- */
- if (tcon->ses->server->sign == false)
- return 0; /* validation requires signing */
-
- vneg_inbuf.Capabilities =
- cpu_to_le32(tcon->ses->server->vals->req_capabilities);
- memcpy(vneg_inbuf.Guid, cifs_client_guid, SMB2_CLIENT_GUID_SIZE);
-
- if (tcon->ses->sign)
- vneg_inbuf.SecurityMode =
- cpu_to_le16(SMB2_NEGOTIATE_SIGNING_REQUIRED);
- else if (global_secflags & CIFSSEC_MAY_SIGN)
- vneg_inbuf.SecurityMode =
- cpu_to_le16(SMB2_NEGOTIATE_SIGNING_ENABLED);
- else
- vneg_inbuf.SecurityMode = 0;
-
- vneg_inbuf.DialectCount = cpu_to_le16(1);
- vneg_inbuf.Dialects[0] =
- cpu_to_le16(tcon->ses->server->vals->protocol_id);
-
- rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
- FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
- (char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req),
- (char **)&pneg_rsp, &rsplen);
-
- if (rc != 0) {
- cifs_dbg(VFS, "validate protocol negotiate failed: %d\n", rc);
- return -EIO;
- }
-
- if (rsplen != sizeof(struct validate_negotiate_info_rsp)) {
- cifs_dbg(VFS, "invalid size of protocol negotiate response\n");
- return -EIO;
- }
-
- /* check validate negotiate info response matches what we got earlier */
- if (pneg_rsp->Dialect !=
- cpu_to_le16(tcon->ses->server->vals->protocol_id))
- goto vneg_out;
-
- if (pneg_rsp->SecurityMode != cpu_to_le16(tcon->ses->server->sec_mode))
- goto vneg_out;
-
- /* do not validate server guid because not saved at negprot time yet */
-
- if ((le32_to_cpu(pneg_rsp->Capabilities) | SMB2_NT_FIND |
- SMB2_LARGE_FILES) != tcon->ses->server->capabilities)
- goto vneg_out;
-
- /* validate negotiate successful */
- cifs_dbg(FYI, "validate negotiate info successful\n");
- return 0;
-
-vneg_out:
- cifs_dbg(VFS, "protocol revalidation - security settings mismatch\n");
- return -EIO;
-}
-
int
SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
const struct nls_table *nls_cp)
@@ -705,8 +630,6 @@ ssetup_ntlmssp_authenticate:
goto ssetup_exit;
ses->session_flags = le16_to_cpu(rsp->SessionFlags);
- if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
- cifs_dbg(VFS, "SMB3 encryption not supported yet\n");
ssetup_exit:
free_rsp_buf(resp_buftype, rsp);
@@ -794,14 +717,6 @@ static inline void cifs_stats_fail_inc(struct cifs_tcon *tcon, uint16_t code)
#define MAX_SHARENAME_LENGTH (255 /* server */ + 80 /* share */ + 1 /* NULL */)
-/* These are similar values to what Windows uses */
-static inline void init_copy_chunk_defaults(struct cifs_tcon *tcon)
-{
- tcon->max_chunks = 256;
- tcon->max_bytes_chunk = 1048576;
- tcon->max_bytes_copy = 16777216;
-}
-
int
SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
struct cifs_tcon *tcon, const struct nls_table *cp)
@@ -903,9 +818,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) &&
((tcon->share_flags & SHI1005_FLAGS_DFS) == 0))
cifs_dbg(VFS, "DFS capability contradicts DFS flag\n");
- init_copy_chunk_defaults(tcon);
- if (tcon->ses->server->ops->validate_negotiate)
- rc = tcon->ses->server->ops->validate_negotiate(xid, tcon);
+
tcon_exit:
free_rsp_buf(resp_buftype, rsp);
kfree(unc_path);
@@ -1224,7 +1137,6 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
cifs_dbg(FYI, "SMB2 IOCTL\n");
- *out_data = NULL;
/* zero out returned data len, in case of error */
if (plen)
*plen = 0;
@@ -1270,38 +1182,19 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
req->Flags = 0;
iov[0].iov_base = (char *)req;
+ /* 4 for rfc1002 length field */
+ iov[0].iov_len = get_rfc1002_length(req) + 4;
- /*
- * If no input data, the size of ioctl struct in
- * protocol spec still includes a 1 byte data buffer,
- * but if input data passed to ioctl, we do not
- * want to double count this, so we do not send
- * the dummy one byte of data in iovec[0] if sending
- * input data (in iovec[1]). We also must add 4 bytes
- * in first iovec to allow for rfc1002 length field.
- */
-
- if (indatalen) {
- iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
- inc_rfc1001_len(req, indatalen - 1);
- } else
- iov[0].iov_len = get_rfc1002_length(req) + 4;
-
+ if (indatalen)
+ inc_rfc1001_len(req, indatalen);
rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0);
rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base;
- if ((rc != 0) && (rc != -EINVAL)) {
+ if (rc != 0) {
if (tcon)
cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
goto ioctl_exit;
- } else if (rc == -EINVAL) {
- if ((opcode != FSCTL_SRV_COPYCHUNK_WRITE) &&
- (opcode != FSCTL_SRV_COPYCHUNK)) {
- if (tcon)
- cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
- goto ioctl_exit;
- }
}
/* check if caller wants to look at return data or just return rc */
@@ -1341,33 +1234,6 @@ ioctl_exit:
return rc;
}
-/*
- * Individual callers to ioctl worker function follow
- */
-
-int
-SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid)
-{
- int rc;
- char *res_key = NULL;
- struct compress_ioctl fsctl_input;
- char *ret_data = NULL;
-
- fsctl_input.CompressionState =
- __constant_cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
-
- rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
- FSCTL_SET_COMPRESSION, true /* is_fsctl */,
- (char *)&fsctl_input /* data input */,
- 2 /* in data len */, &ret_data /* out data */, NULL);
-
- cifs_dbg(FYI, "set compression rc %d\n", rc);
- kfree(res_key);
-
- return rc;
-}
-
int
SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid)
@@ -2238,9 +2104,11 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
rc = SendReceive2(xid, ses, iov, num, &resp_buftype, 0);
rsp = (struct smb2_set_info_rsp *)iov[0].iov_base;
- if (rc != 0)
+ if (rc != 0) {
cifs_stats_fail_inc(tcon, SMB2_SET_INFO_HE);
-
+ goto out;
+ }
+out:
free_rsp_buf(resp_buftype, rsp);
kfree(iov);
return rc;
@@ -2431,7 +2299,7 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
rc = SendReceive2(xid, ses, &iov, 1, &resp_buftype, 0);
if (rc) {
cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
- goto qfsinf_exit;
+ goto qinf_exit;
}
rsp = (struct smb2_query_info_rsp *)iov.iov_base;
@@ -2443,70 +2311,7 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
if (!rc)
copy_fs_info_to_kstatfs(info, fsdata);
-qfsinf_exit:
- free_rsp_buf(resp_buftype, iov.iov_base);
- return rc;
-}
-
-int
-SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid, int level)
-{
- struct smb2_query_info_rsp *rsp = NULL;
- struct kvec iov;
- int rc = 0;
- int resp_buftype, max_len, min_len;
- struct cifs_ses *ses = tcon->ses;
- unsigned int rsp_len, offset;
-
- if (level == FS_DEVICE_INFORMATION) {
- max_len = sizeof(FILE_SYSTEM_DEVICE_INFO);
- min_len = sizeof(FILE_SYSTEM_DEVICE_INFO);
- } else if (level == FS_ATTRIBUTE_INFORMATION) {
- max_len = sizeof(FILE_SYSTEM_ATTRIBUTE_INFO);
- min_len = MIN_FS_ATTR_INFO_SIZE;
- } else if (level == FS_SECTOR_SIZE_INFORMATION) {
- max_len = sizeof(struct smb3_fs_ss_info);
- min_len = sizeof(struct smb3_fs_ss_info);
- } else {
- cifs_dbg(FYI, "Invalid qfsinfo level %d\n", level);
- return -EINVAL;
- }
-
- rc = build_qfs_info_req(&iov, tcon, level, max_len,
- persistent_fid, volatile_fid);
- if (rc)
- return rc;
-
- rc = SendReceive2(xid, ses, &iov, 1, &resp_buftype, 0);
- if (rc) {
- cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
- goto qfsattr_exit;
- }
- rsp = (struct smb2_query_info_rsp *)iov.iov_base;
-
- rsp_len = le32_to_cpu(rsp->OutputBufferLength);
- offset = le16_to_cpu(rsp->OutputBufferOffset);
- rc = validate_buf(offset, rsp_len, &rsp->hdr, min_len);
- if (rc)
- goto qfsattr_exit;
-
- if (level == FS_ATTRIBUTE_INFORMATION)
- memcpy(&tcon->fsAttrInfo, 4 /* RFC1001 len */ + offset
- + (char *)&rsp->hdr, min_t(unsigned int,
- rsp_len, max_len));
- else if (level == FS_DEVICE_INFORMATION)
- memcpy(&tcon->fsDevInfo, 4 /* RFC1001 len */ + offset
- + (char *)&rsp->hdr, sizeof(FILE_SYSTEM_DEVICE_INFO));
- else if (level == FS_SECTOR_SIZE_INFORMATION) {
- struct smb3_fs_ss_info *ss_info = (struct smb3_fs_ss_info *)
- (4 /* RFC1001 len */ + offset + (char *)&rsp->hdr);
- tcon->ss_flags = le32_to_cpu(ss_info->Flags);
- tcon->perf_sector_size =
- le32_to_cpu(ss_info->PhysicalBytesPerSectorForPerf);
- }
-
-qfsattr_exit:
+qinf_exit:
free_rsp_buf(resp_buftype, iov.iov_base);
return rc;
}
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 2022c54..b83d011 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -122,23 +122,6 @@ struct smb2_pdu {
__le16 StructureSize2; /* size of wct area (varies, request specific) */
} __packed;
-struct smb2_transform_hdr {
- __be32 smb2_buf_length; /* big endian on wire */
- /* length is only two or three bytes - with
- one or two byte type preceding it that MBZ */
- __u8 ProtocolId[4]; /* 0xFD 'S' 'M' 'B' */
- __u8 Signature[16];
- __u8 Nonce[11];
- __u8 Reserved[5];
- __le32 OriginalMessageSize;
- __u16 Reserved1;
- __le16 EncryptionAlgorithm;
- __u64 SessionId;
-} __packed;
-
-/* Encryption Algorithms */
-#define SMB2_ENCRYPTION_AES128_CCM __constant_cpu_to_le16(0x0001)
-
/*
* SMB2 flag definitions
*/
@@ -254,7 +237,6 @@ struct smb2_sess_setup_req {
/* Currently defined SessionFlags */
#define SMB2_SESSION_FLAG_IS_GUEST 0x0001
#define SMB2_SESSION_FLAG_IS_NULL 0x0002
-#define SMB2_SESSION_FLAG_ENCRYPT_DATA 0x0004
struct smb2_sess_setup_rsp {
struct smb2_hdr hdr;
__le16 StructureSize; /* Must be 9 */
@@ -552,16 +534,9 @@ struct create_durable {
} Data;
} __packed;
-#define COPY_CHUNK_RES_KEY_SIZE 24
-struct resume_key_req {
- char ResumeKey[COPY_CHUNK_RES_KEY_SIZE];
- __le32 ContextLength; /* MBZ */
- char Context[0]; /* ignored, Windows sets to 4 bytes of zero */
-} __packed;
-
/* this goes in the ioctl buffer when doing a copychunk request */
struct copychunk_ioctl {
- char SourceKey[COPY_CHUNK_RES_KEY_SIZE];
+ char SourceKey[24];
__le32 ChunkCount; /* we are only sending 1 */
__le32 Reserved;
/* array will only be one chunk long for us */
@@ -571,25 +546,13 @@ struct copychunk_ioctl {
__u32 Reserved2;
} __packed;
-struct copychunk_ioctl_rsp {
- __le32 ChunksWritten;
- __le32 ChunkBytesWritten;
- __le32 TotalBytesWritten;
-} __packed;
-
-struct validate_negotiate_info_req {
+/* Response and Request are the same format */
+struct validate_negotiate_info {
__le32 Capabilities;
__u8 Guid[SMB2_CLIENT_GUID_SIZE];
__le16 SecurityMode;
__le16 DialectCount;
- __le16 Dialects[1]; /* dialect (someday maybe list) client asked for */
-} __packed;
-
-struct validate_negotiate_info_rsp {
- __le32 Capabilities;
- __u8 Guid[SMB2_CLIENT_GUID_SIZE];
- __le16 SecurityMode;
- __le16 Dialect; /* Dialect in use for the connection */
+ __le16 Dialect[1];
} __packed;
#define RSS_CAPABLE 0x00000001
@@ -606,10 +569,6 @@ struct network_interface_info_ioctl_rsp {
#define NO_FILE_ID 0xFFFFFFFFFFFFFFFFULL /* general ioctls to srv not to file */
-struct compress_ioctl {
- __le16 CompressionState; /* See cifspdu.h for possible flag values */
-} __packed;
-
struct smb2_ioctl_req {
struct smb2_hdr hdr;
__le16 StructureSize; /* Must be 57 */
@@ -625,7 +584,7 @@ struct smb2_ioctl_req {
__le32 MaxOutputResponse;
__le32 Flags;
__u32 Reserved2;
- __u8 Buffer[0];
+ char Buffer[0];
} __packed;
struct smb2_ioctl_rsp {
@@ -911,16 +870,14 @@ struct smb2_lease_ack {
/* File System Information Classes */
#define FS_VOLUME_INFORMATION 1 /* Query */
-#define FS_LABEL_INFORMATION 2 /* Local only */
+#define FS_LABEL_INFORMATION 2 /* Set */
#define FS_SIZE_INFORMATION 3 /* Query */
#define FS_DEVICE_INFORMATION 4 /* Query */
#define FS_ATTRIBUTE_INFORMATION 5 /* Query */
#define FS_CONTROL_INFORMATION 6 /* Query, Set */
#define FS_FULL_SIZE_INFORMATION 7 /* Query */
#define FS_OBJECT_ID_INFORMATION 8 /* Query, Set */
-#define FS_DRIVER_PATH_INFORMATION 9 /* Local only */
-#define FS_VOLUME_FLAGS_INFORMATION 10 /* Local only */
-#define FS_SECTOR_SIZE_INFORMATION 11 /* SMB3 or later. Query */
+#define FS_DRIVER_PATH_INFORMATION 9 /* Query */
struct smb2_fs_full_size_info {
__le64 TotalAllocationUnits;
@@ -930,22 +887,6 @@ struct smb2_fs_full_size_info {
__le32 BytesPerSector;
} __packed;
-#define SSINFO_FLAGS_ALIGNED_DEVICE 0x00000001
-#define SSINFO_FLAGS_PARTITION_ALIGNED_ON_DEVICE 0x00000002
-#define SSINFO_FLAGS_NO_SEEK_PENALTY 0x00000004
-#define SSINFO_FLAGS_TRIM_ENABLED 0x00000008
-
-/* sector size info struct */
-struct smb3_fs_ss_info {
- __le32 LogicalBytesPerSector;
- __le32 PhysicalBytesPerSectorForAtomicity;
- __le32 PhysicalBytesPerSectorForPerf;
- __le32 FileSystemEffectivePhysicalBytesPerSectorForAtomicity;
- __le32 Flags;
- __le32 ByteOffsetForSectorAlignment;
- __le32 ByteOffsetForPartitionAlignment;
-} __packed;
-
/* partial list of QUERY INFO levels */
#define FILE_DIRECTORY_INFORMATION 1
#define FILE_FULL_DIRECTORY_INFORMATION 2
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 93adc64..e3fb480 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -61,7 +61,7 @@ extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst,
extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb,
const char *full_path, FILE_ALL_INFO *data,
- bool *adjust_tz, bool *symlink);
+ bool *adjust_tz);
extern int smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon,
const char *full_path, __u64 size,
struct cifs_sb_info *cifs_sb, bool set_alloc);
@@ -142,16 +142,12 @@ extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon,
extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid,
FILE_BASIC_INFO *buf);
-extern int SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid);
extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
const u64 persistent_fid, const u64 volatile_fid,
const __u8 oplock_level);
extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id,
struct kstatfs *FSData);
-extern int SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_file_id, u64 volatile_file_id, int lvl);
extern int SMB2_lock(const unsigned int xid, struct cifs_tcon *tcon,
const __u64 persist_fid, const __u64 volatile_fid,
const __u32 pid, const __u64 length, const __u64 offset,
@@ -162,6 +158,5 @@ extern int smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
struct smb2_lock_element *buf);
extern int SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
__u8 *lease_key, const __le32 lease_state);
-extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *);
#endif /* _SMB2PROTO_H */
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 59c748c..340abca 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -466,7 +466,7 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
static inline void
smb2_seq_num_into_buf(struct TCP_Server_Info *server, struct smb2_hdr *hdr)
{
- hdr->MessageId = get_next_mid64(server);
+ hdr->MessageId = get_next_mid(server);
}
static struct mid_q_entry *
@@ -516,19 +516,13 @@ smb2_get_mid_entry(struct cifs_ses *ses, struct smb2_hdr *buf,
return -EAGAIN;
}
- if (ses->status == CifsNew) {
+ if (ses->status != CifsGood) {
+ /* check if SMB2 session is bad because we are setting it up */
if ((buf->Command != SMB2_SESSION_SETUP) &&
(buf->Command != SMB2_NEGOTIATE))
return -EAGAIN;
/* else ok - we are setting up session */
}
-
- if (ses->status == CifsExiting) {
- if (buf->Command != SMB2_LOGOFF)
- return -EAGAIN;
- /* else ok - we are shutting down the session */
- }
-
*mid = smb2_mid_entry_alloc(buf, ses->server);
if (*mid == NULL)
return -ENOMEM;
diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h
index 0e538b5..a4b2391f 100644
--- a/fs/cifs/smbfsctl.h
+++ b/fs/cifs/smbfsctl.h
@@ -90,7 +90,7 @@
#define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4 /* BB add struct */
#define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */
#define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */
-#define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204
+#define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204 /* BB add struct */
/* Perform server-side data movement */
#define FSCTL_SRV_COPYCHUNK 0x001440F2
#define FSCTL_SRV_COPYCHUNK_WRITE 0x001480F2
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index b375709..800b938 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -58,7 +58,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
return temp;
else {
memset(temp, 0, sizeof(struct mid_q_entry));
- temp->mid = get_mid(smb_buffer);
+ temp->mid = smb_buffer->Mid; /* always LE */
temp->pid = current->pid;
temp->command = cpu_to_le16(smb_buffer->Command);
cifs_dbg(FYI, "For smb_command %d\n", smb_buffer->Command);
@@ -431,20 +431,13 @@ static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
return -EAGAIN;
}
- if (ses->status == CifsNew) {
+ if (ses->status != CifsGood) {
+ /* check if SMB session is bad because we are setting it up */
if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) &&
(in_buf->Command != SMB_COM_NEGOTIATE))
return -EAGAIN;
/* else ok - we are setting up session */
}
-
- if (ses->status == CifsExiting) {
- /* check if SMB session is bad because we are setting it up */
- if (in_buf->Command != SMB_COM_LOGOFF_ANDX)
- return -EAGAIN;
- /* else ok - we are shutting down session */
- }
-
*ppmidQ = AllocMidQEntry(in_buf, ses->server);
if (*ppmidQ == NULL)
return -ENOMEM;
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index e7550cb..cc0ea9f 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -40,7 +40,7 @@ extern const struct file_operations coda_ioctl_operations;
int coda_open(struct inode *i, struct file *f);
int coda_release(struct inode *i, struct file *f);
int coda_permission(struct inode *inode, int mask);
-int coda_revalidate_inode(struct inode *);
+int coda_revalidate_inode(struct dentry *);
int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
int coda_setattr(struct dentry *, struct iattr *);
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 5efbb5e..190effc 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -387,6 +387,9 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx)
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
host_file = cfi->cfi_container;
+ if (!host_file->f_op)
+ return -ENOTDIR;
+
if (host_file->f_op->iterate) {
struct inode *host_inode = file_inode(host_file);
mutex_lock(&host_inode->i_mutex);
@@ -563,12 +566,13 @@ static int coda_dentry_delete(const struct dentry * dentry)
* cache manager Venus issues a downcall to the kernel when this
* happens
*/
-int coda_revalidate_inode(struct inode *inode)
+int coda_revalidate_inode(struct dentry *dentry)
{
struct coda_vattr attr;
int error;
int old_mode;
ino_t old_ino;
+ struct inode *inode = dentry->d_inode;
struct coda_inode_info *cii = ITOC(inode);
if (!cii->c_flags)
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 9e83b77..380b798 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -36,7 +36,7 @@ coda_file_read(struct file *coda_file, char __user *buf, size_t count, loff_t *p
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
host_file = cfi->cfi_container;
- if (!host_file->f_op->read)
+ if (!host_file->f_op || !host_file->f_op->read)
return -EINVAL;
return host_file->f_op->read(host_file, buf, count, ppos);
@@ -75,7 +75,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
host_file = cfi->cfi_container;
- if (!host_file->f_op->write)
+ if (!host_file->f_op || !host_file->f_op->write)
return -EINVAL;
host_inode = file_inode(host_file);
@@ -105,7 +105,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma)
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
host_file = cfi->cfi_container;
- if (!host_file->f_op->mmap)
+ if (!host_file->f_op || !host_file->f_op->mmap)
return -ENODEV;
coda_inode = file_inode(coda_file);
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 506de34..4dcc0d8 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -257,7 +257,7 @@ static void coda_evict_inode(struct inode *inode)
int coda_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
- int err = coda_revalidate_inode(dentry->d_inode);
+ int err = coda_revalidate_inode(dentry);
if (!err)
generic_fillattr(dentry->d_inode, stat);
return err;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index dc52e13..5d19acf 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1583,13 +1583,13 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
/*FALL THROUGH*/
default:
- if (f.file->f_op->compat_ioctl) {
+ if (f.file->f_op && f.file->f_op->compat_ioctl) {
error = f.file->f_op->compat_ioctl(f.file, cmd, arg);
if (error != -ENOIOCTLCMD)
goto out_fput;
}
- if (!f.file->f_op->unlocked_ioctl)
+ if (!f.file->f_op || !f.file->f_op->unlocked_ioctl)
goto do_ioctl;
break;
}
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index e081acb..277bd1b 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -56,28 +56,29 @@ static void configfs_d_iput(struct dentry * dentry,
struct configfs_dirent *sd = dentry->d_fsdata;
if (sd) {
+ BUG_ON(sd->s_dentry != dentry);
/* Coordinate with configfs_readdir */
spin_lock(&configfs_dirent_lock);
- /* Coordinate with configfs_attach_attr where will increase
- * sd->s_count and update sd->s_dentry to new allocated one.
- * Only set sd->dentry to null when this dentry is the only
- * sd owner.
- * If not do so, configfs_d_iput may run just after
- * configfs_attach_attr and set sd->s_dentry to null
- * even it's still in use.
- */
- if (atomic_read(&sd->s_count) <= 2)
- sd->s_dentry = NULL;
-
+ sd->s_dentry = NULL;
spin_unlock(&configfs_dirent_lock);
configfs_put(sd);
}
iput(inode);
}
+/*
+ * We _must_ delete our dentries on last dput, as the chain-to-parent
+ * behavior is required to clear the parents of default_groups.
+ */
+static int configfs_d_delete(const struct dentry *dentry)
+{
+ return 1;
+}
+
const struct dentry_operations configfs_dentry_ops = {
.d_iput = configfs_d_iput,
- .d_delete = always_delete_dentry,
+ /* simple_delete_dentry() isn't exported */
+ .d_delete = configfs_d_delete,
};
#ifdef CONFIG_LOCKDEP
@@ -425,11 +426,8 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
struct configfs_attribute * attr = sd->s_element;
int error;
- spin_lock(&configfs_dirent_lock);
dentry->d_fsdata = configfs_get(sd);
sd->s_dentry = dentry;
- spin_unlock(&configfs_dirent_lock);
-
error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG,
configfs_init_file);
if (error) {
diff --git a/fs/coredump.c b/fs/coredump.c
index bc3fbcd..9bdeca1 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -485,7 +485,7 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
return err;
}
-void do_coredump(const siginfo_t *siginfo)
+void do_coredump(siginfo_t *siginfo)
{
struct core_state core_state;
struct core_name cn;
@@ -645,7 +645,7 @@ void do_coredump(const siginfo_t *siginfo)
*/
if (!uid_eq(inode->i_uid, current_fsuid()))
goto close_fail;
- if (!cprm.file->f_op->write)
+ if (!cprm.file->f_op || !cprm.file->f_op->write)
goto close_fail;
if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
goto close_fail;
@@ -685,55 +685,40 @@ fail:
* do on a core-file: use only these functions to write out all the
* necessary info.
*/
-int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
+int dump_write(struct file *file, const void *addr, int nr)
{
- struct file *file = cprm->file;
- loff_t pos = file->f_pos;
- ssize_t n;
- if (cprm->written + nr > cprm->limit)
- return 0;
- while (nr) {
- if (dump_interrupted())
- return 0;
- n = __kernel_write(file, addr, nr, &pos);
- if (n <= 0)
- return 0;
- file->f_pos = pos;
- cprm->written += n;
- nr -= n;
- }
- return 1;
+ return !dump_interrupted() &&
+ access_ok(VERIFY_READ, addr, nr) &&
+ file->f_op->write(file, addr, nr, &file->f_pos) == nr;
}
-EXPORT_SYMBOL(dump_emit);
+EXPORT_SYMBOL(dump_write);
-int dump_skip(struct coredump_params *cprm, size_t nr)
+int dump_seek(struct file *file, loff_t off)
{
- static char zeroes[PAGE_SIZE];
- struct file *file = cprm->file;
+ int ret = 1;
+
if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
- if (cprm->written + nr > cprm->limit)
- return 0;
if (dump_interrupted() ||
- file->f_op->llseek(file, nr, SEEK_CUR) < 0)
+ file->f_op->llseek(file, off, SEEK_CUR) < 0)
return 0;
- cprm->written += nr;
- return 1;
} else {
- while (nr > PAGE_SIZE) {
- if (!dump_emit(cprm, zeroes, PAGE_SIZE))
- return 0;
- nr -= PAGE_SIZE;
+ char *buf = (char *)get_zeroed_page(GFP_KERNEL);
+
+ if (!buf)
+ return 0;
+ while (off > 0) {
+ unsigned long n = off;
+
+ if (n > PAGE_SIZE)
+ n = PAGE_SIZE;
+ if (!dump_write(file, buf, n)) {
+ ret = 0;
+ break;
+ }
+ off -= n;
}
- return dump_emit(cprm, zeroes, nr);
+ free_page((unsigned long)buf);
}
+ return ret;
}
-EXPORT_SYMBOL(dump_skip);
-
-int dump_align(struct coredump_params *cprm, int align)
-{
- unsigned mod = cprm->written & (align - 1);
- if (align & (align - 1))
- return 0;
- return mod ? dump_skip(cprm, align - mod) : 1;
-}
-EXPORT_SYMBOL(dump_align);
+EXPORT_SYMBOL(dump_seek);
diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig
index 11b29d4..cd06466 100644
--- a/fs/cramfs/Kconfig
+++ b/fs/cramfs/Kconfig
@@ -1,5 +1,5 @@
config CRAMFS
- tristate "Compressed ROM file system support (cramfs) (OBSOLETE)"
+ tristate "Compressed ROM file system support (cramfs)"
depends on BLOCK
select ZLIB_INFLATE
help
@@ -16,7 +16,4 @@ config CRAMFS
cramfs. Note that the root file system (the one containing the
directory /) cannot be compiled as a module.
- This filesystem is obsoleted by SquashFS, which is much better
- in terms of performance and features.
-
If unsure, say N.
diff --git a/fs/dcache.c b/fs/dcache.c
index 4bdb300..ae6ebb8 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -88,6 +88,35 @@ EXPORT_SYMBOL(rename_lock);
static struct kmem_cache *dentry_cache __read_mostly;
+/**
+ * read_seqbegin_or_lock - begin a sequence number check or locking block
+ * @lock: sequence lock
+ * @seq : sequence number to be checked
+ *
+ * First try it once optimistically without taking the lock. If that fails,
+ * take the lock. The sequence number is also used as a marker for deciding
+ * whether to be a reader (even) or writer (odd).
+ * N.B. seq must be initialized to an even number to begin with.
+ */
+static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
+{
+ if (!(*seq & 1)) /* Even */
+ *seq = read_seqbegin(lock);
+ else /* Odd */
+ read_seqlock_excl(lock);
+}
+
+static inline int need_seqretry(seqlock_t *lock, int seq)
+{
+ return !(seq & 1) && read_seqretry(lock, seq);
+}
+
+static inline void done_seqretry(seqlock_t *lock, int seq)
+{
+ if (seq & 1)
+ read_sequnlock_excl(lock);
+}
+
/*
* This is the single most critical data structure when it comes
* to the dcache: the hashtable for lookups. Somebody should try
@@ -96,6 +125,8 @@ static struct kmem_cache *dentry_cache __read_mostly;
* This hash-function tries to avoid losing too many bits of hash
* information, yet avoid using a prime hash-size or similar.
*/
+#define D_HASHBITS d_hash_shift
+#define D_HASHMASK d_hash_mask
static unsigned int d_hash_mask __read_mostly;
static unsigned int d_hash_shift __read_mostly;
@@ -106,8 +137,8 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
unsigned int hash)
{
hash += (unsigned long) parent / L1_CACHE_BYTES;
- hash = hash + (hash >> d_hash_shift);
- return dentry_hashtable + (hash & d_hash_mask);
+ hash = hash + (hash >> D_HASHBITS);
+ return dentry_hashtable + (hash & D_HASHMASK);
}
/* Statistics gathering. */
@@ -312,7 +343,6 @@ static void dentry_unlink_inode(struct dentry * dentry)
__releases(dentry->d_inode->i_lock)
{
struct inode *inode = dentry->d_inode;
- __d_clear_type(dentry);
dentry->d_inode = NULL;
hlist_del_init(&dentry->d_alias);
dentry_rcuwalk_barrier(dentry);
@@ -438,7 +468,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
{
list_del(&dentry->d_u.d_child);
/*
- * Inform d_walk() that we are no longer attached to the
+ * Inform try_to_ascend() that we are no longer attached to the
* dentry tree
*/
dentry->d_flags |= DCACHE_DENTRY_KILLED;
@@ -453,6 +483,27 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
return parent;
}
+/*
+ * Unhash a dentry without inserting an RCU walk barrier or checking that
+ * dentry->d_lock is locked. The caller must take care of that, if
+ * appropriate.
+ */
+static void __d_shrink(struct dentry *dentry)
+{
+ if (!d_unhashed(dentry)) {
+ struct hlist_bl_head *b;
+ if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
+ b = &dentry->d_sb->s_anon;
+ else
+ b = d_hash(dentry->d_parent, dentry->d_name.hash);
+
+ hlist_bl_lock(b);
+ __hlist_bl_del(&dentry->d_hash);
+ dentry->d_hash.pprev = NULL;
+ hlist_bl_unlock(b);
+ }
+}
+
/**
* d_drop - drop a dentry
* @dentry: dentry to drop
@@ -471,21 +522,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
void __d_drop(struct dentry *dentry)
{
if (!d_unhashed(dentry)) {
- struct hlist_bl_head *b;
- /*
- * Hashed dentries are normally on the dentry hashtable,
- * with the exception of those newly allocated by
- * d_obtain_alias, which are always IS_ROOT:
- */
- if (unlikely(IS_ROOT(dentry)))
- b = &dentry->d_sb->s_anon;
- else
- b = d_hash(dentry->d_parent, dentry->d_name.hash);
-
- hlist_bl_lock(b);
- __hlist_bl_del(&dentry->d_hash);
- dentry->d_hash.pprev = NULL;
- hlist_bl_unlock(b);
+ __d_shrink(dentry);
dentry_rcuwalk_barrier(dentry);
}
}
@@ -1038,6 +1075,144 @@ void shrink_dcache_sb(struct super_block *sb)
}
EXPORT_SYMBOL(shrink_dcache_sb);
+/*
+ * destroy a single subtree of dentries for unmount
+ * - see the comments on shrink_dcache_for_umount() for a description of the
+ * locking
+ */
+static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
+{
+ struct dentry *parent;
+
+ BUG_ON(!IS_ROOT(dentry));
+
+ for (;;) {
+ /* descend to the first leaf in the current subtree */
+ while (!list_empty(&dentry->d_subdirs))
+ dentry = list_entry(dentry->d_subdirs.next,
+ struct dentry, d_u.d_child);
+
+ /* consume the dentries from this leaf up through its parents
+ * until we find one with children or run out altogether */
+ do {
+ struct inode *inode;
+
+ /*
+ * inform the fs that this dentry is about to be
+ * unhashed and destroyed.
+ */
+ if ((dentry->d_flags & DCACHE_OP_PRUNE) &&
+ !d_unhashed(dentry))
+ dentry->d_op->d_prune(dentry);
+
+ dentry_lru_del(dentry);
+ __d_shrink(dentry);
+
+ if (dentry->d_lockref.count != 0) {
+ printk(KERN_ERR
+ "BUG: Dentry %p{i=%lx,n=%s}"
+ " still in use (%d)"
+ " [unmount of %s %s]\n",
+ dentry,
+ dentry->d_inode ?
+ dentry->d_inode->i_ino : 0UL,
+ dentry->d_name.name,
+ dentry->d_lockref.count,
+ dentry->d_sb->s_type->name,
+ dentry->d_sb->s_id);
+ BUG();
+ }
+
+ if (IS_ROOT(dentry)) {
+ parent = NULL;
+ list_del(&dentry->d_u.d_child);
+ } else {
+ parent = dentry->d_parent;
+ parent->d_lockref.count--;
+ list_del(&dentry->d_u.d_child);
+ }
+
+ inode = dentry->d_inode;
+ if (inode) {
+ dentry->d_inode = NULL;
+ hlist_del_init(&dentry->d_alias);
+ if (dentry->d_op && dentry->d_op->d_iput)
+ dentry->d_op->d_iput(dentry, inode);
+ else
+ iput(inode);
+ }
+
+ d_free(dentry);
+
+ /* finished when we fall off the top of the tree,
+ * otherwise we ascend to the parent and move to the
+ * next sibling if there is one */
+ if (!parent)
+ return;
+ dentry = parent;
+ } while (list_empty(&dentry->d_subdirs));
+
+ dentry = list_entry(dentry->d_subdirs.next,
+ struct dentry, d_u.d_child);
+ }
+}
+
+/*
+ * destroy the dentries attached to a superblock on unmounting
+ * - we don't need to use dentry->d_lock because:
+ * - the superblock is detached from all mountings and open files, so the
+ * dentry trees will not be rearranged by the VFS
+ * - s_umount is write-locked, so the memory pressure shrinker will ignore
+ * any dentries belonging to this superblock that it comes across
+ * - the filesystem itself is no longer permitted to rearrange the dentries
+ * in this superblock
+ */
+void shrink_dcache_for_umount(struct super_block *sb)
+{
+ struct dentry *dentry;
+
+ if (down_read_trylock(&sb->s_umount))
+ BUG();
+
+ dentry = sb->s_root;
+ sb->s_root = NULL;
+ dentry->d_lockref.count--;
+ shrink_dcache_for_umount_subtree(dentry);
+
+ while (!hlist_bl_empty(&sb->s_anon)) {
+ dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
+ shrink_dcache_for_umount_subtree(dentry);
+ }
+}
+
+/*
+ * This tries to ascend one level of parenthood, but
+ * we can race with renaming, so we need to re-check
+ * the parenthood after dropping the lock and check
+ * that the sequence number still matches.
+ */
+static struct dentry *try_to_ascend(struct dentry *old, unsigned seq)
+{
+ struct dentry *new = old->d_parent;
+
+ rcu_read_lock();
+ spin_unlock(&old->d_lock);
+ spin_lock(&new->d_lock);
+
+ /*
+ * might go back up the wrong parent if we have had a rename
+ * or deletion
+ */
+ if (new != old->d_parent ||
+ (old->d_flags & DCACHE_DENTRY_KILLED) ||
+ need_seqretry(&rename_lock, seq)) {
+ spin_unlock(&new->d_lock);
+ new = NULL;
+ }
+ rcu_read_unlock();
+ return new;
+}
+
/**
* enum d_walk_ret - action to talke during tree walk
* @D_WALK_CONTINUE: contrinue walk
@@ -1126,24 +1301,9 @@ resume:
*/
if (this_parent != parent) {
struct dentry *child = this_parent;
- this_parent = child->d_parent;
-
- rcu_read_lock();
- spin_unlock(&child->d_lock);
- spin_lock(&this_parent->d_lock);
-
- /*
- * might go back up the wrong parent if we have had a rename
- * or deletion
- */
- if (this_parent != child->d_parent ||
- (child->d_flags & DCACHE_DENTRY_KILLED) ||
- need_seqretry(&rename_lock, seq)) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
+ this_parent = try_to_ascend(this_parent, seq);
+ if (!this_parent)
goto rename_retry;
- }
- rcu_read_unlock();
next = child->d_u.d_child.next;
goto resume;
}
@@ -1318,91 +1478,6 @@ void shrink_dcache_parent(struct dentry *parent)
}
EXPORT_SYMBOL(shrink_dcache_parent);
-static enum d_walk_ret umount_collect(void *_data, struct dentry *dentry)
-{
- struct select_data *data = _data;
- enum d_walk_ret ret = D_WALK_CONTINUE;
-
- if (dentry->d_lockref.count) {
- dentry_lru_del(dentry);
- if (likely(!list_empty(&dentry->d_subdirs)))
- goto out;
- if (dentry == data->start && dentry->d_lockref.count == 1)
- goto out;
- printk(KERN_ERR
- "BUG: Dentry %p{i=%lx,n=%s}"
- " still in use (%d)"
- " [unmount of %s %s]\n",
- dentry,
- dentry->d_inode ?
- dentry->d_inode->i_ino : 0UL,
- dentry->d_name.name,
- dentry->d_lockref.count,
- dentry->d_sb->s_type->name,
- dentry->d_sb->s_id);
- BUG();
- } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
- /*
- * We can't use d_lru_shrink_move() because we
- * need to get the global LRU lock and do the
- * LRU accounting.
- */
- if (dentry->d_flags & DCACHE_LRU_LIST)
- d_lru_del(dentry);
- d_shrink_add(dentry, &data->dispose);
- data->found++;
- ret = D_WALK_NORETRY;
- }
-out:
- if (data->found && need_resched())
- ret = D_WALK_QUIT;
- return ret;
-}
-
-/*
- * destroy the dentries attached to a superblock on unmounting
- */
-void shrink_dcache_for_umount(struct super_block *sb)
-{
- struct dentry *dentry;
-
- if (down_read_trylock(&sb->s_umount))
- BUG();
-
- dentry = sb->s_root;
- sb->s_root = NULL;
- for (;;) {
- struct select_data data;
-
- INIT_LIST_HEAD(&data.dispose);
- data.start = dentry;
- data.found = 0;
-
- d_walk(dentry, &data, umount_collect, NULL);
- if (!data.found)
- break;
-
- shrink_dentry_list(&data.dispose);
- cond_resched();
- }
- d_drop(dentry);
- dput(dentry);
-
- while (!hlist_bl_empty(&sb->s_anon)) {
- struct select_data data;
- dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
-
- INIT_LIST_HEAD(&data.dispose);
- data.start = NULL;
- data.found = 0;
-
- d_walk(dentry, &data, umount_collect, NULL);
- if (data.found)
- shrink_dentry_list(&data.dispose);
- cond_resched();
- }
-}
-
static enum d_walk_ret check_and_collect(void *_data, struct dentry *dentry)
{
struct select_data *data = _data;
@@ -1563,17 +1638,12 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
}
EXPORT_SYMBOL(d_alloc);
-/**
- * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems)
- * @sb: the superblock
- * @name: qstr of the name
- *
- * For a filesystem that just pins its dentries in memory and never
- * performs lookups at all, return an unhashed IS_ROOT dentry.
- */
struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
{
- return __d_alloc(sb, name);
+ struct dentry *dentry = __d_alloc(sb, name);
+ if (dentry)
+ dentry->d_flags |= DCACHE_DISCONNECTED;
+ return dentry;
}
EXPORT_SYMBOL(d_alloc_pseudo);
@@ -1615,42 +1685,14 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
}
EXPORT_SYMBOL(d_set_d_op);
-static unsigned d_flags_for_inode(struct inode *inode)
-{
- unsigned add_flags = DCACHE_FILE_TYPE;
-
- if (!inode)
- return DCACHE_MISS_TYPE;
-
- if (S_ISDIR(inode->i_mode)) {
- add_flags = DCACHE_DIRECTORY_TYPE;
- if (unlikely(!(inode->i_opflags & IOP_LOOKUP))) {
- if (unlikely(!inode->i_op->lookup))
- add_flags = DCACHE_AUTODIR_TYPE;
- else
- inode->i_opflags |= IOP_LOOKUP;
- }
- } else if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
- if (unlikely(inode->i_op->follow_link))
- add_flags = DCACHE_SYMLINK_TYPE;
- else
- inode->i_opflags |= IOP_NOFOLLOW;
- }
-
- if (unlikely(IS_AUTOMOUNT(inode)))
- add_flags |= DCACHE_NEED_AUTOMOUNT;
- return add_flags;
-}
-
static void __d_instantiate(struct dentry *dentry, struct inode *inode)
{
- unsigned add_flags = d_flags_for_inode(inode);
-
spin_lock(&dentry->d_lock);
- dentry->d_flags &= ~DCACHE_ENTRY_TYPE;
- dentry->d_flags |= add_flags;
- if (inode)
+ if (inode) {
+ if (unlikely(IS_AUTOMOUNT(inode)))
+ dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
hlist_add_head(&dentry->d_alias, &inode->i_dentry);
+ }
dentry->d_inode = inode;
dentry_rcuwalk_barrier(dentry);
spin_unlock(&dentry->d_lock);
@@ -1759,33 +1801,6 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
EXPORT_SYMBOL(d_instantiate_unique);
-/**
- * d_instantiate_no_diralias - instantiate a non-aliased dentry
- * @entry: dentry to complete
- * @inode: inode to attach to this dentry
- *
- * Fill in inode information in the entry. If a directory alias is found, then
- * return an error (and drop inode). Together with d_materialise_unique() this
- * guarantees that a directory inode may never have more than one alias.
- */
-int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode)
-{
- BUG_ON(!hlist_unhashed(&entry->d_alias));
-
- spin_lock(&inode->i_lock);
- if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) {
- spin_unlock(&inode->i_lock);
- iput(inode);
- return -EBUSY;
- }
- __d_instantiate(entry, inode);
- spin_unlock(&inode->i_lock);
- security_d_instantiate(entry, inode);
-
- return 0;
-}
-EXPORT_SYMBOL(d_instantiate_no_diralias);
-
struct dentry *d_make_root(struct inode *root_inode)
{
struct dentry *res = NULL;
@@ -1855,7 +1870,6 @@ struct dentry *d_obtain_alias(struct inode *inode)
static const struct qstr anonstring = QSTR_INIT("/", 1);
struct dentry *tmp;
struct dentry *res;
- unsigned add_flags;
if (!inode)
return ERR_PTR(-ESTALE);
@@ -1881,11 +1895,9 @@ struct dentry *d_obtain_alias(struct inode *inode)
}
/* attach a disconnected dentry */
- add_flags = d_flags_for_inode(inode) | DCACHE_DISCONNECTED;
-
spin_lock(&tmp->d_lock);
tmp->d_inode = inode;
- tmp->d_flags |= add_flags;
+ tmp->d_flags |= DCACHE_DISCONNECTED;
hlist_add_head(&tmp->d_alias, &inode->i_dentry);
hlist_bl_lock(&tmp->d_sb->s_anon);
hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
@@ -2562,7 +2574,7 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
dentry_lock_for_move(dentry, target);
write_seqcount_begin(&dentry->d_seq);
- write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED);
+ write_seqcount_begin(&target->d_seq);
/* __d_drop does write_seqcount_barrier, but they're OK to nest. */
@@ -2694,7 +2706,7 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
dentry_lock_for_move(anon, dentry);
write_seqcount_begin(&dentry->d_seq);
- write_seqcount_begin_nested(&anon->d_seq, DENTRY_D_LOCK_NESTED);
+ write_seqcount_begin(&anon->d_seq);
dparent = dentry->d_parent;
@@ -2713,6 +2725,7 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
spin_unlock(&dentry->d_lock);
/* anon->d_lock still locked, returns locked */
+ anon->d_flags &= ~DCACHE_DISCONNECTED;
}
/**
@@ -2868,36 +2881,27 @@ static int prepend_path(const struct path *path,
const struct path *root,
char **buffer, int *buflen)
{
- struct dentry *dentry;
- struct vfsmount *vfsmnt;
- struct mount *mnt;
+ struct dentry *dentry = path->dentry;
+ struct vfsmount *vfsmnt = path->mnt;
+ struct mount *mnt = real_mount(vfsmnt);
int error = 0;
- unsigned seq, m_seq = 0;
+ unsigned seq = 0;
char *bptr;
int blen;
rcu_read_lock();
-restart_mnt:
- read_seqbegin_or_lock(&mount_lock, &m_seq);
- seq = 0;
- rcu_read_lock();
restart:
bptr = *buffer;
blen = *buflen;
- error = 0;
- dentry = path->dentry;
- vfsmnt = path->mnt;
- mnt = real_mount(vfsmnt);
read_seqbegin_or_lock(&rename_lock, &seq);
while (dentry != root->dentry || vfsmnt != root->mnt) {
struct dentry * parent;
if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
- struct mount *parent = ACCESS_ONCE(mnt->mnt_parent);
/* Global root? */
- if (mnt != parent) {
- dentry = ACCESS_ONCE(mnt->mnt_mountpoint);
- mnt = parent;
+ if (mnt_has_parent(mnt)) {
+ dentry = mnt->mnt_mountpoint;
+ mnt = mnt->mnt_parent;
vfsmnt = &mnt->mnt;
continue;
}
@@ -2932,14 +2936,6 @@ restart:
}
done_seqretry(&rename_lock, seq);
- if (!(m_seq & 1))
- rcu_read_unlock();
- if (need_seqretry(&mount_lock, m_seq)) {
- m_seq = 1;
- goto restart_mnt;
- }
- done_seqretry(&mount_lock, m_seq);
-
if (error >= 0 && bptr == *buffer) {
if (--blen < 0)
error = -ENAMETOOLONG;
@@ -2975,7 +2971,9 @@ char *__d_path(const struct path *path,
int error;
prepend(&res, &buflen, "\0", 1);
+ br_read_lock(&vfsmount_lock);
error = prepend_path(path, root, &res, &buflen);
+ br_read_unlock(&vfsmount_lock);
if (error < 0)
return ERR_PTR(error);
@@ -2992,7 +2990,9 @@ char *d_absolute_path(const struct path *path,
int error;
prepend(&res, &buflen, "\0", 1);
+ br_read_lock(&vfsmount_lock);
error = prepend_path(path, &root, &res, &buflen);
+ br_read_unlock(&vfsmount_lock);
if (error > 1)
error = -EINVAL;
@@ -3067,7 +3067,9 @@ char *d_path(const struct path *path, char *buf, int buflen)
rcu_read_lock();
get_fs_root_rcu(current->fs, &root);
+ br_read_lock(&vfsmount_lock);
error = path_with_deleted(path, &root, &res, &buflen);
+ br_read_unlock(&vfsmount_lock);
rcu_read_unlock();
if (error < 0)
@@ -3222,6 +3224,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
error = -ENOENT;
+ br_read_lock(&vfsmount_lock);
if (!d_unlinked(pwd.dentry)) {
unsigned long len;
char *cwd = page + PATH_MAX;
@@ -3229,6 +3232,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
prepend(&cwd, &buflen, "\0", 1);
error = prepend_path(&pwd, &root, &cwd, &buflen);
+ br_read_unlock(&vfsmount_lock);
rcu_read_unlock();
if (error < 0)
@@ -3249,6 +3253,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
error = -EFAULT;
}
} else {
+ br_read_unlock(&vfsmount_lock);
rcu_read_unlock();
}
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 9c0444c..c7c83ff 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -566,7 +566,8 @@ void debugfs_remove_recursive(struct dentry *dentry)
mutex_lock(&parent->d_inode->i_mutex);
if (child != dentry) {
- next = list_next_entry(child, d_u.d_child);
+ next = list_entry(child->d_u.d_child.next, struct dentry,
+ d_u.d_child);
goto up;
}
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index a726b9f..073d30b 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -498,7 +498,6 @@ static void devpts_kill_sb(struct super_block *sb)
{
struct pts_fs_info *fsi = DEVPTS_SB(sb);
- ida_destroy(&fsi->allocated_ptys);
kfree(fsi);
kill_litter_super(sb);
}
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index d5abafd..88556dc 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -706,7 +706,9 @@ static int lkb_idr_is_local(int id, void *p, void *data)
{
struct dlm_lkb *lkb = p;
- return lkb->lkb_nodeid == 0 && lkb->lkb_grmode != DLM_LOCK_IV;
+ if (!lkb->lkb_nodeid)
+ return 1;
+ return 0;
}
static int lkb_idr_is_any(int id, void *p, void *data)
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index e7cfbaf..60a3278 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -74,16 +74,14 @@ static int user_cmd(struct sk_buff *skb, struct genl_info *info)
return 0;
}
-static struct genl_ops dlm_nl_ops[] = {
- {
- .cmd = DLM_CMD_HELLO,
- .doit = user_cmd,
- },
+static struct genl_ops dlm_nl_ops = {
+ .cmd = DLM_CMD_HELLO,
+ .doit = user_cmd,
};
int __init dlm_netlink_init(void)
{
- return genl_register_family_with_ops(&family, dlm_nl_ops);
+ return genl_register_family_with_ops(&family, &dlm_nl_ops, 1);
}
void dlm_netlink_exit(void)
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 2f6735d..000eae2 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -392,7 +392,7 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
wait_for_completion(&ecr->completion);
rc = ecr->rc;
- reinit_completion(&ecr->completion);
+ INIT_COMPLETION(ecr->completion);
}
out:
ablkcipher_request_free(req);
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 4000f6b..bf12ba5 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -44,15 +44,15 @@
*/
static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
{
- struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
- int rc;
-
- if (!(lower_dentry->d_flags & DCACHE_OP_REVALIDATE))
- return 1;
+ struct dentry *lower_dentry;
+ int rc = 1;
if (flags & LOOKUP_RCU)
return -ECHILD;
+ lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
+ goto out;
rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
if (dentry->d_inode) {
struct inode *lower_inode =
@@ -60,17 +60,12 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
fsstack_copy_attr_all(dentry->d_inode, lower_inode);
}
+out:
return rc;
}
struct kmem_cache *ecryptfs_dentry_info_cache;
-static void ecryptfs_dentry_free_rcu(struct rcu_head *head)
-{
- kmem_cache_free(ecryptfs_dentry_info_cache,
- container_of(head, struct ecryptfs_dentry_info, rcu));
-}
-
/**
* ecryptfs_d_release
* @dentry: The ecryptfs dentry
@@ -79,11 +74,15 @@ static void ecryptfs_dentry_free_rcu(struct rcu_head *head)
*/
static void ecryptfs_d_release(struct dentry *dentry)
{
- struct ecryptfs_dentry_info *p = dentry->d_fsdata;
- if (p) {
- path_put(&p->lower_path);
- call_rcu(&p->rcu, ecryptfs_dentry_free_rcu);
+ if (ecryptfs_dentry_to_private(dentry)) {
+ if (ecryptfs_dentry_to_lower(dentry)) {
+ dput(ecryptfs_dentry_to_lower(dentry));
+ mntput(ecryptfs_dentry_to_lower_mnt(dentry));
+ }
+ kmem_cache_free(ecryptfs_dentry_info_cache,
+ ecryptfs_dentry_to_private(dentry));
}
+ return;
}
const struct dentry_operations ecryptfs_dops = {
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 90d1882..df19d34 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -261,10 +261,7 @@ struct ecryptfs_inode_info {
* vfsmount too. */
struct ecryptfs_dentry_info {
struct path lower_path;
- union {
- struct ecryptfs_crypt_stat *crypt_stat;
- struct rcu_head rcu;
- };
+ struct ecryptfs_crypt_stat *crypt_stat;
};
/**
@@ -515,6 +512,13 @@ ecryptfs_dentry_to_lower(struct dentry *dentry)
return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry;
}
+static inline void
+ecryptfs_set_dentry_lower(struct dentry *dentry, struct dentry *lower_dentry)
+{
+ ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry =
+ lower_dentry;
+}
+
static inline struct vfsmount *
ecryptfs_dentry_to_lower_mnt(struct dentry *dentry)
{
@@ -527,6 +531,13 @@ ecryptfs_dentry_to_lower_path(struct dentry *dentry)
return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path;
}
+static inline void
+ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt)
+{
+ ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.mnt =
+ lower_mnt;
+}
+
#define ecryptfs_printk(type, fmt, arg...) \
__ecryptfs_printk(type "%s: " fmt, __func__, ## arg);
__printf(1, 2)
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index b1eaa7a..992cf95 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -271,7 +271,7 @@ static int ecryptfs_flush(struct file *file, fl_owner_t td)
{
struct file *lower_file = ecryptfs_file_to_lower(file);
- if (lower_file->f_op->flush) {
+ if (lower_file->f_op && lower_file->f_op->flush) {
filemap_write_and_wait(file->f_mapping);
return lower_file->f_op->flush(lower_file, td);
}
@@ -305,7 +305,7 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
struct file *lower_file = NULL;
lower_file = ecryptfs_file_to_lower(file);
- if (lower_file->f_op->fasync)
+ if (lower_file->f_op && lower_file->f_op->fasync)
rc = lower_file->f_op->fasync(fd, lower_file, flag);
return rc;
}
@@ -313,10 +313,12 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
static long
ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
- struct file *lower_file = ecryptfs_file_to_lower(file);
+ struct file *lower_file = NULL;
long rc = -ENOTTY;
- if (lower_file->f_op->unlocked_ioctl)
+ if (ecryptfs_file_to_private(file))
+ lower_file = ecryptfs_file_to_lower(file);
+ if (lower_file && lower_file->f_op && lower_file->f_op->unlocked_ioctl)
rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
return rc;
}
@@ -325,10 +327,12 @@ ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
static long
ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
- struct file *lower_file = ecryptfs_file_to_lower(file);
+ struct file *lower_file = NULL;
long rc = -ENOIOCTLCMD;
- if (lower_file->f_op && lower_file->f_op->compat_ioctl)
+ if (ecryptfs_file_to_private(file))
+ lower_file = ecryptfs_file_to_lower(file);
+ if (lower_file && lower_file->f_op && lower_file->f_op->compat_ioctl)
rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
return rc;
}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index c36c448..67e9b63 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -153,7 +153,7 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
dget(lower_dentry);
lower_dir_dentry = lock_parent(lower_dentry);
- rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL);
+ rc = vfs_unlink(lower_dir_inode, lower_dentry);
if (rc) {
printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
goto out_unlock;
@@ -208,7 +208,7 @@ ecryptfs_do_create(struct inode *directory_inode,
inode = __ecryptfs_get_inode(lower_dentry->d_inode,
directory_inode->i_sb);
if (IS_ERR(inode)) {
- vfs_unlink(lower_dir_dentry->d_inode, lower_dentry, NULL);
+ vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
goto out_lock;
}
fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode);
@@ -361,8 +361,8 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry,
BUG_ON(!d_count(lower_dentry));
ecryptfs_set_dentry_private(dentry, dentry_info);
- dentry_info->lower_path.mnt = lower_mnt;
- dentry_info->lower_path.dentry = lower_dentry;
+ ecryptfs_set_dentry_lower(dentry, lower_dentry);
+ ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt);
if (!lower_dentry->d_inode) {
/* We want to add because we couldn't find in lower */
@@ -475,7 +475,7 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
dget(lower_new_dentry);
lower_dir_dentry = lock_parent(lower_new_dentry);
rc = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
- lower_new_dentry, NULL);
+ lower_new_dentry);
if (rc || !lower_new_dentry->d_inode)
goto out_lock;
rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb);
@@ -640,8 +640,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out_lock;
}
rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
- lower_new_dir_dentry->d_inode, lower_new_dentry,
- NULL);
+ lower_new_dir_dentry->d_inode, lower_new_dentry);
if (rc)
goto out_lock;
if (target_inode)
@@ -704,6 +703,16 @@ out:
return NULL;
}
+static void
+ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
+{
+ char *buf = nd_get_link(nd);
+ if (!IS_ERR(buf)) {
+ /* Free the char* */
+ kfree(buf);
+ }
+}
+
/**
* upper_size_to_lower_size
* @crypt_stat: Crypt_stat associated with file
@@ -882,7 +891,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
mutex_lock(&lower_dentry->d_inode->i_mutex);
- rc = notify_change(lower_dentry, &lower_ia, NULL);
+ rc = notify_change(lower_dentry, &lower_ia);
mutex_unlock(&lower_dentry->d_inode->i_mutex);
}
return rc;
@@ -983,7 +992,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
lower_ia.ia_valid &= ~ATTR_MODE;
mutex_lock(&lower_dentry->d_inode->i_mutex);
- rc = notify_change(lower_dentry, &lower_ia, NULL);
+ rc = notify_change(lower_dentry, &lower_ia);
mutex_unlock(&lower_dentry->d_inode->i_mutex);
out:
fsstack_copy_attr_all(inode, lower_inode);
@@ -1112,7 +1121,7 @@ out:
const struct inode_operations ecryptfs_symlink_iops = {
.readlink = generic_readlink,
.follow_link = ecryptfs_follow_link,
- .put_link = kfree_put_link,
+ .put_link = ecryptfs_put_link,
.permission = ecryptfs_permission,
.setattr = ecryptfs_setattr,
.getattr = ecryptfs_getattr_link,
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 1b119d3..eb1c597 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -585,7 +585,8 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
/* ->kill_sb() will take care of root_info */
ecryptfs_set_dentry_private(s->s_root, root_info);
- root_info->lower_path = path;
+ ecryptfs_set_dentry_lower(s->s_root, path.dentry);
+ ecryptfs_set_dentry_lower_mnt(s->s_root, path.mnt);
s->s_flags |= MS_ACTIVE;
return dget(s->s_root);
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index becc725..a8766b8 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -83,10 +83,19 @@ static int efivarfs_d_hash(const struct dentry *dentry, struct qstr *qstr)
return 0;
}
+/*
+ * Retaining negative dentries for an in-memory filesystem just wastes
+ * memory and lookup time: arrange for them to be deleted immediately.
+ */
+static int efivarfs_delete_dentry(const struct dentry *dentry)
+{
+ return 1;
+}
+
static struct dentry_operations efivarfs_d_ops = {
.d_compare = efivarfs_d_compare,
.d_hash = efivarfs_d_hash,
- .d_delete = always_delete_dentry,
+ .d_delete = efivarfs_delete_dentry,
};
static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 8b5e258..810c28f 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -41,7 +41,6 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/compat.h>
-#include <linux/rculist.h>
/*
* LOCKING:
@@ -134,12 +133,8 @@ struct nested_calls {
* of these on a server and we do not want this to take another cache line.
*/
struct epitem {
- union {
- /* RB tree node links this structure to the eventpoll RB tree */
- struct rb_node rbn;
- /* Used to free the struct epitem */
- struct rcu_head rcu;
- };
+ /* RB tree node used to link this structure to the eventpoll RB tree */
+ struct rb_node rbn;
/* List header used to link this structure to the eventpoll ready list */
struct list_head rdllink;
@@ -585,14 +580,14 @@ static inline void ep_pm_stay_awake_rcu(struct epitem *epi)
* @sproc: Pointer to the scan callback.
* @priv: Private opaque data passed to the @sproc callback.
* @depth: The current depth of recursive f_op->poll calls.
- * @ep_locked: caller already holds ep->mtx
*
* Returns: The same integer error code returned by the @sproc callback.
*/
static int ep_scan_ready_list(struct eventpoll *ep,
int (*sproc)(struct eventpoll *,
struct list_head *, void *),
- void *priv, int depth, bool ep_locked)
+ void *priv,
+ int depth)
{
int error, pwake = 0;
unsigned long flags;
@@ -603,9 +598,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
* We need to lock this because we could be hit by
* eventpoll_release_file() and epoll_ctl().
*/
-
- if (!ep_locked)
- mutex_lock_nested(&ep->mtx, depth);
+ mutex_lock_nested(&ep->mtx, depth);
/*
* Steal the ready list, and re-init the original one to the
@@ -669,8 +662,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
}
spin_unlock_irqrestore(&ep->lock, flags);
- if (!ep_locked)
- mutex_unlock(&ep->mtx);
+ mutex_unlock(&ep->mtx);
/* We have to call this outside the lock */
if (pwake)
@@ -679,12 +671,6 @@ static int ep_scan_ready_list(struct eventpoll *ep,
return error;
}
-static void epi_rcu_free(struct rcu_head *head)
-{
- struct epitem *epi = container_of(head, struct epitem, rcu);
- kmem_cache_free(epi_cache, epi);
-}
-
/*
* Removes a "struct epitem" from the eventpoll RB tree and deallocates
* all the associated resources. Must be called with "mtx" held.
@@ -706,7 +692,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
/* Remove the current item from the list of epoll hooks */
spin_lock(&file->f_lock);
- list_del_rcu(&epi->fllink);
+ if (ep_is_linked(&epi->fllink))
+ list_del_init(&epi->fllink);
spin_unlock(&file->f_lock);
rb_erase(&epi->rbn, &ep->rbr);
@@ -717,14 +704,9 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
spin_unlock_irqrestore(&ep->lock, flags);
wakeup_source_unregister(ep_wakeup_source(epi));
- /*
- * At this point it is safe to free the eventpoll item. Use the union
- * field epi->rcu, since we are trying to minimize the size of
- * 'struct epitem'. The 'rbn' field is no longer in use. Protected by
- * ep->mtx. The rcu read side, reverse_path_check_proc(), does not make
- * use of the rbn field.
- */
- call_rcu(&epi->rcu, epi_rcu_free);
+
+ /* At this point it is safe to free the eventpoll item */
+ kmem_cache_free(epi_cache, epi);
atomic_long_dec(&ep->user->epoll_watches);
@@ -825,34 +807,15 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
return 0;
}
-static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
- poll_table *pt);
-
-struct readyevents_arg {
- struct eventpoll *ep;
- bool locked;
-};
-
static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests)
{
- struct readyevents_arg *arg = priv;
-
- return ep_scan_ready_list(arg->ep, ep_read_events_proc, NULL,
- call_nests + 1, arg->locked);
+ return ep_scan_ready_list(priv, ep_read_events_proc, NULL, call_nests + 1);
}
static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
{
int pollflags;
struct eventpoll *ep = file->private_data;
- struct readyevents_arg arg;
-
- /*
- * During ep_insert() we already hold the ep->mtx for the tfile.
- * Prevent re-aquisition.
- */
- arg.locked = wait && (wait->_qproc == ep_ptable_queue_proc);
- arg.ep = ep;
/* Insert inside our poll wait queue */
poll_wait(file, &ep->poll_wait, wait);
@@ -864,7 +827,7 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
* could re-enter here.
*/
pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS,
- ep_poll_readyevents_proc, &arg, ep, current);
+ ep_poll_readyevents_proc, ep, ep, current);
return pollflags != -1 ? pollflags : 0;
}
@@ -909,6 +872,7 @@ static const struct file_operations eventpoll_fops = {
*/
void eventpoll_release_file(struct file *file)
{
+ struct list_head *lsthead = &file->f_ep_links;
struct eventpoll *ep;
struct epitem *epi;
@@ -926,12 +890,17 @@ void eventpoll_release_file(struct file *file)
* Besides, ep_remove() acquires the lock, so we can't hold it here.
*/
mutex_lock(&epmutex);
- list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) {
+
+ while (!list_empty(lsthead)) {
+ epi = list_first_entry(lsthead, struct epitem, fllink);
+
ep = epi->ep;
+ list_del_init(&epi->fllink);
mutex_lock_nested(&ep->mtx, 0);
ep_remove(ep, epi);
mutex_unlock(&ep->mtx);
}
+
mutex_unlock(&epmutex);
}
@@ -1169,9 +1138,7 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
struct file *child_file;
struct epitem *epi;
- /* CTL_DEL can remove links here, but that can't increase our count */
- rcu_read_lock();
- list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) {
+ list_for_each_entry(epi, &file->f_ep_links, fllink) {
child_file = epi->ep->file;
if (is_file_epoll(child_file)) {
if (list_empty(&child_file->f_ep_links)) {
@@ -1193,7 +1160,6 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
"file is not an ep!\n");
}
}
- rcu_read_unlock();
return error;
}
@@ -1265,7 +1231,7 @@ static noinline void ep_destroy_wakeup_source(struct epitem *epi)
* Must be called with "mtx" held.
*/
static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
- struct file *tfile, int fd, int full_check)
+ struct file *tfile, int fd)
{
int error, revents, pwake = 0;
unsigned long flags;
@@ -1320,7 +1286,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
/* Add the current item to the list of active epoll hook for this file */
spin_lock(&tfile->f_lock);
- list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
+ list_add_tail(&epi->fllink, &tfile->f_ep_links);
spin_unlock(&tfile->f_lock);
/*
@@ -1331,7 +1297,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
/* now check if we've created too many backpaths */
error = -EINVAL;
- if (full_check && reverse_path_check())
+ if (reverse_path_check())
goto error_remove_epi;
/* We have to drop the new item inside our item list to keep track of it */
@@ -1361,7 +1327,8 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
error_remove_epi:
spin_lock(&tfile->f_lock);
- list_del_rcu(&epi->fllink);
+ if (ep_is_linked(&epi->fllink))
+ list_del_init(&epi->fllink);
spin_unlock(&tfile->f_lock);
rb_erase(&epi->rbn, &ep->rbr);
@@ -1554,7 +1521,7 @@ static int ep_send_events(struct eventpoll *ep,
esed.maxevents = maxevents;
esed.events = events;
- return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0, false);
+ return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0);
}
static inline struct timespec ep_set_mstimeout(long ms)
@@ -1824,12 +1791,11 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
struct epoll_event __user *, event)
{
int error;
- int full_check = 0;
+ int did_lock_epmutex = 0;
struct fd f, tf;
struct eventpoll *ep;
struct epitem *epi;
struct epoll_event epds;
- struct eventpoll *tep = NULL;
error = -EFAULT;
if (ep_op_has_event(op) &&
@@ -1848,11 +1814,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
/* The target file descriptor must support poll */
error = -EPERM;
- if (!tf.file->f_op->poll)
+ if (!tf.file->f_op || !tf.file->f_op->poll)
goto error_tgt_fput;
/* Check if EPOLLWAKEUP is allowed */
- ep_take_care_of_epollwakeup(&epds);
+ if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND))
+ epds.events &= ~EPOLLWAKEUP;
/*
* We have to check that the file structure underneath the file descriptor
@@ -1877,41 +1844,27 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
* and hang them on the tfile_check_list, so we can check that we
* haven't created too many possible wakeup paths.
*
- * We do not need to take the global 'epumutex' on EPOLL_CTL_ADD when
- * the epoll file descriptor is attaching directly to a wakeup source,
- * unless the epoll file descriptor is nested. The purpose of taking the
- * 'epmutex' on add is to prevent complex toplogies such as loops and
- * deep wakeup paths from forming in parallel through multiple
- * EPOLL_CTL_ADD operations.
+ * We need to hold the epmutex across both ep_insert and ep_remove
+ * b/c we want to make sure we are looking at a coherent view of
+ * epoll network.
*/
- mutex_lock_nested(&ep->mtx, 0);
+ if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) {
+ mutex_lock(&epmutex);
+ did_lock_epmutex = 1;
+ }
if (op == EPOLL_CTL_ADD) {
- if (!list_empty(&f.file->f_ep_links) ||
- is_file_epoll(tf.file)) {
- full_check = 1;
- mutex_unlock(&ep->mtx);
- mutex_lock(&epmutex);
- if (is_file_epoll(tf.file)) {
- error = -ELOOP;
- if (ep_loop_check(ep, tf.file) != 0) {
- clear_tfile_check_list();
- goto error_tgt_fput;
- }
- } else
- list_add(&tf.file->f_tfile_llink,
- &tfile_check_list);
- mutex_lock_nested(&ep->mtx, 0);
- if (is_file_epoll(tf.file)) {
- tep = tf.file->private_data;
- mutex_lock_nested(&tep->mtx, 1);
+ if (is_file_epoll(tf.file)) {
+ error = -ELOOP;
+ if (ep_loop_check(ep, tf.file) != 0) {
+ clear_tfile_check_list();
+ goto error_tgt_fput;
}
- }
- }
- if (op == EPOLL_CTL_DEL && is_file_epoll(tf.file)) {
- tep = tf.file->private_data;
- mutex_lock_nested(&tep->mtx, 1);
+ } else
+ list_add(&tf.file->f_tfile_llink, &tfile_check_list);
}
+ mutex_lock_nested(&ep->mtx, 0);
+
/*
* Try to lookup the file inside our RB tree, Since we grabbed "mtx"
* above, we can be sure to be able to use the item looked up by
@@ -1924,11 +1877,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
case EPOLL_CTL_ADD:
if (!epi) {
epds.events |= POLLERR | POLLHUP;
- error = ep_insert(ep, &epds, tf.file, fd, full_check);
+ error = ep_insert(ep, &epds, tf.file, fd);
} else
error = -EEXIST;
- if (full_check)
- clear_tfile_check_list();
+ clear_tfile_check_list();
break;
case EPOLL_CTL_DEL:
if (epi)
@@ -1944,12 +1896,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
error = -ENOENT;
break;
}
- if (tep != NULL)
- mutex_unlock(&tep->mtx);
mutex_unlock(&ep->mtx);
error_tgt_fput:
- if (full_check)
+ if (did_lock_epmutex)
mutex_unlock(&epmutex);
fdput(tf);
diff --git a/fs/exec.c b/fs/exec.c
index 7ea097f..8875dd1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -106,7 +106,6 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
*/
SYSCALL_DEFINE1(uselib, const char __user *, library)
{
- struct linux_binfmt *fmt;
struct file *file;
struct filename *tmp = getname(library);
int error = PTR_ERR(tmp);
@@ -137,21 +136,24 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
fsnotify_open(file);
error = -ENOEXEC;
+ if(file->f_op) {
+ struct linux_binfmt * fmt;
- read_lock(&binfmt_lock);
- list_for_each_entry(fmt, &formats, lh) {
- if (!fmt->load_shlib)
- continue;
- if (!try_module_get(fmt->module))
- continue;
- read_unlock(&binfmt_lock);
- error = fmt->load_shlib(file);
read_lock(&binfmt_lock);
- put_binfmt(fmt);
- if (error != -ENOEXEC)
- break;
+ list_for_each_entry(fmt, &formats, lh) {
+ if (!fmt->load_shlib)
+ continue;
+ if (!try_module_get(fmt->module))
+ continue;
+ read_unlock(&binfmt_lock);
+ error = fmt->load_shlib(file);
+ read_lock(&binfmt_lock);
+ put_binfmt(fmt);
+ if (error != -ENOEXEC)
+ break;
+ }
+ read_unlock(&binfmt_lock);
}
- read_unlock(&binfmt_lock);
exit:
fput(file);
out:
@@ -1275,10 +1277,13 @@ static int check_unsafe_exec(struct linux_binprm *bprm)
*/
int prepare_binprm(struct linux_binprm *bprm)
{
- struct inode *inode = file_inode(bprm->file);
- umode_t mode = inode->i_mode;
+ umode_t mode;
+ struct inode * inode = file_inode(bprm->file);
int retval;
+ mode = inode->i_mode;
+ if (bprm->file->f_op == NULL)
+ return -EACCES;
/* clear any previous set[ug]id data from a previous binary */
bprm->cred->euid = current_euid();
@@ -1380,6 +1385,10 @@ int search_binary_handler(struct linux_binprm *bprm)
if (retval)
return retval;
+ retval = audit_bprm(bprm);
+ if (retval)
+ return retval;
+
retval = -ENOENT;
retry:
read_lock(&binfmt_lock);
@@ -1427,7 +1436,6 @@ static int exec_binprm(struct linux_binprm *bprm)
ret = search_binary_handler(bprm);
if (ret >= 0) {
- audit_bprm(bprm);
trace_sched_process_exec(current, old_pid, bprm);
ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
current->did_exec = 1;
@@ -1539,7 +1547,6 @@ static int do_execve_common(const char *filename,
current->fs->in_exec = 0;
current->in_execve = 0;
acct_update_integrals(current);
- task_numa_free(current);
free_bprm(bprm);
if (displaced)
put_files_struct(displaced);
@@ -1661,12 +1668,6 @@ int __get_dumpable(unsigned long mm_flags)
return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret;
}
-/*
- * This returns the actual value of the suid_dumpable flag. For things
- * that are using this for checking for privilege transitions, it must
- * test against SUID_DUMP_USER rather than treating it as a boolean
- * value.
- */
int get_dumpable(struct mm_struct *mm)
{
return __get_dumpable(mm->flags);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 48a359d..a235f00 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -69,162 +69,145 @@ find_acceptable_alias(struct dentry *result,
return NULL;
}
-static bool dentry_connected(struct dentry *dentry)
+/*
+ * Find root of a disconnected subtree and return a reference to it.
+ */
+static struct dentry *
+find_disconnected_root(struct dentry *dentry)
{
dget(dentry);
- while (dentry->d_flags & DCACHE_DISCONNECTED) {
+ while (!IS_ROOT(dentry)) {
struct dentry *parent = dget_parent(dentry);
- dput(dentry);
- if (IS_ROOT(dentry)) {
+ if (!(parent->d_flags & DCACHE_DISCONNECTED)) {
dput(parent);
- return false;
+ break;
}
- dentry = parent;
- }
- dput(dentry);
- return true;
-}
-
-static void clear_disconnected(struct dentry *dentry)
-{
- dget(dentry);
- while (dentry->d_flags & DCACHE_DISCONNECTED) {
- struct dentry *parent = dget_parent(dentry);
-
- WARN_ON_ONCE(IS_ROOT(dentry));
-
- spin_lock(&dentry->d_lock);
- dentry->d_flags &= ~DCACHE_DISCONNECTED;
- spin_unlock(&dentry->d_lock);
dput(dentry);
dentry = parent;
}
- dput(dentry);
-}
-
-/*
- * Reconnect a directory dentry with its parent.
- *
- * This can return a dentry, or NULL, or an error.
- *
- * In the first case the returned dentry is the parent of the given
- * dentry, and may itself need to be reconnected to its parent.
- *
- * In the NULL case, a concurrent VFS operation has either renamed or
- * removed this directory. The concurrent operation has reconnected our
- * dentry, so we no longer need to.
- */
-static struct dentry *reconnect_one(struct vfsmount *mnt,
- struct dentry *dentry, char *nbuf)
-{
- struct dentry *parent;
- struct dentry *tmp;
- int err;
-
- parent = ERR_PTR(-EACCES);
- mutex_lock(&dentry->d_inode->i_mutex);
- if (mnt->mnt_sb->s_export_op->get_parent)
- parent = mnt->mnt_sb->s_export_op->get_parent(dentry);
- mutex_unlock(&dentry->d_inode->i_mutex);
-
- if (IS_ERR(parent)) {
- dprintk("%s: get_parent of %ld failed, err %d\n",
- __func__, dentry->d_inode->i_ino, PTR_ERR(parent));
- return parent;
- }
-
- dprintk("%s: find name of %lu in %lu\n", __func__,
- dentry->d_inode->i_ino, parent->d_inode->i_ino);
- err = exportfs_get_name(mnt, parent, nbuf, dentry);
- if (err == -ENOENT)
- goto out_reconnected;
- if (err)
- goto out_err;
- dprintk("%s: found name: %s\n", __func__, nbuf);
- mutex_lock(&parent->d_inode->i_mutex);
- tmp = lookup_one_len(nbuf, parent, strlen(nbuf));
- mutex_unlock(&parent->d_inode->i_mutex);
- if (IS_ERR(tmp)) {
- dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
- goto out_err;
- }
- if (tmp != dentry) {
- dput(tmp);
- goto out_reconnected;
- }
- dput(tmp);
- if (IS_ROOT(dentry)) {
- err = -ESTALE;
- goto out_err;
- }
- return parent;
-
-out_err:
- dput(parent);
- return ERR_PTR(err);
-out_reconnected:
- dput(parent);
- /*
- * Someone must have renamed our entry into another parent, in
- * which case it has been reconnected by the rename.
- *
- * Or someone removed it entirely, in which case filehandle
- * lookup will succeed but the directory is now IS_DEAD and
- * subsequent operations on it will fail.
- *
- * Alternatively, maybe there was no race at all, and the
- * filesystem is just corrupt and gave us a parent that doesn't
- * actually contain any entry pointing to this inode. So,
- * double check that this worked and return -ESTALE if not:
- */
- if (!dentry_connected(dentry))
- return ERR_PTR(-ESTALE);
- return NULL;
+ return dentry;
}
/*
* Make sure target_dir is fully connected to the dentry tree.
*
- * On successful return, DCACHE_DISCONNECTED will be cleared on
- * target_dir, and target_dir->d_parent->...->d_parent will reach the
- * root of the filesystem.
- *
- * Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected.
- * But the converse is not true: target_dir may have DCACHE_DISCONNECTED
- * set but already be connected. In that case we'll verify the
- * connection to root and then clear the flag.
- *
- * Note that target_dir could be removed by a concurrent operation. In
- * that case reconnect_path may still succeed with target_dir fully
- * connected, but further operations using the filehandle will fail when
- * necessary (due to S_DEAD being set on the directory).
+ * It may already be, as the flag isn't always updated when connection happens.
*/
static int
reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
{
- struct dentry *dentry, *parent;
+ int noprogress = 0;
+ int err = -ESTALE;
- dentry = dget(target_dir);
-
- while (dentry->d_flags & DCACHE_DISCONNECTED) {
- BUG_ON(dentry == mnt->mnt_sb->s_root);
+ /*
+ * It is possible that a confused file system might not let us complete
+ * the path to the root. For example, if get_parent returns a directory
+ * in which we cannot find a name for the child. While this implies a
+ * very sick filesystem we don't want it to cause knfsd to spin. Hence
+ * the noprogress counter. If we go through the loop 10 times (2 is
+ * probably enough) without getting anywhere, we just give up
+ */
+ while (target_dir->d_flags & DCACHE_DISCONNECTED && noprogress++ < 10) {
+ struct dentry *pd = find_disconnected_root(target_dir);
+
+ if (!IS_ROOT(pd)) {
+ /* must have found a connected parent - great */
+ spin_lock(&pd->d_lock);
+ pd->d_flags &= ~DCACHE_DISCONNECTED;
+ spin_unlock(&pd->d_lock);
+ noprogress = 0;
+ } else if (pd == mnt->mnt_sb->s_root) {
+ printk(KERN_ERR "export: Eeek filesystem root is not connected, impossible\n");
+ spin_lock(&pd->d_lock);
+ pd->d_flags &= ~DCACHE_DISCONNECTED;
+ spin_unlock(&pd->d_lock);
+ noprogress = 0;
+ } else {
+ /*
+ * We have hit the top of a disconnected path, try to
+ * find parent and connect.
+ *
+ * Racing with some other process renaming a directory
+ * isn't much of a problem here. If someone renames
+ * the directory, it will end up properly connected,
+ * which is what we want
+ *
+ * Getting the parent can't be supported generically,
+ * the locking is too icky.
+ *
+ * Instead we just return EACCES. If server reboots
+ * or inodes get flushed, you lose
+ */
+ struct dentry *ppd = ERR_PTR(-EACCES);
+ struct dentry *npd;
+
+ mutex_lock(&pd->d_inode->i_mutex);
+ if (mnt->mnt_sb->s_export_op->get_parent)
+ ppd = mnt->mnt_sb->s_export_op->get_parent(pd);
+ mutex_unlock(&pd->d_inode->i_mutex);
+
+ if (IS_ERR(ppd)) {
+ err = PTR_ERR(ppd);
+ dprintk("%s: get_parent of %ld failed, err %d\n",
+ __func__, pd->d_inode->i_ino, err);
+ dput(pd);
+ break;
+ }
- if (IS_ROOT(dentry))
- parent = reconnect_one(mnt, dentry, nbuf);
- else
- parent = dget_parent(dentry);
+ dprintk("%s: find name of %lu in %lu\n", __func__,
+ pd->d_inode->i_ino, ppd->d_inode->i_ino);
+ err = exportfs_get_name(mnt, ppd, nbuf, pd);
+ if (err) {
+ dput(ppd);
+ dput(pd);
+ if (err == -ENOENT)
+ /* some race between get_parent and
+ * get_name? just try again
+ */
+ continue;
+ break;
+ }
+ dprintk("%s: found name: %s\n", __func__, nbuf);
+ mutex_lock(&ppd->d_inode->i_mutex);
+ npd = lookup_one_len(nbuf, ppd, strlen(nbuf));
+ mutex_unlock(&ppd->d_inode->i_mutex);
+ if (IS_ERR(npd)) {
+ err = PTR_ERR(npd);
+ dprintk("%s: lookup failed: %d\n",
+ __func__, err);
+ dput(ppd);
+ dput(pd);
+ break;
+ }
+ /* we didn't really want npd, we really wanted
+ * a side-effect of the lookup.
+ * hopefully, npd == pd, though it isn't really
+ * a problem if it isn't
+ */
+ if (npd == pd)
+ noprogress = 0;
+ else
+ printk("%s: npd != pd\n", __func__);
+ dput(npd);
+ dput(ppd);
+ if (IS_ROOT(pd)) {
+ /* something went wrong, we have to give up */
+ dput(pd);
+ break;
+ }
+ }
+ dput(pd);
+ }
- if (!parent)
- break;
- dput(dentry);
- if (IS_ERR(parent))
- return PTR_ERR(parent);
- dentry = parent;
+ if (target_dir->d_flags & DCACHE_DISCONNECTED) {
+ /* something went wrong - oh-well */
+ if (!err)
+ err = -ESTALE;
+ return err;
}
- dput(dentry);
- clear_disconnected(target_dir);
+
return 0;
}
@@ -232,7 +215,7 @@ struct getdents_callback {
struct dir_context ctx;
char *name; /* name that was found. It already points to a
buffer NAME_MAX+1 is size */
- u64 ino; /* the inum we are looking for */
+ unsigned long ino; /* the inum we are looking for */
int found; /* inode matched? */
int sequence; /* sequence counter */
};
@@ -272,14 +255,10 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
struct inode *dir = path->dentry->d_inode;
int error;
struct file *file;
- struct kstat stat;
- struct path child_path = {
- .mnt = path->mnt,
- .dentry = child,
- };
struct getdents_callback buffer = {
.ctx.actor = filldir_one,
.name = name,
+ .ino = child->d_inode->i_ino
};
error = -ENOTDIR;
@@ -289,16 +268,6 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
if (!dir->i_fop)
goto out;
/*
- * inode->i_ino is unsigned long, kstat->ino is u64, so the
- * former would be insufficient on 32-bit hosts when the
- * filesystem supports 64-bit inode numbers. So we need to
- * actually call ->getattr, not just read i_ino:
- */
- error = vfs_getattr_nosec(&child_path, &stat);
- if (error)
- return error;
- buffer.ino = stat.ino;
- /*
* Open the directory ...
*/
file = dentry_open(path, O_RDONLY, cred);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 8a33764..c260de6 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -632,8 +632,6 @@ static int ext2_get_blocks(struct inode *inode,
int count = 0;
ext2_fsblk_t first_block = 0;
- BUG_ON(maxblocks == 0);
-
depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
if (depth == 0)
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index e98171a..1c33128 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -35,7 +35,6 @@ __ext2_get_block(struct inode *inode, pgoff_t pgoff, int create,
int rc;
memset(&tmp, 0, sizeof(struct buffer_head));
- tmp.b_size = 1 << inode->i_blkbits;
rc = ext2_get_block(inode, pgoff, &tmp, create);
*result = tmp.b_blocknr;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 37fd31e..c50c761 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2825,10 +2825,6 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
* bitmap, and an inode table.
*/
overhead += ngroups * (2 + sbi->s_itb_per_group);
-
- /* Add the journal blocks as well */
- overhead += sbi->s_journal->j_maxlen;
-
sbi->s_overhead_last = overhead;
smp_wmb();
sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 6ea7b14..dc5d572 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -640,7 +640,6 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
struct ext4_group_desc *gdp;
ext4_group_t i;
ext4_group_t ngroups = ext4_get_groups_count(sb);
- struct ext4_group_info *grp;
#ifdef EXT4FS_DEBUG
struct ext4_super_block *es;
ext4_fsblk_t bitmap_count;
@@ -656,11 +655,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp)
continue;
- grp = NULL;
- if (EXT4_SB(sb)->s_group_info)
- grp = ext4_get_group_info(sb, i);
- if (!grp || !EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
- desc_count += ext4_free_group_clusters(sb, gdp);
+ desc_count += ext4_free_group_clusters(sb, gdp);
brelse(bitmap_bh);
bitmap_bh = ext4_read_block_bitmap(sb, i);
if (bitmap_bh == NULL)
@@ -684,11 +679,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp)
continue;
- grp = NULL;
- if (EXT4_SB(sb)->s_group_info)
- grp = ext4_get_group_info(sb, i);
- if (!grp || !EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
- desc_count += ext4_free_group_clusters(sb, gdp);
+ desc_count += ext4_free_group_clusters(sb, gdp);
}
return desc_count;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index e618503..af815ea 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -29,7 +29,6 @@
#include <linux/wait.h>
#include <linux/blockgroup_lock.h>
#include <linux/percpu_counter.h>
-#include <linux/ratelimit.h>
#include <crypto/hash.h>
#ifdef __KERNEL__
#include <linux/compat.h>
@@ -1315,11 +1314,6 @@ struct ext4_sb_info {
unsigned long s_es_last_sorted;
struct percpu_counter s_extent_cache_cnt;
spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
-
- /* Ratelimit ext4 messages. */
- struct ratelimit_state s_err_ratelimit_state;
- struct ratelimit_state s_warning_ratelimit_state;
- struct ratelimit_state s_msg_ratelimit_state;
};
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1402,18 +1396,7 @@ static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \
clear_bit(bit + (offset), &EXT4_I(inode)->i_##field); \
}
-/* Add these declarations here only so that these functions can be
- * found by name. Otherwise, they are very hard to locate. */
-static inline int ext4_test_inode_flag(struct inode *inode, int bit);
-static inline void ext4_set_inode_flag(struct inode *inode, int bit);
-static inline void ext4_clear_inode_flag(struct inode *inode, int bit);
EXT4_INODE_BIT_FNS(flag, flags, 0)
-
-/* Add these declarations here only so that these functions can be
- * found by name. Otherwise, they are very hard to locate. */
-static inline int ext4_test_inode_state(struct inode *inode, int bit);
-static inline void ext4_set_inode_state(struct inode *inode, int bit);
-static inline void ext4_clear_inode_state(struct inode *inode, int bit);
#if (BITS_PER_LONG < 64)
EXT4_INODE_BIT_FNS(state, state_flags, 0)
@@ -2751,6 +2734,8 @@ extern void ext4_double_down_write_data_sem(struct inode *first,
struct inode *second);
extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
struct inode *donor_inode);
+void ext4_inode_double_lock(struct inode *inode1, struct inode *inode2);
+void ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2);
extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
__u64 start_orig, __u64 start_donor,
__u64 len, __u64 *moved_len);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 35f65cf..54d52af 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1666,7 +1666,7 @@ int
ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
struct ext4_extent *ex2)
{
- unsigned short ext1_ee_len, ext2_ee_len;
+ unsigned short ext1_ee_len, ext2_ee_len, max_len;
/*
* Make sure that both extents are initialized. We don't merge
@@ -1677,6 +1677,11 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2))
return 0;
+ if (ext4_ext_is_uninitialized(ex1))
+ max_len = EXT_UNINIT_MAX_LEN;
+ else
+ max_len = EXT_INIT_MAX_LEN;
+
ext1_ee_len = ext4_ext_get_actual_len(ex1);
ext2_ee_len = ext4_ext_get_actual_len(ex2);
@@ -1689,7 +1694,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
* as an RO_COMPAT feature, refuse to merge to extents if
* this can result in the top bit of ee_len being set.
*/
- if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
+ if (ext1_ee_len + ext2_ee_len > max_len)
return 0;
#ifdef AGGRESSIVE_TEST
if (ext1_ee_len >= 4)
@@ -1715,6 +1720,7 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
struct ext4_extent_header *eh;
unsigned int depth, len;
int merge_done = 0;
+ int uninitialized = 0;
depth = ext_depth(inode);
BUG_ON(path[depth].p_hdr == NULL);
@@ -1724,8 +1730,12 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
break;
/* merge with next extent! */
+ if (ext4_ext_is_uninitialized(ex))
+ uninitialized = 1;
ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
+ ext4_ext_get_actual_len(ex + 1));
+ if (uninitialized)
+ ext4_ext_mark_uninitialized(ex);
if (ex + 1 < EXT_LAST_EXTENT(eh)) {
len = (EXT_LAST_EXTENT(eh) - ex - 1)
@@ -1880,6 +1890,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
struct ext4_ext_path *npath = NULL;
int depth, len, err;
ext4_lblk_t next;
+ unsigned uninitialized = 0;
int mb_flags = 0;
if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
@@ -1931,8 +1942,18 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
if (err)
return err;
+ /*
+ * ext4_can_extents_be_merged should have checked
+ * that either both extents are uninitialized, or
+ * both aren't. Thus we need to check only one of
+ * them here.
+ */
+ if (ext4_ext_is_uninitialized(ex))
+ uninitialized = 1;
ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
+ ext4_ext_get_actual_len(newext));
+ if (uninitialized)
+ ext4_ext_mark_uninitialized(ex);
eh = path[depth].p_hdr;
nearex = ex;
goto merge;
@@ -1955,10 +1976,20 @@ prepend:
if (err)
return err;
+ /*
+ * ext4_can_extents_be_merged should have checked
+ * that either both extents are uninitialized, or
+ * both aren't. Thus we need to check only one of
+ * them here.
+ */
+ if (ext4_ext_is_uninitialized(ex))
+ uninitialized = 1;
ex->ee_block = newext->ee_block;
ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
+ ext4_ext_get_actual_len(newext));
+ if (uninitialized)
+ ext4_ext_mark_uninitialized(ex);
eh = path[depth].p_hdr;
nearex = ex;
goto merge;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 0ee59a6..137193f 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -432,7 +432,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
ext4fs_dirhash(qstr->name, qstr->len, &hinfo);
grp = hinfo.hash;
} else
- grp = prandom_u32();
+ get_random_bytes(&grp, sizeof(grp));
parent_group = (unsigned)grp % ngroups;
for (i = 0; i < ngroups; i++) {
g = (parent_group + i) % ngroups;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index bae9875..d9ecbf1 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -994,9 +994,11 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
struct inode *dir = dentry->d_parent->d_inode;
const char *name = dentry->d_name.name;
int namelen = dentry->d_name.len;
+ unsigned short reclen;
int err;
struct ext4_dir_entry_2 *de;
+ reclen = EXT4_DIR_REC_LEN(namelen);
err = ext4_find_dest_de(dir, inode, iloc->bh,
inline_start, inline_size,
name, namelen, &de);
@@ -1440,7 +1442,6 @@ int ext4_read_inline_dir(struct file *file,
if (ret < 0)
goto out;
- ret = 0;
sb = inode->i_sb;
parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode);
offset = ctx->pos;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0757634..e274e9c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2178,9 +2178,6 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
*
* @handle - handle for journal operations
* @mpd - extent to map
- * @give_up_on_write - we set this to true iff there is a fatal error and there
- * is no hope of writing the data. The caller should discard
- * dirty pages to avoid infinite loops.
*
* The function maps extent starting at mpd->lblk of length mpd->len. If it is
* delayed, blocks are allocated, if it is unwritten, we may need to convert
@@ -2298,7 +2295,6 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
struct address_space *mapping = mpd->inode->i_mapping;
struct pagevec pvec;
unsigned int nr_pages;
- long left = mpd->wbc->nr_to_write;
pgoff_t index = mpd->first_page;
pgoff_t end = mpd->last_page;
int tag;
@@ -2334,17 +2330,6 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
if (page->index > end)
goto out;
- /*
- * Accumulated enough dirty pages? This doesn't apply
- * to WB_SYNC_ALL mode. For integrity sync we have to
- * keep going because someone may be concurrently
- * dirtying pages, and we might have synced a lot of
- * newly appeared dirty pages, but have not synced all
- * of the old dirty pages.
- */
- if (mpd->wbc->sync_mode == WB_SYNC_NONE && left <= 0)
- goto out;
-
/* If we can't merge this page, we are done. */
if (mpd->map.m_len > 0 && mpd->next_page != page->index)
goto out;
@@ -2379,7 +2364,19 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
if (err <= 0)
goto out;
err = 0;
- left--;
+
+ /*
+ * Accumulated enough dirty pages? This doesn't apply
+ * to WB_SYNC_ALL mode. For integrity sync we have to
+ * keep going because someone may be concurrently
+ * dirtying pages, and we might have synced a lot of
+ * newly appeared dirty pages, but have not synced all
+ * of the old dirty pages.
+ */
+ if (mpd->wbc->sync_mode == WB_SYNC_NONE &&
+ mpd->next_page - mpd->first_page >=
+ mpd->wbc->nr_to_write)
+ goto out;
}
pagevec_release(&pvec);
cond_resched();
@@ -2423,15 +2420,16 @@ static int ext4_writepages(struct address_space *mapping,
* because that could violate lock ordering on umount
*/
if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
- goto out_writepages;
+ return 0;
if (ext4_should_journal_data(inode)) {
struct blk_plug plug;
+ int ret;
blk_start_plug(&plug);
ret = write_cache_pages(mapping, wbc, __writepage, mapping);
blk_finish_plug(&plug);
- goto out_writepages;
+ return ret;
}
/*
@@ -2444,10 +2442,8 @@ static int ext4_writepages(struct address_space *mapping,
* *never* be called, so if that ever happens, we would want
* the stack trace.
*/
- if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) {
- ret = -EROFS;
- goto out_writepages;
- }
+ if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
+ return -EROFS;
if (ext4_should_dioread_nolock(inode)) {
/*
@@ -4694,15 +4690,6 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
generic_fillattr(inode, stat);
/*
- * If there is inline data in the inode, the inode will normally not
- * have data blocks allocated (it may have an external xattr block).
- * Report at least one sector for such files, so tools like tar, rsync,
- * others doen't incorrectly think the file is completely sparse.
- */
- if (unlikely(ext4_has_inline_data(inode)))
- stat->blocks += (stat->size + 511) >> 9;
-
- /*
* We can't update i_blocks if the block allocation is delayed
* otherwise in the case of system crash before the real block
* allocation is done, we will have i_blocks inconsistent with
@@ -4713,8 +4700,9 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
* blocks for this file.
*/
delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb),
- EXT4_I(inode)->i_reserved_data_blocks);
- stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits - 9);
+ EXT4_I(inode)->i_reserved_data_blocks);
+
+ stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits-9);
return 0;
}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 60589b6..a569d33 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -130,7 +130,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
/* Protect orig inodes against a truncate and make sure,
* that only 1 swap_inode_boot_loader is running. */
- lock_two_nondirectories(inode, inode_bl);
+ ext4_inode_double_lock(inode, inode_bl);
truncate_inode_pages(&inode->i_data, 0);
truncate_inode_pages(&inode_bl->i_data, 0);
@@ -205,7 +205,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
ext4_inode_resume_unlocked_dio(inode);
ext4_inode_resume_unlocked_dio(inode_bl);
- unlock_two_nondirectories(inode, inode_bl);
+ ext4_inode_double_unlock(inode, inode_bl);
iput(inode_bl);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 4d113ef..a41e3ba 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4794,8 +4794,8 @@ do_more:
" group:%d block:%d count:%lu failed"
" with %d", block_group, bit, count,
err);
- } else
- EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
+ }
+
ext4_lock_group(sb, block_group);
mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 04434ad..214461e 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -259,7 +259,7 @@ static unsigned int mmp_new_seq(void)
u32 new_seq;
do {
- new_seq = prandom_u32();
+ get_random_bytes(&new_seq, sizeof(u32));
} while (new_seq > EXT4_MMP_SEQ_MAX);
return new_seq;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 773b503..7fa4d85 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -1203,6 +1203,42 @@ mext_check_arguments(struct inode *orig_inode,
}
/**
+ * ext4_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
+ *
+ * @inode1: the inode structure
+ * @inode2: the inode structure
+ *
+ * Lock two inodes' i_mutex
+ */
+void
+ext4_inode_double_lock(struct inode *inode1, struct inode *inode2)
+{
+ BUG_ON(inode1 == inode2);
+ if (inode1 < inode2) {
+ mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+ } else {
+ mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
+ }
+}
+
+/**
+ * ext4_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
+ *
+ * @inode1: the inode that is released first
+ * @inode2: the inode that is released second
+ *
+ */
+
+void
+ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2)
+{
+ mutex_unlock(&inode1->i_mutex);
+ mutex_unlock(&inode2->i_mutex);
+}
+
+/**
* ext4_move_extents - Exchange the specified range of a file
*
* @o_filp: file structure of the original file
@@ -1291,7 +1327,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
return -EINVAL;
}
/* Protect orig and donor inodes against a truncate */
- lock_two_nondirectories(orig_inode, donor_inode);
+ ext4_inode_double_lock(orig_inode, donor_inode);
/* Wait for all existing dio workers */
ext4_inode_block_unlocked_dio(orig_inode);
@@ -1499,7 +1535,7 @@ out:
ext4_double_up_write_data_sem(orig_inode, donor_inode);
ext4_inode_resume_unlocked_dio(orig_inode);
ext4_inode_resume_unlocked_dio(donor_inode);
- unlock_two_nondirectories(orig_inode, donor_inode);
+ ext4_inode_double_unlock(orig_inode, donor_inode);
return ret;
}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index d488f80..d7d0c7b 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -197,15 +197,14 @@ static void dump_completed_IO(struct inode *inode, struct list_head *head)
static void ext4_add_complete_io(ext4_io_end_t *io_end)
{
struct ext4_inode_info *ei = EXT4_I(io_end->inode);
- struct ext4_sb_info *sbi = EXT4_SB(io_end->inode->i_sb);
struct workqueue_struct *wq;
unsigned long flags;
/* Only reserved conversions from writeback should enter here */
WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
- WARN_ON(!io_end->handle && sbi->s_journal);
+ WARN_ON(!io_end->handle);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
- wq = sbi->rsv_conversion_wq;
+ wq = EXT4_SB(io_end->inode->i_sb)->rsv_conversion_wq;
if (list_empty(&ei->i_rsv_conversion_list))
queue_work(wq, &ei->i_rsv_conversion_work);
list_add_tail(&io_end->list, &ei->i_rsv_conversion_list);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c977f4e..2c2e6cb 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -411,26 +411,20 @@ static void ext4_handle_error(struct super_block *sb)
sb->s_id);
}
-#define ext4_error_ratelimit(sb) \
- ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \
- "EXT4-fs error")
-
void __ext4_error(struct super_block *sb, const char *function,
unsigned int line, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
- if (ext4_error_ratelimit(sb)) {
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- printk(KERN_CRIT
- "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
- sb->s_id, function, line, current->comm, &vaf);
- va_end(args);
- }
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
+ sb->s_id, function, line, current->comm, &vaf);
+ va_end(args);
save_error_info(sb, function, line);
+
ext4_handle_error(sb);
}
@@ -444,23 +438,22 @@ void __ext4_error_inode(struct inode *inode, const char *function,
es->s_last_error_ino = cpu_to_le32(inode->i_ino);
es->s_last_error_block = cpu_to_le64(block);
- if (ext4_error_ratelimit(inode->i_sb)) {
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- if (block)
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
- "inode #%lu: block %llu: comm %s: %pV\n",
- inode->i_sb->s_id, function, line, inode->i_ino,
- block, current->comm, &vaf);
- else
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
- "inode #%lu: comm %s: %pV\n",
- inode->i_sb->s_id, function, line, inode->i_ino,
- current->comm, &vaf);
- va_end(args);
- }
save_error_info(inode->i_sb, function, line);
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ if (block)
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
+ "inode #%lu: block %llu: comm %s: %pV\n",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ block, current->comm, &vaf);
+ else
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
+ "inode #%lu: comm %s: %pV\n",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ current->comm, &vaf);
+ va_end(args);
+
ext4_handle_error(inode->i_sb);
}
@@ -476,28 +469,27 @@ void __ext4_error_file(struct file *file, const char *function,
es = EXT4_SB(inode->i_sb)->s_es;
es->s_last_error_ino = cpu_to_le32(inode->i_ino);
- if (ext4_error_ratelimit(inode->i_sb)) {
- path = d_path(&(file->f_path), pathname, sizeof(pathname));
- if (IS_ERR(path))
- path = "(unknown)";
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- if (block)
- printk(KERN_CRIT
- "EXT4-fs error (device %s): %s:%d: inode #%lu: "
- "block %llu: comm %s: path %s: %pV\n",
- inode->i_sb->s_id, function, line, inode->i_ino,
- block, current->comm, path, &vaf);
- else
- printk(KERN_CRIT
- "EXT4-fs error (device %s): %s:%d: inode #%lu: "
- "comm %s: path %s: %pV\n",
- inode->i_sb->s_id, function, line, inode->i_ino,
- current->comm, path, &vaf);
- va_end(args);
- }
save_error_info(inode->i_sb, function, line);
+ path = d_path(&(file->f_path), pathname, sizeof(pathname));
+ if (IS_ERR(path))
+ path = "(unknown)";
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ if (block)
+ printk(KERN_CRIT
+ "EXT4-fs error (device %s): %s:%d: inode #%lu: "
+ "block %llu: comm %s: path %s: %pV\n",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ block, current->comm, path, &vaf);
+ else
+ printk(KERN_CRIT
+ "EXT4-fs error (device %s): %s:%d: inode #%lu: "
+ "comm %s: path %s: %pV\n",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ current->comm, path, &vaf);
+ va_end(args);
+
ext4_handle_error(inode->i_sb);
}
@@ -551,13 +543,11 @@ void __ext4_std_error(struct super_block *sb, const char *function,
(sb->s_flags & MS_RDONLY))
return;
- if (ext4_error_ratelimit(sb)) {
- errstr = ext4_decode_error(sb, errno, nbuf);
- printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
- sb->s_id, function, line, errstr);
- }
-
+ errstr = ext4_decode_error(sb, errno, nbuf);
+ printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
+ sb->s_id, function, line, errstr);
save_error_info(sb, function, line);
+
ext4_handle_error(sb);
}
@@ -607,9 +597,6 @@ void __ext4_msg(struct super_block *sb,
struct va_format vaf;
va_list args;
- if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs"))
- return;
-
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
@@ -623,10 +610,6 @@ void __ext4_warning(struct super_block *sb, const char *function,
struct va_format vaf;
va_list args;
- if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
- "EXT4-fs warning"))
- return;
-
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
@@ -650,20 +633,18 @@ __acquires(bitlock)
es->s_last_error_block = cpu_to_le64(block);
__save_error_info(sb, function, line);
- if (ext4_error_ratelimit(sb)) {
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
- sb->s_id, function, line, grp);
- if (ino)
- printk(KERN_CONT "inode %lu: ", ino);
- if (block)
- printk(KERN_CONT "block %llu:",
- (unsigned long long) block);
- printk(KERN_CONT "%pV\n", &vaf);
- va_end(args);
- }
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
+ sb->s_id, function, line, grp);
+ if (ino)
+ printk(KERN_CONT "inode %lu: ", ino);
+ if (block)
+ printk(KERN_CONT "block %llu:", (unsigned long long) block);
+ printk(KERN_CONT "%pV\n", &vaf);
+ va_end(args);
if (test_opt(sb, ERRORS_CONT)) {
ext4_commit_super(sb, 0);
@@ -2625,12 +2606,6 @@ EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128);
EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
-EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
-EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst);
-EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval);
-EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
-EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
-EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
static struct attribute *ext4_attrs[] = {
ATTR_LIST(delayed_allocation_blocks),
@@ -2648,12 +2623,6 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(max_writeback_mb_bump),
ATTR_LIST(extent_max_zeroout_kb),
ATTR_LIST(trigger_fs_error),
- ATTR_LIST(err_ratelimit_interval_ms),
- ATTR_LIST(err_ratelimit_burst),
- ATTR_LIST(warning_ratelimit_interval_ms),
- ATTR_LIST(warning_ratelimit_burst),
- ATTR_LIST(msg_ratelimit_interval_ms),
- ATTR_LIST(msg_ratelimit_burst),
NULL,
};
@@ -3068,6 +3037,7 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_li_request *elr;
+ unsigned long rnd;
elr = kzalloc(sizeof(*elr), GFP_KERNEL);
if (!elr)
@@ -3082,8 +3052,10 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
* spread the inode table initialization requests
* better.
*/
- elr->lr_next_sched = jiffies + (prandom_u32() %
- (EXT4_DEF_LI_MAX_START_DELAY * HZ));
+ get_random_bytes(&rnd, sizeof(rnd));
+ elr->lr_next_sched = jiffies + (unsigned long)rnd %
+ (EXT4_DEF_LI_MAX_START_DELAY * HZ);
+
return elr;
}
@@ -4146,11 +4118,6 @@ no_journal:
if (es->s_error_count)
mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
- /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
- ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
- ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
- ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
-
kfree(orig_data);
return 0;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 1423c48..03e9beb 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1352,7 +1352,6 @@ retry:
new_extra_isize = s_min_extra_isize;
kfree(is); is = NULL;
kfree(bs); bs = NULL;
- brelse(bh);
goto retry;
}
error = -1;
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 214fe10..e06e099 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -63,11 +63,3 @@ config F2FS_FS_SECURITY
the extended attribute support in advance.
If you are not using a security module, say N.
-
-config F2FS_CHECK_FS
- bool "F2FS consistency checking feature"
- depends on F2FS_FS
- help
- Enables BUG_ONs which check the file system consistency in runtime.
-
- If you want to improve the performance, say N.
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index d0fc287..b7826ec 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -205,8 +205,7 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
return acl;
}
-static int f2fs_set_acl(struct inode *inode, int type,
- struct posix_acl *acl, struct page *ipage)
+static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct f2fs_inode_info *fi = F2FS_I(inode);
@@ -251,7 +250,7 @@ static int f2fs_set_acl(struct inode *inode, int type,
}
}
- error = f2fs_setxattr(inode, name_index, "", value, size, ipage);
+ error = f2fs_setxattr(inode, name_index, "", value, size, NULL);
kfree(value);
if (!error)
@@ -261,10 +260,10 @@ static int f2fs_set_acl(struct inode *inode, int type,
return error;
}
-int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage)
+int f2fs_init_acl(struct inode *inode, struct inode *dir)
{
- struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct posix_acl *acl = NULL;
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
int error = 0;
if (!S_ISLNK(inode->i_mode)) {
@@ -277,19 +276,19 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage)
inode->i_mode &= ~current_umask();
}
- if (!test_opt(sbi, POSIX_ACL) || !acl)
- goto cleanup;
+ if (test_opt(sbi, POSIX_ACL) && acl) {
- if (S_ISDIR(inode->i_mode)) {
- error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl, ipage);
- if (error)
- goto cleanup;
+ if (S_ISDIR(inode->i_mode)) {
+ error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
+ if (error)
+ goto cleanup;
+ }
+ error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode);
+ if (error < 0)
+ return error;
+ if (error > 0)
+ error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl);
}
- error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode);
- if (error < 0)
- return error;
- if (error > 0)
- error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, ipage);
cleanup:
posix_acl_release(acl);
return error;
@@ -314,8 +313,7 @@ int f2fs_acl_chmod(struct inode *inode)
error = posix_acl_chmod(&acl, GFP_KERNEL, mode);
if (error)
return error;
-
- error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, NULL);
+ error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl);
posix_acl_release(acl);
return error;
}
@@ -390,7 +388,7 @@ static int f2fs_xattr_set_acl(struct dentry *dentry, const char *name,
acl = NULL;
}
- error = f2fs_set_acl(inode, type, acl, NULL);
+ error = f2fs_set_acl(inode, type, acl);
release_and_out:
posix_acl_release(acl);
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
index 4963313..80f4306 100644
--- a/fs/f2fs/acl.h
+++ b/fs/f2fs/acl.h
@@ -36,9 +36,9 @@ struct f2fs_acl_header {
#ifdef CONFIG_F2FS_FS_POSIX_ACL
-extern struct posix_acl *f2fs_get_acl(struct inode *, int);
-extern int f2fs_acl_chmod(struct inode *);
-extern int f2fs_init_acl(struct inode *, struct inode *, struct page *);
+extern struct posix_acl *f2fs_get_acl(struct inode *inode, int type);
+extern int f2fs_acl_chmod(struct inode *inode);
+extern int f2fs_init_acl(struct inode *inode, struct inode *dir);
#else
#define f2fs_check_acl NULL
#define f2fs_get_acl NULL
@@ -49,8 +49,7 @@ static inline int f2fs_acl_chmod(struct inode *inode)
return 0;
}
-static inline int f2fs_init_acl(struct inode *inode, struct inode *dir,
- struct page *page)
+static inline int f2fs_init_acl(struct inode *inode, struct inode *dir)
{
return 0;
}
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 5716e5e..bb31220 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -81,7 +81,7 @@ static int f2fs_write_meta_page(struct page *page,
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
/* Should not write any meta pages, if any IO error was occurred */
- if (wbc->for_reclaim || sbi->por_doing ||
+ if (wbc->for_reclaim ||
is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) {
dec_page_count(sbi, F2FS_DIRTY_META);
wbc->pages_skipped++;
@@ -142,8 +142,8 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
lock_page(page);
- f2fs_bug_on(page->mapping != mapping);
- f2fs_bug_on(!PageDirty(page));
+ BUG_ON(page->mapping != mapping);
+ BUG_ON(!PageDirty(page));
clear_page_dirty_for_io(page);
if (f2fs_write_meta_page(page, &wbc)) {
unlock_page(page);
@@ -167,8 +167,6 @@ static int f2fs_set_meta_page_dirty(struct page *page)
struct address_space *mapping = page->mapping;
struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
- trace_f2fs_set_page_dirty(page, META);
-
SetPageUptodate(page);
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
@@ -208,7 +206,6 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi)
void release_orphan_inode(struct f2fs_sb_info *sbi)
{
mutex_lock(&sbi->orphan_inode_mutex);
- f2fs_bug_on(sbi->n_orphans == 0);
sbi->n_orphans--;
mutex_unlock(&sbi->orphan_inode_mutex);
}
@@ -228,8 +225,12 @@ void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
break;
orphan = NULL;
}
-
- new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
+retry:
+ new = kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
+ if (!new) {
+ cond_resched();
+ goto retry;
+ }
new->ino = ino;
/* add new_oentry into list which is sorted by inode number */
@@ -252,7 +253,6 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
if (orphan->ino == ino) {
list_del(&orphan->list);
kmem_cache_free(orphan_entry_slab, orphan);
- f2fs_bug_on(sbi->n_orphans == 0);
sbi->n_orphans--;
break;
}
@@ -263,7 +263,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
struct inode *inode = f2fs_iget(sbi->sb, ino);
- f2fs_bug_on(IS_ERR(inode));
+ BUG_ON(IS_ERR(inode));
clear_nlink(inode);
/* truncate all the data during iput */
@@ -277,7 +277,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
return 0;
- sbi->por_doing = true;
+ sbi->por_doing = 1;
start_blk = __start_cp_addr(sbi) + 1;
orphan_blkaddr = __start_sum_addr(sbi) - 1;
@@ -294,7 +294,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
}
/* clear Orphan Flag */
clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
- sbi->por_doing = false;
+ sbi->por_doing = 0;
return 0;
}
@@ -469,7 +469,9 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
return -EEXIST;
}
list_add_tail(&new->list, head);
- stat_inc_dirty_dir(sbi);
+#ifdef CONFIG_F2FS_STAT_FS
+ sbi->n_dirty_dirs++;
+#endif
return 0;
}
@@ -480,8 +482,12 @@ void set_dirty_dir_page(struct inode *inode, struct page *page)
if (!S_ISDIR(inode->i_mode))
return;
-
- new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
+retry:
+ new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
+ if (!new) {
+ cond_resched();
+ goto retry;
+ }
new->inode = inode;
INIT_LIST_HEAD(&new->list);
@@ -498,9 +504,13 @@ void set_dirty_dir_page(struct inode *inode, struct page *page)
void add_dirty_dir_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- struct dir_inode_entry *new =
- f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
-
+ struct dir_inode_entry *new;
+retry:
+ new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
+ if (!new) {
+ cond_resched();
+ goto retry;
+ }
new->inode = inode;
INIT_LIST_HEAD(&new->list);
@@ -531,7 +541,9 @@ void remove_dirty_dir_inode(struct inode *inode)
if (entry->inode == inode) {
list_del(&entry->list);
kmem_cache_free(inode_entry_slab, entry);
- stat_dec_dirty_dir(sbi);
+#ifdef CONFIG_F2FS_STAT_FS
+ sbi->n_dirty_dirs--;
+#endif
break;
}
}
@@ -605,10 +617,11 @@ static void block_operations(struct f2fs_sb_info *sbi)
blk_start_plug(&plug);
retry_flush_dents:
- f2fs_lock_all(sbi);
+ mutex_lock_all(sbi);
+
/* write all the dirty dentry pages */
if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
- f2fs_unlock_all(sbi);
+ mutex_unlock_all(sbi);
sync_dirty_dir_inodes(sbi);
goto retry_flush_dents;
}
@@ -631,22 +644,7 @@ retry_flush_nodes:
static void unblock_operations(struct f2fs_sb_info *sbi)
{
mutex_unlock(&sbi->node_write);
- f2fs_unlock_all(sbi);
-}
-
-static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
-{
- DEFINE_WAIT(wait);
-
- for (;;) {
- prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
-
- if (!get_pages(sbi, F2FS_WRITEBACK))
- break;
-
- io_schedule();
- }
- finish_wait(&sbi->cp_wait, &wait);
+ mutex_unlock_all(sbi);
}
static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
@@ -758,7 +756,8 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
f2fs_put_page(cp_page, 1);
/* wait for previous submitted node/meta pages writeback */
- wait_on_all_pages_writeback(sbi);
+ while (get_pages(sbi, F2FS_WRITEBACK))
+ congestion_wait(BLK_RW_ASYNC, HZ / 50);
filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX);
filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index aa3438c..941f9b9 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -68,6 +68,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
struct buffer_head *bh_result)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
+#ifdef CONFIG_F2FS_STAT_FS
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+#endif
pgoff_t start_fofs, end_fofs;
block_t start_blkaddr;
@@ -77,8 +80,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
return 0;
}
- stat_inc_total_hit(inode->i_sb);
-
+#ifdef CONFIG_F2FS_STAT_FS
+ sbi->total_hit_ext++;
+#endif
start_fofs = fi->ext.fofs;
end_fofs = fi->ext.fofs + fi->ext.len - 1;
start_blkaddr = fi->ext.blk_addr;
@@ -96,7 +100,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
else
bh_result->b_size = UINT_MAX;
- stat_inc_read_hit(inode->i_sb);
+#ifdef CONFIG_F2FS_STAT_FS
+ sbi->read_hit_ext++;
+#endif
read_unlock(&fi->ext.ext_lock);
return 1;
}
@@ -110,7 +116,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
pgoff_t fofs, start_fofs, end_fofs;
block_t start_blkaddr, end_blkaddr;
- f2fs_bug_on(blk_addr == NEW_ADDR);
+ BUG_ON(blk_addr == NEW_ADDR);
fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
dn->ofs_in_node;
@@ -436,7 +442,7 @@ static int get_data_block_ro(struct inode *inode, sector_t iblock,
}
/* It does not support data allocation */
- f2fs_bug_on(create);
+ BUG_ON(create);
if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) {
int i;
@@ -554,9 +560,9 @@ write:
inode_dec_dirty_dents(inode);
err = do_write_data_page(page);
} else {
- f2fs_lock_op(sbi);
+ int ilock = mutex_lock_op(sbi);
err = do_write_data_page(page);
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
need_balance_fs = true;
}
if (err == -ENOENT)
@@ -635,6 +641,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
struct dnode_of_data dn;
int err = 0;
+ int ilock;
f2fs_balance_fs(sbi);
repeat:
@@ -643,7 +650,7 @@ repeat:
return -ENOMEM;
*pagep = page;
- f2fs_lock_op(sbi);
+ ilock = mutex_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, index, ALLOC_NODE);
@@ -657,7 +664,7 @@ repeat:
if (err)
goto err;
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
return 0;
@@ -693,7 +700,7 @@ out:
return 0;
err:
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
f2fs_put_page(page, 1);
return err;
}
@@ -756,8 +763,6 @@ static int f2fs_set_data_page_dirty(struct page *page)
struct address_space *mapping = page->mapping;
struct inode *inode = mapping->host;
- trace_f2fs_set_page_dirty(page, DATA);
-
SetPageUptodate(page);
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 594fc1b..384c6da 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -139,7 +139,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
bool room = false;
int max_slots = 0;
- f2fs_bug_on(level > MAX_DIR_HASH_DEPTH);
+ BUG_ON(level > MAX_DIR_HASH_DEPTH);
nbucket = dir_buckets(level);
nblock = bucket_blocks(level);
@@ -346,7 +346,7 @@ static struct page *init_inode_metadata(struct inode *inode,
goto error;
}
- err = f2fs_init_acl(inode, dir, page);
+ err = f2fs_init_acl(inode, dir);
if (err)
goto error;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 89dc750..608f0df 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -18,13 +18,6 @@
#include <linux/crc32.h>
#include <linux/magic.h>
#include <linux/kobject.h>
-#include <linux/sched.h>
-
-#ifdef CONFIG_F2FS_CHECK_FS
-#define f2fs_bug_on(condition) BUG_ON(condition)
-#else
-#define f2fs_bug_on(condition)
-#endif
/*
* For mount options
@@ -305,9 +298,6 @@ struct f2fs_sm_info {
unsigned int main_segments; /* # of segments in main area */
unsigned int reserved_segments; /* # of reserved segments */
unsigned int ovp_segments; /* # of overprovision segments */
-
- /* a threshold to reclaim prefree segments */
- unsigned int rec_prefree_segments;
};
/*
@@ -328,6 +318,14 @@ enum count_type {
};
/*
+ * Uses as sbi->fs_lock[NR_GLOBAL_LOCKS].
+ * The checkpoint procedure blocks all the locks in this fs_lock array.
+ * Some FS operations grab free locks, and if there is no free lock,
+ * then wait to grab a lock in a round-robin manner.
+ */
+#define NR_GLOBAL_LOCKS 8
+
+/*
* The below are the page types of bios used in submti_bio().
* The available types are:
* DATA User data pages. It operates as async mode.
@@ -367,12 +365,12 @@ struct f2fs_sb_info {
struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
struct inode *meta_inode; /* cache meta blocks */
struct mutex cp_mutex; /* checkpoint procedure lock */
- struct rw_semaphore cp_rwsem; /* blocking FS operations */
+ struct mutex fs_lock[NR_GLOBAL_LOCKS]; /* blocking FS operations */
struct mutex node_write; /* locking node writes */
struct mutex writepages; /* mutex for writepages() */
- bool por_doing; /* recovery is doing or not */
- bool on_build_free_nids; /* build_free_nids is doing */
- wait_queue_head_t cp_wait;
+ unsigned char next_lock_num; /* round-robin global locks */
+ int por_doing; /* recovery is doing or not */
+ int on_build_free_nids; /* build_free_nids is doing */
/* for orphan inode management */
struct list_head orphan_inode_list; /* orphan inode list */
@@ -522,24 +520,48 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
cp->ckpt_flags = cpu_to_le32(ckpt_flags);
}
-static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
+static inline void mutex_lock_all(struct f2fs_sb_info *sbi)
{
- down_read(&sbi->cp_rwsem);
+ int i;
+
+ for (i = 0; i < NR_GLOBAL_LOCKS; i++) {
+ /*
+ * This is the only time we take multiple fs_lock[]
+ * instances; the order is immaterial since we
+ * always hold cp_mutex, which serializes multiple
+ * such operations.
+ */
+ mutex_lock_nest_lock(&sbi->fs_lock[i], &sbi->cp_mutex);
+ }
}
-static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
+static inline void mutex_unlock_all(struct f2fs_sb_info *sbi)
{
- up_read(&sbi->cp_rwsem);
+ int i = 0;
+ for (; i < NR_GLOBAL_LOCKS; i++)
+ mutex_unlock(&sbi->fs_lock[i]);
}
-static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
+static inline int mutex_lock_op(struct f2fs_sb_info *sbi)
{
- down_write_nest_lock(&sbi->cp_rwsem, &sbi->cp_mutex);
+ unsigned char next_lock = sbi->next_lock_num % NR_GLOBAL_LOCKS;
+ int i = 0;
+
+ for (; i < NR_GLOBAL_LOCKS; i++)
+ if (mutex_trylock(&sbi->fs_lock[i]))
+ return i;
+
+ mutex_lock(&sbi->fs_lock[next_lock]);
+ sbi->next_lock_num++;
+ return next_lock;
}
-static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
+static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, int ilock)
{
- up_write(&sbi->cp_rwsem);
+ if (ilock < 0)
+ return;
+ BUG_ON(ilock >= NR_GLOBAL_LOCKS);
+ mutex_unlock(&sbi->fs_lock[ilock]);
}
/*
@@ -590,8 +612,8 @@ static inline int dec_valid_block_count(struct f2fs_sb_info *sbi,
blkcnt_t count)
{
spin_lock(&sbi->stat_lock);
- f2fs_bug_on(sbi->total_valid_block_count < (block_t) count);
- f2fs_bug_on(inode->i_blocks < count);
+ BUG_ON(sbi->total_valid_block_count < (block_t) count);
+ BUG_ON(inode->i_blocks < count);
inode->i_blocks -= count;
sbi->total_valid_block_count -= (block_t)count;
spin_unlock(&sbi->stat_lock);
@@ -723,9 +745,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
{
spin_lock(&sbi->stat_lock);
- f2fs_bug_on(sbi->total_valid_block_count < count);
- f2fs_bug_on(sbi->total_valid_node_count < count);
- f2fs_bug_on(inode->i_blocks < count);
+ BUG_ON(sbi->total_valid_block_count < count);
+ BUG_ON(sbi->total_valid_node_count < count);
+ BUG_ON(inode->i_blocks < count);
inode->i_blocks -= count;
sbi->total_valid_node_count -= count;
@@ -746,7 +768,7 @@ static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
{
spin_lock(&sbi->stat_lock);
- f2fs_bug_on(sbi->total_valid_inode_count == sbi->total_node_count);
+ BUG_ON(sbi->total_valid_inode_count == sbi->total_node_count);
sbi->total_valid_inode_count++;
spin_unlock(&sbi->stat_lock);
}
@@ -754,7 +776,7 @@ static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi)
{
spin_lock(&sbi->stat_lock);
- f2fs_bug_on(!sbi->total_valid_inode_count);
+ BUG_ON(!sbi->total_valid_inode_count);
sbi->total_valid_inode_count--;
spin_unlock(&sbi->stat_lock);
return 0;
@@ -775,7 +797,7 @@ static inline void f2fs_put_page(struct page *page, int unlock)
return;
if (unlock) {
- f2fs_bug_on(!PageLocked(page));
+ BUG_ON(!PageLocked(page));
unlock_page(page);
}
page_cache_release(page);
@@ -797,20 +819,6 @@ static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name,
return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor);
}
-static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
- gfp_t flags)
-{
- void *entry;
-retry:
- entry = kmem_cache_alloc(cachep, flags);
- if (!entry) {
- cond_resched();
- goto retry;
- }
-
- return entry;
-}
-
#define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino)
static inline bool IS_INODE(struct page *page)
@@ -971,7 +979,6 @@ long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long);
*/
void f2fs_set_inode_flags(struct inode *);
struct inode *f2fs_iget(struct super_block *, unsigned long);
-int try_to_free_nats(struct f2fs_sb_info *, int);
void update_inode(struct inode *, struct page *);
int update_inode_page(struct inode *);
int f2fs_write_inode(struct inode *, struct writeback_control *);
@@ -1026,7 +1033,6 @@ void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
int truncate_inode_blocks(struct inode *, pgoff_t);
int truncate_xattr_node(struct inode *, struct page *);
-int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
int remove_inode_page(struct inode *);
struct page *new_inode_page(struct inode *, const struct qstr *);
struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
@@ -1053,7 +1059,6 @@ void destroy_node_manager_caches(void);
* segment.c
*/
void f2fs_balance_fs(struct f2fs_sb_info *);
-void f2fs_balance_fs_bg(struct f2fs_sb_info *);
void invalidate_blocks(struct f2fs_sb_info *, block_t);
void clear_prefree_segments(struct f2fs_sb_info *);
int npages_for_summary_flush(struct f2fs_sb_info *);
@@ -1167,16 +1172,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
return (struct f2fs_stat_info*)sbi->stat_info;
}
-#define stat_inc_call_count(si) ((si)->call_count++)
-#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
-#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++)
-#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--)
-#define stat_inc_total_hit(sb) ((F2FS_SB(sb))->total_hit_ext++)
-#define stat_inc_read_hit(sb) ((F2FS_SB(sb))->read_hit_ext++)
-#define stat_inc_seg_type(sbi, curseg) \
- ((sbi)->segment_count[(curseg)->alloc_type]++)
-#define stat_inc_block_count(sbi, curseg) \
- ((sbi)->block_count[(curseg)->alloc_type]++)
+#define stat_inc_call_count(si) ((si)->call_count++)
#define stat_inc_seg_count(sbi, type) \
do { \
@@ -1211,13 +1207,6 @@ void __init f2fs_create_root_stats(void);
void f2fs_destroy_root_stats(void);
#else
#define stat_inc_call_count(si)
-#define stat_inc_bggc_count(si)
-#define stat_inc_dirty_dir(sbi)
-#define stat_dec_dirty_dir(sbi)
-#define stat_inc_total_hit(sb)
-#define stat_inc_read_hit(sb)
-#define stat_inc_seg_type(sbi, curseg)
-#define stat_inc_block_count(sbi, curseg)
#define stat_inc_seg_count(si, type)
#define stat_inc_tot_blk_count(si, blks)
#define stat_inc_data_blk_count(si, blks)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 7d714f4..02c9069 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -35,18 +35,18 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
block_t old_blk_addr;
struct dnode_of_data dn;
- int err;
+ int err, ilock;
f2fs_balance_fs(sbi);
sb_start_pagefault(inode->i_sb);
/* block allocation */
- f2fs_lock_op(sbi);
+ ilock = mutex_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, page->index, ALLOC_NODE);
if (err) {
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
goto out;
}
@@ -56,12 +56,12 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
err = reserve_new_block(&dn);
if (err) {
f2fs_put_dnode(&dn);
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
goto out;
}
}
f2fs_put_dnode(&dn);
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
file_update_time(vma->vm_file);
lock_page(page);
@@ -88,7 +88,6 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
set_page_dirty(page);
SetPageUptodate(page);
- trace_f2fs_vm_page_mkwrite(page, DATA);
mapped:
/* fill the page */
wait_on_page_writeback(page);
@@ -189,9 +188,8 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (ret)
goto out;
}
- ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
- if (ret)
- goto out;
+ filemap_fdatawait_range(sbi->node_inode->i_mapping,
+ 0, LONG_MAX);
ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
}
out:
@@ -272,7 +270,7 @@ static int truncate_blocks(struct inode *inode, u64 from)
unsigned int blocksize = inode->i_sb->s_blocksize;
struct dnode_of_data dn;
pgoff_t free_from;
- int count = 0;
+ int count = 0, ilock = -1;
int err;
trace_f2fs_truncate_blocks_enter(inode, from);
@@ -280,13 +278,13 @@ static int truncate_blocks(struct inode *inode, u64 from)
free_from = (pgoff_t)
((from + blocksize - 1) >> (sbi->log_blocksize));
- f2fs_lock_op(sbi);
+ ilock = mutex_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);
if (err) {
if (err == -ENOENT)
goto free_next;
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
trace_f2fs_truncate_blocks_exit(inode, err);
return err;
}
@@ -297,7 +295,7 @@ static int truncate_blocks(struct inode *inode, u64 from)
count = ADDRS_PER_BLOCK;
count -= dn.ofs_in_node;
- f2fs_bug_on(count < 0);
+ BUG_ON(count < 0);
if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
truncate_data_blocks_range(&dn, count);
@@ -307,7 +305,7 @@ static int truncate_blocks(struct inode *inode, u64 from)
f2fs_put_dnode(&dn);
free_next:
err = truncate_inode_blocks(inode, free_from);
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
/* lastly zero out the first data page */
truncate_partial_data_page(inode, from);
@@ -418,15 +416,16 @@ static void fill_zero(struct inode *inode, pgoff_t index,
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct page *page;
+ int ilock;
if (!len)
return;
f2fs_balance_fs(sbi);
- f2fs_lock_op(sbi);
+ ilock = mutex_lock_op(sbi);
page = get_new_data_page(inode, NULL, index, false);
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
if (!IS_ERR(page)) {
wait_on_page_writeback(page);
@@ -485,6 +484,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode)
struct address_space *mapping = inode->i_mapping;
loff_t blk_start, blk_end;
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ int ilock;
f2fs_balance_fs(sbi);
@@ -493,9 +493,9 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode)
truncate_inode_pages_range(mapping, blk_start,
blk_end - 1);
- f2fs_lock_op(sbi);
+ ilock = mutex_lock_op(sbi);
ret = truncate_hole(inode, pg_start, pg_end);
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
}
}
@@ -529,12 +529,13 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
for (index = pg_start; index <= pg_end; index++) {
struct dnode_of_data dn;
+ int ilock;
- f2fs_lock_op(sbi);
+ ilock = mutex_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
ret = get_dnode_of_data(&dn, index, ALLOC_NODE);
if (ret) {
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
break;
}
@@ -542,12 +543,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
ret = reserve_new_block(&dn);
if (ret) {
f2fs_put_dnode(&dn);
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
break;
}
}
f2fs_put_dnode(&dn);
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
if (pg_start == pg_end)
new_size = offset + len;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index b7ad1ec..2f157e8 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -77,15 +77,13 @@ static int gc_thread_func(void *data)
else
wait_ms = increase_sleep_time(gc_th, wait_ms);
- stat_inc_bggc_count(sbi);
+#ifdef CONFIG_F2FS_STAT_FS
+ sbi->bg_gc++;
+#endif
/* if return value is not zero, no victim was selected */
if (f2fs_gc(sbi))
wait_ms = gc_th->no_gc_sleep_time;
-
- /* balancing f2fs's metadata periodically */
- f2fs_balance_fs_bg(sbi);
-
} while (!kthread_should_stop());
return 0;
}
@@ -238,8 +236,8 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
}
-static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
- unsigned int segno, struct victim_sel_policy *p)
+static unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno,
+ struct victim_sel_policy *p)
{
if (p->alloc_mode == SSR)
return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
@@ -295,11 +293,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
}
break;
}
-
- p.offset = segno + p.ofs_unit;
- if (p.ofs_unit > 1)
- p.offset -= segno % p.ofs_unit;
-
+ p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit;
secno = GET_SECNO(sbi, segno);
if (sec_usage_check(sbi, secno))
@@ -312,10 +306,11 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
if (p.min_cost > cost) {
p.min_segno = segno;
p.min_cost = cost;
- } else if (unlikely(cost == max_cost)) {
- continue;
}
+ if (cost == max_cost)
+ continue;
+
if (nsearched++ >= p.max_search) {
sbi->last_victim[p.gc_mode] = segno;
break;
@@ -363,8 +358,12 @@ static void add_gc_inode(struct inode *inode, struct list_head *ilist)
iput(inode);
return;
}
-
- new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS);
+repeat:
+ new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS);
+ if (!new_ie) {
+ cond_resched();
+ goto repeat;
+ }
new_ie->inode = inode;
list_add_tail(&new_ie->list, ilist);
}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index d0eaa9f..9339cd2 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -37,31 +37,6 @@ void f2fs_set_inode_flags(struct inode *inode)
inode->i_flags |= S_DIRSYNC;
}
-static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
-{
- if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
- S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
- if (ri->i_addr[0])
- inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0]));
- else
- inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1]));
- }
-}
-
-static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
-{
- if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
- if (old_valid_dev(inode->i_rdev)) {
- ri->i_addr[0] = cpu_to_le32(old_encode_dev(inode->i_rdev));
- ri->i_addr[1] = 0;
- } else {
- ri->i_addr[0] = 0;
- ri->i_addr[1] = cpu_to_le32(new_encode_dev(inode->i_rdev));
- ri->i_addr[2] = 0;
- }
- }
-}
-
static int do_read_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
@@ -98,6 +73,10 @@ static int do_read_inode(struct inode *inode)
inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
inode->i_generation = le32_to_cpu(ri->i_generation);
+ if (ri->i_addr[0])
+ inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0]));
+ else
+ inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1]));
fi->i_current_depth = le32_to_cpu(ri->i_current_depth);
fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
@@ -105,13 +84,8 @@ static int do_read_inode(struct inode *inode)
fi->flags = 0;
fi->i_advise = ri->i_advise;
fi->i_pino = le32_to_cpu(ri->i_pino);
-
get_extent_info(&fi->ext, ri->i_ext);
get_inline_info(fi, ri);
-
- /* get rdev by using inline_info */
- __get_inode_rdev(inode, ri);
-
f2fs_put_page(node_page, 1);
return 0;
}
@@ -205,10 +179,21 @@ void update_inode(struct inode *inode, struct page *node_page)
ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
ri->i_generation = cpu_to_le32(inode->i_generation);
- __set_inode_rdev(inode, ri);
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
+ if (old_valid_dev(inode->i_rdev)) {
+ ri->i_addr[0] =
+ cpu_to_le32(old_encode_dev(inode->i_rdev));
+ ri->i_addr[1] = 0;
+ } else {
+ ri->i_addr[0] = 0;
+ ri->i_addr[1] =
+ cpu_to_le32(new_encode_dev(inode->i_rdev));
+ ri->i_addr[2] = 0;
+ }
+ }
+
set_cold_node(inode, node_page);
set_page_dirty(node_page);
-
clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
}
@@ -229,7 +214,7 @@ int update_inode_page(struct inode *inode)
int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- int ret;
+ int ret, ilock;
if (inode->i_ino == F2FS_NODE_INO(sbi) ||
inode->i_ino == F2FS_META_INO(sbi))
@@ -242,9 +227,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
* We need to lock here to prevent from producing dirty node pages
* during the urgent cleaning time when runing out of free sections.
*/
- f2fs_lock_op(sbi);
+ ilock = mutex_lock_op(sbi);
ret = update_inode_page(inode);
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
if (wbc)
f2fs_balance_fs(sbi);
@@ -258,6 +243,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
void f2fs_evict_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ int ilock;
trace_f2fs_evict_inode(inode);
truncate_inode_pages(&inode->i_data, 0);
@@ -266,7 +252,7 @@ void f2fs_evict_inode(struct inode *inode)
inode->i_ino == F2FS_META_INO(sbi))
goto no_delete;
- f2fs_bug_on(atomic_read(&F2FS_I(inode)->dirty_dents));
+ BUG_ON(atomic_read(&F2FS_I(inode)->dirty_dents));
remove_dirty_dir_inode(inode);
if (inode->i_nlink || is_bad_inode(inode))
@@ -279,9 +265,9 @@ void f2fs_evict_inode(struct inode *inode)
if (F2FS_HAS_BLOCKS(inode))
f2fs_truncate(inode);
- f2fs_lock_op(sbi);
+ ilock = mutex_lock_op(sbi);
remove_inode_page(inode);
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
sb_end_intwrite(inode->i_sb);
no_delete:
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 575adac..2a5359c 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -27,19 +27,19 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
nid_t ino;
struct inode *inode;
bool nid_free = false;
- int err;
+ int err, ilock;
inode = new_inode(sb);
if (!inode)
return ERR_PTR(-ENOMEM);
- f2fs_lock_op(sbi);
+ ilock = mutex_lock_op(sbi);
if (!alloc_nid(sbi, &ino)) {
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
err = -ENOSPC;
goto fail;
}
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
inode->i_uid = current_fsuid();
@@ -115,7 +115,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct inode *inode;
nid_t ino = 0;
- int err;
+ int err, ilock;
f2fs_balance_fs(sbi);
@@ -131,9 +131,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
inode->i_mapping->a_ops = &f2fs_dblock_aops;
ino = inode->i_ino;
- f2fs_lock_op(sbi);
+ ilock = mutex_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
if (err)
goto out;
@@ -157,7 +157,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
struct inode *inode = old_dentry->d_inode;
struct super_block *sb = dir->i_sb;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
- int err;
+ int err, ilock;
f2fs_balance_fs(sbi);
@@ -165,9 +165,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
ihold(inode);
set_inode_flag(F2FS_I(inode), FI_INC_LINK);
- f2fs_lock_op(sbi);
+ ilock = mutex_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
if (err)
goto out;
@@ -220,6 +220,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
struct f2fs_dir_entry *de;
struct page *page;
int err = -ENOENT;
+ int ilock;
trace_f2fs_unlink_enter(dir, dentry);
f2fs_balance_fs(sbi);
@@ -228,16 +229,16 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
if (!de)
goto fail;
- f2fs_lock_op(sbi);
err = acquire_orphan_inode(sbi);
if (err) {
- f2fs_unlock_op(sbi);
kunmap(page);
f2fs_put_page(page, 0);
goto fail;
}
+
+ ilock = mutex_lock_op(sbi);
f2fs_delete_entry(de, page, inode);
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
/* In order to evict this inode, we set it dirty */
mark_inode_dirty(inode);
@@ -253,7 +254,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct inode *inode;
size_t symlen = strlen(symname) + 1;
- int err;
+ int err, ilock;
f2fs_balance_fs(sbi);
@@ -264,9 +265,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
inode->i_op = &f2fs_symlink_inode_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
- f2fs_lock_op(sbi);
+ ilock = mutex_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
if (err)
goto out;
@@ -289,7 +290,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct inode *inode;
- int err;
+ int err, ilock;
f2fs_balance_fs(sbi);
@@ -303,9 +304,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
set_inode_flag(F2FS_I(inode), FI_INC_LINK);
- f2fs_lock_op(sbi);
+ ilock = mutex_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
if (err)
goto out_fail;
@@ -341,6 +342,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct inode *inode;
int err = 0;
+ int ilock;
if (!new_valid_dev(rdev))
return -EINVAL;
@@ -354,9 +356,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
init_special_inode(inode, inode->i_mode, rdev);
inode->i_op = &f2fs_special_inode_operations;
- f2fs_lock_op(sbi);
+ ilock = mutex_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
if (err)
goto out;
@@ -385,7 +387,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct f2fs_dir_entry *old_dir_entry = NULL;
struct f2fs_dir_entry *old_entry;
struct f2fs_dir_entry *new_entry;
- int err = -ENOENT;
+ int err = -ENOENT, ilock = -1;
f2fs_balance_fs(sbi);
@@ -400,7 +402,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out_old;
}
- f2fs_lock_op(sbi);
+ ilock = mutex_lock_op(sbi);
if (new_inode) {
@@ -465,7 +467,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
update_inode_page(old_dir);
}
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
return 0;
put_out_dir:
@@ -475,7 +477,7 @@ out_dir:
kunmap(old_dir_page);
f2fs_put_page(old_dir_page, 0);
}
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
out_old:
kunmap(old_page);
f2fs_put_page(old_page, 0);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 4ac4150..51ef278 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -204,7 +204,7 @@ retry:
}
e->ni = *ni;
e->checkpointed = true;
- f2fs_bug_on(ni->blk_addr == NEW_ADDR);
+ BUG_ON(ni->blk_addr == NEW_ADDR);
} else if (new_blkaddr == NEW_ADDR) {
/*
* when nid is reallocated,
@@ -212,19 +212,19 @@ retry:
* So, reinitialize it with new information.
*/
e->ni = *ni;
- f2fs_bug_on(ni->blk_addr != NULL_ADDR);
+ BUG_ON(ni->blk_addr != NULL_ADDR);
}
if (new_blkaddr == NEW_ADDR)
e->checkpointed = false;
/* sanity check */
- f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr);
- f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR &&
+ BUG_ON(nat_get_blkaddr(e) != ni->blk_addr);
+ BUG_ON(nat_get_blkaddr(e) == NULL_ADDR &&
new_blkaddr == NULL_ADDR);
- f2fs_bug_on(nat_get_blkaddr(e) == NEW_ADDR &&
+ BUG_ON(nat_get_blkaddr(e) == NEW_ADDR &&
new_blkaddr == NEW_ADDR);
- f2fs_bug_on(nat_get_blkaddr(e) != NEW_ADDR &&
+ BUG_ON(nat_get_blkaddr(e) != NEW_ADDR &&
nat_get_blkaddr(e) != NULL_ADDR &&
new_blkaddr == NEW_ADDR);
@@ -240,7 +240,7 @@ retry:
write_unlock(&nm_i->nat_tree_lock);
}
-int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
+static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -495,10 +495,10 @@ static void truncate_node(struct dnode_of_data *dn)
get_node_info(sbi, dn->nid, &ni);
if (dn->inode->i_blocks == 0) {
- f2fs_bug_on(ni.blk_addr != NULL_ADDR);
+ BUG_ON(ni.blk_addr != NULL_ADDR);
goto invalidate;
}
- f2fs_bug_on(ni.blk_addr == NULL_ADDR);
+ BUG_ON(ni.blk_addr == NULL_ADDR);
/* Deallocate node address */
invalidate_blocks(sbi, ni.blk_addr);
@@ -822,7 +822,7 @@ int remove_inode_page(struct inode *inode)
}
/* 0 is possible, after f2fs_new_inode() is failed */
- f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1);
+ BUG_ON(inode->i_blocks != 0 && inode->i_blocks != 1);
set_new_dnode(&dn, inode, page, page, ino);
truncate_node(&dn);
return 0;
@@ -863,7 +863,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
get_node_info(sbi, dn->nid, &old_ni);
/* Reinitialize old_ni with new node page */
- f2fs_bug_on(old_ni.blk_addr != NULL_ADDR);
+ BUG_ON(old_ni.blk_addr != NULL_ADDR);
new_ni = old_ni;
new_ni.ino = dn->inode->i_ino;
set_node_addr(sbi, &new_ni, NEW_ADDR);
@@ -969,7 +969,7 @@ repeat:
goto repeat;
}
got_it:
- f2fs_bug_on(nid != nid_of_node(page));
+ BUG_ON(nid != nid_of_node(page));
mark_page_accessed(page);
return page;
}
@@ -1148,47 +1148,6 @@ continue_unlock:
return nwritten;
}
-int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
-{
- struct address_space *mapping = sbi->node_inode->i_mapping;
- pgoff_t index = 0, end = LONG_MAX;
- struct pagevec pvec;
- int nr_pages;
- int ret2 = 0, ret = 0;
-
- pagevec_init(&pvec, 0);
- while ((index <= end) &&
- (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_WRITEBACK,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
- unsigned i;
-
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
-
- /* until radix tree lookup accepts end_index */
- if (page->index > end)
- continue;
-
- if (ino && ino_of_node(page) == ino) {
- wait_on_page_writeback(page);
- if (TestClearPageError(page))
- ret = -EIO;
- }
- }
- pagevec_release(&pvec);
- cond_resched();
- }
-
- if (test_and_clear_bit(AS_ENOSPC, &mapping->flags))
- ret2 = -ENOSPC;
- if (test_and_clear_bit(AS_EIO, &mapping->flags))
- ret2 = -EIO;
- if (!ret)
- ret = ret2;
- return ret;
-}
-
static int f2fs_write_node_page(struct page *page,
struct writeback_control *wbc)
{
@@ -1197,14 +1156,11 @@ static int f2fs_write_node_page(struct page *page,
block_t new_addr;
struct node_info ni;
- if (sbi->por_doing)
- goto redirty_out;
-
wait_on_page_writeback(page);
/* get old block addr of this node page */
nid = nid_of_node(page);
- f2fs_bug_on(page->index != nid);
+ BUG_ON(page->index != nid);
get_node_info(sbi, nid, &ni);
@@ -1215,8 +1171,12 @@ static int f2fs_write_node_page(struct page *page,
return 0;
}
- if (wbc->for_reclaim)
- goto redirty_out;
+ if (wbc->for_reclaim) {
+ dec_page_count(sbi, F2FS_DIRTY_NODES);
+ wbc->pages_skipped++;
+ set_page_dirty(page);
+ return AOP_WRITEPAGE_ACTIVATE;
+ }
mutex_lock(&sbi->node_write);
set_page_writeback(page);
@@ -1226,12 +1186,6 @@ static int f2fs_write_node_page(struct page *page,
mutex_unlock(&sbi->node_write);
unlock_page(page);
return 0;
-
-redirty_out:
- dec_page_count(sbi, F2FS_DIRTY_NODES);
- wbc->pages_skipped++;
- set_page_dirty(page);
- return AOP_WRITEPAGE_ACTIVATE;
}
/*
@@ -1246,8 +1200,11 @@ static int f2fs_write_node_pages(struct address_space *mapping,
struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
long nr_to_write = wbc->nr_to_write;
- /* balancing f2fs's metadata in background */
- f2fs_balance_fs_bg(sbi);
+ /* First check balancing cached NAT entries */
+ if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) {
+ f2fs_sync_fs(sbi->sb, true);
+ return 0;
+ }
/* collect a number of dirty node pages and write together */
if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES)
@@ -1266,8 +1223,6 @@ static int f2fs_set_node_page_dirty(struct page *page)
struct address_space *mapping = page->mapping;
struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
- trace_f2fs_set_page_dirty(page, NODE);
-
SetPageUptodate(page);
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
@@ -1336,18 +1291,23 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
if (nid == 0)
return 0;
- if (build) {
- /* do not add allocated nids */
- read_lock(&nm_i->nat_tree_lock);
- ne = __lookup_nat_cache(nm_i, nid);
- if (ne && nat_get_blkaddr(ne) != NULL_ADDR)
- allocated = true;
- read_unlock(&nm_i->nat_tree_lock);
- if (allocated)
- return 0;
- }
+ if (!build)
+ goto retry;
- i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
+ /* do not add allocated nids */
+ read_lock(&nm_i->nat_tree_lock);
+ ne = __lookup_nat_cache(nm_i, nid);
+ if (ne && nat_get_blkaddr(ne) != NULL_ADDR)
+ allocated = true;
+ read_unlock(&nm_i->nat_tree_lock);
+ if (allocated)
+ return 0;
+retry:
+ i = kmem_cache_alloc(free_nid_slab, GFP_NOFS);
+ if (!i) {
+ cond_resched();
+ goto retry;
+ }
i->nid = nid;
i->state = NID_NEW;
@@ -1390,7 +1350,7 @@ static void scan_nat_page(struct f2fs_nm_info *nm_i,
break;
blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
- f2fs_bug_on(blk_addr == NEW_ADDR);
+ BUG_ON(blk_addr == NEW_ADDR);
if (blk_addr == NULL_ADDR) {
if (add_free_nid(nm_i, start_nid, true) < 0)
break;
@@ -1461,14 +1421,14 @@ retry:
/* We should not use stale free nids created by build_free_nids */
if (nm_i->fcnt && !sbi->on_build_free_nids) {
- f2fs_bug_on(list_empty(&nm_i->free_nid_list));
+ BUG_ON(list_empty(&nm_i->free_nid_list));
list_for_each(this, &nm_i->free_nid_list) {
i = list_entry(this, struct free_nid, list);
if (i->state == NID_NEW)
break;
}
- f2fs_bug_on(i->state != NID_NEW);
+ BUG_ON(i->state != NID_NEW);
*nid = i->nid;
i->state = NID_ALLOC;
nm_i->fcnt--;
@@ -1479,9 +1439,9 @@ retry:
/* Let's scan nat pages and its caches to get free nids */
mutex_lock(&nm_i->build_lock);
- sbi->on_build_free_nids = true;
+ sbi->on_build_free_nids = 1;
build_free_nids(sbi);
- sbi->on_build_free_nids = false;
+ sbi->on_build_free_nids = 0;
mutex_unlock(&nm_i->build_lock);
goto retry;
}
@@ -1496,7 +1456,7 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
spin_lock(&nm_i->free_nid_list_lock);
i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
- f2fs_bug_on(!i || i->state != NID_ALLOC);
+ BUG_ON(!i || i->state != NID_ALLOC);
__del_from_free_nid_list(i);
spin_unlock(&nm_i->free_nid_list_lock);
}
@@ -1514,7 +1474,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
spin_lock(&nm_i->free_nid_list_lock);
i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
- f2fs_bug_on(!i || i->state != NID_ALLOC);
+ BUG_ON(!i || i->state != NID_ALLOC);
if (nm_i->fcnt > 2 * MAX_FREE_NIDS) {
__del_from_free_nid_list(i);
} else {
@@ -1717,7 +1677,7 @@ to_nat_page:
nat_blk = page_address(page);
}
- f2fs_bug_on(!nat_blk);
+ BUG_ON(!nat_blk);
raw_ne = nat_blk->entries[nid - start_nid];
flush_now:
new_blkaddr = nat_get_blkaddr(ne);
@@ -1821,11 +1781,11 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
/* destroy free nid list */
spin_lock(&nm_i->free_nid_list_lock);
list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
- f2fs_bug_on(i->state == NID_ALLOC);
+ BUG_ON(i->state == NID_ALLOC);
__del_from_free_nid_list(i);
nm_i->fcnt--;
}
- f2fs_bug_on(nm_i->fcnt);
+ BUG_ON(nm_i->fcnt);
spin_unlock(&nm_i->free_nid_list_lock);
/* destroy nat cache */
@@ -1839,7 +1799,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
__del_from_nat_cache(nm_i, e);
}
}
- f2fs_bug_on(nm_i->nat_cnt);
+ BUG_ON(nm_i->nat_cnt);
write_unlock(&nm_i->nat_tree_lock);
kfree(nm_i->nat_bitmap);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index fdc8116..51ef5ee 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -64,31 +64,24 @@ static int recover_dentry(struct page *ipage, struct inode *inode)
name.name = raw_inode->i_name;
retry:
de = f2fs_find_entry(dir, &name, &page);
- if (de && inode->i_ino == le32_to_cpu(de->ino))
- goto out_unmap_put;
+ if (de && inode->i_ino == le32_to_cpu(de->ino)) {
+ kunmap(page);
+ f2fs_put_page(page, 0);
+ goto out;
+ }
if (de) {
einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
if (IS_ERR(einode)) {
WARN_ON(1);
if (PTR_ERR(einode) == -ENOENT)
err = -EEXIST;
- goto out_unmap_put;
- }
- err = acquire_orphan_inode(F2FS_SB(inode->i_sb));
- if (err) {
- iput(einode);
- goto out_unmap_put;
+ goto out;
}
f2fs_delete_entry(de, page, einode);
iput(einode);
goto retry;
}
err = __f2fs_add_link(dir, &name, inode);
- goto out;
-
-out_unmap_put:
- kunmap(page);
- f2fs_put_page(page, 0);
out:
f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: "
"ino = %x, name = %s, dir = %lx, err = %d",
@@ -292,6 +285,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
struct f2fs_summary sum;
struct node_info ni;
int err = 0, recovered = 0;
+ int ilock;
start = start_bidx_of_node(ofs_of_node(page), fi);
if (IS_INODE(page))
@@ -299,20 +293,20 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
else
end = start + ADDRS_PER_BLOCK;
- f2fs_lock_op(sbi);
+ ilock = mutex_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, start, ALLOC_NODE);
if (err) {
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
return err;
}
wait_on_page_writeback(dn.node_page);
get_node_info(sbi, dn.nid, &ni);
- f2fs_bug_on(ni.ino != ino_of_node(page));
- f2fs_bug_on(ofs_of_node(dn.node_page) != ofs_of_node(page));
+ BUG_ON(ni.ino != ino_of_node(page));
+ BUG_ON(ofs_of_node(dn.node_page) != ofs_of_node(page));
for (; start < end; start++) {
block_t src, dest;
@@ -322,9 +316,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) {
if (src == NULL_ADDR) {
- err = reserve_new_block(&dn);
+ int err = reserve_new_block(&dn);
/* We should not get -ENOSPC */
- f2fs_bug_on(err);
+ BUG_ON(err);
}
/* Check the previous node page having this index */
@@ -355,7 +349,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr);
err:
f2fs_put_dnode(&dn);
- f2fs_unlock_op(sbi);
+ mutex_unlock_op(sbi, ilock);
f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, "
"recovered_data = %d blocks, err = %d",
@@ -425,7 +419,6 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
{
struct list_head inode_list;
int err;
- bool need_writecp = false;
fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
sizeof(struct fsync_inode_entry), NULL);
@@ -435,7 +428,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&inode_list);
/* step #1: find fsynced inode numbers */
- sbi->por_doing = true;
+ sbi->por_doing = 1;
err = find_fsync_dnodes(sbi, &inode_list);
if (err)
goto out;
@@ -443,16 +436,14 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
if (list_empty(&inode_list))
goto out;
- need_writecp = true;
-
/* step #2: recover data */
err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
- f2fs_bug_on(!list_empty(&inode_list));
+ BUG_ON(!list_empty(&inode_list));
out:
destroy_fsync_dnodes(&inode_list);
kmem_cache_destroy(fsync_entry_slab);
- sbi->por_doing = false;
- if (!err && need_writecp)
+ sbi->por_doing = 0;
+ if (!err)
write_checkpoint(sbi, false);
return err;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index fa284d3..09af9c7 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -36,14 +36,6 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi)
}
}
-void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
-{
- /* check the # of cached NAT entries and prefree segments */
- if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
- excess_prefree_segs(sbi))
- f2fs_sync_fs(sbi->sb, true);
-}
-
static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
enum dirty_type dirty_type)
{
@@ -58,10 +50,20 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
if (dirty_type == DIRTY) {
struct seg_entry *sentry = get_seg_entry(sbi, segno);
- enum dirty_type t = sentry->type;
+ enum dirty_type t = DIRTY_HOT_DATA;
+
+ dirty_type = sentry->type;
+
+ if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
+ dirty_i->nr_dirty[dirty_type]++;
- if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
- dirty_i->nr_dirty[t]++;
+ /* Only one bitmap should be set */
+ for (; t <= DIRTY_COLD_NODE; t++) {
+ if (t == dirty_type)
+ continue;
+ if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
+ dirty_i->nr_dirty[t]--;
+ }
}
}
@@ -74,11 +76,12 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
dirty_i->nr_dirty[dirty_type]--;
if (dirty_type == DIRTY) {
- struct seg_entry *sentry = get_seg_entry(sbi, segno);
- enum dirty_type t = sentry->type;
+ enum dirty_type t = DIRTY_HOT_DATA;
- if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
- dirty_i->nr_dirty[t]--;
+ /* clear all the bitmaps */
+ for (; t <= DIRTY_COLD_NODE; t++)
+ if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
+ dirty_i->nr_dirty[t]--;
if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
clear_bit(GET_SECNO(sbi, segno),
@@ -139,33 +142,27 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
void clear_prefree_segments(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
+ unsigned int segno = -1;
unsigned int total_segs = TOTAL_SEGS(sbi);
- unsigned int start = 0, end = -1;
mutex_lock(&dirty_i->seglist_lock);
-
while (1) {
- int i;
- start = find_next_bit(prefree_map, total_segs, end + 1);
- if (start >= total_segs)
+ segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
+ segno + 1);
+ if (segno >= total_segs)
break;
- end = find_next_zero_bit(prefree_map, total_segs, start + 1);
-
- for (i = start; i < end; i++)
- clear_bit(i, prefree_map);
- dirty_i->nr_dirty[PRE] -= end - start;
-
- if (!test_opt(sbi, DISCARD))
- continue;
-
- blkdev_issue_discard(sbi->sb->s_bdev,
- START_BLOCK(sbi, start) <<
- sbi->log_sectors_per_block,
- (1 << (sbi->log_sectors_per_block +
- sbi->log_blocks_per_seg)) * (end - start),
- GFP_NOFS, 0);
+ if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE]))
+ dirty_i->nr_dirty[PRE]--;
+
+ /* Let's use trim */
+ if (test_opt(sbi, DISCARD))
+ blkdev_issue_discard(sbi->sb->s_bdev,
+ START_BLOCK(sbi, segno) <<
+ sbi->log_sectors_per_block,
+ 1 << (sbi->log_sectors_per_block +
+ sbi->log_blocks_per_seg),
+ GFP_NOFS, 0);
}
mutex_unlock(&dirty_i->seglist_lock);
}
@@ -198,7 +195,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
new_vblocks = se->valid_blocks + del;
offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1);
- f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) ||
+ BUG_ON((new_vblocks >> (sizeof(unsigned short) << 3) ||
(new_vblocks > sbi->blocks_per_seg)));
se->valid_blocks = new_vblocks;
@@ -238,7 +235,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
unsigned int segno = GET_SEGNO(sbi, addr);
struct sit_info *sit_i = SIT_I(sbi);
- f2fs_bug_on(addr == NULL_ADDR);
+ BUG_ON(addr == NULL_ADDR);
if (addr == NEW_ADDR)
return;
@@ -270,8 +267,9 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
*/
int npages_for_summary_flush(struct f2fs_sb_info *sbi)
{
+ int total_size_bytes = 0;
int valid_sum_count = 0;
- int i, sum_in_page;
+ int i, sum_space;
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
if (sbi->ckpt->alloc_type[i] == SSR)
@@ -280,12 +278,13 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi)
valid_sum_count += curseg_blkoff(sbi, i);
}
- sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE -
- SUM_FOOTER_SIZE) / SUMMARY_SIZE;
- if (valid_sum_count <= sum_in_page)
+ total_size_bytes = valid_sum_count * (SUMMARY_SIZE + 1)
+ + sizeof(struct nat_journal) + 2
+ + sizeof(struct sit_journal) + 2;
+ sum_space = PAGE_CACHE_SIZE - SUM_FOOTER_SIZE;
+ if (total_size_bytes < sum_space)
return 1;
- else if ((valid_sum_count - sum_in_page) <=
- (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
+ else if (total_size_bytes < 2 * sum_space)
return 2;
return 3;
}
@@ -351,7 +350,7 @@ find_other_zone:
if (dir == ALLOC_RIGHT) {
secno = find_next_zero_bit(free_i->free_secmap,
TOTAL_SECS(sbi), 0);
- f2fs_bug_on(secno >= TOTAL_SECS(sbi));
+ BUG_ON(secno >= TOTAL_SECS(sbi));
} else {
go_left = 1;
left_start = hint - 1;
@@ -367,7 +366,7 @@ find_other_zone:
}
left_start = find_next_zero_bit(free_i->free_secmap,
TOTAL_SECS(sbi), 0);
- f2fs_bug_on(left_start >= TOTAL_SECS(sbi));
+ BUG_ON(left_start >= TOTAL_SECS(sbi));
break;
}
secno = left_start;
@@ -406,7 +405,7 @@ skip_left:
}
got_it:
/* set it as dirty segment in free segmap */
- f2fs_bug_on(test_bit(segno, free_i->free_segmap));
+ BUG_ON(test_bit(segno, free_i->free_segmap));
__set_inuse(sbi, segno);
*newseg = segno;
write_unlock(&free_i->segmap_lock);
@@ -551,8 +550,9 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
change_curseg(sbi, type, true);
else
new_curseg(sbi, type, false);
-
- stat_inc_seg_type(sbi, curseg);
+#ifdef CONFIG_F2FS_STAT_FS
+ sbi->segment_count[curseg->alloc_type]++;
+#endif
}
void allocate_new_segments(struct f2fs_sb_info *sbi)
@@ -597,11 +597,6 @@ static void f2fs_end_io_write(struct bio *bio, int err)
if (p->is_sync)
complete(p->wait);
-
- if (!get_pages(p->sbi, F2FS_WRITEBACK) &&
- !list_empty(&p->sbi->cp_wait.task_list))
- wake_up(&p->sbi->cp_wait);
-
kfree(p);
bio_put(bio);
}
@@ -662,7 +657,6 @@ static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page,
block_t blk_addr, enum page_type type)
{
struct block_device *bdev = sbi->sb->s_bdev;
- int bio_blocks;
verify_block_addr(sbi, blk_addr);
@@ -682,8 +676,7 @@ retry:
goto retry;
}
- bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
- sbi->bio[type] = f2fs_bio_alloc(bdev, bio_blocks);
+ sbi->bio[type] = f2fs_bio_alloc(bdev, max_hw_blocks(sbi));
sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
sbi->bio[type]->bi_private = priv;
/*
@@ -778,7 +771,7 @@ static int __get_segment_type(struct page *page, enum page_type p_type)
return __get_segment_type_4(page, p_type);
}
/* NR_CURSEG_TYPE(6) logs by default */
- f2fs_bug_on(sbi->active_logs != NR_CURSEG_TYPE);
+ BUG_ON(sbi->active_logs != NR_CURSEG_TYPE);
return __get_segment_type_6(page, p_type);
}
@@ -808,8 +801,9 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
mutex_lock(&sit_i->sentry_lock);
__refresh_next_blkoff(sbi, curseg);
-
- stat_inc_block_count(sbi, curseg);
+#ifdef CONFIG_F2FS_STAT_FS
+ sbi->block_count[curseg->alloc_type]++;
+#endif
/*
* SIT information should be updated before segment allocation,
@@ -855,7 +849,7 @@ void write_data_page(struct inode *inode, struct page *page,
struct f2fs_summary sum;
struct node_info ni;
- f2fs_bug_on(old_blkaddr == NULL_ADDR);
+ BUG_ON(old_blkaddr == NULL_ADDR);
get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
@@ -1128,6 +1122,8 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
SUM_JOURNAL_SIZE);
written_size += SUM_JOURNAL_SIZE;
+ set_page_dirty(page);
+
/* Step 3: write summary entries */
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
unsigned short blkoff;
@@ -1146,20 +1142,18 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
summary = (struct f2fs_summary *)(kaddr + written_size);
*summary = seg_i->sum_blk->entries[j];
written_size += SUMMARY_SIZE;
+ set_page_dirty(page);
if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
SUM_FOOTER_SIZE)
continue;
- set_page_dirty(page);
f2fs_put_page(page, 1);
page = NULL;
}
}
- if (page) {
- set_page_dirty(page);
+ if (page)
f2fs_put_page(page, 1);
- }
}
static void write_normal_summaries(struct f2fs_sb_info *sbi,
@@ -1245,7 +1239,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
/* get current sit block page without lock */
src_page = get_meta_page(sbi, src_off);
dst_page = grab_meta_page(sbi, dst_off);
- f2fs_bug_on(PageDirty(src_page));
+ BUG_ON(PageDirty(src_page));
src_addr = page_address(src_page);
dst_addr = page_address(dst_page);
@@ -1277,9 +1271,9 @@ static bool flush_sits_in_journal(struct f2fs_sb_info *sbi)
__mark_sit_entry_dirty(sbi, segno);
}
update_sits_in_cursum(sum, -sits_in_cursum(sum));
- return true;
+ return 1;
}
- return false;
+ return 0;
}
/*
@@ -1643,7 +1637,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
- sm_info->rec_prefree_segments = DEF_RECLAIM_PREFREE_SEGMENTS;
err = build_sit_info(sbi);
if (err)
@@ -1751,8 +1744,6 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
void destroy_segment_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_sm_info *sm_info = SM_I(sbi);
- if (!sm_info)
- return;
destroy_dirty_segmap(sbi);
destroy_curseg(sbi);
destroy_free_segmap(sbi);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 269f690..bdd10ea 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -14,8 +14,6 @@
#define NULL_SEGNO ((unsigned int)(~0))
#define NULL_SECNO ((unsigned int)(~0))
-#define DEF_RECLAIM_PREFREE_SEGMENTS 100 /* 200MB of prefree segments */
-
/* L: Logical segment # in volume, R: Relative segment # in main area */
#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno)
@@ -92,8 +90,6 @@
(blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE))
#define SECTOR_TO_BLOCK(sbi, sectors) \
(sectors >> ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE))
-#define MAX_BIO_BLOCKS(max_hw_blocks) \
- (min((int)max_hw_blocks, BIO_MAX_PAGES))
/* during checkpoint, bio_private is used to synchronize the last bio */
struct bio_private {
@@ -474,11 +470,6 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
reserved_sections(sbi)));
}
-static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
-{
- return (prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments);
-}
-
static inline int utilization(struct f2fs_sb_info *sbi)
{
return div_u64((u64)valid_user_blocks(sbi) * 100, sbi->user_block_count);
@@ -522,13 +513,16 @@ static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type)
return curseg->next_blkoff;
}
-#ifdef CONFIG_F2FS_CHECK_FS
static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
{
unsigned int end_segno = SM_I(sbi)->segment_count - 1;
BUG_ON(segno > end_segno);
}
+/*
+ * This function is used for only debugging.
+ * NOTE: In future, we have to remove this function.
+ */
static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
{
struct f2fs_sm_info *sm_info = SM_I(sbi);
@@ -547,9 +541,8 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
{
struct f2fs_sm_info *sm_info = SM_I(sbi);
unsigned int end_segno = sm_info->segment_count - 1;
- bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false;
int valid_blocks = 0;
- int cur_pos = 0, next_pos;
+ int i;
/* check segment usage */
BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg);
@@ -558,26 +551,11 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
BUG_ON(segno > end_segno);
/* check bitmap with valid block count */
- do {
- if (is_valid) {
- next_pos = find_next_zero_bit_le(&raw_sit->valid_map,
- sbi->blocks_per_seg,
- cur_pos);
- valid_blocks += next_pos - cur_pos;
- } else
- next_pos = find_next_bit_le(&raw_sit->valid_map,
- sbi->blocks_per_seg,
- cur_pos);
- cur_pos = next_pos;
- is_valid = !is_valid;
- } while (cur_pos < sbi->blocks_per_seg);
+ for (i = 0; i < sbi->blocks_per_seg; i++)
+ if (f2fs_test_bit(i, raw_sit->valid_map))
+ valid_blocks++;
BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks);
}
-#else
-#define check_seg_range(sbi, segno)
-#define verify_block_addr(sbi, blk_addr)
-#define check_block_count(sbi, segno, raw_sit)
-#endif
static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
unsigned int start)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index bafff72..13d0a0f 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -43,9 +43,7 @@ enum {
Opt_disable_roll_forward,
Opt_discard,
Opt_noheap,
- Opt_user_xattr,
Opt_nouser_xattr,
- Opt_acl,
Opt_noacl,
Opt_active_logs,
Opt_disable_ext_identify,
@@ -58,9 +56,7 @@ static match_table_t f2fs_tokens = {
{Opt_disable_roll_forward, "disable_roll_forward"},
{Opt_discard, "discard"},
{Opt_noheap, "no_heap"},
- {Opt_user_xattr, "user_xattr"},
{Opt_nouser_xattr, "nouser_xattr"},
- {Opt_acl, "acl"},
{Opt_noacl, "noacl"},
{Opt_active_logs, "active_logs=%u"},
{Opt_disable_ext_identify, "disable_ext_identify"},
@@ -69,40 +65,24 @@ static match_table_t f2fs_tokens = {
};
/* Sysfs support for f2fs */
-enum {
- GC_THREAD, /* struct f2fs_gc_thread */
- SM_INFO, /* struct f2fs_sm_info */
-};
-
struct f2fs_attr {
struct attribute attr;
ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);
ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *,
const char *, size_t);
- int struct_type;
int offset;
};
-static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
-{
- if (struct_type == GC_THREAD)
- return (unsigned char *)sbi->gc_thread;
- else if (struct_type == SM_INFO)
- return (unsigned char *)SM_I(sbi);
- return NULL;
-}
-
static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
- unsigned char *ptr = NULL;
+ struct f2fs_gc_kthread *gc_kth = sbi->gc_thread;
unsigned int *ui;
- ptr = __struct_ptr(sbi, a->struct_type);
- if (!ptr)
+ if (!gc_kth)
return -EINVAL;
- ui = (unsigned int *)(ptr + a->offset);
+ ui = (unsigned int *)(((char *)gc_kth) + a->offset);
return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
}
@@ -111,16 +91,15 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
struct f2fs_sb_info *sbi,
const char *buf, size_t count)
{
- unsigned char *ptr;
+ struct f2fs_gc_kthread *gc_kth = sbi->gc_thread;
unsigned long t;
unsigned int *ui;
ssize_t ret;
- ptr = __struct_ptr(sbi, a->struct_type);
- if (!ptr)
+ if (!gc_kth)
return -EINVAL;
- ui = (unsigned int *)(ptr + a->offset);
+ ui = (unsigned int *)(((char *)gc_kth) + a->offset);
ret = kstrtoul(skip_spaces(buf), 0, &t);
if (ret < 0)
@@ -156,25 +135,21 @@ static void f2fs_sb_release(struct kobject *kobj)
complete(&sbi->s_kobj_unregister);
}
-#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \
+#define F2FS_ATTR_OFFSET(_name, _mode, _show, _store, _elname) \
static struct f2fs_attr f2fs_attr_##_name = { \
.attr = {.name = __stringify(_name), .mode = _mode }, \
.show = _show, \
.store = _store, \
- .struct_type = _struct_type, \
- .offset = _offset \
+ .offset = offsetof(struct f2fs_gc_kthread, _elname), \
}
-#define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \
- F2FS_ATTR_OFFSET(struct_type, name, 0644, \
- f2fs_sbi_show, f2fs_sbi_store, \
- offsetof(struct struct_name, elname))
+#define F2FS_RW_ATTR(name, elname) \
+ F2FS_ATTR_OFFSET(name, 0644, f2fs_sbi_show, f2fs_sbi_store, elname)
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
+F2FS_RW_ATTR(gc_min_sleep_time, min_sleep_time);
+F2FS_RW_ATTR(gc_max_sleep_time, max_sleep_time);
+F2FS_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time);
+F2FS_RW_ATTR(gc_idle, gc_idle);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
@@ -182,7 +157,6 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(gc_max_sleep_time),
ATTR_LIST(gc_no_gc_sleep_time),
ATTR_LIST(gc_idle),
- ATTR_LIST(reclaim_segments),
NULL,
};
@@ -263,9 +237,6 @@ static int parse_options(struct super_block *sb, char *options)
set_opt(sbi, NOHEAP);
break;
#ifdef CONFIG_F2FS_FS_XATTR
- case Opt_user_xattr:
- set_opt(sbi, XATTR_USER);
- break;
case Opt_nouser_xattr:
clear_opt(sbi, XATTR_USER);
break;
@@ -273,10 +244,6 @@ static int parse_options(struct super_block *sb, char *options)
set_opt(sbi, INLINE_XATTR);
break;
#else
- case Opt_user_xattr:
- f2fs_msg(sb, KERN_INFO,
- "user_xattr options not supported");
- break;
case Opt_nouser_xattr:
f2fs_msg(sb, KERN_INFO,
"nouser_xattr options not supported");
@@ -287,16 +254,10 @@ static int parse_options(struct super_block *sb, char *options)
break;
#endif
#ifdef CONFIG_F2FS_FS_POSIX_ACL
- case Opt_acl:
- set_opt(sbi, POSIX_ACL);
- break;
case Opt_noacl:
clear_opt(sbi, POSIX_ACL);
break;
#else
- case Opt_acl:
- f2fs_msg(sb, KERN_INFO, "acl options not supported");
- break;
case Opt_noacl:
f2fs_msg(sb, KERN_INFO, "noacl options not supported");
break;
@@ -394,9 +355,7 @@ static void f2fs_put_super(struct super_block *sb)
f2fs_destroy_stats(sbi);
stop_gc_thread(sbi);
- /* We don't need to do checkpoint when it's clean */
- if (sbi->s_dirty && get_pages(sbi, F2FS_DIRTY_NODES))
- write_checkpoint(sbi, true);
+ write_checkpoint(sbi, true);
iput(sbi->node_inode);
iput(sbi->meta_inode);
@@ -768,47 +727,30 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
atomic_set(&sbi->nr_pages[i], 0);
}
-/*
- * Read f2fs raw super block.
- * Because we have two copies of super block, so read the first one at first,
- * if the first one is invalid, move to read the second one.
- */
-static int read_raw_super_block(struct super_block *sb,
- struct f2fs_super_block **raw_super,
- struct buffer_head **raw_super_buf)
+static int validate_superblock(struct super_block *sb,
+ struct f2fs_super_block **raw_super,
+ struct buffer_head **raw_super_buf, sector_t block)
{
- int block = 0;
+ const char *super = (block == 0 ? "first" : "second");
-retry:
+ /* read f2fs raw super block */
*raw_super_buf = sb_bread(sb, block);
if (!*raw_super_buf) {
- f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
- block + 1);
- if (block == 0) {
- block++;
- goto retry;
- } else {
- return -EIO;
- }
+ f2fs_msg(sb, KERN_ERR, "unable to read %s superblock",
+ super);
+ return -EIO;
}
*raw_super = (struct f2fs_super_block *)
((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET);
/* sanity checking of raw super */
- if (sanity_check_raw_super(sb, *raw_super)) {
- brelse(*raw_super_buf);
- f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem "
- "in %dth superblock", block + 1);
- if(block == 0) {
- block++;
- goto retry;
- } else {
- return -EINVAL;
- }
- }
+ if (!sanity_check_raw_super(sb, *raw_super))
+ return 0;
- return 0;
+ f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem "
+ "in %s superblock", super);
+ return -EINVAL;
}
static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
@@ -818,6 +760,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
struct buffer_head *raw_super_buf;
struct inode *root;
long err = -EINVAL;
+ int i;
/* allocate memory for f2fs-specific super block info */
sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL);
@@ -830,10 +773,14 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
goto free_sbi;
}
- err = read_raw_super_block(sb, &raw_super, &raw_super_buf);
- if (err)
- goto free_sbi;
-
+ err = validate_superblock(sb, &raw_super, &raw_super_buf, 0);
+ if (err) {
+ brelse(raw_super_buf);
+ /* check secondary superblock when primary failed */
+ err = validate_superblock(sb, &raw_super, &raw_super_buf, 1);
+ if (err)
+ goto free_sb_buf;
+ }
sb->s_fs_info = sbi;
/* init some FS parameters */
sbi->active_logs = NR_CURSEG_TYPE;
@@ -871,12 +818,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
mutex_init(&sbi->gc_mutex);
mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
+ for (i = 0; i < NR_GLOBAL_LOCKS; i++)
+ mutex_init(&sbi->fs_lock[i]);
mutex_init(&sbi->node_write);
- sbi->por_doing = false;
+ sbi->por_doing = 0;
spin_lock_init(&sbi->stat_lock);
init_rwsem(&sbi->bio_sem);
- init_rwsem(&sbi->cp_rwsem);
- init_waitqueue_head(&sbi->cp_wait);
init_sb_info(sbi);
/* get an inode for meta space */
@@ -975,12 +922,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
/* After POR, we can run background GC thread.*/
err = start_gc_thread(sbi);
if (err)
- goto free_gc;
+ goto fail;
}
err = f2fs_build_stats(sbi);
if (err)
- goto free_gc;
+ goto fail;
if (f2fs_proc_root)
sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
@@ -1006,12 +953,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
return 0;
fail:
- if (sbi->s_proc) {
- remove_proc_entry("segment_info", sbi->s_proc);
- remove_proc_entry(sb->s_id, f2fs_proc_root);
- }
- f2fs_destroy_stats(sbi);
-free_gc:
stop_gc_thread(sbi);
free_root_inode:
dput(sb->s_root);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index aa7a3f1..1ac8a5f 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -154,9 +154,6 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name,
}
#ifdef CONFIG_F2FS_FS_SECURITY
-static int __f2fs_setxattr(struct inode *inode, int name_index,
- const char *name, const void *value, size_t value_len,
- struct page *ipage);
static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
void *page)
{
@@ -164,7 +161,7 @@ static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
int err = 0;
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
- err = __f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY,
+ err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY,
xattr->name, xattr->value,
xattr->value_len, (struct page *)page);
if (err < 0)
@@ -372,7 +369,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
alloc_nid_failed(sbi, new_nid);
return PTR_ERR(xpage);
}
- f2fs_bug_on(new_nid);
+ BUG_ON(new_nid);
} else {
struct dnode_of_data dn;
set_new_dnode(&dn, inode, NULL, NULL, new_nid);
@@ -472,15 +469,16 @@ cleanup:
return error;
}
-static int __f2fs_setxattr(struct inode *inode, int name_index,
- const char *name, const void *value, size_t value_len,
- struct page *ipage)
+int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
+ const void *value, size_t value_len, struct page *ipage)
{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_xattr_entry *here, *last;
void *base_addr;
int found, newsize;
size_t name_len;
+ int ilock;
__u32 new_hsize;
int error = -ENOMEM;
@@ -495,6 +493,10 @@ static int __f2fs_setxattr(struct inode *inode, int name_index,
if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode))
return -ERANGE;
+ f2fs_balance_fs(sbi);
+
+ ilock = mutex_lock_op(sbi);
+
base_addr = read_all_xattrs(inode, ipage);
if (!base_addr)
goto exit;
@@ -520,7 +522,7 @@ static int __f2fs_setxattr(struct inode *inode, int name_index,
*/
free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr);
if (found)
- free = free + ENTRY_SIZE(here);
+ free = free - ENTRY_SIZE(here);
if (free < newsize) {
error = -ENOSPC;
@@ -576,21 +578,7 @@ static int __f2fs_setxattr(struct inode *inode, int name_index,
else
update_inode_page(inode);
exit:
+ mutex_unlock_op(sbi, ilock);
kzfree(base_addr);
return error;
}
-
-int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
- const void *value, size_t value_len, struct page *ipage)
-{
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- int err;
-
- f2fs_balance_fs(sbi);
-
- f2fs_lock_op(sbi);
- err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage);
- f2fs_unlock_op(sbi);
-
- return err;
-}
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 7c31f4b..4241e6f 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -102,7 +102,6 @@ struct msdos_sb_info {
struct hlist_head dir_hashtable[FAT_HASH_SIZE];
unsigned int dirty; /* fs state before mount */
- struct rcu_head rcu;
};
#define FAT_CACHE_VALID 0 /* special case for valid cache */
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 854b578..0062da2 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -548,16 +548,6 @@ static void fat_set_state(struct super_block *sb,
brelse(bh);
}
-static void delayed_free(struct rcu_head *p)
-{
- struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu);
- unload_nls(sbi->nls_disk);
- unload_nls(sbi->nls_io);
- if (sbi->options.iocharset != fat_default_iocharset)
- kfree(sbi->options.iocharset);
- kfree(sbi);
-}
-
static void fat_put_super(struct super_block *sb)
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
@@ -567,7 +557,14 @@ static void fat_put_super(struct super_block *sb)
iput(sbi->fsinfo_inode);
iput(sbi->fat_inode);
- call_rcu(&sbi->rcu, delayed_free);
+ unload_nls(sbi->nls_disk);
+ unload_nls(sbi->nls_io);
+
+ if (sbi->options.iocharset != fat_default_iocharset)
+ kfree(sbi->options.iocharset);
+
+ sb->s_fs_info = NULL;
+ kfree(sbi);
}
static struct kmem_cache *fat_inode_cachep;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index ef68665..65343c3 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -56,7 +56,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
return -EINVAL;
}
- if (filp->f_op->check_flags)
+ if (filp->f_op && filp->f_op->check_flags)
error = filp->f_op->check_flags(arg);
if (error)
return error;
@@ -64,7 +64,8 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
/*
* ->fasync() is responsible for setting the FASYNC bit.
*/
- if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) {
+ if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op &&
+ filp->f_op->fasync) {
error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
if (error < 0)
goto out;
diff --git a/fs/file_table.c b/fs/file_table.c
index 5fff903..e900ca5 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -36,6 +36,8 @@ struct files_stat_struct files_stat = {
.max_files = NR_FILE
};
+DEFINE_STATIC_LGLOCK(files_lglock);
+
/* SLAB cache for file structures */
static struct kmem_cache *filp_cachep __read_mostly;
@@ -132,6 +134,7 @@ struct file *get_empty_filp(void)
return ERR_PTR(error);
}
+ INIT_LIST_HEAD(&f->f_u.fu_list);
atomic_long_set(&f->f_count, 1);
rwlock_init(&f->f_owner.lock);
spin_lock_init(&f->f_lock);
@@ -237,11 +240,11 @@ static void __fput(struct file *file)
locks_remove_flock(file);
if (unlikely(file->f_flags & FASYNC)) {
- if (file->f_op->fasync)
+ if (file->f_op && file->f_op->fasync)
file->f_op->fasync(-1, file, 0);
}
ima_file_free(file);
- if (file->f_op->release)
+ if (file->f_op && file->f_op->release)
file->f_op->release(inode, file);
security_file_free(file);
if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
@@ -301,6 +304,7 @@ void fput(struct file *file)
if (atomic_long_dec_and_test(&file->f_count)) {
struct task_struct *task = current;
+ file_sb_list_del(file);
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
init_task_work(&file->f_u.fu_rcuhead, ____fput);
if (!task_work_add(task, &file->f_u.fu_rcuhead, true))
@@ -329,6 +333,7 @@ void __fput_sync(struct file *file)
{
if (atomic_long_dec_and_test(&file->f_count)) {
struct task_struct *task = current;
+ file_sb_list_del(file);
BUG_ON(!(task->flags & PF_KTHREAD));
__fput(file);
}
@@ -340,10 +345,129 @@ void put_filp(struct file *file)
{
if (atomic_long_dec_and_test(&file->f_count)) {
security_file_free(file);
+ file_sb_list_del(file);
file_free(file);
}
}
+static inline int file_list_cpu(struct file *file)
+{
+#ifdef CONFIG_SMP
+ return file->f_sb_list_cpu;
+#else
+ return smp_processor_id();
+#endif
+}
+
+/* helper for file_sb_list_add to reduce ifdefs */
+static inline void __file_sb_list_add(struct file *file, struct super_block *sb)
+{
+ struct list_head *list;
+#ifdef CONFIG_SMP
+ int cpu;
+ cpu = smp_processor_id();
+ file->f_sb_list_cpu = cpu;
+ list = per_cpu_ptr(sb->s_files, cpu);
+#else
+ list = &sb->s_files;
+#endif
+ list_add(&file->f_u.fu_list, list);
+}
+
+/**
+ * file_sb_list_add - add a file to the sb's file list
+ * @file: file to add
+ * @sb: sb to add it to
+ *
+ * Use this function to associate a file with the superblock of the inode it
+ * refers to.
+ */
+void file_sb_list_add(struct file *file, struct super_block *sb)
+{
+ if (likely(!(file->f_mode & FMODE_WRITE)))
+ return;
+ if (!S_ISREG(file_inode(file)->i_mode))
+ return;
+ lg_local_lock(&files_lglock);
+ __file_sb_list_add(file, sb);
+ lg_local_unlock(&files_lglock);
+}
+
+/**
+ * file_sb_list_del - remove a file from the sb's file list
+ * @file: file to remove
+ * @sb: sb to remove it from
+ *
+ * Use this function to remove a file from its superblock.
+ */
+void file_sb_list_del(struct file *file)
+{
+ if (!list_empty(&file->f_u.fu_list)) {
+ lg_local_lock_cpu(&files_lglock, file_list_cpu(file));
+ list_del_init(&file->f_u.fu_list);
+ lg_local_unlock_cpu(&files_lglock, file_list_cpu(file));
+ }
+}
+
+#ifdef CONFIG_SMP
+
+/*
+ * These macros iterate all files on all CPUs for a given superblock.
+ * files_lglock must be held globally.
+ */
+#define do_file_list_for_each_entry(__sb, __file) \
+{ \
+ int i; \
+ for_each_possible_cpu(i) { \
+ struct list_head *list; \
+ list = per_cpu_ptr((__sb)->s_files, i); \
+ list_for_each_entry((__file), list, f_u.fu_list)
+
+#define while_file_list_for_each_entry \
+ } \
+}
+
+#else
+
+#define do_file_list_for_each_entry(__sb, __file) \
+{ \
+ struct list_head *list; \
+ list = &(sb)->s_files; \
+ list_for_each_entry((__file), list, f_u.fu_list)
+
+#define while_file_list_for_each_entry \
+}
+
+#endif
+
+/**
+ * mark_files_ro - mark all files read-only
+ * @sb: superblock in question
+ *
+ * All files are marked read-only. We don't care about pending
+ * delete files so this should be used in 'force' mode only.
+ */
+void mark_files_ro(struct super_block *sb)
+{
+ struct file *f;
+
+ lg_global_lock(&files_lglock);
+ do_file_list_for_each_entry(sb, f) {
+ if (!file_count(f))
+ continue;
+ if (!(f->f_mode & FMODE_WRITE))
+ continue;
+ spin_lock(&f->f_lock);
+ f->f_mode &= ~FMODE_WRITE;
+ spin_unlock(&f->f_lock);
+ if (file_check_writeable(f) != 0)
+ continue;
+ __mnt_drop_write(f->f_path.mnt);
+ file_release_write(f);
+ } while_file_list_for_each_entry;
+ lg_global_unlock(&files_lglock);
+}
+
void __init files_init(unsigned long mempages)
{
unsigned long n;
@@ -359,5 +483,6 @@ void __init files_init(unsigned long mempages)
n = (mempages * (PAGE_SIZE / 1024)) / 10;
files_stat.max_files = max_t(unsigned long, n, NR_FILE);
files_defer_init();
+ lg_lock_init(&files_lglock, "files_lglock");
percpu_counter_init(&nr_files, 0);
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1f4a10e..9f4935b 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -26,7 +26,6 @@
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/tracepoint.h>
-#include <linux/device.h>
#include "internal.h"
/*
@@ -40,18 +39,13 @@
struct wb_writeback_work {
long nr_pages;
struct super_block *sb;
- /*
- * Write only inodes dirtied before this time. Don't forget to set
- * older_than_this_is_set when you set this.
- */
- unsigned long older_than_this;
+ unsigned long *older_than_this;
enum writeback_sync_modes sync_mode;
unsigned int tagged_writepages:1;
unsigned int for_kupdate:1;
unsigned int range_cyclic:1;
unsigned int for_background:1;
unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */
- unsigned int older_than_this_is_set:1;
enum wb_reason reason; /* why was writeback initiated? */
struct list_head list; /* pending work list */
@@ -252,10 +246,10 @@ static int move_expired_inodes(struct list_head *delaying_queue,
int do_sb_sort = 0;
int moved = 0;
- WARN_ON_ONCE(!work->older_than_this_is_set);
while (!list_empty(delaying_queue)) {
inode = wb_inode(delaying_queue->prev);
- if (inode_dirtied_after(inode, work->older_than_this))
+ if (work->older_than_this &&
+ inode_dirtied_after(inode, *work->older_than_this))
break;
list_move(&inode->i_wb_list, &tmp);
moved++;
@@ -739,8 +733,6 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
.sync_mode = WB_SYNC_NONE,
.range_cyclic = 1,
.reason = reason,
- .older_than_this = jiffies,
- .older_than_this_is_set = 1,
};
spin_lock(&wb->list_lock);
@@ -799,13 +791,12 @@ static long wb_writeback(struct bdi_writeback *wb,
{
unsigned long wb_start = jiffies;
long nr_pages = work->nr_pages;
+ unsigned long oldest_jif;
struct inode *inode;
long progress;
- if (!work->older_than_this_is_set) {
- work->older_than_this = jiffies;
- work->older_than_this_is_set = 1;
- }
+ oldest_jif = jiffies;
+ work->older_than_this = &oldest_jif;
spin_lock(&wb->list_lock);
for (;;) {
@@ -839,10 +830,10 @@ static long wb_writeback(struct bdi_writeback *wb,
* safe.
*/
if (work->for_kupdate) {
- work->older_than_this = jiffies -
+ oldest_jif = jiffies -
msecs_to_jiffies(dirty_expire_interval * 10);
} else if (work->for_background)
- work->older_than_this = jiffies;
+ oldest_jif = jiffies;
trace_writeback_start(wb->bdi, work);
if (list_empty(&wb->b_io))
@@ -1354,21 +1345,18 @@ EXPORT_SYMBOL(try_to_writeback_inodes_sb);
/**
* sync_inodes_sb - sync sb inode pages
- * @sb: the superblock
- * @older_than_this: timestamp
+ * @sb: the superblock
*
* This function writes and waits on any dirty inode belonging to this
- * superblock that has been dirtied before given timestamp.
+ * super_block.
*/
-void sync_inodes_sb(struct super_block *sb, unsigned long older_than_this)
+void sync_inodes_sb(struct super_block *sb)
{
DECLARE_COMPLETION_ONSTACK(done);
struct wb_writeback_work work = {
.sb = sb,
.sync_mode = WB_SYNC_ALL,
.nr_pages = LONG_MAX,
- .older_than_this = older_than_this,
- .older_than_this_is_set = 1,
.range_cyclic = 0,
.done = &done,
.reason = WB_REASON_SYNC,
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 7dca743..d8ac61d 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -161,6 +161,6 @@ EXPORT_SYMBOL(current_umask);
struct fs_struct init_fs = {
.users = 1,
.lock = __SPIN_LOCK_UNLOCKED(init_fs.lock),
- .seq = SEQCNT_ZERO(init_fs.seq),
+ .seq = SEQCNT_ZERO,
.umask = 0022,
};
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 29d7feb..b2a86e3 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -58,16 +58,15 @@ void fscache_cookie_init_once(void *_cookie)
struct fscache_cookie *__fscache_acquire_cookie(
struct fscache_cookie *parent,
const struct fscache_cookie_def *def,
- void *netfs_data,
- bool enable)
+ void *netfs_data)
{
struct fscache_cookie *cookie;
BUG_ON(!def);
- _enter("{%s},{%s},%p,%u",
+ _enter("{%s},{%s},%p",
parent ? (char *) parent->def->name : "<no-parent>",
- def->name, netfs_data, enable);
+ def->name, netfs_data);
fscache_stat(&fscache_n_acquires);
@@ -107,7 +106,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
cookie->def = def;
cookie->parent = parent;
cookie->netfs_data = netfs_data;
- cookie->flags = (1 << FSCACHE_COOKIE_NO_DATA_YET);
+ cookie->flags = 0;
/* radix tree insertion won't use the preallocation pool unless it's
* told it may not wait */
@@ -125,22 +124,16 @@ struct fscache_cookie *__fscache_acquire_cookie(
break;
}
- if (enable) {
- /* if the object is an index then we need do nothing more here
- * - we create indices on disk when we need them as an index
- * may exist in multiple caches */
- if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) {
- if (fscache_acquire_non_index_cookie(cookie) == 0) {
- set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags);
- } else {
- atomic_dec(&parent->n_children);
- __fscache_cookie_put(cookie);
- fscache_stat(&fscache_n_acquires_nobufs);
- _leave(" = NULL");
- return NULL;
- }
- } else {
- set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags);
+ /* if the object is an index then we need do nothing more here - we
+ * create indices on disk when we need them as an index may exist in
+ * multiple caches */
+ if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) {
+ if (fscache_acquire_non_index_cookie(cookie) < 0) {
+ atomic_dec(&parent->n_children);
+ __fscache_cookie_put(cookie);
+ fscache_stat(&fscache_n_acquires_nobufs);
+ _leave(" = NULL");
+ return NULL;
}
}
@@ -151,39 +144,6 @@ struct fscache_cookie *__fscache_acquire_cookie(
EXPORT_SYMBOL(__fscache_acquire_cookie);
/*
- * Enable a cookie to permit it to accept new operations.
- */
-void __fscache_enable_cookie(struct fscache_cookie *cookie,
- bool (*can_enable)(void *data),
- void *data)
-{
- _enter("%p", cookie);
-
- wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK,
- fscache_wait_bit, TASK_UNINTERRUPTIBLE);
-
- if (test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags))
- goto out_unlock;
-
- if (can_enable && !can_enable(data)) {
- /* The netfs decided it didn't want to enable after all */
- } else if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) {
- /* Wait for outstanding disablement to complete */
- __fscache_wait_on_invalidate(cookie);
-
- if (fscache_acquire_non_index_cookie(cookie) == 0)
- set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags);
- } else {
- set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags);
- }
-
-out_unlock:
- clear_bit_unlock(FSCACHE_COOKIE_ENABLEMENT_LOCK, &cookie->flags);
- wake_up_bit(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK);
-}
-EXPORT_SYMBOL(__fscache_enable_cookie);
-
-/*
* acquire a non-index cookie
* - this must make sure the index chain is instantiated and instantiate the
* object representation too
@@ -197,7 +157,7 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie)
_enter("");
- set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags);
+ cookie->flags = 1 << FSCACHE_COOKIE_UNAVAILABLE;
/* now we need to see whether the backing objects for this cookie yet
* exist, if not there'll be nothing to search */
@@ -220,7 +180,9 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie)
_debug("cache %s", cache->tag->name);
- set_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags);
+ cookie->flags =
+ (1 << FSCACHE_COOKIE_LOOKING_UP) |
+ (1 << FSCACHE_COOKIE_NO_DATA_YET);
/* ask the cache to allocate objects for this cookie and its parent
* chain */
@@ -436,8 +398,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie)
if (!hlist_empty(&cookie->backing_objects)) {
spin_lock(&cookie->lock);
- if (fscache_cookie_enabled(cookie) &&
- !hlist_empty(&cookie->backing_objects) &&
+ if (!hlist_empty(&cookie->backing_objects) &&
!test_and_set_bit(FSCACHE_COOKIE_INVALIDATING,
&cookie->flags)) {
object = hlist_entry(cookie->backing_objects.first,
@@ -491,14 +452,10 @@ void __fscache_update_cookie(struct fscache_cookie *cookie)
spin_lock(&cookie->lock);
- if (fscache_cookie_enabled(cookie)) {
- /* update the index entry on disk in each cache backing this
- * cookie.
- */
- hlist_for_each_entry(object,
- &cookie->backing_objects, cookie_link) {
- fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE);
- }
+ /* update the index entry on disk in each cache backing this cookie */
+ hlist_for_each_entry(object,
+ &cookie->backing_objects, cookie_link) {
+ fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE);
}
spin_unlock(&cookie->lock);
@@ -507,80 +464,15 @@ void __fscache_update_cookie(struct fscache_cookie *cookie)
EXPORT_SYMBOL(__fscache_update_cookie);
/*
- * Disable a cookie to stop it from accepting new requests from the netfs.
- */
-void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
-{
- struct fscache_object *object;
- bool awaken = false;
-
- _enter("%p,%u", cookie, invalidate);
-
- ASSERTCMP(atomic_read(&cookie->n_active), >, 0);
-
- if (atomic_read(&cookie->n_children) != 0) {
- printk(KERN_ERR "FS-Cache: Cookie '%s' still has children\n",
- cookie->def->name);
- BUG();
- }
-
- wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK,
- fscache_wait_bit, TASK_UNINTERRUPTIBLE);
- if (!test_and_clear_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags))
- goto out_unlock_enable;
-
- /* If the cookie is being invalidated, wait for that to complete first
- * so that we can reuse the flag.
- */
- __fscache_wait_on_invalidate(cookie);
-
- /* Dispose of the backing objects */
- set_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags);
-
- spin_lock(&cookie->lock);
- if (!hlist_empty(&cookie->backing_objects)) {
- hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) {
- if (invalidate)
- set_bit(FSCACHE_OBJECT_RETIRED, &object->flags);
- fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL);
- }
- } else {
- if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags))
- awaken = true;
- }
- spin_unlock(&cookie->lock);
- if (awaken)
- wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING);
-
- /* Wait for cessation of activity requiring access to the netfs (when
- * n_active reaches 0). This makes sure outstanding reads and writes
- * have completed.
- */
- if (!atomic_dec_and_test(&cookie->n_active))
- wait_on_atomic_t(&cookie->n_active, fscache_wait_atomic_t,
- TASK_UNINTERRUPTIBLE);
-
- /* Reset the cookie state if it wasn't relinquished */
- if (!test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags)) {
- atomic_inc(&cookie->n_active);
- set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags);
- }
-
-out_unlock_enable:
- clear_bit_unlock(FSCACHE_COOKIE_ENABLEMENT_LOCK, &cookie->flags);
- wake_up_bit(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK);
- _leave("");
-}
-EXPORT_SYMBOL(__fscache_disable_cookie);
-
-/*
* release a cookie back to the cache
* - the object will be marked as recyclable on disk if retire is true
* - all dependents of this cookie must have already been unregistered
* (indices/files/pages)
*/
-void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire)
+void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire)
{
+ struct fscache_object *object;
+
fscache_stat(&fscache_n_relinquishes);
if (retire)
fscache_stat(&fscache_n_relinquishes_retire);
@@ -595,10 +487,31 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire)
cookie, cookie->def->name, cookie->netfs_data,
atomic_read(&cookie->n_active), retire);
+ ASSERTCMP(atomic_read(&cookie->n_active), >, 0);
+
+ if (atomic_read(&cookie->n_children) != 0) {
+ printk(KERN_ERR "FS-Cache: Cookie '%s' still has children\n",
+ cookie->def->name);
+ BUG();
+ }
+
/* No further netfs-accessing operations on this cookie permitted */
set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags);
+ if (retire)
+ set_bit(FSCACHE_COOKIE_RETIRED, &cookie->flags);
- __fscache_disable_cookie(cookie, retire);
+ spin_lock(&cookie->lock);
+ hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) {
+ fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL);
+ }
+ spin_unlock(&cookie->lock);
+
+ /* Wait for cessation of activity requiring access to the netfs (when
+ * n_active reaches 0).
+ */
+ if (!atomic_dec_and_test(&cookie->n_active))
+ wait_on_atomic_t(&cookie->n_active, fscache_wait_atomic_t,
+ TASK_UNINTERRUPTIBLE);
/* Clear pointers back to the netfs */
cookie->netfs_data = NULL;
@@ -655,7 +568,6 @@ int __fscache_check_consistency(struct fscache_cookie *cookie)
{
struct fscache_operation *op;
struct fscache_object *object;
- bool wake_cookie = false;
int ret;
_enter("%p,", cookie);
@@ -679,8 +591,7 @@ int __fscache_check_consistency(struct fscache_cookie *cookie)
spin_lock(&cookie->lock);
- if (!fscache_cookie_enabled(cookie) ||
- hlist_empty(&cookie->backing_objects))
+ if (hlist_empty(&cookie->backing_objects))
goto inconsistent;
object = hlist_entry(cookie->backing_objects.first,
struct fscache_object, cookie_link);
@@ -689,7 +600,7 @@ int __fscache_check_consistency(struct fscache_cookie *cookie)
op->debug_id = atomic_inc_return(&fscache_op_debug_id);
- __fscache_use_cookie(cookie);
+ atomic_inc(&cookie->n_active);
if (fscache_submit_op(object, op) < 0)
goto submit_failed;
@@ -711,11 +622,9 @@ int __fscache_check_consistency(struct fscache_cookie *cookie)
return ret;
submit_failed:
- wake_cookie = __fscache_unuse_cookie(cookie);
+ atomic_dec(&cookie->n_active);
inconsistent:
spin_unlock(&cookie->lock);
- if (wake_cookie)
- __fscache_wake_unused_cookie(cookie);
kfree(op);
_leave(" = -ESTALE");
return -ESTALE;
diff --git a/fs/fscache/fsdef.c b/fs/fscache/fsdef.c
index 5a117df..10a2ade 100644
--- a/fs/fscache/fsdef.c
+++ b/fs/fscache/fsdef.c
@@ -59,7 +59,6 @@ struct fscache_cookie fscache_fsdef_index = {
.lock = __SPIN_LOCK_UNLOCKED(fscache_fsdef_index.lock),
.backing_objects = HLIST_HEAD_INIT,
.def = &fscache_fsdef_index_def,
- .flags = 1 << FSCACHE_COOKIE_ENABLED,
};
EXPORT_SYMBOL(fscache_fsdef_index);
diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c
index 989f394..b1bb611 100644
--- a/fs/fscache/netfs.c
+++ b/fs/fscache/netfs.c
@@ -45,7 +45,6 @@ int __fscache_register_netfs(struct fscache_netfs *netfs)
netfs->primary_index->def = &fscache_fsdef_netfs_def;
netfs->primary_index->parent = &fscache_fsdef_index;
netfs->primary_index->netfs_data = netfs;
- netfs->primary_index->flags = 1 << FSCACHE_COOKIE_ENABLED;
atomic_inc(&netfs->primary_index->parent->usage);
atomic_inc(&netfs->primary_index->parent->n_children);
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 53d35c5..86d75a6 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -495,7 +495,6 @@ void fscache_object_lookup_negative(struct fscache_object *object)
* returning ENODATA.
*/
set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags);
- clear_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags);
_debug("wake up lookup %p", &cookie->flags);
clear_bit_unlock(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags);
@@ -528,7 +527,6 @@ void fscache_obtained_object(struct fscache_object *object)
/* We do (presumably) have data */
clear_bit_unlock(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags);
- clear_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags);
/* Allow write requests to begin stacking up and read requests
* to begin shovelling data.
@@ -681,8 +679,7 @@ static const struct fscache_state *fscache_drop_object(struct fscache_object *ob
*/
spin_lock(&cookie->lock);
hlist_del_init(&object->cookie_link);
- if (hlist_empty(&cookie->backing_objects) &&
- test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags))
+ if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags))
awaken = true;
spin_unlock(&cookie->lock);
@@ -799,7 +796,7 @@ void fscache_enqueue_object(struct fscache_object *object)
*/
bool fscache_object_sleep_till_congested(signed long *timeoutp)
{
- wait_queue_head_t *cong_wq = this_cpu_ptr(&fscache_object_cong_wait);
+ wait_queue_head_t *cong_wq = &__get_cpu_var(fscache_object_cong_wait);
DEFINE_WAIT(wait);
if (fscache_object_congested())
@@ -930,7 +927,7 @@ static const struct fscache_state *_fscache_invalidate_object(struct fscache_obj
*/
if (!fscache_use_cookie(object)) {
ASSERT(object->cookie->stores.rnode == NULL);
- set_bit(FSCACHE_OBJECT_RETIRED, &object->flags);
+ set_bit(FSCACHE_COOKIE_RETIRED, &cookie->flags);
_leave(" [no cookie]");
return transit_to(KILL_OBJECT);
}
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 7f5c658..73899c1 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -163,10 +163,12 @@ static void fscache_attr_changed_op(struct fscache_operation *op)
fscache_stat(&fscache_n_attr_changed_calls);
- if (fscache_object_is_active(object)) {
+ if (fscache_object_is_active(object) &&
+ fscache_use_cookie(object)) {
fscache_stat(&fscache_n_cop_attr_changed);
ret = object->cache->ops->attr_changed(object);
fscache_stat_d(&fscache_n_cop_attr_changed);
+ fscache_unuse_cookie(object);
if (ret < 0)
fscache_abort_object(object);
}
@@ -182,7 +184,6 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
{
struct fscache_operation *op;
struct fscache_object *object;
- bool wake_cookie;
_enter("%p", cookie);
@@ -198,19 +199,15 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
}
fscache_operation_init(op, fscache_attr_changed_op, NULL);
- op->flags = FSCACHE_OP_ASYNC |
- (1 << FSCACHE_OP_EXCLUSIVE) |
- (1 << FSCACHE_OP_UNUSE_COOKIE);
+ op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE);
spin_lock(&cookie->lock);
- if (!fscache_cookie_enabled(cookie) ||
- hlist_empty(&cookie->backing_objects))
+ if (hlist_empty(&cookie->backing_objects))
goto nobufs;
object = hlist_entry(cookie->backing_objects.first,
struct fscache_object, cookie_link);
- __fscache_use_cookie(cookie);
if (fscache_submit_exclusive_op(object, op) < 0)
goto nobufs;
spin_unlock(&cookie->lock);
@@ -220,11 +217,8 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
return 0;
nobufs:
- wake_cookie = __fscache_unuse_cookie(cookie);
spin_unlock(&cookie->lock);
kfree(op);
- if (wake_cookie)
- __fscache_wake_unused_cookie(cookie);
fscache_stat(&fscache_n_attr_changed_nobufs);
_leave(" = %d", -ENOBUFS);
return -ENOBUFS;
@@ -269,6 +263,7 @@ static struct fscache_retrieval *fscache_alloc_retrieval(
}
fscache_operation_init(&op->op, NULL, fscache_release_retrieval_op);
+ atomic_inc(&cookie->n_active);
op->op.flags = FSCACHE_OP_MYTHREAD |
(1UL << FSCACHE_OP_WAITING) |
(1UL << FSCACHE_OP_UNUSE_COOKIE);
@@ -389,7 +384,6 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
{
struct fscache_retrieval *op;
struct fscache_object *object;
- bool wake_cookie = false;
int ret;
_enter("%p,%p,,,", cookie, page);
@@ -411,7 +405,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
return -ERESTARTSYS;
op = fscache_alloc_retrieval(cookie, page->mapping,
- end_io_func, context);
+ end_io_func,context);
if (!op) {
_leave(" = -ENOMEM");
return -ENOMEM;
@@ -420,15 +414,13 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
spin_lock(&cookie->lock);
- if (!fscache_cookie_enabled(cookie) ||
- hlist_empty(&cookie->backing_objects))
+ if (hlist_empty(&cookie->backing_objects))
goto nobufs_unlock;
object = hlist_entry(cookie->backing_objects.first,
struct fscache_object, cookie_link);
ASSERT(test_bit(FSCACHE_OBJECT_IS_LOOKED_UP, &object->flags));
- __fscache_use_cookie(cookie);
atomic_inc(&object->n_reads);
__set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);
@@ -483,11 +475,9 @@ error:
nobufs_unlock_dec:
atomic_dec(&object->n_reads);
- wake_cookie = __fscache_unuse_cookie(cookie);
nobufs_unlock:
spin_unlock(&cookie->lock);
- if (wake_cookie)
- __fscache_wake_unused_cookie(cookie);
+ atomic_dec(&cookie->n_active);
kfree(op);
nobufs:
fscache_stat(&fscache_n_retrievals_nobufs);
@@ -524,7 +514,6 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
{
struct fscache_retrieval *op;
struct fscache_object *object;
- bool wake_cookie = false;
int ret;
_enter("%p,,%d,,,", cookie, *nr_pages);
@@ -553,13 +542,11 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
spin_lock(&cookie->lock);
- if (!fscache_cookie_enabled(cookie) ||
- hlist_empty(&cookie->backing_objects))
+ if (hlist_empty(&cookie->backing_objects))
goto nobufs_unlock;
object = hlist_entry(cookie->backing_objects.first,
struct fscache_object, cookie_link);
- __fscache_use_cookie(cookie);
atomic_inc(&object->n_reads);
__set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);
@@ -614,12 +601,10 @@ error:
nobufs_unlock_dec:
atomic_dec(&object->n_reads);
- wake_cookie = __fscache_unuse_cookie(cookie);
nobufs_unlock:
spin_unlock(&cookie->lock);
+ atomic_dec(&cookie->n_active);
kfree(op);
- if (wake_cookie)
- __fscache_wake_unused_cookie(cookie);
nobufs:
fscache_stat(&fscache_n_retrievals_nobufs);
_leave(" = -ENOBUFS");
@@ -641,7 +626,6 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
{
struct fscache_retrieval *op;
struct fscache_object *object;
- bool wake_cookie = false;
int ret;
_enter("%p,%p,,,", cookie, page);
@@ -669,15 +653,13 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
spin_lock(&cookie->lock);
- if (!fscache_cookie_enabled(cookie) ||
- hlist_empty(&cookie->backing_objects))
+ if (hlist_empty(&cookie->backing_objects))
goto nobufs_unlock;
object = hlist_entry(cookie->backing_objects.first,
struct fscache_object, cookie_link);
- __fscache_use_cookie(cookie);
if (fscache_submit_op(object, &op->op) < 0)
- goto nobufs_unlock_dec;
+ goto nobufs_unlock;
spin_unlock(&cookie->lock);
fscache_stat(&fscache_n_alloc_ops);
@@ -707,13 +689,10 @@ error:
_leave(" = %d", ret);
return ret;
-nobufs_unlock_dec:
- wake_cookie = __fscache_unuse_cookie(cookie);
nobufs_unlock:
spin_unlock(&cookie->lock);
+ atomic_dec(&cookie->n_active);
kfree(op);
- if (wake_cookie)
- __fscache_wake_unused_cookie(cookie);
nobufs:
fscache_stat(&fscache_n_allocs_nobufs);
_leave(" = -ENOBUFS");
@@ -910,7 +889,6 @@ int __fscache_write_page(struct fscache_cookie *cookie,
{
struct fscache_storage *op;
struct fscache_object *object;
- bool wake_cookie = false;
int ret;
_enter("%p,%x,", cookie, (u32) page->flags);
@@ -942,8 +920,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,
ret = -ENOBUFS;
spin_lock(&cookie->lock);
- if (!fscache_cookie_enabled(cookie) ||
- hlist_empty(&cookie->backing_objects))
+ if (hlist_empty(&cookie->backing_objects))
goto nobufs;
object = hlist_entry(cookie->backing_objects.first,
struct fscache_object, cookie_link);
@@ -980,7 +957,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,
op->op.debug_id = atomic_inc_return(&fscache_op_debug_id);
op->store_limit = object->store_limit;
- __fscache_use_cookie(cookie);
+ atomic_inc(&cookie->n_active);
if (fscache_submit_op(object, &op->op) < 0)
goto submit_failed;
@@ -1007,10 +984,10 @@ already_pending:
return 0;
submit_failed:
+ atomic_dec(&cookie->n_active);
spin_lock(&cookie->stores_lock);
radix_tree_delete(&cookie->stores, page->index);
spin_unlock(&cookie->stores_lock);
- wake_cookie = __fscache_unuse_cookie(cookie);
page_cache_release(page);
ret = -ENOBUFS;
goto nobufs;
@@ -1022,8 +999,6 @@ nobufs:
spin_unlock(&cookie->lock);
radix_tree_preload_end();
kfree(op);
- if (wake_cookie)
- __fscache_wake_unused_cookie(cookie);
fscache_stat(&fscache_n_stores_nobufs);
_leave(" = -ENOBUFS");
return -ENOBUFS;
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index b96a49b..adbfd66 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -473,7 +473,7 @@ err:
static void cuse_fc_release(struct fuse_conn *fc)
{
struct cuse_conn *cc = fc_to_cc(fc);
- kfree_rcu(cc, fc.rcu);
+ kfree(cc);
}
/**
@@ -589,14 +589,11 @@ static struct attribute *cuse_class_dev_attrs[] = {
ATTRIBUTE_GROUPS(cuse_class_dev);
static struct miscdevice cuse_miscdev = {
- .minor = CUSE_MINOR,
+ .minor = MISC_DYNAMIC_MINOR,
.name = "cuse",
.fops = &cuse_channel_fops,
};
-MODULE_ALIAS_MISCDEV(CUSE_MINOR);
-MODULE_ALIAS("devname:cuse");
-
static int __init cuse_init(void)
{
int i, rc;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c3eb2c4..b7989f2 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -342,6 +342,24 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
return err;
}
+static struct dentry *fuse_materialise_dentry(struct dentry *dentry,
+ struct inode *inode)
+{
+ struct dentry *newent;
+
+ if (inode && S_ISDIR(inode->i_mode)) {
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ mutex_lock(&fc->inst_mutex);
+ newent = d_materialise_unique(dentry, inode);
+ mutex_unlock(&fc->inst_mutex);
+ } else {
+ newent = d_materialise_unique(dentry, inode);
+ }
+
+ return newent;
+}
+
static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
unsigned int flags)
{
@@ -364,7 +382,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
if (inode && get_node_id(inode) == FUSE_ROOT_ID)
goto out_iput;
- newent = d_materialise_unique(entry, inode);
+ newent = fuse_materialise_dentry(entry, inode);
err = PTR_ERR(newent);
if (IS_ERR(newent))
goto out_err;
@@ -583,9 +601,21 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
}
kfree(forget);
- err = d_instantiate_no_diralias(entry, inode);
- if (err)
- return err;
+ if (S_ISDIR(inode->i_mode)) {
+ struct dentry *alias;
+ mutex_lock(&fc->inst_mutex);
+ alias = d_find_alias(inode);
+ if (alias) {
+ /* New directory must have moved since mkdir */
+ mutex_unlock(&fc->inst_mutex);
+ dput(alias);
+ iput(inode);
+ return -EBUSY;
+ }
+ d_instantiate(entry, inode);
+ mutex_unlock(&fc->inst_mutex);
+ } else
+ d_instantiate(entry, inode);
fuse_change_entry_timeout(entry, &outarg);
fuse_invalidate_attr(dir);
@@ -1254,7 +1284,7 @@ static int fuse_direntplus_link(struct file *file,
if (!inode)
goto out;
- alias = d_materialise_unique(dentry, inode);
+ alias = fuse_materialise_dentry(dentry, inode);
err = PTR_ERR(alias);
if (IS_ERR(alias))
goto out;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 7e70506..4598345 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -334,8 +334,7 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
BUG_ON(req->inode != inode);
curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
- if (curr_index <= index &&
- index < curr_index + req->num_pages) {
+ if (curr_index == index) {
found = true;
break;
}
@@ -1410,13 +1409,8 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
{
- int i;
-
- for (i = 0; i < req->num_pages; i++)
- __free_page(req->pages[i]);
-
- if (req->ff)
- fuse_file_put(req->ff, false);
+ __free_page(req->pages[0]);
+ fuse_file_put(req->ff, false);
}
static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
@@ -1424,34 +1418,30 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
struct inode *inode = req->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
- int i;
list_del(&req->writepages_entry);
- for (i = 0; i < req->num_pages; i++) {
- dec_bdi_stat(bdi, BDI_WRITEBACK);
- dec_zone_page_state(req->pages[i], NR_WRITEBACK_TEMP);
- bdi_writeout_inc(bdi);
- }
+ dec_bdi_stat(bdi, BDI_WRITEBACK);
+ dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP);
+ bdi_writeout_inc(bdi);
wake_up(&fi->page_waitq);
}
/* Called under fc->lock, may release and reacquire it */
-static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req,
- loff_t size)
+static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
__releases(fc->lock)
__acquires(fc->lock)
{
struct fuse_inode *fi = get_fuse_inode(req->inode);
+ loff_t size = i_size_read(req->inode);
struct fuse_write_in *inarg = &req->misc.write.in;
- __u64 data_size = req->num_pages * PAGE_CACHE_SIZE;
if (!fc->connected)
goto out_free;
- if (inarg->offset + data_size <= size) {
- inarg->size = data_size;
+ if (inarg->offset + PAGE_CACHE_SIZE <= size) {
+ inarg->size = PAGE_CACHE_SIZE;
} else if (inarg->offset < size) {
- inarg->size = size - inarg->offset;
+ inarg->size = size & (PAGE_CACHE_SIZE - 1);
} else {
/* Got truncated off completely */
goto out_free;
@@ -1482,13 +1472,12 @@ __acquires(fc->lock)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
- size_t crop = i_size_read(inode);
struct fuse_req *req;
while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
req = list_entry(fi->queued_writes.next, struct fuse_req, list);
list_del_init(&req->list);
- fuse_send_writepage(fc, req, crop);
+ fuse_send_writepage(fc, req);
}
}
@@ -1499,62 +1488,12 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
mapping_set_error(inode->i_mapping, req->out.h.error);
spin_lock(&fc->lock);
- while (req->misc.write.next) {
- struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_write_in *inarg = &req->misc.write.in;
- struct fuse_req *next = req->misc.write.next;
- req->misc.write.next = next->misc.write.next;
- next->misc.write.next = NULL;
- next->ff = fuse_file_get(req->ff);
- list_add(&next->writepages_entry, &fi->writepages);
-
- /*
- * Skip fuse_flush_writepages() to make it easy to crop requests
- * based on primary request size.
- *
- * 1st case (trivial): there are no concurrent activities using
- * fuse_set/release_nowrite. Then we're on safe side because
- * fuse_flush_writepages() would call fuse_send_writepage()
- * anyway.
- *
- * 2nd case: someone called fuse_set_nowrite and it is waiting
- * now for completion of all in-flight requests. This happens
- * rarely and no more than once per page, so this should be
- * okay.
- *
- * 3rd case: someone (e.g. fuse_do_setattr()) is in the middle
- * of fuse_set_nowrite..fuse_release_nowrite section. The fact
- * that fuse_set_nowrite returned implies that all in-flight
- * requests were completed along with all of their secondary
- * requests. Further primary requests are blocked by negative
- * writectr. Hence there cannot be any in-flight requests and
- * no invocations of fuse_writepage_end() while we're in
- * fuse_set_nowrite..fuse_release_nowrite section.
- */
- fuse_send_writepage(fc, next, inarg->offset + inarg->size);
- }
fi->writectr--;
fuse_writepage_finish(fc, req);
spin_unlock(&fc->lock);
fuse_writepage_free(fc, req);
}
-static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc,
- struct fuse_inode *fi)
-{
- struct fuse_file *ff = NULL;
-
- spin_lock(&fc->lock);
- if (!WARN_ON(list_empty(&fi->write_files))) {
- ff = list_entry(fi->write_files.next, struct fuse_file,
- write_entry);
- fuse_file_get(ff);
- }
- spin_unlock(&fc->lock);
-
- return ff;
-}
-
static int fuse_writepage_locked(struct page *page)
{
struct address_space *mapping = page->mapping;
@@ -1562,8 +1501,8 @@ static int fuse_writepage_locked(struct page *page)
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_req *req;
+ struct fuse_file *ff;
struct page *tmp_page;
- int error = -ENOMEM;
set_page_writeback(page);
@@ -1576,16 +1515,16 @@ static int fuse_writepage_locked(struct page *page)
if (!tmp_page)
goto err_free;
- error = -EIO;
- req->ff = fuse_write_file_get(fc, fi);
- if (!req->ff)
- goto err_free;
+ spin_lock(&fc->lock);
+ BUG_ON(list_empty(&fi->write_files));
+ ff = list_entry(fi->write_files.next, struct fuse_file, write_entry);
+ req->ff = fuse_file_get(ff);
+ spin_unlock(&fc->lock);
- fuse_write_fill(req, req->ff, page_offset(page), 0);
+ fuse_write_fill(req, ff, page_offset(page), 0);
copy_highpage(tmp_page, page);
req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
- req->misc.write.next = NULL;
req->in.argpages = 1;
req->num_pages = 1;
req->pages[0] = tmp_page;
@@ -1611,263 +1550,19 @@ err_free:
fuse_request_free(req);
err:
end_page_writeback(page);
- return error;
+ return -ENOMEM;
}
static int fuse_writepage(struct page *page, struct writeback_control *wbc)
{
int err;
- if (fuse_page_is_writeback(page->mapping->host, page->index)) {
- /*
- * ->writepages() should be called for sync() and friends. We
- * should only get here on direct reclaim and then we are
- * allowed to skip a page which is already in flight
- */
- WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
-
- redirty_page_for_writepage(wbc, page);
- return 0;
- }
-
err = fuse_writepage_locked(page);
unlock_page(page);
return err;
}
-struct fuse_fill_wb_data {
- struct fuse_req *req;
- struct fuse_file *ff;
- struct inode *inode;
- struct page **orig_pages;
-};
-
-static void fuse_writepages_send(struct fuse_fill_wb_data *data)
-{
- struct fuse_req *req = data->req;
- struct inode *inode = data->inode;
- struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_inode *fi = get_fuse_inode(inode);
- int num_pages = req->num_pages;
- int i;
-
- req->ff = fuse_file_get(data->ff);
- spin_lock(&fc->lock);
- list_add_tail(&req->list, &fi->queued_writes);
- fuse_flush_writepages(inode);
- spin_unlock(&fc->lock);
-
- for (i = 0; i < num_pages; i++)
- end_page_writeback(data->orig_pages[i]);
-}
-
-static bool fuse_writepage_in_flight(struct fuse_req *new_req,
- struct page *page)
-{
- struct fuse_conn *fc = get_fuse_conn(new_req->inode);
- struct fuse_inode *fi = get_fuse_inode(new_req->inode);
- struct fuse_req *tmp;
- struct fuse_req *old_req;
- bool found = false;
- pgoff_t curr_index;
-
- BUG_ON(new_req->num_pages != 0);
-
- spin_lock(&fc->lock);
- list_del(&new_req->writepages_entry);
- list_for_each_entry(old_req, &fi->writepages, writepages_entry) {
- BUG_ON(old_req->inode != new_req->inode);
- curr_index = old_req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
- if (curr_index <= page->index &&
- page->index < curr_index + old_req->num_pages) {
- found = true;
- break;
- }
- }
- if (!found) {
- list_add(&new_req->writepages_entry, &fi->writepages);
- goto out_unlock;
- }
-
- new_req->num_pages = 1;
- for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) {
- BUG_ON(tmp->inode != new_req->inode);
- curr_index = tmp->misc.write.in.offset >> PAGE_CACHE_SHIFT;
- if (tmp->num_pages == 1 &&
- curr_index == page->index) {
- old_req = tmp;
- }
- }
-
- if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT ||
- old_req->state == FUSE_REQ_PENDING)) {
- struct backing_dev_info *bdi = page->mapping->backing_dev_info;
-
- copy_highpage(old_req->pages[0], page);
- spin_unlock(&fc->lock);
-
- dec_bdi_stat(bdi, BDI_WRITEBACK);
- dec_zone_page_state(page, NR_WRITEBACK_TEMP);
- bdi_writeout_inc(bdi);
- fuse_writepage_free(fc, new_req);
- fuse_request_free(new_req);
- goto out;
- } else {
- new_req->misc.write.next = old_req->misc.write.next;
- old_req->misc.write.next = new_req;
- }
-out_unlock:
- spin_unlock(&fc->lock);
-out:
- return found;
-}
-
-static int fuse_writepages_fill(struct page *page,
- struct writeback_control *wbc, void *_data)
-{
- struct fuse_fill_wb_data *data = _data;
- struct fuse_req *req = data->req;
- struct inode *inode = data->inode;
- struct fuse_conn *fc = get_fuse_conn(inode);
- struct page *tmp_page;
- bool is_writeback;
- int err;
-
- if (!data->ff) {
- err = -EIO;
- data->ff = fuse_write_file_get(fc, get_fuse_inode(inode));
- if (!data->ff)
- goto out_unlock;
- }
-
- /*
- * Being under writeback is unlikely but possible. For example direct
- * read to an mmaped fuse file will set the page dirty twice; once when
- * the pages are faulted with get_user_pages(), and then after the read
- * completed.
- */
- is_writeback = fuse_page_is_writeback(inode, page->index);
-
- if (req && req->num_pages &&
- (is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
- (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write ||
- data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
- fuse_writepages_send(data);
- data->req = NULL;
- }
- err = -ENOMEM;
- tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
- if (!tmp_page)
- goto out_unlock;
-
- /*
- * The page must not be redirtied until the writeout is completed
- * (i.e. userspace has sent a reply to the write request). Otherwise
- * there could be more than one temporary page instance for each real
- * page.
- *
- * This is ensured by holding the page lock in page_mkwrite() while
- * checking fuse_page_is_writeback(). We already hold the page lock
- * since clear_page_dirty_for_io() and keep it held until we add the
- * request to the fi->writepages list and increment req->num_pages.
- * After this fuse_page_is_writeback() will indicate that the page is
- * under writeback, so we can release the page lock.
- */
- if (data->req == NULL) {
- struct fuse_inode *fi = get_fuse_inode(inode);
-
- err = -ENOMEM;
- req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ);
- if (!req) {
- __free_page(tmp_page);
- goto out_unlock;
- }
-
- fuse_write_fill(req, data->ff, page_offset(page), 0);
- req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
- req->misc.write.next = NULL;
- req->in.argpages = 1;
- req->background = 1;
- req->num_pages = 0;
- req->end = fuse_writepage_end;
- req->inode = inode;
-
- spin_lock(&fc->lock);
- list_add(&req->writepages_entry, &fi->writepages);
- spin_unlock(&fc->lock);
-
- data->req = req;
- }
- set_page_writeback(page);
-
- copy_highpage(tmp_page, page);
- req->pages[req->num_pages] = tmp_page;
- req->page_descs[req->num_pages].offset = 0;
- req->page_descs[req->num_pages].length = PAGE_SIZE;
-
- inc_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK);
- inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
-
- err = 0;
- if (is_writeback && fuse_writepage_in_flight(req, page)) {
- end_page_writeback(page);
- data->req = NULL;
- goto out_unlock;
- }
- data->orig_pages[req->num_pages] = page;
-
- /*
- * Protected by fc->lock against concurrent access by
- * fuse_page_is_writeback().
- */
- spin_lock(&fc->lock);
- req->num_pages++;
- spin_unlock(&fc->lock);
-
-out_unlock:
- unlock_page(page);
-
- return err;
-}
-
-static int fuse_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
-{
- struct inode *inode = mapping->host;
- struct fuse_fill_wb_data data;
- int err;
-
- err = -EIO;
- if (is_bad_inode(inode))
- goto out;
-
- data.inode = inode;
- data.req = NULL;
- data.ff = NULL;
-
- err = -ENOMEM;
- data.orig_pages = kzalloc(sizeof(struct page *) *
- FUSE_MAX_PAGES_PER_REQ,
- GFP_NOFS);
- if (!data.orig_pages)
- goto out;
-
- err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
- if (data.req) {
- /* Ignore errors if we can write at least one page */
- BUG_ON(!data.req->num_pages);
- fuse_writepages_send(&data);
- err = 0;
- }
- if (data.ff)
- fuse_file_put(data.ff, false);
-
- kfree(data.orig_pages);
-out:
- return err;
-}
-
static int fuse_launder_page(struct page *page)
{
int err = 0;
@@ -1907,17 +1602,14 @@ static void fuse_vma_close(struct vm_area_struct *vma)
static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct inode *inode = file_inode(vma->vm_file);
-
- file_update_time(vma->vm_file);
- lock_page(page);
- if (page->mapping != inode->i_mapping) {
- unlock_page(page);
- return VM_FAULT_NOPAGE;
- }
+ /*
+ * Don't use page->mapping as it may become NULL from a
+ * concurrent truncate.
+ */
+ struct inode *inode = vma->vm_file->f_mapping->host;
fuse_wait_on_page_writeback(inode, page->index);
- return VM_FAULT_LOCKED;
+ return 0;
}
static const struct vm_operations_struct fuse_file_vm_ops = {
@@ -2889,7 +2581,6 @@ static const struct file_operations fuse_direct_io_file_operations = {
static const struct address_space_operations fuse_file_aops = {
.readpage = fuse_readpage,
.writepage = fuse_writepage,
- .writepages = fuse_writepages,
.launder_page = fuse_launder_page,
.readpages = fuse_readpages,
.set_page_dirty = __set_page_dirty_nobuffers,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 7d27309..5b9e6f3 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -321,7 +321,6 @@ struct fuse_req {
struct {
struct fuse_write_in in;
struct fuse_write_out out;
- struct fuse_req *next;
} write;
struct fuse_notify_retrieve_in retrieve_in;
struct fuse_lk_in lk_in;
@@ -375,11 +374,12 @@ struct fuse_conn {
/** Lock protecting accessess to members of this structure */
spinlock_t lock;
+ /** Mutex protecting against directory alias creation */
+ struct mutex inst_mutex;
+
/** Refcount */
atomic_t count;
- struct rcu_head rcu;
-
/** The user id for this mount */
kuid_t user_id;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index d468643..a8ce6da 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -565,6 +565,7 @@ void fuse_conn_init(struct fuse_conn *fc)
{
memset(fc, 0, sizeof(*fc));
spin_lock_init(&fc->lock);
+ mutex_init(&fc->inst_mutex);
init_rwsem(&fc->killsb);
atomic_set(&fc->count, 1);
init_waitqueue_head(&fc->waitq);
@@ -595,6 +596,7 @@ void fuse_conn_put(struct fuse_conn *fc)
if (atomic_dec_and_test(&fc->count)) {
if (fc->destroy_req)
fuse_request_free(fc->destroy_req);
+ mutex_destroy(&fc->inst_mutex);
fc->release(fc);
}
}
@@ -918,7 +920,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
static void fuse_free_conn(struct fuse_conn *fc)
{
- kfree_rcu(fc, rcu);
+ kfree(fc);
}
static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index b7fc035..1f7d805 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -611,14 +611,12 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
if (alloc_required) {
- struct gfs2_alloc_parms ap = { .aflags = 0, };
error = gfs2_quota_lock_check(ip);
if (error)
goto out_unlock;
requested = data_blocks + ind_blocks;
- ap.target = requested;
- error = gfs2_inplace_reserve(ip, &ap);
+ error = gfs2_inplace_reserve(ip, requested, 0);
if (error)
goto out_qunlock;
}
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index fe0500c..62a65fc 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1216,7 +1216,6 @@ static int do_grow(struct inode *inode, u64 size)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
- struct gfs2_alloc_parms ap = { .target = 1, };
struct buffer_head *dibh;
int error;
int unstuff = 0;
@@ -1227,7 +1226,7 @@ static int do_grow(struct inode *inode, u64 size)
if (error)
return error;
- error = gfs2_inplace_reserve(ip, &ap);
+ error = gfs2_inplace_reserve(ip, 1, 0);
if (error)
goto do_grow_qunlock;
unstuff = 1;
@@ -1280,7 +1279,6 @@ do_grow_qunlock:
int gfs2_setattr_size(struct inode *inode, u64 newsize)
{
- struct gfs2_inode *ip = GFS2_I(inode);
int ret;
u64 oldsize;
@@ -1296,7 +1294,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
inode_dio_wait(inode);
- ret = gfs2_rs_alloc(ip);
+ ret = gfs2_rs_alloc(GFS2_I(inode));
if (ret)
goto out;
@@ -1306,7 +1304,6 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
goto out;
}
- gfs2_rs_deltree(ip->i_res);
ret = do_shrink(inode, oldsize, newsize);
out:
put_write_access(inode);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index efc078f..0621b46 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -383,7 +383,6 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
struct inode *inode = file_inode(vma->vm_file);
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
- struct gfs2_alloc_parms ap = { .aflags = 0, };
unsigned long last_index;
u64 pos = page->index << PAGE_CACHE_SHIFT;
unsigned int data_blocks, ind_blocks, rblocks;
@@ -431,8 +430,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
if (ret)
goto out_unlock;
gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
- ap.target = data_blocks + ind_blocks;
- ret = gfs2_inplace_reserve(ip, &ap);
+ ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0);
if (ret)
goto out_quota_unlock;
@@ -622,7 +620,7 @@ static int gfs2_release(struct inode *inode, struct file *file)
if (!(file->f_mode & FMODE_WRITE))
return 0;
- gfs2_rs_delete(ip, &inode->i_writecount);
+ gfs2_rs_delete(ip);
return 0;
}
@@ -802,7 +800,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
struct inode *inode = file_inode(file);
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_alloc_parms ap = { .aflags = 0, };
unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
loff_t bytes, max_bytes;
int error;
@@ -853,8 +850,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
retry:
gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
- ap.target = data_blocks + ind_blocks;
- error = gfs2_inplace_reserve(ip, &ap);
+ error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0);
if (error) {
if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
bytes >>= 1;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c8420f7..c2f41b4 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -31,7 +31,6 @@
#include <linux/bit_spinlock.h>
#include <linux/percpu.h>
#include <linux/list_sort.h>
-#include <linux/lockref.h>
#include "gfs2.h"
#include "incore.h"
@@ -130,10 +129,10 @@ void gfs2_glock_free(struct gfs2_glock *gl)
*
*/
-static void gfs2_glock_hold(struct gfs2_glock *gl)
+void gfs2_glock_hold(struct gfs2_glock *gl)
{
- GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
- lockref_get(&gl->gl_lockref);
+ GLOCK_BUG_ON(gl, atomic_read(&gl->gl_ref) == 0);
+ atomic_inc(&gl->gl_ref);
}
/**
@@ -188,6 +187,20 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
}
/**
+ * gfs2_glock_put_nolock() - Decrement reference count on glock
+ * @gl: The glock to put
+ *
+ * This function should only be used if the caller has its own reference
+ * to the glock, in addition to the one it is dropping.
+ */
+
+void gfs2_glock_put_nolock(struct gfs2_glock *gl)
+{
+ if (atomic_dec_and_test(&gl->gl_ref))
+ GLOCK_BUG_ON(gl, 1);
+}
+
+/**
* gfs2_glock_put() - Decrement reference count on glock
* @gl: The glock to put
*
@@ -198,22 +211,17 @@ void gfs2_glock_put(struct gfs2_glock *gl)
struct gfs2_sbd *sdp = gl->gl_sbd;
struct address_space *mapping = gfs2_glock2aspace(gl);
- if (lockref_put_or_lock(&gl->gl_lockref))
- return;
-
- lockref_mark_dead(&gl->gl_lockref);
-
- spin_lock(&lru_lock);
- __gfs2_glock_remove_from_lru(gl);
- spin_unlock(&lru_lock);
- spin_unlock(&gl->gl_lockref.lock);
- spin_lock_bucket(gl->gl_hash);
- hlist_bl_del_rcu(&gl->gl_list);
- spin_unlock_bucket(gl->gl_hash);
- GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
- GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
- trace_gfs2_glock_put(gl);
- sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
+ if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) {
+ __gfs2_glock_remove_from_lru(gl);
+ spin_unlock(&lru_lock);
+ spin_lock_bucket(gl->gl_hash);
+ hlist_bl_del_rcu(&gl->gl_list);
+ spin_unlock_bucket(gl->gl_hash);
+ GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
+ GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
+ trace_gfs2_glock_put(gl);
+ sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
+ }
}
/**
@@ -236,7 +244,7 @@ static struct gfs2_glock *search_bucket(unsigned int hash,
continue;
if (gl->gl_sbd != sdp)
continue;
- if (lockref_get_not_dead(&gl->gl_lockref))
+ if (atomic_inc_not_zero(&gl->gl_ref))
return gl;
}
@@ -388,11 +396,10 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
held2 = (new_state != LM_ST_UNLOCKED);
if (held1 != held2) {
- GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
if (held2)
- gl->gl_lockref.count++;
+ gfs2_glock_hold(gl);
else
- gl->gl_lockref.count--;
+ gfs2_glock_put_nolock(gl);
}
if (held1 && held2 && list_empty(&gl->gl_holders))
clear_bit(GLF_QUEUED, &gl->gl_flags);
@@ -619,9 +626,9 @@ out:
out_sched:
clear_bit(GLF_LOCK, &gl->gl_flags);
smp_mb__after_clear_bit();
- gl->gl_lockref.count++;
+ gfs2_glock_hold(gl);
if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
- gl->gl_lockref.count--;
+ gfs2_glock_put_nolock(gl);
return;
out_unlock:
@@ -747,7 +754,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
gl->gl_sbd = sdp;
gl->gl_flags = 0;
gl->gl_name = name;
- gl->gl_lockref.count = 1;
+ atomic_set(&gl->gl_ref, 1);
gl->gl_state = LM_ST_UNLOCKED;
gl->gl_target = LM_ST_UNLOCKED;
gl->gl_demote_state = LM_ST_EXCLUSIVE;
@@ -1349,10 +1356,10 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
}
}
- gl->gl_lockref.count++;
- set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
spin_unlock(&gl->gl_spin);
-
+ set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
+ smp_wmb();
+ gfs2_glock_hold(gl);
if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
gfs2_glock_put(gl);
}
@@ -1397,19 +1404,15 @@ __acquires(&lru_lock)
while(!list_empty(list)) {
gl = list_entry(list->next, struct gfs2_glock, gl_lru);
list_del_init(&gl->gl_lru);
- if (!spin_trylock(&gl->gl_spin)) {
- list_add(&gl->gl_lru, &lru_list);
- atomic_inc(&lru_count);
- continue;
- }
clear_bit(GLF_LRU, &gl->gl_flags);
+ gfs2_glock_hold(gl);
spin_unlock(&lru_lock);
- gl->gl_lockref.count++;
+ spin_lock(&gl->gl_spin);
if (demote_ok(gl))
handle_callback(gl, LM_ST_UNLOCKED, 0, false);
WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags));
if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
- gl->gl_lockref.count--;
+ gfs2_glock_put_nolock(gl);
spin_unlock(&gl->gl_spin);
spin_lock(&lru_lock);
}
@@ -1490,7 +1493,7 @@ static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp,
rcu_read_lock();
hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) {
- if ((gl->gl_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref))
+ if ((gl->gl_sbd == sdp) && atomic_inc_not_zero(&gl->gl_ref))
examiner(gl);
}
rcu_read_unlock();
@@ -1743,7 +1746,7 @@ int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
state2str(gl->gl_demote_state), dtime,
atomic_read(&gl->gl_ail_count),
atomic_read(&gl->gl_revokes),
- (int)gl->gl_lockref.count, gl->gl_hold_time);
+ atomic_read(&gl->gl_ref), gl->gl_hold_time);
list_for_each_entry(gh, &gl->gl_holders, gh_list) {
error = dump_holder(seq, gh);
@@ -1899,8 +1902,7 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
gi->nhash = 0;
}
/* Skip entries for other sb and dead entries */
- } while (gi->sdp != gi->gl->gl_sbd ||
- __lockref_is_dead(&gi->gl->gl_lockref));
+ } while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0);
return 0;
}
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 6647d77..69f66e3 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -181,6 +181,8 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl)
extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
const struct gfs2_glock_operations *glops,
int create, struct gfs2_glock **glp);
+extern void gfs2_glock_hold(struct gfs2_glock *gl);
+extern void gfs2_glock_put_nolock(struct gfs2_glock *gl);
extern void gfs2_glock_put(struct gfs2_glock *gl);
extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state,
unsigned flags, struct gfs2_holder *gh);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index db908f6..e2e0a90 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -525,9 +525,9 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
if (gl->gl_demote_state == LM_ST_UNLOCKED &&
gl->gl_state == LM_ST_SHARED && ip) {
- gl->gl_lockref.count++;
+ gfs2_glock_hold(gl);
if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
- gl->gl_lockref.count--;
+ gfs2_glock_put_nolock(gl);
}
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index ba1ea67..26aabd7 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -21,7 +21,6 @@
#include <linux/rbtree.h>
#include <linux/ktime.h>
#include <linux/percpu.h>
-#include <linux/lockref.h>
#define DIO_WAIT 0x00000010
#define DIO_METADATA 0x00000020
@@ -72,7 +71,6 @@ struct gfs2_bitmap {
u32 bi_offset;
u32 bi_start;
u32 bi_len;
- u32 bi_blocks;
};
struct gfs2_rgrpd {
@@ -103,25 +101,19 @@ struct gfs2_rgrpd {
struct gfs2_rbm {
struct gfs2_rgrpd *rgd;
+ struct gfs2_bitmap *bi; /* Bitmap must belong to the rgd */
u32 offset; /* The offset is bitmap relative */
- int bii; /* Bitmap index */
};
-static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm)
-{
- return rbm->rgd->rd_bits + rbm->bii;
-}
-
static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
{
- return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) +
- rbm->offset;
+ return rbm->rgd->rd_data0 + (rbm->bi->bi_start * GFS2_NBBY) + rbm->offset;
}
static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1,
const struct gfs2_rbm *rbm2)
{
- return (rbm1->rgd == rbm2->rgd) && (rbm1->bii == rbm2->bii) &&
+ return (rbm1->rgd == rbm2->rgd) && (rbm1->bi == rbm2->bi) &&
(rbm1->offset == rbm2->offset);
}
@@ -286,20 +278,6 @@ struct gfs2_blkreserv {
unsigned int rs_qa_qd_num;
};
-/*
- * Allocation parameters
- * @target: The number of blocks we'd ideally like to allocate
- * @aflags: The flags (e.g. Orlov flag)
- *
- * The intent is to gradually expand this structure over time in
- * order to give more information, e.g. alignment, min extent size
- * to the allocation code.
- */
-struct gfs2_alloc_parms {
- u32 target;
- u32 aflags;
-};
-
enum {
GLF_LOCK = 1,
GLF_DEMOTE = 3,
@@ -322,9 +300,9 @@ struct gfs2_glock {
struct gfs2_sbd *gl_sbd;
unsigned long gl_flags; /* GLF_... */
struct lm_lockname gl_name;
+ atomic_t gl_ref;
- struct lockref gl_lockref;
-#define gl_spin gl_lockref.lock
+ spinlock_t gl_spin;
/* State fields protected by gl_spin */
unsigned int gl_state:2, /* Current state */
@@ -420,10 +398,11 @@ enum {
struct gfs2_quota_data {
struct list_head qd_list;
- struct kqid qd_id;
- struct lockref qd_lockref;
- struct list_head qd_lru;
+ struct list_head qd_reclaim;
+ atomic_t qd_count;
+
+ struct kqid qd_id;
unsigned long qd_flags; /* QDF_... */
s64 qd_change;
@@ -537,6 +516,7 @@ struct gfs2_tune {
unsigned int gt_logd_secs;
+ unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */
unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */
unsigned int gt_quota_scale_num; /* Numerator */
unsigned int gt_quota_scale_den; /* Denominator */
@@ -714,7 +694,6 @@ struct gfs2_sbd {
struct list_head sd_quota_list;
atomic_t sd_quota_count;
struct mutex sd_quota_mutex;
- struct mutex sd_quota_sync_mutex;
wait_queue_head_t sd_quota_wait;
struct list_head sd_trunc_list;
spinlock_t sd_trunc_lock;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 7119504..ced3257 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -379,7 +379,6 @@ static void munge_mode_uid_gid(const struct gfs2_inode *dip,
static int alloc_dinode(struct gfs2_inode *ip, u32 flags)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- struct gfs2_alloc_parms ap = { .target = RES_DINODE, .aflags = flags, };
int error;
int dblocks = 1;
@@ -387,7 +386,7 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags)
if (error)
goto out;
- error = gfs2_inplace_reserve(ip, &ap);
+ error = gfs2_inplace_reserve(ip, RES_DINODE, flags);
if (error)
goto out_quota;
@@ -473,7 +472,6 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
struct gfs2_inode *ip, int arq)
{
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
- struct gfs2_alloc_parms ap = { .target = sdp->sd_max_dirres, };
int error;
if (arq) {
@@ -481,7 +479,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
if (error)
goto fail_quota_locks;
- error = gfs2_inplace_reserve(dip, &ap);
+ error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0);
if (error)
goto fail_quota_locks;
@@ -586,17 +584,17 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (!IS_ERR(inode)) {
d = d_splice_alias(inode, dentry);
error = 0;
- if (file) {
- if (S_ISREG(inode->i_mode)) {
- WARN_ON(d != NULL);
- error = finish_open(file, dentry, gfs2_open_common, opened);
- } else {
+ if (file && !IS_ERR(d)) {
+ if (d == NULL)
+ d = dentry;
+ if (S_ISREG(inode->i_mode))
+ error = finish_open(file, d, gfs2_open_common, opened);
+ else
error = finish_no_open(file, d);
- }
- } else {
- dput(d);
}
gfs2_glock_dq_uninit(ghs);
+ if (IS_ERR(d))
+ return PTR_ERR(d);
return error;
} else if (error != -ENOENT) {
goto fail_gunlock;
@@ -715,7 +713,7 @@ fail_gunlock2:
fail_free_inode:
if (ip->i_gl)
gfs2_glock_put(ip->i_gl);
- gfs2_rs_delete(ip, NULL);
+ gfs2_rs_delete(ip);
free_inode_nonrcu(inode);
inode = NULL;
fail_gunlock:
@@ -783,10 +781,8 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry,
error = finish_open(file, dentry, gfs2_open_common, opened);
gfs2_glock_dq_uninit(&gh);
- if (error) {
- dput(d);
+ if (error)
return ERR_PTR(error);
- }
return d;
}
@@ -878,12 +874,11 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
error = 0;
if (alloc_required) {
- struct gfs2_alloc_parms ap = { .target = sdp->sd_max_dirres, };
error = gfs2_quota_lock_check(dip);
if (error)
goto out_gunlock;
- error = gfs2_inplace_reserve(dip, &ap);
+ error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0);
if (error)
goto out_gunlock_q;
@@ -1168,19 +1163,14 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
d = __gfs2_lookup(dir, dentry, file, opened);
if (IS_ERR(d))
return PTR_ERR(d);
- if (d != NULL)
- dentry = d;
- if (dentry->d_inode) {
- if (!(*opened & FILE_OPENED)) {
- if (d == NULL)
- dget(dentry);
- return finish_no_open(file, dentry);
- }
- dput(d);
+ if (d == NULL)
+ d = dentry;
+ if (d->d_inode) {
+ if (!(*opened & FILE_OPENED))
+ return finish_no_open(file, d);
return 0;
}
- BUG_ON(d != NULL);
if (!(flags & O_CREAT))
return -ENOENT;
@@ -1395,12 +1385,11 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
goto out_gunlock;
if (alloc_required) {
- struct gfs2_alloc_parms ap = { .target = sdp->sd_max_dirres, };
error = gfs2_quota_lock_check(ndip);
if (error)
goto out_gunlock;
- error = gfs2_inplace_reserve(ndip, &ap);
+ error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres, 0);
if (error)
goto out_gunlock_q;
@@ -1517,6 +1506,13 @@ out:
return NULL;
}
+static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
+{
+ char *s = nd_get_link(nd);
+ if (!IS_ERR(s))
+ kfree(s);
+}
+
/**
* gfs2_permission -
* @inode: The inode
@@ -1868,7 +1864,7 @@ const struct inode_operations gfs2_dir_iops = {
const struct inode_operations gfs2_symlink_iops = {
.readlink = generic_readlink,
.follow_link = gfs2_follow_link,
- .put_link = kfree_put_link,
+ .put_link = gfs2_put_link,
.permission = gfs2_permission,
.setattr = gfs2_setattr,
.getattr = gfs2_getattr,
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 2a6ba06..c8423d6 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -466,19 +466,19 @@ static void gdlm_cancel(struct gfs2_glock *gl)
static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen,
char *lvb_bits)
{
- __le32 gen;
+ uint32_t gen;
memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE);
- memcpy(&gen, lvb_bits, sizeof(__le32));
+ memcpy(&gen, lvb_bits, sizeof(uint32_t));
*lvb_gen = le32_to_cpu(gen);
}
static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen,
char *lvb_bits)
{
- __le32 gen;
+ uint32_t gen;
memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE);
gen = cpu_to_le32(lvb_gen);
- memcpy(ls->ls_control_lvb, &gen, sizeof(__le32));
+ memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t));
}
static int all_jid_bits_clear(char *lvb)
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 0650db2..351586e 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -31,6 +31,12 @@
struct workqueue_struct *gfs2_control_wq;
+static struct shrinker qd_shrinker = {
+ .count_objects = gfs2_qd_shrink_count,
+ .scan_objects = gfs2_qd_shrink_scan,
+ .seeks = DEFAULT_SEEKS,
+};
+
static void gfs2_init_inode_once(void *foo)
{
struct gfs2_inode *ip = foo;
@@ -81,10 +87,6 @@ static int __init init_gfs2_fs(void)
if (error)
return error;
- error = list_lru_init(&gfs2_qd_lru);
- if (error)
- goto fail_lru;
-
error = gfs2_glock_init();
if (error)
goto fail;
@@ -137,7 +139,7 @@ static int __init init_gfs2_fs(void)
if (!gfs2_rsrv_cachep)
goto fail;
- register_shrinker(&gfs2_qd_shrinker);
+ register_shrinker(&qd_shrinker);
error = register_filesystem(&gfs2_fs_type);
if (error)
@@ -177,9 +179,7 @@ fail_wq:
fail_unregister:
unregister_filesystem(&gfs2_fs_type);
fail:
- list_lru_destroy(&gfs2_qd_lru);
-fail_lru:
- unregister_shrinker(&gfs2_qd_shrinker);
+ unregister_shrinker(&qd_shrinker);
gfs2_glock_exit();
if (gfs2_rsrv_cachep)
@@ -214,14 +214,13 @@ fail_lru:
static void __exit exit_gfs2_fs(void)
{
- unregister_shrinker(&gfs2_qd_shrinker);
+ unregister_shrinker(&qd_shrinker);
gfs2_glock_exit();
gfs2_unregister_debugfs();
unregister_filesystem(&gfs2_fs_type);
unregister_filesystem(&gfs2meta_fs_type);
destroy_workqueue(gfs_recovery_wq);
destroy_workqueue(gfs2_control_wq);
- list_lru_destroy(&gfs2_qd_lru);
rcu_barrier();
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 82303b4..19ff5e8 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -51,6 +51,7 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
{
spin_lock_init(&gt->gt_spin);
+ gt->gt_quota_simul_sync = 64;
gt->gt_quota_warn_period = 10;
gt->gt_quota_scale_num = 1;
gt->gt_quota_scale_den = 1;
@@ -93,7 +94,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
INIT_LIST_HEAD(&sdp->sd_quota_list);
mutex_init(&sdp->sd_quota_mutex);
- mutex_init(&sdp->sd_quota_sync_mutex);
init_waitqueue_head(&sdp->sd_quota_wait);
INIT_LIST_HEAD(&sdp->sd_trunc_list);
spin_lock_init(&sdp->sd_trunc_lock);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 98236d0..db44135 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -50,8 +50,6 @@
#include <linux/freezer.h>
#include <linux/quota.h>
#include <linux/dqblk_xfs.h>
-#include <linux/lockref.h>
-#include <linux/list_lru.h>
#include "gfs2.h"
#include "incore.h"
@@ -73,25 +71,29 @@ struct gfs2_quota_change_host {
struct kqid qc_id;
};
-/* Lock order: qd_lock -> qd->lockref.lock -> lru lock */
-static DEFINE_SPINLOCK(qd_lock);
-struct list_lru gfs2_qd_lru;
+static LIST_HEAD(qd_lru_list);
+static atomic_t qd_lru_count = ATOMIC_INIT(0);
+static DEFINE_SPINLOCK(qd_lru_lock);
-static void gfs2_qd_dispose(struct list_head *list)
+unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink,
+ struct shrink_control *sc)
{
struct gfs2_quota_data *qd;
struct gfs2_sbd *sdp;
+ int nr_to_scan = sc->nr_to_scan;
+ long freed = 0;
- while (!list_empty(list)) {
- qd = list_entry(list->next, struct gfs2_quota_data, qd_lru);
- sdp = qd->qd_gl->gl_sbd;
+ if (!(sc->gfp_mask & __GFP_FS))
+ return SHRINK_STOP;
- list_del(&qd->qd_lru);
+ spin_lock(&qd_lru_lock);
+ while (nr_to_scan && !list_empty(&qd_lru_list)) {
+ qd = list_entry(qd_lru_list.next,
+ struct gfs2_quota_data, qd_reclaim);
+ sdp = qd->qd_gl->gl_sbd;
/* Free from the filesystem-specific list */
- spin_lock(&qd_lock);
list_del(&qd->qd_list);
- spin_unlock(&qd_lock);
gfs2_assert_warn(sdp, !qd->qd_change);
gfs2_assert_warn(sdp, !qd->qd_slot_count);
@@ -101,59 +103,24 @@ static void gfs2_qd_dispose(struct list_head *list)
atomic_dec(&sdp->sd_quota_count);
/* Delete it from the common reclaim list */
+ list_del_init(&qd->qd_reclaim);
+ atomic_dec(&qd_lru_count);
+ spin_unlock(&qd_lru_lock);
kmem_cache_free(gfs2_quotad_cachep, qd);
+ spin_lock(&qd_lru_lock);
+ nr_to_scan--;
+ freed++;
}
-}
-
-
-static enum lru_status gfs2_qd_isolate(struct list_head *item, spinlock_t *lock, void *arg)
-{
- struct list_head *dispose = arg;
- struct gfs2_quota_data *qd = list_entry(item, struct gfs2_quota_data, qd_lru);
-
- if (!spin_trylock(&qd->qd_lockref.lock))
- return LRU_SKIP;
-
- if (qd->qd_lockref.count == 0) {
- lockref_mark_dead(&qd->qd_lockref);
- list_move(&qd->qd_lru, dispose);
- }
-
- spin_unlock(&qd->qd_lockref.lock);
- return LRU_REMOVED;
-}
-
-static unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink,
- struct shrink_control *sc)
-{
- LIST_HEAD(dispose);
- unsigned long freed;
-
- if (!(sc->gfp_mask & __GFP_FS))
- return SHRINK_STOP;
-
- freed = list_lru_walk_node(&gfs2_qd_lru, sc->nid, gfs2_qd_isolate,
- &dispose, &sc->nr_to_scan);
-
- gfs2_qd_dispose(&dispose);
-
+ spin_unlock(&qd_lru_lock);
return freed;
}
-static unsigned long gfs2_qd_shrink_count(struct shrinker *shrink,
- struct shrink_control *sc)
+unsigned long gfs2_qd_shrink_count(struct shrinker *shrink,
+ struct shrink_control *sc)
{
- return vfs_pressure_ratio(list_lru_count_node(&gfs2_qd_lru, sc->nid));
+ return vfs_pressure_ratio(atomic_read(&qd_lru_count));
}
-struct shrinker gfs2_qd_shrinker = {
- .count_objects = gfs2_qd_shrink_count,
- .scan_objects = gfs2_qd_shrink_scan,
- .seeks = DEFAULT_SEEKS,
- .flags = SHRINKER_NUMA_AWARE,
-};
-
-
static u64 qd2index(struct gfs2_quota_data *qd)
{
struct kqid qid = qd->qd_id;
@@ -181,11 +148,10 @@ static int qd_alloc(struct gfs2_sbd *sdp, struct kqid qid,
if (!qd)
return -ENOMEM;
- qd->qd_lockref.count = 1;
- spin_lock_init(&qd->qd_lockref.lock);
+ atomic_set(&qd->qd_count, 1);
qd->qd_id = qid;
qd->qd_slot = -1;
- INIT_LIST_HEAD(&qd->qd_lru);
+ INIT_LIST_HEAD(&qd->qd_reclaim);
error = gfs2_glock_get(sdp, qd2index(qd),
&gfs2_quota_glops, CREATE, &qd->qd_gl);
@@ -211,11 +177,16 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid,
for (;;) {
found = 0;
- spin_lock(&qd_lock);
+ spin_lock(&qd_lru_lock);
list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
- if (qid_eq(qd->qd_id, qid) &&
- lockref_get_not_dead(&qd->qd_lockref)) {
- list_lru_del(&gfs2_qd_lru, &qd->qd_lru);
+ if (qid_eq(qd->qd_id, qid)) {
+ if (!atomic_read(&qd->qd_count) &&
+ !list_empty(&qd->qd_reclaim)) {
+ /* Remove it from reclaim list */
+ list_del_init(&qd->qd_reclaim);
+ atomic_dec(&qd_lru_count);
+ }
+ atomic_inc(&qd->qd_count);
found = 1;
break;
}
@@ -231,7 +202,7 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid,
new_qd = NULL;
}
- spin_unlock(&qd_lock);
+ spin_unlock(&qd_lru_lock);
if (qd) {
if (new_qd) {
@@ -251,19 +222,18 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid,
static void qd_hold(struct gfs2_quota_data *qd)
{
struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
- gfs2_assert(sdp, !__lockref_is_dead(&qd->qd_lockref));
- lockref_get(&qd->qd_lockref);
+ gfs2_assert(sdp, atomic_read(&qd->qd_count));
+ atomic_inc(&qd->qd_count);
}
static void qd_put(struct gfs2_quota_data *qd)
{
- if (lockref_put_or_lock(&qd->qd_lockref))
- return;
-
- qd->qd_lockref.count = 0;
- list_lru_add(&gfs2_qd_lru, &qd->qd_lru);
- spin_unlock(&qd->qd_lockref.lock);
-
+ if (atomic_dec_and_lock(&qd->qd_count, &qd_lru_lock)) {
+ /* Add to the reclaim list */
+ list_add_tail(&qd->qd_reclaim, &qd_lru_list);
+ atomic_inc(&qd_lru_count);
+ spin_unlock(&qd_lru_lock);
+ }
}
static int slot_get(struct gfs2_quota_data *qd)
@@ -272,10 +242,10 @@ static int slot_get(struct gfs2_quota_data *qd)
unsigned int c, o = 0, b;
unsigned char byte = 0;
- spin_lock(&qd_lock);
+ spin_lock(&qd_lru_lock);
if (qd->qd_slot_count++) {
- spin_unlock(&qd_lock);
+ spin_unlock(&qd_lru_lock);
return 0;
}
@@ -299,13 +269,13 @@ found:
sdp->sd_quota_bitmap[c][o] |= 1 << b;
- spin_unlock(&qd_lock);
+ spin_unlock(&qd_lru_lock);
return 0;
fail:
qd->qd_slot_count--;
- spin_unlock(&qd_lock);
+ spin_unlock(&qd_lru_lock);
return -ENOSPC;
}
@@ -313,43 +283,23 @@ static void slot_hold(struct gfs2_quota_data *qd)
{
struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
- spin_lock(&qd_lock);
+ spin_lock(&qd_lru_lock);
gfs2_assert(sdp, qd->qd_slot_count);
qd->qd_slot_count++;
- spin_unlock(&qd_lock);
-}
-
-static void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap,
- unsigned int bit, int new_value)
-{
- unsigned int c, o, b = bit;
- int old_value;
-
- c = b / (8 * PAGE_SIZE);
- b %= 8 * PAGE_SIZE;
- o = b / 8;
- b %= 8;
-
- old_value = (bitmap[c][o] & (1 << b));
- gfs2_assert_withdraw(sdp, !old_value != !new_value);
-
- if (new_value)
- bitmap[c][o] |= 1 << b;
- else
- bitmap[c][o] &= ~(1 << b);
+ spin_unlock(&qd_lru_lock);
}
static void slot_put(struct gfs2_quota_data *qd)
{
struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
- spin_lock(&qd_lock);
+ spin_lock(&qd_lru_lock);
gfs2_assert(sdp, qd->qd_slot_count);
if (!--qd->qd_slot_count) {
gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0);
qd->qd_slot = -1;
}
- spin_unlock(&qd_lock);
+ spin_unlock(&qd_lru_lock);
}
static int bh_get(struct gfs2_quota_data *qd)
@@ -413,25 +363,6 @@ static void bh_put(struct gfs2_quota_data *qd)
mutex_unlock(&sdp->sd_quota_mutex);
}
-static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd,
- u64 *sync_gen)
-{
- if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
- !test_bit(QDF_CHANGE, &qd->qd_flags) ||
- (sync_gen && (qd->qd_sync_gen >= *sync_gen)))
- return 0;
-
- if (!lockref_get_not_dead(&qd->qd_lockref))
- return 0;
-
- list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
- set_bit(QDF_LOCKED, &qd->qd_flags);
- qd->qd_change_sync = qd->qd_change;
- gfs2_assert_warn(sdp, qd->qd_slot_count);
- qd->qd_slot_count++;
- return 1;
-}
-
static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
{
struct gfs2_quota_data *qd = NULL;
@@ -443,18 +374,31 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
if (sdp->sd_vfs->s_flags & MS_RDONLY)
return 0;
- spin_lock(&qd_lock);
+ spin_lock(&qd_lru_lock);
list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
- found = qd_check_sync(sdp, qd, &sdp->sd_quota_sync_gen);
- if (found)
- break;
+ if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
+ !test_bit(QDF_CHANGE, &qd->qd_flags) ||
+ qd->qd_sync_gen >= sdp->sd_quota_sync_gen)
+ continue;
+
+ list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
+
+ set_bit(QDF_LOCKED, &qd->qd_flags);
+ gfs2_assert_warn(sdp, atomic_read(&qd->qd_count));
+ atomic_inc(&qd->qd_count);
+ qd->qd_change_sync = qd->qd_change;
+ gfs2_assert_warn(sdp, qd->qd_slot_count);
+ qd->qd_slot_count++;
+ found = 1;
+
+ break;
}
if (!found)
qd = NULL;
- spin_unlock(&qd_lock);
+ spin_unlock(&qd_lru_lock);
if (qd) {
gfs2_assert_warn(sdp, qd->qd_change_sync);
@@ -472,6 +416,43 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
return 0;
}
+static int qd_trylock(struct gfs2_quota_data *qd)
+{
+ struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
+
+ if (sdp->sd_vfs->s_flags & MS_RDONLY)
+ return 0;
+
+ spin_lock(&qd_lru_lock);
+
+ if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
+ !test_bit(QDF_CHANGE, &qd->qd_flags)) {
+ spin_unlock(&qd_lru_lock);
+ return 0;
+ }
+
+ list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
+
+ set_bit(QDF_LOCKED, &qd->qd_flags);
+ gfs2_assert_warn(sdp, atomic_read(&qd->qd_count));
+ atomic_inc(&qd->qd_count);
+ qd->qd_change_sync = qd->qd_change;
+ gfs2_assert_warn(sdp, qd->qd_slot_count);
+ qd->qd_slot_count++;
+
+ spin_unlock(&qd_lru_lock);
+
+ gfs2_assert_warn(sdp, qd->qd_change_sync);
+ if (bh_get(qd)) {
+ clear_bit(QDF_LOCKED, &qd->qd_flags);
+ slot_put(qd);
+ qd_put(qd);
+ return 0;
+ }
+
+ return 1;
+}
+
static void qd_unlock(struct gfs2_quota_data *qd)
{
gfs2_assert_warn(qd->qd_gl->gl_sbd,
@@ -621,9 +602,9 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
x = be64_to_cpu(qc->qc_change) + change;
qc->qc_change = cpu_to_be64(x);
- spin_lock(&qd_lock);
+ spin_lock(&qd_lru_lock);
qd->qd_change = x;
- spin_unlock(&qd_lock);
+ spin_unlock(&qd_lru_lock);
if (!x) {
gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags));
@@ -667,7 +648,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
struct buffer_head *bh;
struct page *page;
void *kaddr, *ptr;
- struct gfs2_quota q;
+ struct gfs2_quota q, *qp;
int err, nbytes;
u64 size;
@@ -683,25 +664,28 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
return err;
err = -EIO;
- be64_add_cpu(&q.qu_value, change);
- qd->qd_qb.qb_value = q.qu_value;
+ qp = &q;
+ qp->qu_value = be64_to_cpu(qp->qu_value);
+ qp->qu_value += change;
+ qp->qu_value = cpu_to_be64(qp->qu_value);
+ qd->qd_qb.qb_value = qp->qu_value;
if (fdq) {
if (fdq->d_fieldmask & FS_DQ_BSOFT) {
- q.qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift);
- qd->qd_qb.qb_warn = q.qu_warn;
+ qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift);
+ qd->qd_qb.qb_warn = qp->qu_warn;
}
if (fdq->d_fieldmask & FS_DQ_BHARD) {
- q.qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
- qd->qd_qb.qb_limit = q.qu_limit;
+ qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
+ qd->qd_qb.qb_limit = qp->qu_limit;
}
if (fdq->d_fieldmask & FS_DQ_BCOUNT) {
- q.qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift);
- qd->qd_qb.qb_value = q.qu_value;
+ qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift);
+ qd->qd_qb.qb_value = qp->qu_value;
}
}
/* Write the quota into the quota file on disk */
- ptr = &q;
+ ptr = qp;
nbytes = sizeof(struct gfs2_quota);
get_a_page:
page = find_or_create_page(mapping, index, GFP_NOFS);
@@ -779,7 +763,6 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
{
struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd;
struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
- struct gfs2_alloc_parms ap = { .aflags = 0, };
unsigned int data_blocks, ind_blocks;
struct gfs2_holder *ghs, i_gh;
unsigned int qx, x;
@@ -832,8 +815,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3;
reserved = 1 + (nalloc * (data_blocks + ind_blocks));
- ap.target = reserved;
- error = gfs2_inplace_reserve(ip, &ap);
+ error = gfs2_inplace_reserve(ip, reserved, 0);
if (error)
goto out_alloc;
@@ -992,9 +974,9 @@ static int need_sync(struct gfs2_quota_data *qd)
if (!qd->qd_qb.qb_limit)
return 0;
- spin_lock(&qd_lock);
+ spin_lock(&qd_lru_lock);
value = qd->qd_change;
- spin_unlock(&qd_lock);
+ spin_unlock(&qd_lru_lock);
spin_lock(&gt->gt_spin);
num = gt->gt_quota_scale_num;
@@ -1019,11 +1001,9 @@ static int need_sync(struct gfs2_quota_data *qd)
void gfs2_quota_unlock(struct gfs2_inode *ip)
{
- struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_quota_data *qda[4];
unsigned int count = 0;
unsigned int x;
- int found;
if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
goto out;
@@ -1036,25 +1016,9 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
sync = need_sync(qd);
gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]);
- if (!sync)
- continue;
-
- spin_lock(&qd_lock);
- found = qd_check_sync(sdp, qd, NULL);
- spin_unlock(&qd_lock);
-
- if (!found)
- continue;
- gfs2_assert_warn(sdp, qd->qd_change_sync);
- if (bh_get(qd)) {
- clear_bit(QDF_LOCKED, &qd->qd_flags);
- slot_put(qd);
- qd_put(qd);
- continue;
- }
-
- qda[count++] = qd;
+ if (sync && qd_trylock(qd))
+ qda[count++] = qd;
}
if (count) {
@@ -1103,9 +1067,9 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
continue;
value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
- spin_lock(&qd_lock);
+ spin_lock(&qd_lru_lock);
value += qd->qd_change;
- spin_unlock(&qd_lock);
+ spin_unlock(&qd_lru_lock);
if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) {
print_message(qd, "exceeded");
@@ -1154,18 +1118,17 @@ int gfs2_quota_sync(struct super_block *sb, int type)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
struct gfs2_quota_data **qda;
- unsigned int max_qd = PAGE_SIZE/sizeof(struct gfs2_holder);
+ unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync);
unsigned int num_qd;
unsigned int x;
int error = 0;
+ sdp->sd_quota_sync_gen++;
+
qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL);
if (!qda)
return -ENOMEM;
- mutex_lock(&sdp->sd_quota_sync_mutex);
- sdp->sd_quota_sync_gen++;
-
do {
num_qd = 0;
@@ -1190,7 +1153,6 @@ int gfs2_quota_sync(struct super_block *sb, int type)
}
} while (!error && num_qd == max_qd);
- mutex_unlock(&sdp->sd_quota_sync_mutex);
kfree(qda);
return error;
@@ -1296,11 +1258,11 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
qd->qd_slot = slot;
qd->qd_slot_count = 1;
- spin_lock(&qd_lock);
+ spin_lock(&qd_lru_lock);
gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1);
list_add(&qd->qd_list, &sdp->sd_quota_list);
atomic_inc(&sdp->sd_quota_count);
- spin_unlock(&qd_lock);
+ spin_unlock(&qd_lru_lock);
found++;
}
@@ -1326,34 +1288,30 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
struct gfs2_quota_data *qd;
unsigned int x;
- spin_lock(&qd_lock);
+ spin_lock(&qd_lru_lock);
while (!list_empty(head)) {
qd = list_entry(head->prev, struct gfs2_quota_data, qd_list);
- /*
- * To be removed in due course... we should be able to
- * ensure that all refs to the qd have done by this point
- * so that this rather odd test is not required
- */
- spin_lock(&qd->qd_lockref.lock);
- if (qd->qd_lockref.count > 1 ||
- (qd->qd_lockref.count && !test_bit(QDF_CHANGE, &qd->qd_flags))) {
- spin_unlock(&qd->qd_lockref.lock);
+ if (atomic_read(&qd->qd_count) > 1 ||
+ (atomic_read(&qd->qd_count) &&
+ !test_bit(QDF_CHANGE, &qd->qd_flags))) {
list_move(&qd->qd_list, head);
- spin_unlock(&qd_lock);
+ spin_unlock(&qd_lru_lock);
schedule();
- spin_lock(&qd_lock);
+ spin_lock(&qd_lru_lock);
continue;
}
- spin_unlock(&qd->qd_lockref.lock);
list_del(&qd->qd_list);
/* Also remove if this qd exists in the reclaim list */
- list_lru_del(&gfs2_qd_lru, &qd->qd_lru);
+ if (!list_empty(&qd->qd_reclaim)) {
+ list_del_init(&qd->qd_reclaim);
+ atomic_dec(&qd_lru_count);
+ }
atomic_dec(&sdp->sd_quota_count);
- spin_unlock(&qd_lock);
+ spin_unlock(&qd_lru_lock);
- if (!qd->qd_lockref.count) {
+ if (!atomic_read(&qd->qd_count)) {
gfs2_assert_warn(sdp, !qd->qd_change);
gfs2_assert_warn(sdp, !qd->qd_slot_count);
} else
@@ -1363,9 +1321,9 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
gfs2_glock_put(qd->qd_gl);
kmem_cache_free(gfs2_quotad_cachep, qd);
- spin_lock(&qd_lock);
+ spin_lock(&qd_lru_lock);
}
- spin_unlock(&qd_lock);
+ spin_unlock(&qd_lru_lock);
gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count));
@@ -1504,7 +1462,7 @@ static int gfs2_quota_get_xstate(struct super_block *sb,
}
fqs->qs_uquota.qfs_nextents = 1; /* unsupported */
fqs->qs_gquota = fqs->qs_uquota; /* its the same inode in both cases */
- fqs->qs_incoredqs = list_lru_count(&gfs2_qd_lru);
+ fqs->qs_incoredqs = atomic_read(&qd_lru_count);
return 0;
}
@@ -1615,12 +1573,10 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
if (gfs2_is_stuffed(ip))
alloc_required = 1;
if (alloc_required) {
- struct gfs2_alloc_parms ap = { .aflags = 0, };
gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
&data_blocks, &ind_blocks);
blocks = 1 + data_blocks + ind_blocks;
- ap.target = blocks;
- error = gfs2_inplace_reserve(ip, &ap);
+ error = gfs2_inplace_reserve(ip, blocks, 0);
if (error)
goto out_i;
blocks += gfs2_rg_blocks(ip, blocks);
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 96e4f34..0f64d9d 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -10,10 +10,9 @@
#ifndef __QUOTA_DOT_H__
#define __QUOTA_DOT_H__
-#include <linux/list_lru.h>
-
struct gfs2_inode;
struct gfs2_sbd;
+struct shrink_control;
#define NO_UID_QUOTA_CHANGE INVALID_UID
#define NO_GID_QUOTA_CHANGE INVALID_GID
@@ -54,8 +53,10 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
return ret;
}
+extern unsigned long gfs2_qd_shrink_count(struct shrinker *shrink,
+ struct shrink_control *sc);
+extern unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink,
+ struct shrink_control *sc);
extern const struct quotactl_ops gfs2_quotactl_ops;
-extern struct shrinker gfs2_qd_shrinker;
-extern struct list_lru gfs2_qd_lru;
#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index c8d6161..6931743 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -81,12 +81,11 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
unsigned char new_state)
{
unsigned char *byte1, *byte2, *end, cur_state;
- struct gfs2_bitmap *bi = rbm_bi(rbm);
- unsigned int buflen = bi->bi_len;
+ unsigned int buflen = rbm->bi->bi_len;
const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
- byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY);
- end = bi->bi_bh->b_data + bi->bi_offset + buflen;
+ byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY);
+ end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen;
BUG_ON(byte1 >= end);
@@ -96,17 +95,18 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, "
"new_state=%d\n", rbm->offset, cur_state, new_state);
printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n",
- (unsigned long long)rbm->rgd->rd_addr, bi->bi_start);
+ (unsigned long long)rbm->rgd->rd_addr,
+ rbm->bi->bi_start);
printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n",
- bi->bi_offset, bi->bi_len);
+ rbm->bi->bi_offset, rbm->bi->bi_len);
dump_stack();
gfs2_consist_rgrpd(rbm->rgd);
return;
}
*byte1 ^= (cur_state ^ new_state) << bit;
- if (do_clone && bi->bi_clone) {
- byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY);
+ if (do_clone && rbm->bi->bi_clone) {
+ byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY);
cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
*byte2 ^= (cur_state ^ new_state) << bit;
}
@@ -121,8 +121,7 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
{
- struct gfs2_bitmap *bi = rbm_bi(rbm);
- const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset;
+ const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset;
const u8 *byte;
unsigned int bit;
@@ -253,53 +252,29 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
{
u64 rblock = block - rbm->rgd->rd_data0;
+ u32 x;
if (WARN_ON_ONCE(rblock > UINT_MAX))
return -EINVAL;
if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
return -E2BIG;
- rbm->bii = 0;
+ rbm->bi = rbm->rgd->rd_bits;
rbm->offset = (u32)(rblock);
/* Check if the block is within the first block */
- if (rbm->offset < rbm_bi(rbm)->bi_blocks)
+ if (rbm->offset < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY)
return 0;
/* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
rbm->offset += (sizeof(struct gfs2_rgrp) -
sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
- rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
- rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
+ x = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
+ rbm->offset -= x * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
+ rbm->bi += x;
return 0;
}
/**
- * gfs2_rbm_incr - increment an rbm structure
- * @rbm: The rbm with rgd already set correctly
- *
- * This function takes an existing rbm structure and increments it to the next
- * viable block offset.
- *
- * Returns: If incrementing the offset would cause the rbm to go past the
- * end of the rgrp, true is returned, otherwise false.
- *
- */
-
-static bool gfs2_rbm_incr(struct gfs2_rbm *rbm)
-{
- if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */
- rbm->offset++;
- return false;
- }
- if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */
- return true;
-
- rbm->offset = 0;
- rbm->bii++;
- return false;
-}
-
-/**
* gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
* @rbm: Position to search (value/result)
* @n_unaligned: Number of unaligned blocks to check
@@ -310,6 +285,7 @@ static bool gfs2_rbm_incr(struct gfs2_rbm *rbm)
static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
{
+ u64 block;
u32 n;
u8 res;
@@ -320,7 +296,8 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le
(*len)--;
if (*len == 0)
return true;
- if (gfs2_rbm_incr(rbm))
+ block = gfs2_rbm_to_block(rbm);
+ if (gfs2_rbm_from_block(rbm, block + 1))
return true;
}
@@ -351,7 +328,6 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
u32 chunk_size;
u8 *ptr, *start, *end;
u64 block;
- struct gfs2_bitmap *bi;
if (n_unaligned &&
gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
@@ -360,12 +336,11 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
n_unaligned = len & 3;
/* Start is now byte aligned */
while (len > 3) {
- bi = rbm_bi(&rbm);
- start = bi->bi_bh->b_data;
- if (bi->bi_clone)
- start = bi->bi_clone;
- end = start + bi->bi_bh->b_size;
- start += bi->bi_offset;
+ start = rbm.bi->bi_bh->b_data;
+ if (rbm.bi->bi_clone)
+ start = rbm.bi->bi_clone;
+ end = start + rbm.bi->bi_bh->b_size;
+ start += rbm.bi->bi_offset;
BUG_ON(rbm.offset & 3);
start += (rbm.offset / GFS2_NBBY);
bytes = min_t(u32, len / GFS2_NBBY, (end - start));
@@ -630,13 +605,11 @@ static void __rs_deltree(struct gfs2_blkreserv *rs)
RB_CLEAR_NODE(&rs->rs_node);
if (rs->rs_free) {
- struct gfs2_bitmap *bi = rbm_bi(&rs->rs_rbm);
-
/* return reserved blocks to the rgrp */
BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
rs->rs_free = 0;
- clear_bit(GBF_FULL, &bi->bi_flags);
+ clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags);
smp_mb__after_clear_bit();
}
}
@@ -661,13 +634,14 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
/**
* gfs2_rs_delete - delete a multi-block reservation
* @ip: The inode for this reservation
- * @wcount: The inode's write count, or NULL
*
*/
-void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
+void gfs2_rs_delete(struct gfs2_inode *ip)
{
+ struct inode *inode = &ip->i_inode;
+
down_write(&ip->i_rw_mutex);
- if (ip->i_res && ((wcount == NULL) || (atomic_read(wcount) <= 1))) {
+ if (ip->i_res && atomic_read(&inode->i_writecount) <= 1) {
gfs2_rs_deltree(ip->i_res);
BUG_ON(ip->i_res->rs_free);
kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
@@ -769,21 +743,18 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
bi->bi_offset = sizeof(struct gfs2_rgrp);
bi->bi_start = 0;
bi->bi_len = bytes;
- bi->bi_blocks = bytes * GFS2_NBBY;
/* header block */
} else if (x == 0) {
bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
bi->bi_offset = sizeof(struct gfs2_rgrp);
bi->bi_start = 0;
bi->bi_len = bytes;
- bi->bi_blocks = bytes * GFS2_NBBY;
/* last block */
} else if (x + 1 == length) {
bytes = bytes_left;
bi->bi_offset = sizeof(struct gfs2_meta_header);
bi->bi_start = rgd->rd_bitbytes - bytes_left;
bi->bi_len = bytes;
- bi->bi_blocks = bytes * GFS2_NBBY;
/* other blocks */
} else {
bytes = sdp->sd_sb.sb_bsize -
@@ -791,7 +762,6 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
bi->bi_offset = sizeof(struct gfs2_meta_header);
bi->bi_start = rgd->rd_bitbytes - bytes_left;
bi->bi_len = bytes;
- bi->bi_blocks = bytes * GFS2_NBBY;
}
bytes_left -= bytes;
@@ -1127,7 +1097,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
rgd->rd_free_clone = rgd->rd_free;
}
- if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
+ if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
rgd->rd_bits[0].bi_bh->b_data);
@@ -1161,7 +1131,7 @@ int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
if (rgd->rd_flags & GFS2_RDF_UPTODATE)
return 0;
- if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
+ if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
return gfs2_rgrp_bh_get(rgd);
rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
@@ -1422,12 +1392,12 @@ static void rs_insert(struct gfs2_inode *ip)
* rg_mblk_search - find a group of multiple free blocks to form a reservation
* @rgd: the resource group descriptor
* @ip: pointer to the inode for which we're reserving blocks
- * @ap: the allocation parameters
+ * @requested: number of blocks required for this allocation
*
*/
static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
- const struct gfs2_alloc_parms *ap)
+ unsigned requested)
{
struct gfs2_rbm rbm = { .rgd = rgd, };
u64 goal;
@@ -1440,7 +1410,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
if (S_ISDIR(inode->i_mode))
extlen = 1;
else {
- extlen = max_t(u32, atomic_read(&rs->rs_sizehint), ap->target);
+ extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested);
extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
}
if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
@@ -1584,14 +1554,14 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext,
const struct gfs2_inode *ip, bool nowrap)
{
struct buffer_head *bh;
- int initial_bii;
+ struct gfs2_bitmap *initial_bi;
u32 initial_offset;
u32 offset;
u8 *buffer;
+ int index;
int n = 0;
int iters = rbm->rgd->rd_length;
int ret;
- struct gfs2_bitmap *bi;
/* If we are not starting at the beginning of a bitmap, then we
* need to add one to the bitmap count to ensure that we search
@@ -1601,53 +1571,52 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext,
iters++;
while(1) {
- bi = rbm_bi(rbm);
- if (test_bit(GBF_FULL, &bi->bi_flags) &&
+ if (test_bit(GBF_FULL, &rbm->bi->bi_flags) &&
(state == GFS2_BLKST_FREE))
goto next_bitmap;
- bh = bi->bi_bh;
- buffer = bh->b_data + bi->bi_offset;
+ bh = rbm->bi->bi_bh;
+ buffer = bh->b_data + rbm->bi->bi_offset;
WARN_ON(!buffer_uptodate(bh));
- if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
- buffer = bi->bi_clone + bi->bi_offset;
+ if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone)
+ buffer = rbm->bi->bi_clone + rbm->bi->bi_offset;
initial_offset = rbm->offset;
- offset = gfs2_bitfit(buffer, bi->bi_len, rbm->offset, state);
+ offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state);
if (offset == BFITNOENT)
goto bitmap_full;
rbm->offset = offset;
if (ip == NULL)
return 0;
- initial_bii = rbm->bii;
+ initial_bi = rbm->bi;
ret = gfs2_reservation_check_and_update(rbm, ip, minext);
if (ret == 0)
return 0;
if (ret > 0) {
- n += (rbm->bii - initial_bii);
+ n += (rbm->bi - initial_bi);
goto next_iter;
}
if (ret == -E2BIG) {
- rbm->bii = 0;
+ index = 0;
rbm->offset = 0;
- n += (rbm->bii - initial_bii);
+ n += (rbm->bi - initial_bi);
goto res_covered_end_of_rgrp;
}
return ret;
bitmap_full: /* Mark bitmap as full and fall through */
- if ((state == GFS2_BLKST_FREE) && initial_offset == 0) {
- struct gfs2_bitmap *bi = rbm_bi(rbm);
- set_bit(GBF_FULL, &bi->bi_flags);
- }
+ if ((state == GFS2_BLKST_FREE) && initial_offset == 0)
+ set_bit(GBF_FULL, &rbm->bi->bi_flags);
next_bitmap: /* Find next bitmap in the rgrp */
rbm->offset = 0;
- rbm->bii++;
- if (rbm->bii == rbm->rgd->rd_length)
- rbm->bii = 0;
+ index = rbm->bi - rbm->rgd->rd_bits;
+ index++;
+ if (index == rbm->rgd->rd_length)
+ index = 0;
res_covered_end_of_rgrp:
- if ((rbm->bii == 0) && nowrap)
+ rbm->bi = &rbm->rgd->rd_bits[index];
+ if ((index == 0) && nowrap)
break;
n++;
next_iter:
@@ -1676,7 +1645,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
struct gfs2_inode *ip;
int error;
int found = 0;
- struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
+ struct gfs2_rbm rbm = { .rgd = rgd, .bi = rgd->rd_bits, .offset = 0 };
while (1) {
down_write(&sdp->sd_log_flush_lock);
@@ -1831,12 +1800,12 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b
/**
* gfs2_inplace_reserve - Reserve space in the filesystem
* @ip: the inode to reserve space for
- * @ap: the allocation parameters
+ * @requested: the number of blocks to be reserved
*
* Returns: errno
*/
-int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap)
+int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *begin = NULL;
@@ -1848,16 +1817,17 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
if (sdp->sd_args.ar_rgrplvb)
flags |= GL_SKIP;
- if (gfs2_assert_warn(sdp, ap->target))
+ if (gfs2_assert_warn(sdp, requested))
return -EINVAL;
if (gfs2_rs_active(rs)) {
begin = rs->rs_rbm.rgd;
+ flags = 0; /* Yoda: Do or do not. There is no try */
} else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
rs->rs_rbm.rgd = begin = ip->i_rgd;
} else {
rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
}
- if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV))
+ if (S_ISDIR(ip->i_inode.i_mode) && (aflags & GFS2_AF_ORLOV))
skip = gfs2_orlov_skip(ip);
if (rs->rs_rbm.rgd == NULL)
return -EBADSLT;
@@ -1899,14 +1869,14 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
/* Get a reservation if we don't already have one */
if (!gfs2_rs_active(rs))
- rg_mblk_search(rs->rs_rbm.rgd, ip, ap);
+ rg_mblk_search(rs->rs_rbm.rgd, ip, requested);
/* Skip rgrps when we can't get a reservation on first pass */
if (!gfs2_rs_active(rs) && (loops < 1))
goto check_rgrp;
/* If rgrp has enough free space, use it */
- if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) {
+ if (rs->rs_rbm.rgd->rd_free_clone >= requested) {
ip->i_rgd = rs->rs_rbm.rgd;
return 0;
}
@@ -2003,14 +1973,14 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
*n = 1;
block = gfs2_rbm_to_block(rbm);
- gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh);
+ gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm->bi->bi_bh);
gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
block++;
while (*n < elen) {
ret = gfs2_rbm_from_block(&pos, block);
if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
break;
- gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
+ gfs2_trans_add_meta(pos.rgd->rd_gl, pos.bi->bi_bh);
gfs2_setbit(&pos, true, GFS2_BLKST_USED);
(*n)++;
block++;
@@ -2031,7 +2001,6 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
u32 blen, unsigned char new_state)
{
struct gfs2_rbm rbm;
- struct gfs2_bitmap *bi;
rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
if (!rbm.rgd) {
@@ -2042,15 +2011,15 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
while (blen--) {
gfs2_rbm_from_block(&rbm, bstart);
- bi = rbm_bi(&rbm);
bstart++;
- if (!bi->bi_clone) {
- bi->bi_clone = kmalloc(bi->bi_bh->b_size,
- GFP_NOFS | __GFP_NOFAIL);
- memcpy(bi->bi_clone + bi->bi_offset,
- bi->bi_bh->b_data + bi->bi_offset, bi->bi_len);
+ if (!rbm.bi->bi_clone) {
+ rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size,
+ GFP_NOFS | __GFP_NOFAIL);
+ memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset,
+ rbm.bi->bi_bh->b_data + rbm.bi->bi_offset,
+ rbm.bi->bi_len);
}
- gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh);
+ gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.bi->bi_bh);
gfs2_setbit(&rbm, false, new_state);
}
@@ -2134,35 +2103,6 @@ out:
}
/**
- * gfs2_set_alloc_start - Set starting point for block allocation
- * @rbm: The rbm which will be set to the required location
- * @ip: The gfs2 inode
- * @dinode: Flag to say if allocation includes a new inode
- *
- * This sets the starting point from the reservation if one is active
- * otherwise it falls back to guessing a start point based on the
- * inode's goal block or the last allocation point in the rgrp.
- */
-
-static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
- const struct gfs2_inode *ip, bool dinode)
-{
- u64 goal;
-
- if (gfs2_rs_active(ip->i_res)) {
- *rbm = ip->i_res->rs_rbm;
- return;
- }
-
- if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
- goal = ip->i_goal;
- else
- goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
-
- gfs2_rbm_from_block(rbm, goal);
-}
-
-/**
* gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
* @ip: the inode to allocate the block for
* @bn: Used to return the starting block number
@@ -2180,14 +2120,22 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
struct buffer_head *dibh;
struct gfs2_rbm rbm = { .rgd = ip->i_rgd, };
unsigned int ndata;
+ u64 goal;
u64 block; /* block, within the file system scope */
int error;
- gfs2_set_alloc_start(&rbm, ip, dinode);
+ if (gfs2_rs_active(ip->i_res))
+ goal = gfs2_rbm_to_block(&ip->i_res->rs_rbm);
+ else if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal))
+ goal = ip->i_goal;
+ else
+ goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0;
+
+ gfs2_rbm_from_block(&rbm, goal);
error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false);
if (error == -ENOSPC) {
- gfs2_set_alloc_start(&rbm, ip, dinode);
+ gfs2_rbm_from_block(&rbm, goal);
error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false);
}
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 3a10d2f..5b3f4a8 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -40,7 +40,7 @@ extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
#define GFS2_AF_ORLOV 1
-extern int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap);
+extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 flags);
extern void gfs2_inplace_release(struct gfs2_inode *ip);
extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
@@ -48,7 +48,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
extern int gfs2_rs_alloc(struct gfs2_inode *ip);
extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
-extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount);
+extern void gfs2_rs_delete(struct gfs2_inode *ip);
extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 35da5b1..e5639de 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1526,7 +1526,7 @@ out_unlock:
out:
/* Case 3 starts here */
truncate_inode_pages(&inode->i_data, 0);
- gfs2_rs_delete(ip, NULL);
+ gfs2_rs_delete(ip);
gfs2_ordered_del_inode(ip);
clear_inode(inode);
gfs2_dir_hash_inval(ip);
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index d09f6ed..aa5c480 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -587,6 +587,7 @@ TUNE_ATTR(max_readahead, 0);
TUNE_ATTR(complain_secs, 0);
TUNE_ATTR(statfs_slow, 0);
TUNE_ATTR(new_files_jdata, 0);
+TUNE_ATTR(quota_simul_sync, 1);
TUNE_ATTR(statfs_quantum, 1);
TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
@@ -596,6 +597,7 @@ static struct attribute *tune_attrs[] = {
&tune_attr_max_readahead.attr,
&tune_attr_complain_secs.attr,
&tune_attr_statfs_slow.attr,
+ &tune_attr_quota_simul_sync.attr,
&tune_attr_statfs_quantum.attr,
&tune_attr_quota_scale.attr,
&tune_attr_new_files_jdata.attr,
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index f7109f6..6402fb6 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -268,3 +268,23 @@ int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
return rv;
}
+void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap,
+ unsigned int bit, int new_value)
+{
+ unsigned int c, o, b = bit;
+ int old_value;
+
+ c = b / (8 * PAGE_SIZE);
+ b %= 8 * PAGE_SIZE;
+ o = b / 8;
+ b %= 8;
+
+ old_value = (bitmap[c][o] & (1 << b));
+ gfs2_assert_withdraw(sdp, !old_value != !new_value);
+
+ if (new_value)
+ bitmap[c][o] |= 1 << b;
+ else
+ bitmap[c][o] &= ~(1 << b);
+}
+
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index b7ffb09..8053573 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -164,6 +164,8 @@ static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
#define gfs2_tune_get(sdp, field) \
gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field)
+void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap,
+ unsigned int bit, int new_value);
int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...);
#endif /* __UTIL_DOT_H__ */
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 8c6a6f6..ecd37f3 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -723,7 +723,6 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
unsigned int blks,
ea_skeleton_call_t skeleton_call, void *private)
{
- struct gfs2_alloc_parms ap = { .target = blks };
struct buffer_head *dibh;
int error;
@@ -735,7 +734,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
if (error)
return error;
- error = gfs2_inplace_reserve(ip, &ap);
+ error = gfs2_inplace_reserve(ip, blks, 0);
if (error)
goto out_gunlock_q;
diff --git a/fs/hfs/btree.h b/fs/hfs/btree.h
index f6bd266..2a1d712 100644
--- a/fs/hfs/btree.h
+++ b/fs/hfs/btree.h
@@ -153,6 +153,11 @@ struct hfs_btree_header_rec {
u32 reserved3[16];
} __packed;
+#define HFS_NODE_INDEX 0x00 /* An internal (index) node */
+#define HFS_NODE_HEADER 0x01 /* The tree header node (node 0) */
+#define HFS_NODE_MAP 0x02 /* Holds part of the bitmap of used nodes */
+#define HFS_NODE_LEAF 0xFF /* A leaf (ndNHeight==1) node */
+
#define BTREE_ATTR_BADCLOSE 0x00000001 /* b-tree not closed properly. not
used by hfsplus. */
#define HFS_TREE_BIGKEYS 0x00000002 /* key length is u16 instead of u8.
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 0fcec8b..0c6540c 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -15,118 +15,6 @@
#include "hfsplus_fs.h"
#include "hfsplus_raw.h"
-/*
- * Initial source code of clump size calculation is gotten
- * from http://opensource.apple.com/tarballs/diskdev_cmds/
- */
-#define CLUMP_ENTRIES 15
-
-static short clumptbl[CLUMP_ENTRIES * 3] = {
-/*
- * Volume Attributes Catalog Extents
- * Size Clump (MB) Clump (MB) Clump (MB)
- */
- /* 1GB */ 4, 4, 4,
- /* 2GB */ 6, 6, 4,
- /* 4GB */ 8, 8, 4,
- /* 8GB */ 11, 11, 5,
- /*
- * For volumes 16GB and larger, we want to make sure that a full OS
- * install won't require fragmentation of the Catalog or Attributes
- * B-trees. We do this by making the clump sizes sufficiently large,
- * and by leaving a gap after the B-trees for them to grow into.
- *
- * For SnowLeopard 10A298, a FullNetInstall with all packages selected
- * results in:
- * Catalog B-tree Header
- * nodeSize: 8192
- * totalNodes: 31616
- * freeNodes: 1978
- * (used = 231.55 MB)
- * Attributes B-tree Header
- * nodeSize: 8192
- * totalNodes: 63232
- * freeNodes: 958
- * (used = 486.52 MB)
- *
- * We also want Time Machine backup volumes to have a sufficiently
- * large clump size to reduce fragmentation.
- *
- * The series of numbers for Catalog and Attribute form a geometric
- * series. For Catalog (16GB to 512GB), each term is 8**(1/5) times
- * the previous term. For Attributes (16GB to 512GB), each term is
- * 4**(1/5) times the previous term. For 1TB to 16TB, each term is
- * 2**(1/5) times the previous term.
- */
- /* 16GB */ 64, 32, 5,
- /* 32GB */ 84, 49, 6,
- /* 64GB */ 111, 74, 7,
- /* 128GB */ 147, 111, 8,
- /* 256GB */ 194, 169, 9,
- /* 512GB */ 256, 256, 11,
- /* 1TB */ 294, 294, 14,
- /* 2TB */ 338, 338, 16,
- /* 4TB */ 388, 388, 20,
- /* 8TB */ 446, 446, 25,
- /* 16TB */ 512, 512, 32
-};
-
-u32 hfsplus_calc_btree_clump_size(u32 block_size, u32 node_size,
- u64 sectors, int file_id)
-{
- u32 mod = max(node_size, block_size);
- u32 clump_size;
- int column;
- int i;
-
- /* Figure out which column of the above table to use for this file. */
- switch (file_id) {
- case HFSPLUS_ATTR_CNID:
- column = 0;
- break;
- case HFSPLUS_CAT_CNID:
- column = 1;
- break;
- default:
- column = 2;
- break;
- }
-
- /*
- * The default clump size is 0.8% of the volume size. And
- * it must also be a multiple of the node and block size.
- */
- if (sectors < 0x200000) {
- clump_size = sectors << 2; /* 0.8 % */
- if (clump_size < (8 * node_size))
- clump_size = 8 * node_size;
- } else {
- /* turn exponent into table index... */
- for (i = 0, sectors = sectors >> 22;
- sectors && (i < CLUMP_ENTRIES - 1);
- ++i, sectors = sectors >> 1) {
- /* empty body */
- }
-
- clump_size = clumptbl[column + (i) * 3] * 1024 * 1024;
- }
-
- /*
- * Round the clump size to a multiple of node and block size.
- * NOTE: This rounds down.
- */
- clump_size /= mod;
- clump_size *= mod;
-
- /*
- * Rounding down could have rounded down to 0 if the block size was
- * greater than the clump size. If so, just use one block or node.
- */
- if (clump_size == 0)
- clump_size = mod;
-
- return clump_size;
-}
/* Get a reference to a B*Tree and do some initial checks */
struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 08846425b..2b9cd01 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -127,14 +127,6 @@ struct hfs_bnode {
#define HFS_BNODE_DELETED 4
/*
- * Attributes file states
- */
-#define HFSPLUS_EMPTY_ATTR_TREE 0
-#define HFSPLUS_CREATING_ATTR_TREE 1
-#define HFSPLUS_VALID_ATTR_TREE 2
-#define HFSPLUS_FAILED_ATTR_TREE 3
-
-/*
* HFS+ superblock info (built from Volume Header on disk)
*/
@@ -149,7 +141,6 @@ struct hfsplus_sb_info {
struct hfs_btree *ext_tree;
struct hfs_btree *cat_tree;
struct hfs_btree *attr_tree;
- atomic_t attr_tree_state;
struct inode *alloc_file;
struct inode *hidden_dir;
struct nls_table *nls;
@@ -389,7 +380,6 @@ int hfsplus_block_allocate(struct super_block *, u32, u32, u32 *);
int hfsplus_block_free(struct super_block *, u32, u32);
/* btree.c */
-u32 hfsplus_calc_btree_clump_size(u32, u32, u64, int);
struct hfs_btree *hfs_btree_open(struct super_block *, u32);
void hfs_btree_close(struct hfs_btree *);
int hfs_btree_write(struct hfs_btree *);
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h
index 8ffb3a8..452ede0 100644
--- a/fs/hfsplus/hfsplus_raw.h
+++ b/fs/hfsplus/hfsplus_raw.h
@@ -156,10 +156,10 @@ struct hfs_bnode_desc {
} __packed;
/* HFS+ BTree node types */
-#define HFS_NODE_INDEX 0x00 /* An internal (index) node */
-#define HFS_NODE_HEADER 0x01 /* The tree header node (node 0) */
-#define HFS_NODE_MAP 0x02 /* Holds part of the bitmap of used nodes */
-#define HFS_NODE_LEAF 0xFF /* A leaf (ndNHeight==1) node */
+#define HFS_NODE_INDEX 0x00
+#define HFS_NODE_HEADER 0x01
+#define HFS_NODE_MAP 0x02
+#define HFS_NODE_LEAF 0xFF
/* HFS+ BTree header */
struct hfs_btree_header_rec {
@@ -187,9 +187,6 @@ struct hfs_btree_header_rec {
/* HFS+ BTree misc info */
#define HFSPLUS_TREE_HEAD 0
#define HFSPLUS_NODE_MXSZ 32768
-#define HFSPLUS_ATTR_TREE_NODE_SIZE 8192
-#define HFSPLUS_BTREE_HDR_NODE_RECS_COUNT 3
-#define HFSPLUS_BTREE_HDR_USER_BYTES 128
/* Some special File ID numbers (stolen from hfs.h) */
#define HFSPLUS_POR_CNID 1 /* Parent Of the Root */
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 80875aa..4c4d142 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -474,14 +474,12 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
pr_err("failed to load catalog file\n");
goto out_close_ext_tree;
}
- atomic_set(&sbi->attr_tree_state, HFSPLUS_EMPTY_ATTR_TREE);
if (vhdr->attr_file.total_blocks != 0) {
sbi->attr_tree = hfs_btree_open(sb, HFSPLUS_ATTR_CNID);
if (!sbi->attr_tree) {
pr_err("failed to load attributes file\n");
goto out_close_cat_tree;
}
- atomic_set(&sbi->attr_tree_state, HFSPLUS_VALID_ATTR_TREE);
}
sb->s_xattr = hfsplus_xattr_handlers;
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index e9a97a0..b51a607 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -24,6 +24,13 @@ struct hfsplus_wd {
u16 embed_count;
};
+static void hfsplus_end_io_sync(struct bio *bio, int err)
+{
+ if (err)
+ clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ complete(bio->bi_private);
+}
+
/*
* hfsplus_submit_bio - Perfrom block I/O
* @sb: super block of volume for I/O
@@ -46,6 +53,7 @@ struct hfsplus_wd {
int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
void *buf, void **data, int rw)
{
+ DECLARE_COMPLETION_ONSTACK(wait);
struct bio *bio;
int ret = 0;
u64 io_size;
@@ -65,6 +73,8 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
bio = bio_alloc(GFP_NOIO, 1);
bio->bi_sector = sector;
bio->bi_bdev = sb->s_bdev;
+ bio->bi_end_io = hfsplus_end_io_sync;
+ bio->bi_private = &wait;
if (!(rw & WRITE) && data)
*data = (u8 *)buf + offset;
@@ -83,7 +93,12 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
buf = (u8 *)buf + len;
}
- ret = submit_bio_wait(rw, bio);
+ submit_bio(rw, bio);
+ wait_for_completion(&wait);
+
+ if (!bio_flagged(bio, BIO_UPTODATE))
+ ret = -EIO;
+
out:
bio_put(bio);
return ret < 0 ? ret : 0;
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index 3c6136f..bd8471f 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -127,211 +127,6 @@ static int can_set_xattr(struct inode *inode, const char *name,
return 0;
}
-static void hfsplus_init_header_node(struct inode *attr_file,
- u32 clump_size,
- char *buf, u16 node_size)
-{
- struct hfs_bnode_desc *desc;
- struct hfs_btree_header_rec *head;
- u16 offset;
- __be16 *rec_offsets;
- u32 hdr_node_map_rec_bits;
- char *bmp;
- u32 used_nodes;
- u32 used_bmp_bytes;
- loff_t tmp;
-
- hfs_dbg(ATTR_MOD, "init_hdr_attr_file: clump %u, node_size %u\n",
- clump_size, node_size);
-
- /* The end of the node contains list of record offsets */
- rec_offsets = (__be16 *)(buf + node_size);
-
- desc = (struct hfs_bnode_desc *)buf;
- desc->type = HFS_NODE_HEADER;
- desc->num_recs = cpu_to_be16(HFSPLUS_BTREE_HDR_NODE_RECS_COUNT);
- offset = sizeof(struct hfs_bnode_desc);
- *--rec_offsets = cpu_to_be16(offset);
-
- head = (struct hfs_btree_header_rec *)(buf + offset);
- head->node_size = cpu_to_be16(node_size);
- tmp = i_size_read(attr_file);
- do_div(tmp, node_size);
- head->node_count = cpu_to_be32(tmp);
- head->free_nodes = cpu_to_be32(be32_to_cpu(head->node_count) - 1);
- head->clump_size = cpu_to_be32(clump_size);
- head->attributes |= cpu_to_be32(HFS_TREE_BIGKEYS | HFS_TREE_VARIDXKEYS);
- head->max_key_len = cpu_to_be16(HFSPLUS_ATTR_KEYLEN - sizeof(u16));
- offset += sizeof(struct hfs_btree_header_rec);
- *--rec_offsets = cpu_to_be16(offset);
- offset += HFSPLUS_BTREE_HDR_USER_BYTES;
- *--rec_offsets = cpu_to_be16(offset);
-
- hdr_node_map_rec_bits = 8 * (node_size - offset - (4 * sizeof(u16)));
- if (be32_to_cpu(head->node_count) > hdr_node_map_rec_bits) {
- u32 map_node_bits;
- u32 map_nodes;
-
- desc->next = cpu_to_be32(be32_to_cpu(head->leaf_tail) + 1);
- map_node_bits = 8 * (node_size - sizeof(struct hfs_bnode_desc) -
- (2 * sizeof(u16)) - 2);
- map_nodes = (be32_to_cpu(head->node_count) -
- hdr_node_map_rec_bits +
- (map_node_bits - 1)) / map_node_bits;
- be32_add_cpu(&head->free_nodes, 0 - map_nodes);
- }
-
- bmp = buf + offset;
- used_nodes =
- be32_to_cpu(head->node_count) - be32_to_cpu(head->free_nodes);
- used_bmp_bytes = used_nodes / 8;
- if (used_bmp_bytes) {
- memset(bmp, 0xFF, used_bmp_bytes);
- bmp += used_bmp_bytes;
- used_nodes %= 8;
- }
- *bmp = ~(0xFF >> used_nodes);
- offset += hdr_node_map_rec_bits / 8;
- *--rec_offsets = cpu_to_be16(offset);
-}
-
-static int hfsplus_create_attributes_file(struct super_block *sb)
-{
- int err = 0;
- struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
- struct inode *attr_file;
- struct hfsplus_inode_info *hip;
- u32 clump_size;
- u16 node_size = HFSPLUS_ATTR_TREE_NODE_SIZE;
- char *buf;
- int index, written;
- struct address_space *mapping;
- struct page *page;
- int old_state = HFSPLUS_EMPTY_ATTR_TREE;
-
- hfs_dbg(ATTR_MOD, "create_attr_file: ino %d\n", HFSPLUS_ATTR_CNID);
-
-check_attr_tree_state_again:
- switch (atomic_read(&sbi->attr_tree_state)) {
- case HFSPLUS_EMPTY_ATTR_TREE:
- if (old_state != atomic_cmpxchg(&sbi->attr_tree_state,
- old_state,
- HFSPLUS_CREATING_ATTR_TREE))
- goto check_attr_tree_state_again;
- break;
- case HFSPLUS_CREATING_ATTR_TREE:
- /*
- * This state means that another thread is in process
- * of AttributesFile creation. Theoretically, it is
- * possible to be here. But really __setxattr() method
- * first of all calls hfs_find_init() for lookup in
- * B-tree of CatalogFile. This method locks mutex of
- * CatalogFile's B-tree. As a result, if some thread
- * is inside AttributedFile creation operation then
- * another threads will be waiting unlocking of
- * CatalogFile's B-tree's mutex. However, if code will
- * change then we will return error code (-EAGAIN) from
- * here. Really, it means that first try to set of xattr
- * fails with error but second attempt will have success.
- */
- return -EAGAIN;
- case HFSPLUS_VALID_ATTR_TREE:
- return 0;
- case HFSPLUS_FAILED_ATTR_TREE:
- return -EOPNOTSUPP;
- default:
- BUG();
- }
-
- attr_file = hfsplus_iget(sb, HFSPLUS_ATTR_CNID);
- if (IS_ERR(attr_file)) {
- pr_err("failed to load attributes file\n");
- return PTR_ERR(attr_file);
- }
-
- BUG_ON(i_size_read(attr_file) != 0);
-
- hip = HFSPLUS_I(attr_file);
-
- clump_size = hfsplus_calc_btree_clump_size(sb->s_blocksize,
- node_size,
- sbi->sect_count,
- HFSPLUS_ATTR_CNID);
-
- mutex_lock(&hip->extents_lock);
- hip->clump_blocks = clump_size >> sbi->alloc_blksz_shift;
- mutex_unlock(&hip->extents_lock);
-
- if (sbi->free_blocks <= (hip->clump_blocks << 1)) {
- err = -ENOSPC;
- goto end_attr_file_creation;
- }
-
- while (hip->alloc_blocks < hip->clump_blocks) {
- err = hfsplus_file_extend(attr_file);
- if (unlikely(err)) {
- pr_err("failed to extend attributes file\n");
- goto end_attr_file_creation;
- }
- hip->phys_size = attr_file->i_size =
- (loff_t)hip->alloc_blocks << sbi->alloc_blksz_shift;
- hip->fs_blocks = hip->alloc_blocks << sbi->fs_shift;
- inode_set_bytes(attr_file, attr_file->i_size);
- }
-
- buf = kzalloc(node_size, GFP_NOFS);
- if (!buf) {
- pr_err("failed to allocate memory for header node\n");
- err = -ENOMEM;
- goto end_attr_file_creation;
- }
-
- hfsplus_init_header_node(attr_file, clump_size, buf, node_size);
-
- mapping = attr_file->i_mapping;
-
- index = 0;
- written = 0;
- for (; written < node_size; index++, written += PAGE_CACHE_SIZE) {
- void *kaddr;
-
- page = read_mapping_page(mapping, index, NULL);
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
- goto failed_header_node_init;
- }
-
- kaddr = kmap_atomic(page);
- memcpy(kaddr, buf + written,
- min_t(size_t, PAGE_CACHE_SIZE, node_size - written));
- kunmap_atomic(kaddr);
-
- set_page_dirty(page);
- page_cache_release(page);
- }
-
- hfsplus_mark_inode_dirty(attr_file, HFSPLUS_I_ATTR_DIRTY);
-
- sbi->attr_tree = hfs_btree_open(sb, HFSPLUS_ATTR_CNID);
- if (!sbi->attr_tree)
- pr_err("failed to load attributes file\n");
-
-failed_header_node_init:
- kfree(buf);
-
-end_attr_file_creation:
- iput(attr_file);
-
- if (!err)
- atomic_set(&sbi->attr_tree_state, HFSPLUS_VALID_ATTR_TREE);
- else if (err == -ENOSPC)
- atomic_set(&sbi->attr_tree_state, HFSPLUS_EMPTY_ATTR_TREE);
- else
- atomic_set(&sbi->attr_tree_state, HFSPLUS_FAILED_ATTR_TREE);
-
- return err;
-}
-
int __hfsplus_setxattr(struct inode *inode, const char *name,
const void *value, size_t size, int flags)
{
@@ -416,9 +211,8 @@ int __hfsplus_setxattr(struct inode *inode, const char *name,
}
if (!HFSPLUS_SB(inode->i_sb)->attr_tree) {
- err = hfsplus_create_attributes_file(inode->i_sb);
- if (unlikely(err))
- goto end_setxattr;
+ err = -EOPNOTSUPP;
+ goto end_setxattr;
}
if (hfsplus_attr_exists(inode, name)) {
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index db23ce1..2543728 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -33,6 +33,15 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
#define FILE_HOSTFS_I(file) HOSTFS_I(file_inode(file))
+static int hostfs_d_delete(const struct dentry *dentry)
+{
+ return 1;
+}
+
+static const struct dentry_operations hostfs_dentry_ops = {
+ .d_delete = hostfs_d_delete,
+};
+
/* Changed in hostfs_args before the kernel starts running */
static char *root_ino = "";
static int append = 0;
@@ -916,7 +925,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
sb->s_blocksize_bits = 10;
sb->s_magic = HOSTFS_SUPER_MAGIC;
sb->s_op = &hostfs_sbops;
- sb->s_d_op = &simple_dentry_operations;
+ sb->s_d_op = &hostfs_dentry_ops;
sb->s_maxbytes = MAX_LFS_FILESIZE;
/* NULL is printed as <NULL> by sprintf: avoid that. */
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 6797bf8..1b39863 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -80,7 +80,6 @@ struct hpfs_sb_info {
unsigned sb_c_bitmap; /* current bitmap */
unsigned sb_max_fwd_alloc; /* max forwad allocation */
int sb_timeshift;
- struct rcu_head rcu;
};
/* Four 512-byte buffers and the 2k block obtained by concatenating them */
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 1b39afd..345713d 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -407,7 +407,7 @@ again:
/*printk("HPFS: truncating file before delete.\n");*/
newattrs.ia_size = 0;
newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
- err = notify_change(dentry, &newattrs, NULL);
+ err = notify_change(dentry, &newattrs);
put_write_access(inode);
if (!err)
goto again;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index b8d01ef..4334cda 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -101,24 +101,18 @@ int hpfs_stop_cycles(struct super_block *s, int key, int *c1, int *c2,
return 0;
}
-static void free_sbi(struct hpfs_sb_info *sbi)
-{
- kfree(sbi->sb_cp_table);
- kfree(sbi->sb_bmp_dir);
- kfree(sbi);
-}
-
-static void lazy_free_sbi(struct rcu_head *rcu)
-{
- free_sbi(container_of(rcu, struct hpfs_sb_info, rcu));
-}
-
static void hpfs_put_super(struct super_block *s)
{
+ struct hpfs_sb_info *sbi = hpfs_sb(s);
+
hpfs_lock(s);
unmark_dirty(s);
hpfs_unlock(s);
- call_rcu(&hpfs_sb(s)->rcu, lazy_free_sbi);
+
+ kfree(sbi->sb_cp_table);
+ kfree(sbi->sb_bmp_dir);
+ s->s_fs_info = NULL;
+ kfree(sbi);
}
unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno)
@@ -491,6 +485,9 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
}
s->s_fs_info = sbi;
+ sbi->sb_bmp_dir = NULL;
+ sbi->sb_cp_table = NULL;
+
mutex_init(&sbi->hpfs_mutex);
hpfs_lock(s);
@@ -682,7 +679,10 @@ bail2: brelse(bh0);
bail1:
bail0:
hpfs_unlock(s);
- free_sbi(sbi);
+ kfree(sbi->sb_bmp_dir);
+ kfree(sbi->sb_cp_table);
+ s->s_fs_info = NULL;
+ kfree(sbi);
return -EINVAL;
}
diff --git a/fs/inode.c b/fs/inode.c
index 4bcdad3..b33ba8e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -773,11 +773,15 @@ static struct inode *find_inode(struct super_block *sb,
repeat:
hlist_for_each_entry(inode, head, i_hash) {
- if (inode->i_sb != sb)
+ spin_lock(&inode->i_lock);
+ if (inode->i_sb != sb) {
+ spin_unlock(&inode->i_lock);
continue;
- if (!test(inode, data))
+ }
+ if (!test(inode, data)) {
+ spin_unlock(&inode->i_lock);
continue;
- spin_lock(&inode->i_lock);
+ }
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
__wait_on_freeing_inode(inode);
goto repeat;
@@ -800,11 +804,15 @@ static struct inode *find_inode_fast(struct super_block *sb,
repeat:
hlist_for_each_entry(inode, head, i_hash) {
- if (inode->i_ino != ino)
+ spin_lock(&inode->i_lock);
+ if (inode->i_ino != ino) {
+ spin_unlock(&inode->i_lock);
continue;
- if (inode->i_sb != sb)
+ }
+ if (inode->i_sb != sb) {
+ spin_unlock(&inode->i_lock);
continue;
- spin_lock(&inode->i_lock);
+ }
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
__wait_on_freeing_inode(inode);
goto repeat;
@@ -943,42 +951,6 @@ void unlock_new_inode(struct inode *inode)
EXPORT_SYMBOL(unlock_new_inode);
/**
- * lock_two_nondirectories - take two i_mutexes on non-directory objects
- * @inode1: first inode to lock
- * @inode2: second inode to lock
- */
-void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
-{
- WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
- if (inode1 == inode2 || !inode2) {
- mutex_lock(&inode1->i_mutex);
- return;
- }
- WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
- if (inode1 < inode2) {
- mutex_lock(&inode1->i_mutex);
- mutex_lock_nested(&inode2->i_mutex, I_MUTEX_NONDIR2);
- } else {
- mutex_lock(&inode2->i_mutex);
- mutex_lock_nested(&inode1->i_mutex, I_MUTEX_NONDIR2);
- }
-}
-EXPORT_SYMBOL(lock_two_nondirectories);
-
-/**
- * unlock_two_nondirectories - release locks from lock_two_nondirectories()
- * @inode1: first inode to unlock
- * @inode2: second inode to unlock
- */
-void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
-{
- mutex_unlock(&inode1->i_mutex);
- if (inode2 && inode2 != inode1)
- mutex_unlock(&inode2->i_mutex);
-}
-EXPORT_SYMBOL(unlock_two_nondirectories);
-
-/**
* iget5_locked - obtain an inode from a mounted file system
* @sb: super block of file system
* @hashval: hash value (usually inode number) to get
@@ -1603,11 +1575,7 @@ static int __remove_suid(struct dentry *dentry, int kill)
struct iattr newattrs;
newattrs.ia_valid = ATTR_FORCE | kill;
- /*
- * Note we call this on write, so notify_change will not
- * encounter any conflicting delegations:
- */
- return notify_change(dentry, &newattrs, NULL);
+ return notify_change(dentry, &newattrs);
}
int file_remove_suid(struct file *file)
diff --git a/fs/internal.h b/fs/internal.h
index 4657424..513e0d8 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/lglock.h>
+
struct super_block;
struct file_system_type;
struct linux_binprm;
@@ -60,6 +62,8 @@ extern int sb_prepare_remount_readonly(struct super_block *);
extern void __init mnt_init(void);
+extern struct lglock vfsmount_lock;
+
extern int __mnt_want_write(struct vfsmount *);
extern int __mnt_want_write_file(struct file *);
extern void __mnt_drop_write(struct vfsmount *);
@@ -73,6 +77,9 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
/*
* file_table.c
*/
+extern void file_sb_list_add(struct file *f, struct super_block *sb);
+extern void file_sb_list_del(struct file *f);
+extern void mark_files_ro(struct super_block *);
extern struct file *get_empty_filp(void);
/*
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 8ac3fad..fd507fb 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -37,7 +37,7 @@ static long vfs_ioctl(struct file *filp, unsigned int cmd,
{
int error = -ENOTTY;
- if (!filp->f_op->unlocked_ioctl)
+ if (!filp->f_op || !filp->f_op->unlocked_ioctl)
goto out;
error = filp->f_op->unlocked_ioctl(filp, cmd, arg);
@@ -501,7 +501,7 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp,
/* Did FASYNC state change ? */
if ((flag ^ filp->f_flags) & FASYNC) {
- if (filp->f_op->fasync)
+ if (filp->f_op && filp->f_op->fasync)
/* fasync() adjusts filp->f_flags */
error = filp->f_op->fasync(fd, filp, on);
else
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 4a9e10e..e5d408a 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -181,7 +181,7 @@ struct iso9660_options{
* Compute the hash for the isofs name corresponding to the dentry.
*/
static int
-isofs_hash_common(struct qstr *qstr, int ms)
+isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms)
{
const char *name;
int len;
@@ -202,7 +202,7 @@ isofs_hash_common(struct qstr *qstr, int ms)
* Compute the hash for the isofs name corresponding to the dentry.
*/
static int
-isofs_hashi_common(struct qstr *qstr, int ms)
+isofs_hashi_common(const struct dentry *dentry, struct qstr *qstr, int ms)
{
const char *name;
int len;
@@ -259,13 +259,13 @@ static int isofs_dentry_cmp_common(
static int
isofs_hash(const struct dentry *dentry, struct qstr *qstr)
{
- return isofs_hash_common(qstr, 0);
+ return isofs_hash_common(dentry, qstr, 0);
}
static int
isofs_hashi(const struct dentry *dentry, struct qstr *qstr)
{
- return isofs_hashi_common(qstr, 0);
+ return isofs_hashi_common(dentry, qstr, 0);
}
static int
@@ -286,13 +286,13 @@ isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry,
static int
isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr)
{
- return isofs_hash_common(qstr, 1);
+ return isofs_hash_common(dentry, qstr, 1);
}
static int
isofs_hashi_ms(const struct dentry *dentry, struct qstr *qstr)
{
- return isofs_hashi_common(qstr, 1);
+ return isofs_hashi_common(dentry, qstr, 1);
}
static int
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index aa603e0..be0c39b 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -26,6 +26,7 @@
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/hrtimer.h>
+#include <linux/backing-dev.h>
static void __journal_temp_unlink_buffer(struct journal_head *jh);
@@ -99,11 +100,10 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
alloc_transaction:
if (!journal->j_running_transaction) {
- new_transaction = kzalloc(sizeof(*new_transaction),
- GFP_NOFS|__GFP_NOFAIL);
+ new_transaction = kzalloc(sizeof(*new_transaction), GFP_NOFS);
if (!new_transaction) {
- ret = -ENOMEM;
- goto out;
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ goto alloc_transaction;
}
}
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 09b3ed4..fe3c052 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -515,10 +515,6 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
c = JFFS2_SB_INFO(sb);
- /* Do not support the MLC nand */
- if (c->mtd->type == MTD_MLCNANDFLASH)
- return -EINVAL;
-
#ifndef CONFIG_JFFS2_FS_WRITEBUFFER
if (c->mtd->type == MTD_NANDFLASH) {
pr_err("Cannot operate on NAND flash unless jffs2 NAND support is compiled in\n");
diff --git a/fs/libfs.c b/fs/libfs.c
index a184424..3a3a9b5 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -10,7 +10,6 @@
#include <linux/vfs.h>
#include <linux/quotaops.h>
#include <linux/mutex.h>
-#include <linux/namei.h>
#include <linux/exportfs.h>
#include <linux/writeback.h>
#include <linux/buffer_head.h> /* sync_mapping_buffers */
@@ -32,7 +31,6 @@ int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
stat->blocks = inode->i_mapping->nrpages << (PAGE_CACHE_SHIFT - 9);
return 0;
}
-EXPORT_SYMBOL(simple_getattr);
int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
{
@@ -41,22 +39,15 @@ int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_namelen = NAME_MAX;
return 0;
}
-EXPORT_SYMBOL(simple_statfs);
/*
* Retaining negative dentries for an in-memory filesystem just wastes
* memory and lookup time: arrange for them to be deleted immediately.
*/
-int always_delete_dentry(const struct dentry *dentry)
+static int simple_delete_dentry(const struct dentry *dentry)
{
return 1;
}
-EXPORT_SYMBOL(always_delete_dentry);
-
-const struct dentry_operations simple_dentry_operations = {
- .d_delete = always_delete_dentry,
-};
-EXPORT_SYMBOL(simple_dentry_operations);
/*
* Lookup the data. This is trivial - if the dentry didn't already
@@ -64,6 +55,10 @@ EXPORT_SYMBOL(simple_dentry_operations);
*/
struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
+ static const struct dentry_operations simple_dentry_operations = {
+ .d_delete = simple_delete_dentry,
+ };
+
if (dentry->d_name.len > NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);
if (!dentry->d_sb->s_d_op)
@@ -71,7 +66,6 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned
d_add(dentry, NULL);
return NULL;
}
-EXPORT_SYMBOL(simple_lookup);
int dcache_dir_open(struct inode *inode, struct file *file)
{
@@ -81,14 +75,12 @@ int dcache_dir_open(struct inode *inode, struct file *file)
return file->private_data ? 0 : -ENOMEM;
}
-EXPORT_SYMBOL(dcache_dir_open);
int dcache_dir_close(struct inode *inode, struct file *file)
{
dput(file->private_data);
return 0;
}
-EXPORT_SYMBOL(dcache_dir_close);
loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
{
@@ -131,7 +123,6 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
mutex_unlock(&dentry->d_inode->i_mutex);
return offset;
}
-EXPORT_SYMBOL(dcache_dir_lseek);
/* Relationship between i_mode and the DT_xxx types */
static inline unsigned char dt_type(struct inode *inode)
@@ -181,13 +172,11 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
spin_unlock(&dentry->d_lock);
return 0;
}
-EXPORT_SYMBOL(dcache_readdir);
ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
{
return -EISDIR;
}
-EXPORT_SYMBOL(generic_read_dir);
const struct file_operations simple_dir_operations = {
.open = dcache_dir_open,
@@ -197,12 +186,10 @@ const struct file_operations simple_dir_operations = {
.iterate = dcache_readdir,
.fsync = noop_fsync,
};
-EXPORT_SYMBOL(simple_dir_operations);
const struct inode_operations simple_dir_inode_operations = {
.lookup = simple_lookup,
};
-EXPORT_SYMBOL(simple_dir_inode_operations);
static const struct super_operations simple_super_operations = {
.statfs = simple_statfs,
@@ -257,7 +244,6 @@ Enomem:
deactivate_locked_super(s);
return ERR_PTR(-ENOMEM);
}
-EXPORT_SYMBOL(mount_pseudo);
int simple_open(struct inode *inode, struct file *file)
{
@@ -265,7 +251,6 @@ int simple_open(struct inode *inode, struct file *file)
file->private_data = inode->i_private;
return 0;
}
-EXPORT_SYMBOL(simple_open);
int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
{
@@ -278,7 +263,6 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den
d_instantiate(dentry, inode);
return 0;
}
-EXPORT_SYMBOL(simple_link);
int simple_empty(struct dentry *dentry)
{
@@ -299,7 +283,6 @@ out:
spin_unlock(&dentry->d_lock);
return ret;
}
-EXPORT_SYMBOL(simple_empty);
int simple_unlink(struct inode *dir, struct dentry *dentry)
{
@@ -310,7 +293,6 @@ int simple_unlink(struct inode *dir, struct dentry *dentry)
dput(dentry);
return 0;
}
-EXPORT_SYMBOL(simple_unlink);
int simple_rmdir(struct inode *dir, struct dentry *dentry)
{
@@ -322,7 +304,6 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry)
drop_nlink(dir);
return 0;
}
-EXPORT_SYMBOL(simple_rmdir);
int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
@@ -349,7 +330,6 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
return 0;
}
-EXPORT_SYMBOL(simple_rename);
/**
* simple_setattr - setattr for simple filesystem
@@ -390,7 +370,6 @@ int simple_readpage(struct file *file, struct page *page)
unlock_page(page);
return 0;
}
-EXPORT_SYMBOL(simple_readpage);
int simple_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
@@ -414,7 +393,6 @@ int simple_write_begin(struct file *file, struct address_space *mapping,
}
return 0;
}
-EXPORT_SYMBOL(simple_write_begin);
/**
* simple_write_end - .write_end helper for non-block-device FSes
@@ -466,7 +444,6 @@ int simple_write_end(struct file *file, struct address_space *mapping,
return copied;
}
-EXPORT_SYMBOL(simple_write_end);
/*
* the inodes created here are not hashed. If you use iunique to generate
@@ -535,7 +512,6 @@ out:
dput(root);
return -ENOMEM;
}
-EXPORT_SYMBOL(simple_fill_super);
static DEFINE_SPINLOCK(pin_fs_lock);
@@ -558,7 +534,6 @@ int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *c
mntput(mnt);
return 0;
}
-EXPORT_SYMBOL(simple_pin_fs);
void simple_release_fs(struct vfsmount **mount, int *count)
{
@@ -570,7 +545,6 @@ void simple_release_fs(struct vfsmount **mount, int *count)
spin_unlock(&pin_fs_lock);
mntput(mnt);
}
-EXPORT_SYMBOL(simple_release_fs);
/**
* simple_read_from_buffer - copy data from the buffer to user space
@@ -605,7 +579,6 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
*ppos = pos + count;
return count;
}
-EXPORT_SYMBOL(simple_read_from_buffer);
/**
* simple_write_to_buffer - copy data from user space to the buffer
@@ -640,7 +613,6 @@ ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
*ppos = pos + count;
return count;
}
-EXPORT_SYMBOL(simple_write_to_buffer);
/**
* memory_read_from_buffer - copy data from the buffer
@@ -672,7 +644,6 @@ ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
return count;
}
-EXPORT_SYMBOL(memory_read_from_buffer);
/*
* Transaction based IO.
@@ -694,7 +665,6 @@ void simple_transaction_set(struct file *file, size_t n)
smp_mb();
ar->size = n;
}
-EXPORT_SYMBOL(simple_transaction_set);
char *simple_transaction_get(struct file *file, const char __user *buf, size_t size)
{
@@ -726,7 +696,6 @@ char *simple_transaction_get(struct file *file, const char __user *buf, size_t s
return ar->data;
}
-EXPORT_SYMBOL(simple_transaction_get);
ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
{
@@ -736,14 +705,12 @@ ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size
return 0;
return simple_read_from_buffer(buf, size, pos, ar->data, ar->size);
}
-EXPORT_SYMBOL(simple_transaction_read);
int simple_transaction_release(struct inode *inode, struct file *file)
{
free_page((unsigned long)file->private_data);
return 0;
}
-EXPORT_SYMBOL(simple_transaction_release);
/* Simple attribute files */
@@ -779,14 +746,12 @@ int simple_attr_open(struct inode *inode, struct file *file,
return nonseekable_open(inode, file);
}
-EXPORT_SYMBOL_GPL(simple_attr_open);
int simple_attr_release(struct inode *inode, struct file *file)
{
kfree(file->private_data);
return 0;
}
-EXPORT_SYMBOL_GPL(simple_attr_release); /* GPL-only? This? Really? */
/* read from the buffer that is filled with the get function */
ssize_t simple_attr_read(struct file *file, char __user *buf,
@@ -822,7 +787,6 @@ out:
mutex_unlock(&attr->mutex);
return ret;
}
-EXPORT_SYMBOL_GPL(simple_attr_read);
/* interpret the buffer as a number to call the set function with */
ssize_t simple_attr_write(struct file *file, const char __user *buf,
@@ -855,7 +819,6 @@ out:
mutex_unlock(&attr->mutex);
return ret;
}
-EXPORT_SYMBOL_GPL(simple_attr_write);
/**
* generic_fh_to_dentry - generic helper for the fh_to_dentry export operation
@@ -994,56 +957,39 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
return 0;
}
-EXPORT_SYMBOL(noop_fsync);
-
-void kfree_put_link(struct dentry *dentry, struct nameidata *nd,
- void *cookie)
-{
- char *s = nd_get_link(nd);
- if (!IS_ERR(s))
- kfree(s);
-}
-EXPORT_SYMBOL(kfree_put_link);
-
-/*
- * nop .set_page_dirty method so that people can use .page_mkwrite on
- * anon inodes.
- */
-static int anon_set_page_dirty(struct page *page)
-{
- return 0;
-};
-/*
- * A single inode exists for all anon_inode files. Contrary to pipes,
- * anon_inode inodes have no associated per-instance data, so we need
- * only allocate one of them.
- */
-struct inode *alloc_anon_inode(struct super_block *s)
-{
- static const struct address_space_operations anon_aops = {
- .set_page_dirty = anon_set_page_dirty,
- };
- struct inode *inode = new_inode_pseudo(s);
-
- if (!inode)
- return ERR_PTR(-ENOMEM);
-
- inode->i_ino = get_next_ino();
- inode->i_mapping->a_ops = &anon_aops;
-
- /*
- * Mark the inode dirty from the very beginning,
- * that way it will never be moved to the dirty
- * list because mark_inode_dirty() will think
- * that it already _is_ on the dirty list.
- */
- inode->i_state = I_DIRTY;
- inode->i_mode = S_IRUSR | S_IWUSR;
- inode->i_uid = current_fsuid();
- inode->i_gid = current_fsgid();
- inode->i_flags |= S_PRIVATE;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- return inode;
-}
-EXPORT_SYMBOL(alloc_anon_inode);
+EXPORT_SYMBOL(dcache_dir_close);
+EXPORT_SYMBOL(dcache_dir_lseek);
+EXPORT_SYMBOL(dcache_dir_open);
+EXPORT_SYMBOL(dcache_readdir);
+EXPORT_SYMBOL(generic_read_dir);
+EXPORT_SYMBOL(mount_pseudo);
+EXPORT_SYMBOL(simple_write_begin);
+EXPORT_SYMBOL(simple_write_end);
+EXPORT_SYMBOL(simple_dir_inode_operations);
+EXPORT_SYMBOL(simple_dir_operations);
+EXPORT_SYMBOL(simple_empty);
+EXPORT_SYMBOL(simple_fill_super);
+EXPORT_SYMBOL(simple_getattr);
+EXPORT_SYMBOL(simple_open);
+EXPORT_SYMBOL(simple_link);
+EXPORT_SYMBOL(simple_lookup);
+EXPORT_SYMBOL(simple_pin_fs);
+EXPORT_SYMBOL(simple_readpage);
+EXPORT_SYMBOL(simple_release_fs);
+EXPORT_SYMBOL(simple_rename);
+EXPORT_SYMBOL(simple_rmdir);
+EXPORT_SYMBOL(simple_statfs);
+EXPORT_SYMBOL(noop_fsync);
+EXPORT_SYMBOL(simple_unlink);
+EXPORT_SYMBOL(simple_read_from_buffer);
+EXPORT_SYMBOL(simple_write_to_buffer);
+EXPORT_SYMBOL(memory_read_from_buffer);
+EXPORT_SYMBOL(simple_transaction_set);
+EXPORT_SYMBOL(simple_transaction_get);
+EXPORT_SYMBOL(simple_transaction_read);
+EXPORT_SYMBOL(simple_transaction_release);
+EXPORT_SYMBOL_GPL(simple_attr_open);
+EXPORT_SYMBOL_GPL(simple_attr_release);
+EXPORT_SYMBOL_GPL(simple_attr_read);
+EXPORT_SYMBOL_GPL(simple_attr_write);
diff --git a/fs/locks.c b/fs/locks.c
index 92a0f0a..b27a300 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -134,7 +134,7 @@
#define IS_POSIX(fl) (fl->fl_flags & FL_POSIX)
#define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK)
-#define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG))
+#define IS_LEASE(fl) (fl->fl_flags & FL_LEASE)
static bool lease_breaking(struct file_lock *fl)
{
@@ -1292,40 +1292,28 @@ static void time_out_leases(struct inode *inode)
}
}
-static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
-{
- if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE))
- return false;
- return locks_conflict(breaker, lease);
-}
-
/**
* __break_lease - revoke all outstanding leases on file
* @inode: the inode of the file to return
- * @mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR:
- * break all leases
- * @type: FL_LEASE: break leases and delegations; FL_DELEG: break
- * only delegations
+ * @mode: the open mode (read or write)
*
* break_lease (inlined for speed) has checked there already is at least
* some kind of lock (maybe a lease) on this file. Leases are broken on
* a call to open() or truncate(). This function can sleep unless you
* specified %O_NONBLOCK to your open().
*/
-int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
+int __break_lease(struct inode *inode, unsigned int mode)
{
int error = 0;
struct file_lock *new_fl, *flock;
struct file_lock *fl;
unsigned long break_time;
int i_have_this_lease = 0;
- bool lease_conflict = false;
int want_write = (mode & O_ACCMODE) != O_RDONLY;
new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
if (IS_ERR(new_fl))
return PTR_ERR(new_fl);
- new_fl->fl_flags = type;
spin_lock(&inode->i_lock);
@@ -1335,16 +1323,13 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
if ((flock == NULL) || !IS_LEASE(flock))
goto out;
- for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) {
- if (leases_conflict(fl, new_fl)) {
- lease_conflict = true;
- if (fl->fl_owner == current->files)
- i_have_this_lease = 1;
- }
- }
- if (!lease_conflict)
+ if (!locks_conflict(flock, new_fl))
goto out;
+ for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next)
+ if (fl->fl_owner == current->files)
+ i_have_this_lease = 1;
+
break_time = 0;
if (lease_break_time > 0) {
break_time = jiffies + lease_break_time * HZ;
@@ -1353,8 +1338,6 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
}
for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) {
- if (!leases_conflict(fl, new_fl))
- continue;
if (want_write) {
if (fl->fl_flags & FL_UNLOCK_PENDING)
continue;
@@ -1396,7 +1379,7 @@ restart:
*/
for (flock = inode->i_flock; flock && IS_LEASE(flock);
flock = flock->fl_next) {
- if (leases_conflict(new_fl, flock))
+ if (locks_conflict(new_fl, flock))
goto restart;
}
error = 0;
@@ -1477,27 +1460,9 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp
struct file_lock *fl, **before, **my_before = NULL, *lease;
struct dentry *dentry = filp->f_path.dentry;
struct inode *inode = dentry->d_inode;
- bool is_deleg = (*flp)->fl_flags & FL_DELEG;
int error;
lease = *flp;
- /*
- * In the delegation case we need mutual exclusion with
- * a number of operations that take the i_mutex. We trylock
- * because delegations are an optional optimization, and if
- * there's some chance of a conflict--we'd rather not
- * bother, maybe that's a sign this just isn't a good file to
- * hand out a delegation on.
- */
- if (is_deleg && !mutex_trylock(&inode->i_mutex))
- return -EAGAIN;
-
- if (is_deleg && arg == F_WRLCK) {
- /* Write delegations are not currently supported: */
- mutex_unlock(&inode->i_mutex);
- WARN_ON_ONCE(1);
- return -EINVAL;
- }
error = -EAGAIN;
if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
@@ -1549,10 +1514,9 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp
goto out;
locks_insert_lock(before, lease);
- error = 0;
+ return 0;
+
out:
- if (is_deleg)
- mutex_unlock(&inode->i_mutex);
return error;
}
@@ -1615,7 +1579,7 @@ EXPORT_SYMBOL(generic_setlease);
static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
{
- if (filp->f_op->setlease)
+ if (filp->f_op && filp->f_op->setlease)
return filp->f_op->setlease(filp, arg, lease);
else
return generic_setlease(filp, arg, lease);
@@ -1807,7 +1771,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
if (error)
goto out_free;
- if (f.file->f_op->flock)
+ if (f.file->f_op && f.file->f_op->flock)
error = f.file->f_op->flock(f.file,
(can_sleep) ? F_SETLKW : F_SETLK,
lock);
@@ -1833,7 +1797,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
*/
int vfs_test_lock(struct file *filp, struct file_lock *fl)
{
- if (filp->f_op->lock)
+ if (filp->f_op && filp->f_op->lock)
return filp->f_op->lock(filp, F_GETLK, fl);
posix_test_lock(filp, fl);
return 0;
@@ -1945,7 +1909,7 @@ out:
*/
int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
{
- if (filp->f_op->lock)
+ if (filp->f_op && filp->f_op->lock)
return filp->f_op->lock(filp, cmd, fl);
else
return posix_lock_file(filp, fl, conf);
@@ -2218,7 +2182,7 @@ void locks_remove_flock(struct file *filp)
if (!inode->i_flock)
return;
- if (filp->f_op->flock) {
+ if (filp->f_op && filp->f_op->flock) {
struct file_lock fl = {
.fl_pid = current->tgid,
.fl_file = filp,
@@ -2282,7 +2246,7 @@ EXPORT_SYMBOL(posix_unblock_lock);
*/
int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
{
- if (filp->f_op->lock)
+ if (filp->f_op && filp->f_op->lock)
return filp->f_op->lock(filp, F_CANCELLK, fl);
return 0;
}
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 0f95f0d..550475c 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -14,10 +14,16 @@
#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
+static void request_complete(struct bio *bio, int err)
+{
+ complete((struct completion *)bio->bi_private);
+}
+
static int sync_request(struct page *page, struct block_device *bdev, int rw)
{
struct bio bio;
struct bio_vec bio_vec;
+ struct completion complete;
bio_init(&bio);
bio.bi_max_vecs = 1;
@@ -29,8 +35,13 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
bio.bi_size = PAGE_SIZE;
bio.bi_bdev = bdev;
bio.bi_sector = page->index * (PAGE_SIZE >> 9);
+ init_completion(&complete);
+ bio.bi_private = &complete;
+ bio.bi_end_io = request_complete;
- return submit_bio_wait(rw, &bio);
+ submit_bio(rw, &bio);
+ wait_for_completion(&complete);
+ return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO;
}
static int bdev_readpage(void *_sb, struct page *page)
diff --git a/fs/minix/Kconfig b/fs/minix/Kconfig
index f2a0cfc..6624684 100644
--- a/fs/minix/Kconfig
+++ b/fs/minix/Kconfig
@@ -18,7 +18,7 @@ config MINIX_FS
config MINIX_FS_NATIVE_ENDIAN
def_bool MINIX_FS
- depends on M32R || MICROBLAZE || MIPS || S390 || SUPERH || SPARC || XTENSA || (M68K && !MMU)
+ depends on H8300 || M32R || MICROBLAZE || MIPS || S390 || SUPERH || SPARC || XTENSA || (M68K && !MMU)
config MINIX_FS_BIG_ENDIAN_16BIT_INDEXED
def_bool MINIX_FS
diff --git a/fs/mount.h b/fs/mount.h
index d64c594..64a8581 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -29,7 +29,6 @@ struct mount {
struct mount *mnt_parent;
struct dentry *mnt_mountpoint;
struct vfsmount mnt;
- struct rcu_head mnt_rcu;
#ifdef CONFIG_SMP
struct mnt_pcp __percpu *mnt_pcp;
#else
@@ -56,7 +55,7 @@ struct mount {
int mnt_group_id; /* peer group identifier */
int mnt_expiry_mark; /* true if marked for expiry */
int mnt_pinned;
- struct path mnt_ex_mountpoint;
+ int mnt_ghosts;
};
#define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
@@ -77,28 +76,13 @@ static inline int is_mounted(struct vfsmount *mnt)
return !IS_ERR_OR_NULL(real_mount(mnt));
}
-extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
-extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *);
-
-extern bool legitimize_mnt(struct vfsmount *, unsigned);
+extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
static inline void get_mnt_ns(struct mnt_namespace *ns)
{
atomic_inc(&ns->count);
}
-extern seqlock_t mount_lock;
-
-static inline void lock_mount_hash(void)
-{
- write_seqlock(&mount_lock);
-}
-
-static inline void unlock_mount_hash(void)
-{
- write_sequnlock(&mount_lock);
-}
-
struct proc_mounts {
struct seq_file m;
struct mnt_namespace *ns;
diff --git a/fs/namei.c b/fs/namei.c
index c53d3a9..caa2805 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -482,6 +482,18 @@ EXPORT_SYMBOL(path_put);
* to restart the path walk from the beginning in ref-walk mode.
*/
+static inline void lock_rcu_walk(void)
+{
+ br_read_lock(&vfsmount_lock);
+ rcu_read_lock();
+}
+
+static inline void unlock_rcu_walk(void)
+{
+ rcu_read_unlock();
+ br_read_unlock(&vfsmount_lock);
+}
+
/**
* unlazy_walk - try to switch to ref-walk mode.
* @nd: nameidata pathwalk data
@@ -500,21 +512,25 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
BUG_ON(!(nd->flags & LOOKUP_RCU));
/*
- * After legitimizing the bastards, terminate_walk()
- * will do the right thing for non-RCU mode, and all our
- * subsequent exit cases should rcu_read_unlock()
- * before returning. Do vfsmount first; if dentry
- * can't be legitimized, just set nd->path.dentry to NULL
- * and rely on dput(NULL) being a no-op.
+ * Get a reference to the parent first: we're
+ * going to make "path_put(nd->path)" valid in
+ * non-RCU context for "terminate_walk()".
+ *
+ * If this doesn't work, return immediately with
+ * RCU walking still active (and then we will do
+ * the RCU walk cleanup in terminate_walk()).
*/
- if (!legitimize_mnt(nd->path.mnt, nd->m_seq))
+ if (!lockref_get_not_dead(&parent->d_lockref))
return -ECHILD;
- nd->flags &= ~LOOKUP_RCU;
- if (!lockref_get_not_dead(&parent->d_lockref)) {
- nd->path.dentry = NULL;
- goto out;
- }
+ /*
+ * After the mntget(), we terminate_walk() will do
+ * the right thing for non-RCU mode, and all our
+ * subsequent exit cases should unlock_rcu_walk()
+ * before returning.
+ */
+ mntget(nd->path.mnt);
+ nd->flags &= ~LOOKUP_RCU;
/*
* For a negative lookup, the lookup sequence point is the parents
@@ -550,17 +566,17 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
spin_unlock(&fs->lock);
}
- rcu_read_unlock();
+ unlock_rcu_walk();
return 0;
unlock_and_drop_dentry:
spin_unlock(&fs->lock);
drop_dentry:
- rcu_read_unlock();
+ unlock_rcu_walk();
dput(dentry);
goto drop_root_mnt;
out:
- rcu_read_unlock();
+ unlock_rcu_walk();
drop_root_mnt:
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
@@ -592,22 +608,17 @@ static int complete_walk(struct nameidata *nd)
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
- if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) {
- rcu_read_unlock();
- return -ECHILD;
- }
if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) {
- rcu_read_unlock();
- mntput(nd->path.mnt);
+ unlock_rcu_walk();
return -ECHILD;
}
if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
- rcu_read_unlock();
+ unlock_rcu_walk();
dput(dentry);
- mntput(nd->path.mnt);
return -ECHILD;
}
- rcu_read_unlock();
+ mntget(nd->path.mnt);
+ unlock_rcu_walk();
}
if (likely(!(nd->flags & LOOKUP_JUMPED)))
@@ -898,15 +909,15 @@ int follow_up(struct path *path)
struct mount *parent;
struct dentry *mountpoint;
- read_seqlock_excl(&mount_lock);
+ br_read_lock(&vfsmount_lock);
parent = mnt->mnt_parent;
if (parent == mnt) {
- read_sequnlock_excl(&mount_lock);
+ br_read_unlock(&vfsmount_lock);
return 0;
}
mntget(&parent->mnt);
mountpoint = dget(mnt->mnt_mountpoint);
- read_sequnlock_excl(&mount_lock);
+ br_read_unlock(&vfsmount_lock);
dput(path->dentry);
path->dentry = mountpoint;
mntput(path->mnt);
@@ -1037,8 +1048,8 @@ static int follow_managed(struct path *path, unsigned flags)
/* Something is mounted on this dentry in another
* namespace and/or whatever was mounted there in this
- * namespace got unmounted before lookup_mnt() could
- * get it */
+ * namespace got unmounted before we managed to get the
+ * vfsmount_lock */
}
/* Handle an automount point */
@@ -1100,7 +1111,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
if (!d_mountpoint(path->dentry))
break;
- mounted = __lookup_mnt(path->mnt, path->dentry);
+ mounted = __lookup_mnt(path->mnt, path->dentry, 1);
if (!mounted)
break;
path->mnt = &mounted->mnt;
@@ -1121,7 +1132,7 @@ static void follow_mount_rcu(struct nameidata *nd)
{
while (d_mountpoint(nd->path.dentry)) {
struct mount *mounted;
- mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry);
+ mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1);
if (!mounted)
break;
nd->path.mnt = &mounted->mnt;
@@ -1163,7 +1174,7 @@ failed:
nd->flags &= ~LOOKUP_RCU;
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
- rcu_read_unlock();
+ unlock_rcu_walk();
return -ECHILD;
}
@@ -1297,8 +1308,8 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
}
/*
- * Call i_op->lookup on the dentry. The dentry must be negative and
- * unhashed.
+ * Call i_op->lookup on the dentry. The dentry must be negative but may be
+ * hashed if it was pouplated with DCACHE_NEED_LOOKUP.
*
* dir->d_inode->i_mutex must be held
*/
@@ -1490,7 +1501,7 @@ static void terminate_walk(struct nameidata *nd)
nd->flags &= ~LOOKUP_RCU;
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
- rcu_read_unlock();
+ unlock_rcu_walk();
}
}
@@ -1500,9 +1511,18 @@ static void terminate_walk(struct nameidata *nd)
* so we keep a cache of "no, this doesn't need follow_link"
* for the common case.
*/
-static inline int should_follow_link(struct dentry *dentry, int follow)
+static inline int should_follow_link(struct inode *inode, int follow)
{
- return unlikely(d_is_symlink(dentry)) ? follow : 0;
+ if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
+ if (likely(inode->i_op->follow_link))
+ return follow;
+
+ /* This gets set once for the inode lifetime */
+ spin_lock(&inode->i_lock);
+ inode->i_opflags |= IOP_NOFOLLOW;
+ spin_unlock(&inode->i_lock);
+ }
+ return 0;
}
static inline int walk_component(struct nameidata *nd, struct path *path,
@@ -1532,7 +1552,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
if (!inode)
goto out_path_put;
- if (should_follow_link(path->dentry, follow)) {
+ if (should_follow_link(inode, follow)) {
if (nd->flags & LOOKUP_RCU) {
if (unlikely(unlazy_walk(nd, path->dentry))) {
err = -ECHILD;
@@ -1591,6 +1611,26 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
}
/*
+ * We really don't want to look at inode->i_op->lookup
+ * when we don't have to. So we keep a cache bit in
+ * the inode ->i_opflags field that says "yes, we can
+ * do lookup on this inode".
+ */
+static inline int can_lookup(struct inode *inode)
+{
+ if (likely(inode->i_opflags & IOP_LOOKUP))
+ return 1;
+ if (likely(!inode->i_op->lookup))
+ return 0;
+
+ /* We do this once for the lifetime of the inode */
+ spin_lock(&inode->i_lock);
+ inode->i_opflags |= IOP_LOOKUP;
+ spin_unlock(&inode->i_lock);
+ return 1;
+}
+
+/*
* We can do the critical dentry name comparison and hashing
* operations one word at a time, but we are limited to:
*
@@ -1793,7 +1833,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
if (err)
return err;
}
- if (!d_is_directory(nd->path.dentry)) {
+ if (!can_lookup(nd->inode)) {
err = -ENOTDIR;
break;
}
@@ -1811,10 +1851,9 @@ static int path_init(int dfd, const char *name, unsigned int flags,
nd->flags = flags | LOOKUP_JUMPED;
nd->depth = 0;
if (flags & LOOKUP_ROOT) {
- struct dentry *root = nd->root.dentry;
- struct inode *inode = root->d_inode;
+ struct inode *inode = nd->root.dentry->d_inode;
if (*name) {
- if (!d_is_directory(root))
+ if (!can_lookup(inode))
return -ENOTDIR;
retval = inode_permission(inode, MAY_EXEC);
if (retval)
@@ -1823,9 +1862,8 @@ static int path_init(int dfd, const char *name, unsigned int flags,
nd->path = nd->root;
nd->inode = inode;
if (flags & LOOKUP_RCU) {
- rcu_read_lock();
+ lock_rcu_walk();
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
- nd->m_seq = read_seqbegin(&mount_lock);
} else {
path_get(&nd->path);
}
@@ -1834,10 +1872,9 @@ static int path_init(int dfd, const char *name, unsigned int flags,
nd->root.mnt = NULL;
- nd->m_seq = read_seqbegin(&mount_lock);
if (*name=='/') {
if (flags & LOOKUP_RCU) {
- rcu_read_lock();
+ lock_rcu_walk();
set_root_rcu(nd);
} else {
set_root(nd);
@@ -1849,7 +1886,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
struct fs_struct *fs = current->fs;
unsigned seq;
- rcu_read_lock();
+ lock_rcu_walk();
do {
seq = read_seqcount_begin(&fs->seq);
@@ -1870,7 +1907,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
dentry = f.file->f_path.dentry;
if (*name) {
- if (!d_is_directory(dentry)) {
+ if (!can_lookup(dentry->d_inode)) {
fdput(f);
return -ENOTDIR;
}
@@ -1881,7 +1918,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
if (f.need_put)
*fp = f.file;
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
- rcu_read_lock();
+ lock_rcu_walk();
} else {
path_get(&nd->path);
fdput(f);
@@ -1952,7 +1989,7 @@ static int path_lookupat(int dfd, const char *name,
err = complete_walk(nd);
if (!err && nd->flags & LOOKUP_DIRECTORY) {
- if (!d_is_directory(nd->path.dentry)) {
+ if (!can_lookup(nd->inode)) {
path_put(&nd->path);
err = -ENOTDIR;
}
@@ -2244,7 +2281,7 @@ done:
}
path->dentry = dentry;
path->mnt = mntget(nd->path.mnt);
- if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW))
+ if (should_follow_link(dentry->d_inode, nd->flags & LOOKUP_FOLLOW))
return 1;
follow_mount(path);
error = 0;
@@ -2389,14 +2426,12 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
* 10. We don't allow removal of NFS sillyrenamed files; it's handled by
* nfs_async_unlink().
*/
-static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
+static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
{
- struct inode *inode = victim->d_inode;
int error;
- if (d_is_negative(victim))
+ if (!victim->d_inode)
return -ENOENT;
- BUG_ON(!inode);
BUG_ON(victim->d_parent->d_inode != dir);
audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
@@ -2406,16 +2441,15 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
return error;
if (IS_APPEND(dir))
return -EPERM;
-
- if (check_sticky(dir, inode) || IS_APPEND(inode) ||
- IS_IMMUTABLE(inode) || IS_SWAPFILE(inode))
+ if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
+ IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
return -EPERM;
if (isdir) {
- if (!d_is_directory(victim) && !d_is_autodir(victim))
+ if (!S_ISDIR(victim->d_inode->i_mode))
return -ENOTDIR;
if (IS_ROOT(victim))
return -EBUSY;
- } else if (d_is_directory(victim) || d_is_autodir(victim))
+ } else if (S_ISDIR(victim->d_inode->i_mode))
return -EISDIR;
if (IS_DEADDIR(dir))
return -ENOENT;
@@ -2434,7 +2468,6 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
*/
static inline int may_create(struct inode *dir, struct dentry *child)
{
- audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
if (child->d_inode)
return -EEXIST;
if (IS_DEADDIR(dir))
@@ -2950,7 +2983,7 @@ retry_lookup:
/*
* create/update audit record if it already exists.
*/
- if (d_is_positive(path->dentry))
+ if (path->dentry->d_inode)
audit_inode(name, path->dentry, 0);
/*
@@ -2979,12 +3012,12 @@ retry_lookup:
finish_lookup:
/* we _can_ be in RCU mode here */
error = -ENOENT;
- if (d_is_negative(path->dentry)) {
+ if (!inode) {
path_to_nameidata(path, nd);
goto out;
}
- if (should_follow_link(path->dentry, !symlink_ok)) {
+ if (should_follow_link(inode, !symlink_ok)) {
if (nd->flags & LOOKUP_RCU) {
if (unlikely(unlazy_walk(nd, path->dentry))) {
error = -ECHILD;
@@ -3013,11 +3046,10 @@ finish_open:
}
audit_inode(name, nd->path.dentry, 0);
error = -EISDIR;
- if ((open_flag & O_CREAT) &&
- (d_is_directory(nd->path.dentry) || d_is_autodir(nd->path.dentry)))
+ if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode))
goto out;
error = -ENOTDIR;
- if ((nd->flags & LOOKUP_DIRECTORY) && !d_is_directory(nd->path.dentry))
+ if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(nd->inode))
goto out;
if (!S_ISREG(nd->inode->i_mode))
will_truncate = false;
@@ -3243,7 +3275,7 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
nd.root.mnt = mnt;
nd.root.dentry = dentry;
- if (d_is_symlink(dentry) && op->intent & LOOKUP_OPEN)
+ if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
return ERR_PTR(-ELOOP);
file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_RCU);
@@ -3293,9 +3325,8 @@ struct dentry *kern_path_create(int dfd, const char *pathname,
goto unlock;
error = -EEXIST;
- if (d_is_positive(dentry))
+ if (dentry->d_inode)
goto fail;
-
/*
* Special case - lookup gave negative, but... we had foo/bar/
* From the vfs_mknod() POV we just have a negative dentry -
@@ -3616,27 +3647,8 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
return do_rmdir(AT_FDCWD, pathname);
}
-/**
- * vfs_unlink - unlink a filesystem object
- * @dir: parent directory
- * @dentry: victim
- * @delegated_inode: returns victim inode, if the inode is delegated.
- *
- * The caller must hold dir->i_mutex.
- *
- * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and
- * return a reference to the inode in delegated_inode. The caller
- * should then break the delegation on that inode and retry. Because
- * breaking a delegation may take a long time, the caller should drop
- * dir->i_mutex before doing so.
- *
- * Alternatively, a caller may pass NULL for delegated_inode. This may
- * be appropriate for callers that expect the underlying filesystem not
- * to be NFS exported.
- */
-int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
+int vfs_unlink(struct inode *dir, struct dentry *dentry)
{
- struct inode *target = dentry->d_inode;
int error = may_delete(dir, dentry, 0);
if (error)
@@ -3645,26 +3657,22 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
if (!dir->i_op->unlink)
return -EPERM;
- mutex_lock(&target->i_mutex);
+ mutex_lock(&dentry->d_inode->i_mutex);
if (d_mountpoint(dentry))
error = -EBUSY;
else {
error = security_inode_unlink(dir, dentry);
if (!error) {
- error = try_break_deleg(target, delegated_inode);
- if (error)
- goto out;
error = dir->i_op->unlink(dir, dentry);
if (!error)
dont_mount(dentry);
}
}
-out:
- mutex_unlock(&target->i_mutex);
+ mutex_unlock(&dentry->d_inode->i_mutex);
/* We don't d_delete() NFS sillyrenamed files--they still exist. */
if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
- fsnotify_link_count(target);
+ fsnotify_link_count(dentry->d_inode);
d_delete(dentry);
}
@@ -3684,7 +3692,6 @@ static long do_unlinkat(int dfd, const char __user *pathname)
struct dentry *dentry;
struct nameidata nd;
struct inode *inode = NULL;
- struct inode *delegated_inode = NULL;
unsigned int lookup_flags = 0;
retry:
name = user_path_parent(dfd, pathname, &nd, lookup_flags);
@@ -3699,7 +3706,7 @@ retry:
error = mnt_want_write(nd.path.mnt);
if (error)
goto exit1;
-retry_deleg:
+
mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
dentry = lookup_hash(&nd);
error = PTR_ERR(dentry);
@@ -3708,25 +3715,19 @@ retry_deleg:
if (nd.last.name[nd.last.len])
goto slashes;
inode = dentry->d_inode;
- if (d_is_negative(dentry))
+ if (!inode)
goto slashes;
ihold(inode);
error = security_path_unlink(&nd.path, dentry);
if (error)
goto exit2;
- error = vfs_unlink(nd.path.dentry->d_inode, dentry, &delegated_inode);
+ error = vfs_unlink(nd.path.dentry->d_inode, dentry);
exit2:
dput(dentry);
}
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
if (inode)
iput(inode); /* truncate the inode here */
- inode = NULL;
- if (delegated_inode) {
- error = break_deleg_wait(&delegated_inode);
- if (!error)
- goto retry_deleg;
- }
mnt_drop_write(nd.path.mnt);
exit1:
path_put(&nd.path);
@@ -3739,12 +3740,8 @@ exit1:
return error;
slashes:
- if (d_is_negative(dentry))
- error = -ENOENT;
- else if (d_is_directory(dentry) || d_is_autodir(dentry))
- error = -EISDIR;
- else
- error = -ENOTDIR;
+ error = !dentry->d_inode ? -ENOENT :
+ S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
goto exit2;
}
@@ -3820,26 +3817,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
return sys_symlinkat(oldname, AT_FDCWD, newname);
}
-/**
- * vfs_link - create a new link
- * @old_dentry: object to be linked
- * @dir: new parent
- * @new_dentry: where to create the new link
- * @delegated_inode: returns inode needing a delegation break
- *
- * The caller must hold dir->i_mutex
- *
- * If vfs_link discovers a delegation on the to-be-linked file in need
- * of breaking, it will return -EWOULDBLOCK and return a reference to the
- * inode in delegated_inode. The caller should then break the delegation
- * and retry. Because breaking a delegation may take a long time, the
- * caller should drop the i_mutex before doing so.
- *
- * Alternatively, a caller may pass NULL for delegated_inode. This may
- * be appropriate for callers that expect the underlying filesystem not
- * to be NFS exported.
- */
-int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
+int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
{
struct inode *inode = old_dentry->d_inode;
unsigned max_links = dir->i_sb->s_max_links;
@@ -3875,11 +3853,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
error = -ENOENT;
else if (max_links && inode->i_nlink >= max_links)
error = -EMLINK;
- else {
- error = try_break_deleg(inode, delegated_inode);
- if (!error)
- error = dir->i_op->link(old_dentry, dir, new_dentry);
- }
+ else
+ error = dir->i_op->link(old_dentry, dir, new_dentry);
if (!error && (inode->i_state & I_LINKABLE)) {
spin_lock(&inode->i_lock);
@@ -3906,7 +3881,6 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
{
struct dentry *new_dentry;
struct path old_path, new_path;
- struct inode *delegated_inode = NULL;
int how = 0;
int error;
@@ -3945,14 +3919,9 @@ retry:
error = security_path_link(old_path.dentry, &new_path, new_dentry);
if (error)
goto out_dput;
- error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
+ error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry);
out_dput:
done_path_create(&new_path, new_dentry);
- if (delegated_inode) {
- error = break_deleg_wait(&delegated_inode);
- if (!error)
- goto retry;
- }
if (retry_estale(error, how)) {
how |= LOOKUP_REVAL;
goto retry;
@@ -3977,8 +3946,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
* That's where 4.4 screws up. Current fix: serialization on
* sb->s_vfs_rename_mutex. We might be more accurate, but that's another
* story.
- * c) we have to lock _four_ objects - parents and victim (if it exists),
- * and source (if it is not a directory).
+ * c) we have to lock _three_ objects - parents and victim (if it exists).
* And that - after we got ->i_mutex on parents (until then we don't know
* whether the target exists). Solution: try to be smart with locking
* order for inodes. We rely on the fact that tree topology may change
@@ -4051,11 +4019,9 @@ out:
}
static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- struct inode **delegated_inode)
+ struct inode *new_dir, struct dentry *new_dentry)
{
struct inode *target = new_dentry->d_inode;
- struct inode *source = old_dentry->d_inode;
int error;
error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
@@ -4063,20 +4029,13 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
return error;
dget(new_dentry);
- lock_two_nondirectories(source, target);
+ if (target)
+ mutex_lock(&target->i_mutex);
error = -EBUSY;
if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
goto out;
- error = try_break_deleg(source, delegated_inode);
- if (error)
- goto out;
- if (target) {
- error = try_break_deleg(target, delegated_inode);
- if (error)
- goto out;
- }
error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
if (error)
goto out;
@@ -4086,38 +4045,17 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
d_move(old_dentry, new_dentry);
out:
- unlock_two_nondirectories(source, target);
+ if (target)
+ mutex_unlock(&target->i_mutex);
dput(new_dentry);
return error;
}
-/**
- * vfs_rename - rename a filesystem object
- * @old_dir: parent of source
- * @old_dentry: source
- * @new_dir: parent of destination
- * @new_dentry: destination
- * @delegated_inode: returns an inode needing a delegation break
- *
- * The caller must hold multiple mutexes--see lock_rename()).
- *
- * If vfs_rename discovers a delegation in need of breaking at either
- * the source or destination, it will return -EWOULDBLOCK and return a
- * reference to the inode in delegated_inode. The caller should then
- * break the delegation and retry. Because breaking a delegation may
- * take a long time, the caller should drop all locks before doing
- * so.
- *
- * Alternatively, a caller may pass NULL for delegated_inode. This may
- * be appropriate for callers that expect the underlying filesystem not
- * to be NFS exported.
- */
int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- struct inode **delegated_inode)
+ struct inode *new_dir, struct dentry *new_dentry)
{
int error;
- int is_dir = d_is_directory(old_dentry) || d_is_autodir(old_dentry);
+ int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
const unsigned char *old_name;
if (old_dentry->d_inode == new_dentry->d_inode)
@@ -4142,7 +4080,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (is_dir)
error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
else
- error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,delegated_inode);
+ error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
if (!error)
fsnotify_move(old_dir, new_dir, old_name, is_dir,
new_dentry->d_inode, old_dentry);
@@ -4158,7 +4096,6 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
struct dentry *old_dentry, *new_dentry;
struct dentry *trap;
struct nameidata oldnd, newnd;
- struct inode *delegated_inode = NULL;
struct filename *from;
struct filename *to;
unsigned int lookup_flags = 0;
@@ -4198,7 +4135,6 @@ retry:
newnd.flags &= ~LOOKUP_PARENT;
newnd.flags |= LOOKUP_RENAME_TARGET;
-retry_deleg:
trap = lock_rename(new_dir, old_dir);
old_dentry = lookup_hash(&oldnd);
@@ -4207,10 +4143,10 @@ retry_deleg:
goto exit3;
/* source must exist */
error = -ENOENT;
- if (d_is_negative(old_dentry))
+ if (!old_dentry->d_inode)
goto exit4;
/* unless the source is a directory trailing slashes give -ENOTDIR */
- if (!d_is_directory(old_dentry) && !d_is_autodir(old_dentry)) {
+ if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
error = -ENOTDIR;
if (oldnd.last.name[oldnd.last.len])
goto exit4;
@@ -4235,19 +4171,13 @@ retry_deleg:
if (error)
goto exit5;
error = vfs_rename(old_dir->d_inode, old_dentry,
- new_dir->d_inode, new_dentry,
- &delegated_inode);
+ new_dir->d_inode, new_dentry);
exit5:
dput(new_dentry);
exit4:
dput(old_dentry);
exit3:
unlock_rename(new_dir, old_dir);
- if (delegated_inode) {
- error = break_deleg_wait(&delegated_inode);
- if (!error)
- goto retry_deleg;
- }
mnt_drop_write(oldnd.path.mnt);
exit2:
if (retry_estale(error, lookup_flags))
diff --git a/fs/namespace.c b/fs/namespace.c
index ac2ce8a..da5c494 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -39,7 +39,7 @@ static int mnt_group_start = 1;
static struct list_head *mount_hashtable __read_mostly;
static struct list_head *mountpoint_hashtable __read_mostly;
static struct kmem_cache *mnt_cache __read_mostly;
-static DECLARE_RWSEM(namespace_sem);
+static struct rw_semaphore namespace_sem;
/* /sys/fs */
struct kobject *fs_kobj;
@@ -53,7 +53,7 @@ EXPORT_SYMBOL_GPL(fs_kobj);
* It should be taken for write in all cases where the vfsmount
* tree or hash is modified or when a vfsmount structure is modified.
*/
-__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
+DEFINE_BRLOCK(vfsmount_lock);
static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
{
@@ -63,6 +63,8 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
return tmp & (HASH_SIZE - 1);
}
+#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
+
/*
* allocation is serialized by namespace_sem, but we need the spinlock to
* serialize with freeing.
@@ -456,7 +458,7 @@ static int mnt_make_readonly(struct mount *mnt)
{
int ret = 0;
- lock_mount_hash();
+ br_write_lock(&vfsmount_lock);
mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
/*
* After storing MNT_WRITE_HOLD, we'll read the counters. This store
@@ -490,15 +492,15 @@ static int mnt_make_readonly(struct mount *mnt)
*/
smp_wmb();
mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
return ret;
}
static void __mnt_unmake_readonly(struct mount *mnt)
{
- lock_mount_hash();
+ br_write_lock(&vfsmount_lock);
mnt->mnt.mnt_flags &= ~MNT_READONLY;
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
}
int sb_prepare_remount_readonly(struct super_block *sb)
@@ -510,7 +512,7 @@ int sb_prepare_remount_readonly(struct super_block *sb)
if (atomic_long_read(&sb->s_remove_count))
return -EBUSY;
- lock_mount_hash();
+ br_write_lock(&vfsmount_lock);
list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
@@ -532,7 +534,7 @@ int sb_prepare_remount_readonly(struct super_block *sb)
if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
}
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
return err;
}
@@ -547,56 +549,30 @@ static void free_vfsmnt(struct mount *mnt)
kmem_cache_free(mnt_cache, mnt);
}
-/* call under rcu_read_lock */
-bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
-{
- struct mount *mnt;
- if (read_seqretry(&mount_lock, seq))
- return false;
- if (bastard == NULL)
- return true;
- mnt = real_mount(bastard);
- mnt_add_count(mnt, 1);
- if (likely(!read_seqretry(&mount_lock, seq)))
- return true;
- if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
- mnt_add_count(mnt, -1);
- return false;
- }
- rcu_read_unlock();
- mntput(bastard);
- rcu_read_lock();
- return false;
-}
-
/*
- * find the first mount at @dentry on vfsmount @mnt.
- * call under rcu_read_lock()
+ * find the first or last mount at @dentry on vfsmount @mnt depending on
+ * @dir. If @dir is set return the first mount else return the last mount.
+ * vfsmount_lock must be held for read or write.
*/
-struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
+struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
+ int dir)
{
struct list_head *head = mount_hashtable + hash(mnt, dentry);
- struct mount *p;
-
- list_for_each_entry_rcu(p, head, mnt_hash)
- if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
- return p;
- return NULL;
-}
+ struct list_head *tmp = head;
+ struct mount *p, *found = NULL;
-/*
- * find the last mount at @dentry on vfsmount @mnt.
- * mount_lock must be held.
- */
-struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
-{
- struct list_head *head = mount_hashtable + hash(mnt, dentry);
- struct mount *p;
-
- list_for_each_entry_reverse(p, head, mnt_hash)
- if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
- return p;
- return NULL;
+ for (;;) {
+ tmp = dir ? tmp->next : tmp->prev;
+ p = NULL;
+ if (tmp == head)
+ break;
+ p = list_entry(tmp, struct mount, mnt_hash);
+ if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) {
+ found = p;
+ break;
+ }
+ }
+ return found;
}
/*
@@ -618,17 +594,17 @@ struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
struct vfsmount *lookup_mnt(struct path *path)
{
struct mount *child_mnt;
- struct vfsmount *m;
- unsigned seq;
- rcu_read_lock();
- do {
- seq = read_seqbegin(&mount_lock);
- child_mnt = __lookup_mnt(path->mnt, path->dentry);
- m = child_mnt ? &child_mnt->mnt : NULL;
- } while (!legitimize_mnt(m, seq));
- rcu_read_unlock();
- return m;
+ br_read_lock(&vfsmount_lock);
+ child_mnt = __lookup_mnt(path->mnt, path->dentry, 1);
+ if (child_mnt) {
+ mnt_add_count(child_mnt, 1);
+ br_read_unlock(&vfsmount_lock);
+ return &child_mnt->mnt;
+ } else {
+ br_read_unlock(&vfsmount_lock);
+ return NULL;
+ }
}
static struct mountpoint *new_mountpoint(struct dentry *dentry)
@@ -820,9 +796,9 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
mnt->mnt.mnt_sb = root->d_sb;
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
mnt->mnt_parent = mnt;
- lock_mount_hash();
+ br_write_lock(&vfsmount_lock);
list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
return &mnt->mnt;
}
EXPORT_SYMBOL_GPL(vfs_kern_mount);
@@ -863,9 +839,9 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
mnt->mnt.mnt_root = dget(root);
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
mnt->mnt_parent = mnt;
- lock_mount_hash();
+ br_write_lock(&vfsmount_lock);
list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
if ((flag & CL_SLAVE) ||
((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
@@ -896,66 +872,64 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
return ERR_PTR(err);
}
-static void delayed_free(struct rcu_head *head)
+static inline void mntfree(struct mount *mnt)
{
- struct mount *mnt = container_of(head, struct mount, mnt_rcu);
- kfree(mnt->mnt_devname);
-#ifdef CONFIG_SMP
- free_percpu(mnt->mnt_pcp);
-#endif
- kmem_cache_free(mnt_cache, mnt);
+ struct vfsmount *m = &mnt->mnt;
+ struct super_block *sb = m->mnt_sb;
+
+ /*
+ * This probably indicates that somebody messed
+ * up a mnt_want/drop_write() pair. If this
+ * happens, the filesystem was probably unable
+ * to make r/w->r/o transitions.
+ */
+ /*
+ * The locking used to deal with mnt_count decrement provides barriers,
+ * so mnt_get_writers() below is safe.
+ */
+ WARN_ON(mnt_get_writers(mnt));
+ fsnotify_vfsmount_delete(m);
+ dput(m->mnt_root);
+ free_vfsmnt(mnt);
+ deactivate_super(sb);
}
static void mntput_no_expire(struct mount *mnt)
{
put_again:
- rcu_read_lock();
- mnt_add_count(mnt, -1);
- if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
- rcu_read_unlock();
+#ifdef CONFIG_SMP
+ br_read_lock(&vfsmount_lock);
+ if (likely(mnt->mnt_ns)) {
+ /* shouldn't be the last one */
+ mnt_add_count(mnt, -1);
+ br_read_unlock(&vfsmount_lock);
return;
}
- lock_mount_hash();
+ br_read_unlock(&vfsmount_lock);
+
+ br_write_lock(&vfsmount_lock);
+ mnt_add_count(mnt, -1);
if (mnt_get_count(mnt)) {
- rcu_read_unlock();
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
return;
}
+#else
+ mnt_add_count(mnt, -1);
+ if (likely(mnt_get_count(mnt)))
+ return;
+ br_write_lock(&vfsmount_lock);
+#endif
if (unlikely(mnt->mnt_pinned)) {
mnt_add_count(mnt, mnt->mnt_pinned + 1);
mnt->mnt_pinned = 0;
- rcu_read_unlock();
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
acct_auto_close_mnt(&mnt->mnt);
goto put_again;
}
- if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
- rcu_read_unlock();
- unlock_mount_hash();
- return;
- }
- mnt->mnt.mnt_flags |= MNT_DOOMED;
- rcu_read_unlock();
list_del(&mnt->mnt_instance);
- unlock_mount_hash();
-
- /*
- * This probably indicates that somebody messed
- * up a mnt_want/drop_write() pair. If this
- * happens, the filesystem was probably unable
- * to make r/w->r/o transitions.
- */
- /*
- * The locking used to deal with mnt_count decrement provides barriers,
- * so mnt_get_writers() below is safe.
- */
- WARN_ON(mnt_get_writers(mnt));
- fsnotify_vfsmount_delete(&mnt->mnt);
- dput(mnt->mnt.mnt_root);
- deactivate_super(mnt->mnt.mnt_sb);
- mnt_free_id(mnt);
- call_rcu(&mnt->mnt_rcu, delayed_free);
+ br_write_unlock(&vfsmount_lock);
+ mntfree(mnt);
}
void mntput(struct vfsmount *mnt)
@@ -980,21 +954,21 @@ EXPORT_SYMBOL(mntget);
void mnt_pin(struct vfsmount *mnt)
{
- lock_mount_hash();
+ br_write_lock(&vfsmount_lock);
real_mount(mnt)->mnt_pinned++;
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
}
EXPORT_SYMBOL(mnt_pin);
void mnt_unpin(struct vfsmount *m)
{
struct mount *mnt = real_mount(m);
- lock_mount_hash();
+ br_write_lock(&vfsmount_lock);
if (mnt->mnt_pinned) {
mnt_add_count(mnt, 1);
mnt->mnt_pinned--;
}
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
}
EXPORT_SYMBOL(mnt_unpin);
@@ -1111,12 +1085,12 @@ int may_umount_tree(struct vfsmount *m)
BUG_ON(!m);
/* write lock needed for mnt_get_count */
- lock_mount_hash();
+ br_write_lock(&vfsmount_lock);
for (p = mnt; p; p = next_mnt(p, mnt)) {
actual_refs += mnt_get_count(p);
minimum_refs += 2;
}
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
if (actual_refs > minimum_refs)
return 0;
@@ -1143,10 +1117,10 @@ int may_umount(struct vfsmount *mnt)
{
int ret = 1;
down_read(&namespace_sem);
- lock_mount_hash();
+ br_write_lock(&vfsmount_lock);
if (propagate_mount_busy(real_mount(mnt), 2))
ret = 0;
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
up_read(&namespace_sem);
return ret;
}
@@ -1168,13 +1142,23 @@ static void namespace_unlock(void)
list_splice_init(&unmounted, &head);
up_write(&namespace_sem);
- synchronize_rcu();
-
while (!list_empty(&head)) {
mnt = list_first_entry(&head, struct mount, mnt_hash);
list_del_init(&mnt->mnt_hash);
- if (mnt->mnt_ex_mountpoint.mnt)
- path_put(&mnt->mnt_ex_mountpoint);
+ if (mnt_has_parent(mnt)) {
+ struct dentry *dentry;
+ struct mount *m;
+
+ br_write_lock(&vfsmount_lock);
+ dentry = mnt->mnt_mountpoint;
+ m = mnt->mnt_parent;
+ mnt->mnt_mountpoint = mnt->mnt.mnt_root;
+ mnt->mnt_parent = mnt;
+ m->mnt_ghosts--;
+ br_write_unlock(&vfsmount_lock);
+ dput(dentry);
+ mntput(&m->mnt);
+ }
mntput(&mnt->mnt);
}
}
@@ -1185,13 +1169,10 @@ static inline void namespace_lock(void)
}
/*
- * mount_lock must be held
+ * vfsmount lock must be held for write
* namespace_sem must be held for write
- * how = 0 => just this tree, don't propagate
- * how = 1 => propagate; we know that nobody else has reference to any victims
- * how = 2 => lazy umount
*/
-void umount_tree(struct mount *mnt, int how)
+void umount_tree(struct mount *mnt, int propagate)
{
LIST_HEAD(tmp_list);
struct mount *p;
@@ -1199,7 +1180,7 @@ void umount_tree(struct mount *mnt, int how)
for (p = mnt; p; p = next_mnt(p, mnt))
list_move(&p->mnt_hash, &tmp_list);
- if (how)
+ if (propagate)
propagate_umount(&tmp_list);
list_for_each_entry(p, &tmp_list, mnt_hash) {
@@ -1207,16 +1188,10 @@ void umount_tree(struct mount *mnt, int how)
list_del_init(&p->mnt_list);
__touch_mnt_namespace(p->mnt_ns);
p->mnt_ns = NULL;
- if (how < 2)
- p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
list_del_init(&p->mnt_child);
if (mnt_has_parent(p)) {
+ p->mnt_parent->mnt_ghosts++;
put_mountpoint(p->mnt_mp);
- /* move the reference to mountpoint into ->mnt_ex_mountpoint */
- p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint;
- p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt;
- p->mnt_mountpoint = p->mnt.mnt_root;
- p->mnt_parent = p;
p->mnt_mp = NULL;
}
change_mnt_propagation(p, MS_PRIVATE);
@@ -1250,12 +1225,12 @@ static int do_umount(struct mount *mnt, int flags)
* probably don't strictly need the lock here if we examined
* all race cases, but it's a slowpath.
*/
- lock_mount_hash();
+ br_write_lock(&vfsmount_lock);
if (mnt_get_count(mnt) != 2) {
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
return -EBUSY;
}
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
if (!xchg(&mnt->mnt_expiry_mark, 1))
return -EAGAIN;
@@ -1297,23 +1272,19 @@ static int do_umount(struct mount *mnt, int flags)
}
namespace_lock();
- lock_mount_hash();
+ br_write_lock(&vfsmount_lock);
event++;
- if (flags & MNT_DETACH) {
+ if (!(flags & MNT_DETACH))
+ shrink_submounts(mnt);
+
+ retval = -EBUSY;
+ if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
if (!list_empty(&mnt->mnt_list))
- umount_tree(mnt, 2);
+ umount_tree(mnt, 1);
retval = 0;
- } else {
- shrink_submounts(mnt);
- retval = -EBUSY;
- if (!propagate_mount_busy(mnt, 2)) {
- if (!list_empty(&mnt->mnt_list))
- umount_tree(mnt, 1);
- retval = 0;
- }
}
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
namespace_unlock();
return retval;
}
@@ -1456,18 +1427,18 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
q = clone_mnt(p, p->mnt.mnt_root, flag);
if (IS_ERR(q))
goto out;
- lock_mount_hash();
+ br_write_lock(&vfsmount_lock);
list_add_tail(&q->mnt_list, &res->mnt_list);
attach_mnt(q, parent, p->mnt_mp);
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
}
}
return res;
out:
if (res) {
- lock_mount_hash();
+ br_write_lock(&vfsmount_lock);
umount_tree(res, 0);
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
}
return q;
}
@@ -1489,9 +1460,9 @@ struct vfsmount *collect_mounts(struct path *path)
void drop_collected_mounts(struct vfsmount *mnt)
{
namespace_lock();
- lock_mount_hash();
+ br_write_lock(&vfsmount_lock);
umount_tree(real_mount(mnt), 0);
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
namespace_unlock();
}
@@ -1618,7 +1589,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
if (err)
goto out_cleanup_ids;
- lock_mount_hash();
+ br_write_lock(&vfsmount_lock);
if (IS_MNT_SHARED(dest_mnt)) {
for (p = source_mnt; p; p = next_mnt(p, source_mnt))
@@ -1637,7 +1608,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
list_del_init(&child->mnt_hash);
commit_tree(child);
}
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
return 0;
@@ -1739,10 +1710,10 @@ static int do_change_type(struct path *path, int flag)
goto out_unlock;
}
- lock_mount_hash();
+ br_write_lock(&vfsmount_lock);
for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
change_mnt_propagation(m, type);
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
out_unlock:
namespace_unlock();
@@ -1814,9 +1785,9 @@ static int do_loopback(struct path *path, const char *old_name,
err = graft_tree(mnt, parent, mp);
if (err) {
- lock_mount_hash();
+ br_write_lock(&vfsmount_lock);
umount_tree(mnt, 0);
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
}
out2:
unlock_mount(mp);
@@ -1875,13 +1846,17 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
else
err = do_remount_sb(sb, flags, data, 0);
if (!err) {
- lock_mount_hash();
+ br_write_lock(&vfsmount_lock);
mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK;
mnt->mnt.mnt_flags = mnt_flags;
- touch_mnt_namespace(mnt->mnt_ns);
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
}
up_write(&sb->s_umount);
+ if (!err) {
+ br_write_lock(&vfsmount_lock);
+ touch_mnt_namespace(mnt->mnt_ns);
+ br_write_unlock(&vfsmount_lock);
+ }
return err;
}
@@ -1997,7 +1972,7 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
struct mount *parent;
int err;
- mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT);
+ mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
mp = lock_mount(path);
if (IS_ERR(mp))
@@ -2102,7 +2077,9 @@ fail:
/* remove m from any expiration list it may be on */
if (!list_empty(&mnt->mnt_expire)) {
namespace_lock();
+ br_write_lock(&vfsmount_lock);
list_del_init(&mnt->mnt_expire);
+ br_write_unlock(&vfsmount_lock);
namespace_unlock();
}
mntput(m);
@@ -2118,9 +2095,11 @@ fail:
void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
{
namespace_lock();
+ br_write_lock(&vfsmount_lock);
list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
+ br_write_unlock(&vfsmount_lock);
namespace_unlock();
}
EXPORT_SYMBOL(mnt_set_expiry);
@@ -2139,7 +2118,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
return;
namespace_lock();
- lock_mount_hash();
+ br_write_lock(&vfsmount_lock);
/* extract from the expiration list every vfsmount that matches the
* following criteria:
@@ -2158,7 +2137,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
touch_mnt_namespace(mnt->mnt_ns);
umount_tree(mnt, 1);
}
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
namespace_unlock();
}
@@ -2214,7 +2193,7 @@ resume:
* process a list of expirable mountpoints with the intent of discarding any
* submounts of a specific parent mountpoint
*
- * mount_lock must be held for write
+ * vfsmount_lock must be held for write
*/
static void shrink_submounts(struct mount *mnt)
{
@@ -2435,25 +2414,20 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
return new_ns;
}
-struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
- struct user_namespace *user_ns, struct fs_struct *new_fs)
+/*
+ * Allocate a new namespace structure and populate it with contents
+ * copied from the namespace of the passed in task structure.
+ */
+static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
+ struct user_namespace *user_ns, struct fs_struct *fs)
{
struct mnt_namespace *new_ns;
struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
struct mount *p, *q;
- struct mount *old;
+ struct mount *old = mnt_ns->root;
struct mount *new;
int copy_flags;
- BUG_ON(!ns);
-
- if (likely(!(flags & CLONE_NEWNS))) {
- get_mnt_ns(ns);
- return ns;
- }
-
- old = ns->root;
-
new_ns = alloc_mnt_ns(user_ns);
if (IS_ERR(new_ns))
return new_ns;
@@ -2461,7 +2435,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
namespace_lock();
/* First pass: copy the tree topology */
copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
- if (user_ns != ns->user_ns)
+ if (user_ns != mnt_ns->user_ns)
copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
new = copy_tree(old, old->mnt.mnt_root, copy_flags);
if (IS_ERR(new)) {
@@ -2470,7 +2444,9 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
return ERR_CAST(new);
}
new_ns->root = new;
+ br_write_lock(&vfsmount_lock);
list_add_tail(&new_ns->list, &new->mnt_list);
+ br_write_unlock(&vfsmount_lock);
/*
* Second pass: switch the tsk->fs->* elements and mark new vfsmounts
@@ -2481,13 +2457,13 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
q = new;
while (p) {
q->mnt_ns = new_ns;
- if (new_fs) {
- if (&p->mnt == new_fs->root.mnt) {
- new_fs->root.mnt = mntget(&q->mnt);
+ if (fs) {
+ if (&p->mnt == fs->root.mnt) {
+ fs->root.mnt = mntget(&q->mnt);
rootmnt = &p->mnt;
}
- if (&p->mnt == new_fs->pwd.mnt) {
- new_fs->pwd.mnt = mntget(&q->mnt);
+ if (&p->mnt == fs->pwd.mnt) {
+ fs->pwd.mnt = mntget(&q->mnt);
pwdmnt = &p->mnt;
}
}
@@ -2508,6 +2484,23 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
return new_ns;
}
+struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
+ struct user_namespace *user_ns, struct fs_struct *new_fs)
+{
+ struct mnt_namespace *new_ns;
+
+ BUG_ON(!ns);
+ get_mnt_ns(ns);
+
+ if (!(flags & CLONE_NEWNS))
+ return ns;
+
+ new_ns = dup_mnt_ns(ns, user_ns, new_fs);
+
+ put_mnt_ns(ns);
+ return new_ns;
+}
+
/**
* create_mnt_ns - creates a private namespace and adds a root filesystem
* @mnt: pointer to the new root filesystem mountpoint
@@ -2600,7 +2593,7 @@ out_type:
/*
* Return true if path is reachable from root
*
- * namespace_sem or mount_lock is held
+ * namespace_sem or vfsmount_lock is held
*/
bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
const struct path *root)
@@ -2615,9 +2608,9 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
int path_is_under(struct path *path1, struct path *path2)
{
int res;
- read_seqlock_excl(&mount_lock);
+ br_read_lock(&vfsmount_lock);
res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
- read_sequnlock_excl(&mount_lock);
+ br_read_unlock(&vfsmount_lock);
return res;
}
EXPORT_SYMBOL(path_is_under);
@@ -2708,7 +2701,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
if (!is_path_reachable(old_mnt, old.dentry, &new))
goto out4;
root_mp->m_count++; /* pin it so it won't go away */
- lock_mount_hash();
+ br_write_lock(&vfsmount_lock);
detach_mnt(new_mnt, &parent_path);
detach_mnt(root_mnt, &root_parent);
if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
@@ -2720,7 +2713,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
/* mount new_root on / */
attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
touch_mnt_namespace(current->nsproxy->mnt_ns);
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
chroot_fs_refs(&root, &new);
put_mountpoint(root_mp);
error = 0;
@@ -2774,6 +2767,8 @@ void __init mnt_init(void)
unsigned u;
int err;
+ init_rwsem(&namespace_sem);
+
mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
@@ -2790,6 +2785,8 @@ void __init mnt_init(void)
for (u = 0; u < HASH_SIZE; u++)
INIT_LIST_HEAD(&mountpoint_hashtable[u]);
+ br_lock_init(&vfsmount_lock);
+
err = sysfs_init();
if (err)
printk(KERN_WARNING "%s: sysfs_init error: %d\n",
@@ -2805,7 +2802,11 @@ void put_mnt_ns(struct mnt_namespace *ns)
{
if (!atomic_dec_and_test(&ns->count))
return;
- drop_collected_mounts(&ns->root->mnt);
+ namespace_lock();
+ br_write_lock(&vfsmount_lock);
+ umount_tree(ns->root, 0);
+ br_write_unlock(&vfsmount_lock);
+ namespace_unlock();
free_mnt_ns(ns);
}
@@ -2828,8 +2829,9 @@ void kern_unmount(struct vfsmount *mnt)
{
/* release long term mount so mount point can be released */
if (!IS_ERR_OR_NULL(mnt)) {
+ br_write_lock(&vfsmount_lock);
real_mount(mnt)->mnt_ns = NULL;
- synchronize_rcu(); /* yecchhh... */
+ br_write_unlock(&vfsmount_lock);
mntput(mnt);
}
}
@@ -2873,7 +2875,7 @@ bool fs_fully_visible(struct file_system_type *type)
if (unlikely(!ns))
return false;
- down_read(&namespace_sem);
+ namespace_lock();
list_for_each_entry(mnt, &ns->list, mnt_list) {
struct mount *child;
if (mnt->mnt.mnt_sb->s_type != type)
@@ -2894,7 +2896,7 @@ bool fs_fully_visible(struct file_system_type *type)
next: ;
}
found:
- up_read(&namespace_sem);
+ namespace_unlock();
return visible;
}
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index c320ac5..3be0474 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -339,8 +339,9 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
if (val)
goto finished;
- DDPRINTK("ncp_lookup_validate: %pd2 not valid, age=%ld, server lookup\n",
- dentry, NCP_GET_AGE(dentry));
+ DDPRINTK("ncp_lookup_validate: %s/%s not valid, age=%ld, server lookup\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ NCP_GET_AGE(dentry));
len = sizeof(__name);
if (ncp_is_server_root(dir)) {
@@ -358,8 +359,8 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
res = ncp_obtain_info(server, dir, __name, &(finfo.i));
}
finfo.volume = finfo.i.volNumber;
- DDPRINTK("ncp_lookup_validate: looked for %pd/%s, res=%d\n",
- dentry->d_parent, __name, res);
+ DDPRINTK("ncp_lookup_validate: looked for %s/%s, res=%d\n",
+ dentry->d_parent->d_name.name, __name, res);
/*
* If we didn't find it, or if it has a different dirEntNum to
* what we remember, it's not valid any more.
@@ -453,7 +454,8 @@ static int ncp_readdir(struct file *file, struct dir_context *ctx)
ctl.page = NULL;
ctl.cache = NULL;
- DDPRINTK("ncp_readdir: reading %pD2, pos=%d\n", file,
+ DDPRINTK("ncp_readdir: reading %s/%s, pos=%d\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
(int) ctx->pos);
result = -EIO;
@@ -738,10 +740,12 @@ ncp_do_readdir(struct file *file, struct dir_context *ctx,
int more;
size_t bufsize;
- DPRINTK("ncp_do_readdir: %pD2, fpos=%ld\n", file,
+ DPRINTK("ncp_do_readdir: %s/%s, fpos=%ld\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
(unsigned long) ctx->pos);
- PPRINTK("ncp_do_readdir: init %pD, volnum=%d, dirent=%u\n",
- file, NCP_FINFO(dir)->volNumber, NCP_FINFO(dir)->dirEntNum);
+ PPRINTK("ncp_do_readdir: init %s, volnum=%d, dirent=%u\n",
+ dentry->d_name.name, NCP_FINFO(dir)->volNumber,
+ NCP_FINFO(dir)->dirEntNum);
err = ncp_initialize_search(server, dir, &seq);
if (err) {
@@ -846,7 +850,8 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig
if (!ncp_conn_valid(server))
goto finished;
- PPRINTK("ncp_lookup: server lookup for %pd2\n", dentry);
+ PPRINTK("ncp_lookup: server lookup for %s/%s\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
len = sizeof(__name);
if (ncp_is_server_root(dir)) {
@@ -862,7 +867,8 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig
if (!res)
res = ncp_obtain_info(server, dir, __name, &(finfo.i));
}
- PPRINTK("ncp_lookup: looked for %pd2, res=%d\n", dentry, res);
+ PPRINTK("ncp_lookup: looked for %s/%s, res=%d\n",
+ dentry->d_parent->d_name.name, __name, res);
/*
* If we didn't find an entry, make a negative dentry.
*/
@@ -909,7 +915,8 @@ out:
return error;
out_close:
- PPRINTK("ncp_instantiate: %pd2 failed, closing file\n", dentry);
+ PPRINTK("ncp_instantiate: %s/%s failed, closing file\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
ncp_close_file(NCP_SERVER(dir), finfo->file_handle);
goto out;
}
@@ -923,7 +930,8 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode,
int opmode;
__u8 __name[NCP_MAXPATHLEN + 1];
- PPRINTK("ncp_create_new: creating %pd2, mode=%hx\n", dentry, mode);
+ PPRINTK("ncp_create_new: creating %s/%s, mode=%hx\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name, mode);
ncp_age_dentry(server, dentry);
len = sizeof(__name);
@@ -952,7 +960,8 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode,
error = -ENAMETOOLONG;
else if (result < 0)
error = result;
- DPRINTK("ncp_create: %pd2 failed\n", dentry);
+ DPRINTK("ncp_create: %s/%s failed\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
goto out;
}
opmode = O_WRONLY;
@@ -985,7 +994,8 @@ static int ncp_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
int error, len;
__u8 __name[NCP_MAXPATHLEN + 1];
- DPRINTK("ncp_mkdir: making %pd2\n", dentry);
+ DPRINTK("ncp_mkdir: making %s/%s\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
ncp_age_dentry(server, dentry);
len = sizeof(__name);
@@ -1022,7 +1032,8 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry)
int error, result, len;
__u8 __name[NCP_MAXPATHLEN + 1];
- DPRINTK("ncp_rmdir: removing %pd2\n", dentry);
+ DPRINTK("ncp_rmdir: removing %s/%s\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
len = sizeof(__name);
error = ncp_io2vol(server, __name, &len, dentry->d_name.name,
@@ -1067,7 +1078,8 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry)
int error;
server = NCP_SERVER(dir);
- DPRINTK("ncp_unlink: unlinking %pd2\n", dentry);
+ DPRINTK("ncp_unlink: unlinking %s/%s\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
/*
* Check whether to close the file ...
@@ -1087,7 +1099,8 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry)
#endif
switch (error) {
case 0x00:
- DPRINTK("ncp: removed %pd2\n", dentry);
+ DPRINTK("ncp: removed %s/%s\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
break;
case 0x85:
case 0x8A:
@@ -1120,7 +1133,9 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry,
int old_len, new_len;
__u8 __old_name[NCP_MAXPATHLEN + 1], __new_name[NCP_MAXPATHLEN + 1];
- DPRINTK("ncp_rename: %pd2 to %pd2\n", old_dentry, new_dentry);
+ DPRINTK("ncp_rename: %s/%s to %s/%s\n",
+ old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
+ new_dentry->d_parent->d_name.name, new_dentry->d_name.name);
ncp_age_dentry(server, old_dentry);
ncp_age_dentry(server, new_dentry);
@@ -1150,8 +1165,8 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry,
#endif
switch (error) {
case 0x00:
- DPRINTK("ncp renamed %pd -> %pd.\n",
- old_dentry, new_dentry);
+ DPRINTK("ncp renamed %s -> %s.\n",
+ old_dentry->d_name.name,new_dentry->d_name.name);
break;
case 0x9E:
error = -ENAMETOOLONG;
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 8f5074e..122e260 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -107,7 +107,8 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
void* freepage;
size_t freelen;
- DPRINTK("ncp_file_read: enter %pd2\n", dentry);
+ DPRINTK("ncp_file_read: enter %s/%s\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
pos = *ppos;
@@ -165,7 +166,8 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
file_accessed(file);
- DPRINTK("ncp_file_read: exit %pd2\n", dentry);
+ DPRINTK("ncp_file_read: exit %s/%s\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
outrel:
ncp_inode_close(inode);
return already_read ? already_read : error;
@@ -182,7 +184,8 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
int errno;
void* bouncebuffer;
- DPRINTK("ncp_file_write: enter %pd2\n", dentry);
+ DPRINTK("ncp_file_write: enter %s/%s\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
if ((ssize_t) count < 0)
return -EINVAL;
pos = *ppos;
@@ -261,7 +264,8 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
i_size_write(inode, pos);
mutex_unlock(&inode->i_mutex);
}
- DPRINTK("ncp_file_write: exit %pd2\n", dentry);
+ DPRINTK("ncp_file_write: exit %s/%s\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
outrel:
ncp_inode_close(inode);
return already_written ? already_written : errno;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 2cf2ebe..4659da6 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -782,17 +782,6 @@ out:
return error;
}
-static void delayed_free(struct rcu_head *p)
-{
- struct ncp_server *server = container_of(p, struct ncp_server, rcu);
-#ifdef CONFIG_NCPFS_NLS
- /* unload the NLS charsets */
- unload_nls(server->nls_vol);
- unload_nls(server->nls_io);
-#endif /* CONFIG_NCPFS_NLS */
- kfree(server);
-}
-
static void ncp_put_super(struct super_block *sb)
{
struct ncp_server *server = NCP_SBP(sb);
@@ -803,6 +792,11 @@ static void ncp_put_super(struct super_block *sb)
ncp_stop_tasks(server);
+#ifdef CONFIG_NCPFS_NLS
+ /* unload the NLS charsets */
+ unload_nls(server->nls_vol);
+ unload_nls(server->nls_io);
+#endif /* CONFIG_NCPFS_NLS */
mutex_destroy(&server->rcv.creq_mutex);
mutex_destroy(&server->root_setup_lock);
mutex_destroy(&server->mutex);
@@ -819,7 +813,8 @@ static void ncp_put_super(struct super_block *sb)
vfree(server->rxbuf);
vfree(server->txbuf);
vfree(server->packet);
- call_rcu(&server->rcu, delayed_free);
+ sb->s_fs_info = NULL;
+ kfree(server);
}
static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf)
diff --git a/fs/ncpfs/ncp_fs_sb.h b/fs/ncpfs/ncp_fs_sb.h
index b81e97a..c51b2c5 100644
--- a/fs/ncpfs/ncp_fs_sb.h
+++ b/fs/ncpfs/ncp_fs_sb.h
@@ -38,7 +38,7 @@ struct ncp_mount_data_kernel {
};
struct ncp_server {
- struct rcu_head rcu;
+
struct ncp_mount_data_kernel m; /* Nearly all of the mount data is of
interest for us later, so we store
it completely. */
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 3dece03..b5e80b0 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -116,17 +116,17 @@ config NFS_V4_2
config PNFS_FILE_LAYOUT
tristate
depends on NFS_V4_1
- default NFS_V4
+ default m
config PNFS_BLOCK
tristate
depends on NFS_V4_1 && BLK_DEV_DM
- default NFS_V4
+ default m
config PNFS_OBJLAYOUT
tristate
depends on NFS_V4_1 && SCSI_OSD_ULD
- default NFS_V4
+ default m
config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN
string "NFSv4.1 Implementation ID Domain"
@@ -140,17 +140,6 @@ config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN
If the NFS client is unchanged from the upstream kernel, this
option should be set to the default "kernel.org".
-config NFS_V4_1_MIGRATION
- bool "NFSv4.1 client support for migration"
- depends on NFS_V4_1
- default n
- help
- This option makes the NFS client advertise to NFSv4.1 servers that
- it can support NFSv4 migration.
-
- The NFSv4.1 pieces of the Linux NFSv4 migration implementation are
- still experimental. If you are not an NFSv4 developer, say N here.
-
config NFS_V4_SECURITY_LABEL
bool
depends on NFS_V4_2 && SECURITY
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 9838fb0..8485978 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -36,7 +36,6 @@
#include <linux/nfs_fs.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
-#include "../nfs4_fs.h"
#include "../pnfs.h"
#include "../netns.h"
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 4d01614..9c3e117 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -44,7 +44,7 @@
static inline sector_t normalize(sector_t s, int base)
{
sector_t tmp = s; /* Since do_div modifies its argument */
- return s - sector_div(tmp, base);
+ return s - do_div(tmp, base);
}
static inline sector_t normalize_up(sector_t s, int base)
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 073b4cf..67cd732 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -164,7 +164,8 @@ nfs41_callback_up(struct svc_serv *serv)
svc_xprt_put(serv->sv_bc_xprt);
serv->sv_bc_xprt = NULL;
}
- dprintk("--> %s return %d\n", __func__, PTR_ERR_OR_ZERO(rqstp));
+ dprintk("--> %s return %ld\n", __func__,
+ IS_ERR(rqstp) ? PTR_ERR(rqstp) : 0);
return rqstp;
}
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 1d09289..2dceee4 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -590,8 +590,6 @@ int nfs_create_rpc_client(struct nfs_client *clp,
if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
args.flags |= RPC_CLNT_CREATE_DISCRTRY;
- if (test_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags))
- args.flags |= RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT;
if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags))
args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
if (test_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags))
@@ -786,10 +784,8 @@ static int nfs_init_server(struct nfs_server *server,
goto error;
server->port = data->nfs_server.port;
- server->auth_info = data->auth_info;
- error = nfs_init_server_rpcclient(server, &timeparms,
- data->selected_flavor);
+ error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
if (error < 0)
goto error;
@@ -930,7 +926,6 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour
target->acdirmax = source->acdirmax;
target->caps = source->caps;
target->options = source->options;
- target->auth_info = source->auth_info;
}
EXPORT_SYMBOL_GPL(nfs_server_copy_userdata);
@@ -948,7 +943,7 @@ void nfs_server_insert_lists(struct nfs_server *server)
}
EXPORT_SYMBOL_GPL(nfs_server_insert_lists);
-void nfs_server_remove_lists(struct nfs_server *server)
+static void nfs_server_remove_lists(struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
struct nfs_net *nn;
@@ -965,7 +960,6 @@ void nfs_server_remove_lists(struct nfs_server *server)
synchronize_rcu();
}
-EXPORT_SYMBOL_GPL(nfs_server_remove_lists);
/*
* Allocate and initialise a server record
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 812154a..02b0df7 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -98,7 +98,9 @@ nfs_opendir(struct inode *inode, struct file *filp)
struct nfs_open_dir_context *ctx;
struct rpc_cred *cred;
- dfprintk(FILE, "NFS: open dir(%pD2)\n", filp);
+ dfprintk(FILE, "NFS: open dir(%s/%s)\n",
+ filp->f_path.dentry->d_parent->d_name.name,
+ filp->f_path.dentry->d_name.name);
nfs_inc_stats(inode, NFSIOS_VFSOPEN);
@@ -295,10 +297,11 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
if (ctx->duped > 0
&& ctx->dup_cookie == *desc->dir_cookie) {
if (printk_ratelimit()) {
- pr_notice("NFS: directory %pD2 contains a readdir loop."
+ pr_notice("NFS: directory %s/%s contains a readdir loop."
"Please contact your server vendor. "
"The file: %s has duplicate cookie %llu\n",
- desc->file,
+ desc->file->f_dentry->d_parent->d_name.name,
+ desc->file->f_dentry->d_name.name,
array->array[i].string.name,
*desc->dir_cookie);
}
@@ -819,8 +822,9 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
struct nfs_open_dir_context *dir_ctx = file->private_data;
int res = 0;
- dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
- file, (long long)ctx->pos);
+ dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ (long long)ctx->pos);
nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
/*
@@ -876,17 +880,22 @@ out:
nfs_unblock_sillyrename(dentry);
if (res > 0)
res = 0;
- dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
+ dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ res);
return res;
}
static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
{
- struct inode *inode = file_inode(filp);
+ struct dentry *dentry = filp->f_path.dentry;
+ struct inode *inode = dentry->d_inode;
struct nfs_open_dir_context *dir_ctx = filp->private_data;
- dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
- filp, offset, whence);
+ dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n",
+ dentry->d_parent->d_name.name,
+ dentry->d_name.name,
+ offset, whence);
mutex_lock(&inode->i_mutex);
switch (whence) {
@@ -916,12 +925,15 @@ out:
static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
int datasync)
{
- struct inode *inode = file_inode(filp);
+ struct dentry *dentry = filp->f_path.dentry;
+ struct inode *inode = dentry->d_inode;
- dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync);
+ dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ datasync);
mutex_lock(&inode->i_mutex);
- nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
+ nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC);
mutex_unlock(&inode->i_mutex);
return 0;
}
@@ -1061,8 +1073,9 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
}
if (is_bad_inode(inode)) {
- dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
- __func__, dentry);
+ dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
+ __func__, dentry->d_parent->d_name.name,
+ dentry->d_name.name);
goto out_bad;
}
@@ -1112,8 +1125,9 @@ out_set_verifier:
nfs_advise_use_readdirplus(dir);
out_valid_noent:
dput(parent);
- dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
- __func__, dentry);
+ dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
+ __func__, dentry->d_parent->d_name.name,
+ dentry->d_name.name);
return 1;
out_zap_parent:
nfs_zap_caches(dir);
@@ -1125,13 +1139,7 @@ out_zap_parent:
if (inode && S_ISDIR(inode->i_mode)) {
/* Purge readdir caches. */
nfs_zap_caches(inode);
- /*
- * We can't d_drop the root of a disconnected tree:
- * its d_hash is on the s_anon list and d_drop() would hide
- * it from shrink_dcache_for_unmount(), leading to busy
- * inodes on unmount and further oopses.
- */
- if (IS_ROOT(dentry))
+ if (dentry->d_flags & DCACHE_DISCONNECTED)
goto out_valid;
}
/* If we have submounts, don't unhash ! */
@@ -1139,16 +1147,18 @@ out_zap_parent:
goto out_valid;
dput(parent);
- dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
- __func__, dentry);
+ dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
+ __func__, dentry->d_parent->d_name.name,
+ dentry->d_name.name);
return 0;
out_error:
nfs_free_fattr(fattr);
nfs_free_fhandle(fhandle);
nfs4_label_free(label);
dput(parent);
- dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
- __func__, dentry, error);
+ dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) lookup returned error %d\n",
+ __func__, dentry->d_parent->d_name.name,
+ dentry->d_name.name, error);
return error;
}
@@ -1172,14 +1182,16 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
* eventually need to do something more here.
*/
if (!inode) {
- dfprintk(LOOKUPCACHE, "%s: %pd2 has negative inode\n",
- __func__, dentry);
+ dfprintk(LOOKUPCACHE, "%s: %s/%s has negative inode\n",
+ __func__, dentry->d_parent->d_name.name,
+ dentry->d_name.name);
return 1;
}
if (is_bad_inode(inode)) {
- dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
- __func__, dentry);
+ dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
+ __func__, dentry->d_parent->d_name.name,
+ dentry->d_name.name);
return 0;
}
@@ -1194,8 +1206,9 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
*/
static int nfs_dentry_delete(const struct dentry *dentry)
{
- dfprintk(VFS, "NFS: dentry_delete(%pd2, %x)\n",
- dentry, dentry->d_flags);
+ dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ dentry->d_flags);
/* Unhash any dentry with a stale inode */
if (dentry->d_inode != NULL && NFS_STALE(dentry->d_inode))
@@ -1273,7 +1286,8 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
struct nfs4_label *label = NULL;
int error;
- dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry);
+ dfprintk(VFS, "NFS: lookup(%s/%s)\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
res = ERR_PTR(-ENAMETOOLONG);
@@ -1367,7 +1381,7 @@ static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, i
static int do_open(struct inode *inode, struct file *filp)
{
- nfs_fscache_open_file(inode, filp);
+ nfs_fscache_set_inode_cookie(inode, filp);
return 0;
}
@@ -1404,8 +1418,8 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
/* Expect a negative dentry */
BUG_ON(dentry->d_inode);
- dfprintk(VFS, "NFS: atomic_open(%s/%ld), %pd\n",
- dir->i_sb->s_id, dir->i_ino, dentry);
+ dfprintk(VFS, "NFS: atomic_open(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
err = nfs_check_flags(open_flags);
if (err)
@@ -1594,8 +1608,8 @@ int nfs_create(struct inode *dir, struct dentry *dentry,
int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
int error;
- dfprintk(VFS, "NFS: create(%s/%ld), %pd\n",
- dir->i_sb->s_id, dir->i_ino, dentry);
+ dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
attr.ia_mode = mode;
attr.ia_valid = ATTR_MODE;
@@ -1621,8 +1635,8 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
struct iattr attr;
int status;
- dfprintk(VFS, "NFS: mknod(%s/%ld), %pd\n",
- dir->i_sb->s_id, dir->i_ino, dentry);
+ dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
if (!new_valid_dev(rdev))
return -EINVAL;
@@ -1650,8 +1664,8 @@ int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct iattr attr;
int error;
- dfprintk(VFS, "NFS: mkdir(%s/%ld), %pd\n",
- dir->i_sb->s_id, dir->i_ino, dentry);
+ dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
attr.ia_valid = ATTR_MODE;
attr.ia_mode = mode | S_IFDIR;
@@ -1678,8 +1692,8 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
{
int error;
- dfprintk(VFS, "NFS: rmdir(%s/%ld), %pd\n",
- dir->i_sb->s_id, dir->i_ino, dentry);
+ dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
trace_nfs_rmdir_enter(dir, dentry);
if (dentry->d_inode) {
@@ -1714,7 +1728,8 @@ static int nfs_safe_remove(struct dentry *dentry)
struct inode *inode = dentry->d_inode;
int error = -EBUSY;
- dfprintk(VFS, "NFS: safe_remove(%pd2)\n", dentry);
+ dfprintk(VFS, "NFS: safe_remove(%s/%s)\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
/* If the dentry was sillyrenamed, we simply call d_delete() */
if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
@@ -1747,8 +1762,8 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
int error;
int need_rehash = 0;
- dfprintk(VFS, "NFS: unlink(%s/%ld, %pd)\n", dir->i_sb->s_id,
- dir->i_ino, dentry);
+ dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id,
+ dir->i_ino, dentry->d_name.name);
trace_nfs_unlink_enter(dir, dentry);
spin_lock(&dentry->d_lock);
@@ -1798,8 +1813,8 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
unsigned int pathlen = strlen(symname);
int error;
- dfprintk(VFS, "NFS: symlink(%s/%ld, %pd, %s)\n", dir->i_sb->s_id,
- dir->i_ino, dentry, symname);
+ dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id,
+ dir->i_ino, dentry->d_name.name, symname);
if (pathlen > PAGE_SIZE)
return -ENAMETOOLONG;
@@ -1821,9 +1836,9 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
trace_nfs_symlink_exit(dir, dentry, error);
if (error != 0) {
- dfprintk(VFS, "NFS: symlink(%s/%ld, %pd, %s) error %d\n",
+ dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n",
dir->i_sb->s_id, dir->i_ino,
- dentry, symname, error);
+ dentry->d_name.name, symname, error);
d_drop(dentry);
__free_page(page);
return error;
@@ -1850,8 +1865,9 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
struct inode *inode = old_dentry->d_inode;
int error;
- dfprintk(VFS, "NFS: link(%pd2 -> %pd2)\n",
- old_dentry, dentry);
+ dfprintk(VFS, "NFS: link(%s/%s -> %s/%s)\n",
+ old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
+ dentry->d_parent->d_name.name, dentry->d_name.name);
trace_nfs_link_enter(inode, dir, dentry);
NFS_PROTO(inode)->return_delegation(inode);
@@ -1899,8 +1915,9 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct dentry *dentry = NULL, *rehash = NULL;
int error = -EBUSY;
- dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n",
- old_dentry, new_dentry,
+ dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n",
+ old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
+ new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
d_count(new_dentry));
trace_nfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index d71d66c..91ff089 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -124,8 +124,9 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
{
#ifndef CONFIG_NFS_SWAP
- dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n",
- iocb->ki_filp, (long long) pos, nr_segs);
+ dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
+ iocb->ki_filp->f_path.dentry->d_name.name,
+ (long long) pos, nr_segs);
return -EINVAL;
#else
@@ -908,8 +909,10 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
count = iov_length(iov, nr_segs);
nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
- dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n",
- file, count, (long long) pos);
+ dfprintk(FILE, "NFS: direct read(%s/%s, %zd@%Ld)\n",
+ file->f_path.dentry->d_parent->d_name.name,
+ file->f_path.dentry->d_name.name,
+ count, (long long) pos);
retval = 0;
if (!count)
@@ -962,8 +965,10 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
count = iov_length(iov, nr_segs);
nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
- dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
- file, count, (long long) pos);
+ dfprintk(FILE, "NFS: direct write(%s/%s, %zd@%Ld)\n",
+ file->f_path.dentry->d_parent->d_name.name,
+ file->f_path.dentry->d_name.name,
+ count, (long long) pos);
retval = generic_write_checks(file, &pos, &count, 0);
if (retval)
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index d25f10f..fc0f95e 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -46,9 +46,7 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
#include <linux/sunrpc/cache.h>
#include <linux/sunrpc/svcauth.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
-#include <linux/nfs_fs.h>
-#include "nfs4_fs.h"
#include "dns_resolve.h"
#include "cache_lib.h"
#include "netns.h"
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index e2fcacf..1e6bfdb 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -65,7 +65,9 @@ nfs_file_open(struct inode *inode, struct file *filp)
{
int res;
- dprintk("NFS: open file(%pD2)\n", filp);
+ dprintk("NFS: open file(%s/%s)\n",
+ filp->f_path.dentry->d_parent->d_name.name,
+ filp->f_path.dentry->d_name.name);
nfs_inc_stats(inode, NFSIOS_VFSOPEN);
res = nfs_check_flags(filp->f_flags);
@@ -79,7 +81,9 @@ nfs_file_open(struct inode *inode, struct file *filp)
int
nfs_file_release(struct inode *inode, struct file *filp)
{
- dprintk("NFS: release(%pD2)\n", filp);
+ dprintk("NFS: release(%s/%s)\n",
+ filp->f_path.dentry->d_parent->d_name.name,
+ filp->f_path.dentry->d_name.name);
nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
return nfs_release(inode, filp);
@@ -119,8 +123,10 @@ force_reval:
loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence)
{
- dprintk("NFS: llseek file(%pD2, %lld, %d)\n",
- filp, offset, whence);
+ dprintk("NFS: llseek file(%s/%s, %lld, %d)\n",
+ filp->f_path.dentry->d_parent->d_name.name,
+ filp->f_path.dentry->d_name.name,
+ offset, whence);
/*
* whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
@@ -144,9 +150,12 @@ EXPORT_SYMBOL_GPL(nfs_file_llseek);
int
nfs_file_flush(struct file *file, fl_owner_t id)
{
- struct inode *inode = file_inode(file);
+ struct dentry *dentry = file->f_path.dentry;
+ struct inode *inode = dentry->d_inode;
- dprintk("NFS: flush(%pD2)\n", file);
+ dprintk("NFS: flush(%s/%s)\n",
+ dentry->d_parent->d_name.name,
+ dentry->d_name.name);
nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
if ((file->f_mode & FMODE_WRITE) == 0)
@@ -168,14 +177,15 @@ ssize_t
nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
- struct inode *inode = file_inode(iocb->ki_filp);
+ struct dentry * dentry = iocb->ki_filp->f_path.dentry;
+ struct inode * inode = dentry->d_inode;
ssize_t result;
if (iocb->ki_filp->f_flags & O_DIRECT)
return nfs_file_direct_read(iocb, iov, nr_segs, pos, true);
- dprintk("NFS: read(%pD2, %lu@%lu)\n",
- iocb->ki_filp,
+ dprintk("NFS: read(%s/%s, %lu@%lu)\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
(unsigned long) iov_length(iov, nr_segs), (unsigned long) pos);
result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
@@ -193,11 +203,13 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos,
struct pipe_inode_info *pipe, size_t count,
unsigned int flags)
{
- struct inode *inode = file_inode(filp);
+ struct dentry *dentry = filp->f_path.dentry;
+ struct inode *inode = dentry->d_inode;
ssize_t res;
- dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
- filp, (unsigned long) count, (unsigned long long) *ppos);
+ dprintk("NFS: splice_read(%s/%s, %lu@%Lu)\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ (unsigned long) count, (unsigned long long) *ppos);
res = nfs_revalidate_mapping(inode, filp->f_mapping);
if (!res) {
@@ -212,10 +224,12 @@ EXPORT_SYMBOL_GPL(nfs_file_splice_read);
int
nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
{
- struct inode *inode = file_inode(file);
+ struct dentry *dentry = file->f_path.dentry;
+ struct inode *inode = dentry->d_inode;
int status;
- dprintk("NFS: mmap(%pD2)\n", file);
+ dprintk("NFS: mmap(%s/%s)\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
/* Note: generic_file_mmap() returns ENOSYS on nommu systems
* so we call that before revalidating the mapping
@@ -244,12 +258,15 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap);
int
nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync)
{
+ struct dentry *dentry = file->f_path.dentry;
struct nfs_open_context *ctx = nfs_file_open_context(file);
- struct inode *inode = file_inode(file);
+ struct inode *inode = dentry->d_inode;
int have_error, do_resend, status;
int ret = 0;
- dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync);
+ dprintk("NFS: fsync file(%s/%s) datasync %d\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ datasync);
nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
@@ -354,8 +371,10 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
struct page *page;
int once_thru = 0;
- dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%ld), %u@%lld)\n",
- file, mapping->host->i_ino, len, (long long) pos);
+ dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n",
+ file->f_path.dentry->d_parent->d_name.name,
+ file->f_path.dentry->d_name.name,
+ mapping->host->i_ino, len, (long long) pos);
start:
/*
@@ -395,8 +414,10 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
struct nfs_open_context *ctx = nfs_file_open_context(file);
int status;
- dfprintk(PAGECACHE, "NFS: write_end(%pD2(%ld), %u@%lld)\n",
- file, mapping->host->i_ino, len, (long long) pos);
+ dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n",
+ file->f_path.dentry->d_parent->d_name.name,
+ file->f_path.dentry->d_name.name,
+ mapping->host->i_ino, len, (long long) pos);
/*
* Zero any uninitialised parts of the page, and then mark the page
@@ -580,21 +601,22 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct page *page = vmf->page;
struct file *filp = vma->vm_file;
- struct inode *inode = file_inode(filp);
+ struct dentry *dentry = filp->f_path.dentry;
unsigned pagelen;
int ret = VM_FAULT_NOPAGE;
struct address_space *mapping;
- dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%ld), offset %lld)\n",
- filp, filp->f_mapping->host->i_ino,
+ dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ filp->f_mapping->host->i_ino,
(long long)page_offset(page));
/* make sure the cache has finished storing the page */
- nfs_fscache_wait_on_page_write(NFS_I(inode), page);
+ nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page);
lock_page(page);
mapping = page_file_mapping(page);
- if (mapping != inode->i_mapping)
+ if (mapping != dentry->d_inode->i_mapping)
goto out_unlock;
wait_on_page_writeback(page);
@@ -637,21 +659,22 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode)
ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file_inode(file);
+ struct dentry * dentry = iocb->ki_filp->f_path.dentry;
+ struct inode * inode = dentry->d_inode;
unsigned long written = 0;
ssize_t result;
size_t count = iov_length(iov, nr_segs);
- result = nfs_key_timeout_notify(file, inode);
+ result = nfs_key_timeout_notify(iocb->ki_filp, inode);
if (result)
return result;
- if (file->f_flags & O_DIRECT)
+ if (iocb->ki_filp->f_flags & O_DIRECT)
return nfs_file_direct_write(iocb, iov, nr_segs, pos, true);
- dprintk("NFS: write(%pD2, %lu@%Ld)\n",
- file, (unsigned long) count, (long long) pos);
+ dprintk("NFS: write(%s/%s, %lu@%Ld)\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ (unsigned long) count, (long long) pos);
result = -EBUSY;
if (IS_SWAPFILE(inode))
@@ -659,8 +682,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
/*
* O_APPEND implies that we must revalidate the file length.
*/
- if (file->f_flags & O_APPEND) {
- result = nfs_revalidate_file_size(inode, file);
+ if (iocb->ki_filp->f_flags & O_APPEND) {
+ result = nfs_revalidate_file_size(inode, iocb->ki_filp);
if (result)
goto out;
}
@@ -674,8 +697,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
written = result;
/* Return error values for O_DSYNC and IS_SYNC() */
- if (result >= 0 && nfs_need_sync_write(file, inode)) {
- int err = vfs_fsync(file, 0);
+ if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
+ int err = vfs_fsync(iocb->ki_filp, 0);
if (err < 0)
result = err;
}
@@ -694,12 +717,14 @@ ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
struct file *filp, loff_t *ppos,
size_t count, unsigned int flags)
{
- struct inode *inode = file_inode(filp);
+ struct dentry *dentry = filp->f_path.dentry;
+ struct inode *inode = dentry->d_inode;
unsigned long written = 0;
ssize_t ret;
- dprintk("NFS splice_write(%pD2, %lu@%llu)\n",
- filp, (unsigned long) count, (unsigned long long) *ppos);
+ dprintk("NFS splice_write(%s/%s, %lu@%llu)\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ (unsigned long) count, (unsigned long long) *ppos);
/*
* The combination of splice and an O_APPEND destination is disallowed.
@@ -858,8 +883,10 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
int ret = -ENOLCK;
int is_local = 0;
- dprintk("NFS: lock(%pD2, t=%x, fl=%x, r=%lld:%lld)\n",
- filp, fl->fl_type, fl->fl_flags,
+ dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n",
+ filp->f_path.dentry->d_parent->d_name.name,
+ filp->f_path.dentry->d_name.name,
+ fl->fl_type, fl->fl_flags,
(long long)fl->fl_start, (long long)fl->fl_end);
nfs_inc_stats(inode, NFSIOS_VFSLOCK);
@@ -896,8 +923,10 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
struct inode *inode = filp->f_mapping->host;
int is_local = 0;
- dprintk("NFS: flock(%pD2, t=%x, fl=%x)\n",
- filp, fl->fl_type, fl->fl_flags);
+ dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n",
+ filp->f_path.dentry->d_parent->d_name.name,
+ filp->f_path.dentry->d_name.name,
+ fl->fl_type, fl->fl_flags);
if (!(fl->fl_flags & FL_FLOCK))
return -ENOLCK;
@@ -931,7 +960,9 @@ EXPORT_SYMBOL_GPL(nfs_flock);
*/
int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
{
- dprintk("NFS: setlease(%pD2, arg=%ld)\n", file, arg);
+ dprintk("NFS: setlease(%s/%s, arg=%ld)\n",
+ file->f_path.dentry->d_parent->d_name.name,
+ file->f_path.dentry->d_name.name, arg);
return -EINVAL;
}
EXPORT_SYMBOL_GPL(nfs_setlease);
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 3ef01f0..24d1d1c 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -39,7 +39,7 @@ void nfs_fscache_get_client_cookie(struct nfs_client *clp)
/* create a cache index for looking up filehandles */
clp->fscache = fscache_acquire_cookie(nfs_fscache_netfs.primary_index,
&nfs_fscache_server_index_def,
- clp, true);
+ clp);
dfprintk(FSCACHE, "NFS: get client cookie (0x%p/0x%p)\n",
clp, clp->fscache);
}
@@ -139,7 +139,7 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int
/* create a cache index for looking up filehandles */
nfss->fscache = fscache_acquire_cookie(nfss->nfs_client->fscache,
&nfs_fscache_super_index_def,
- nfss, true);
+ nfss);
dfprintk(FSCACHE, "NFS: get superblock cookie (0x%p/0x%p)\n",
nfss, nfss->fscache);
return;
@@ -178,79 +178,163 @@ void nfs_fscache_release_super_cookie(struct super_block *sb)
/*
* Initialise the per-inode cache cookie pointer for an NFS inode.
*/
-void nfs_fscache_init_inode(struct inode *inode)
+void nfs_fscache_init_inode_cookie(struct inode *inode)
{
+ NFS_I(inode)->fscache = NULL;
+ if (S_ISREG(inode->i_mode))
+ set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags);
+}
+
+/*
+ * Get the per-inode cache cookie for an NFS inode.
+ */
+static void nfs_fscache_enable_inode_cookie(struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
struct nfs_inode *nfsi = NFS_I(inode);
- nfsi->fscache = NULL;
- if (!S_ISREG(inode->i_mode))
+ if (nfsi->fscache || !NFS_FSCACHE(inode))
return;
- nfsi->fscache = fscache_acquire_cookie(NFS_SB(inode->i_sb)->fscache,
- &nfs_fscache_inode_object_def,
- nfsi, false);
+
+ if ((NFS_SB(sb)->options & NFS_OPTION_FSCACHE)) {
+ nfsi->fscache = fscache_acquire_cookie(
+ NFS_SB(sb)->fscache,
+ &nfs_fscache_inode_object_def,
+ nfsi);
+
+ dfprintk(FSCACHE, "NFS: get FH cookie (0x%p/0x%p/0x%p)\n",
+ sb, nfsi, nfsi->fscache);
+ }
}
/*
* Release a per-inode cookie.
*/
-void nfs_fscache_clear_inode(struct inode *inode)
+void nfs_fscache_release_inode_cookie(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
- struct fscache_cookie *cookie = nfs_i_fscache(inode);
- dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", nfsi, cookie);
+ dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n",
+ nfsi, nfsi->fscache);
- fscache_relinquish_cookie(cookie, false);
+ fscache_relinquish_cookie(nfsi->fscache, 0);
nfsi->fscache = NULL;
}
-static bool nfs_fscache_can_enable(void *data)
+/*
+ * Retire a per-inode cookie, destroying the data attached to it.
+ */
+void nfs_fscache_zap_inode_cookie(struct inode *inode)
{
- struct inode *inode = data;
+ struct nfs_inode *nfsi = NFS_I(inode);
- return !inode_is_open_for_write(inode);
+ dfprintk(FSCACHE, "NFS: zapping cookie (0x%p/0x%p)\n",
+ nfsi, nfsi->fscache);
+
+ fscache_relinquish_cookie(nfsi->fscache, 1);
+ nfsi->fscache = NULL;
}
/*
- * Enable or disable caching for a file that is being opened as appropriate.
- * The cookie is allocated when the inode is initialised, but is not enabled at
- * that time. Enablement is deferred to file-open time to avoid stat() and
- * access() thrashing the cache.
- *
- * For now, with NFS, only regular files that are open read-only will be able
- * to use the cache.
- *
- * We enable the cache for an inode if we open it read-only and it isn't
- * currently open for writing. We disable the cache if the inode is open
- * write-only.
- *
- * The caller uses the file struct to pin i_writecount on the inode before
- * calling us when a file is opened for writing, so we can make use of that.
- *
- * Note that this may be invoked multiple times in parallel by parallel
- * nfs_open() functions.
+ * Turn off the cache with regard to a per-inode cookie if opened for writing,
+ * invalidating all the pages in the page cache relating to the associated
+ * inode to clear the per-page caching.
+ */
+static void nfs_fscache_disable_inode_cookie(struct inode *inode)
+{
+ clear_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags);
+
+ if (NFS_I(inode)->fscache) {
+ dfprintk(FSCACHE,
+ "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode));
+
+ /* Need to uncache any pages attached to this inode that
+ * fscache knows about before turning off the cache.
+ */
+ fscache_uncache_all_inode_pages(NFS_I(inode)->fscache, inode);
+ nfs_fscache_zap_inode_cookie(inode);
+ }
+}
+
+/*
+ * wait_on_bit() sleep function for uninterruptible waiting
*/
-void nfs_fscache_open_file(struct inode *inode, struct file *filp)
+static int nfs_fscache_wait_bit(void *flags)
+{
+ schedule();
+ return 0;
+}
+
+/*
+ * Lock against someone else trying to also acquire or relinquish a cookie
+ */
+static inline void nfs_fscache_inode_lock(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
- struct fscache_cookie *cookie = nfs_i_fscache(inode);
- if (!fscache_cookie_valid(cookie))
- return;
+ while (test_and_set_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags))
+ wait_on_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK,
+ nfs_fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+}
- if (inode_is_open_for_write(inode)) {
- dfprintk(FSCACHE, "NFS: nfsi 0x%p disabling cache\n", nfsi);
- clear_bit(NFS_INO_FSCACHE, &nfsi->flags);
- fscache_disable_cookie(cookie, true);
- fscache_uncache_all_inode_pages(cookie, inode);
- } else {
- dfprintk(FSCACHE, "NFS: nfsi 0x%p enabling cache\n", nfsi);
- fscache_enable_cookie(cookie, nfs_fscache_can_enable, inode);
- if (fscache_cookie_enabled(cookie))
- set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags);
+/*
+ * Unlock cookie management lock
+ */
+static inline void nfs_fscache_inode_unlock(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ smp_mb__before_clear_bit();
+ clear_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK);
+}
+
+/*
+ * Decide if we should enable or disable local caching for this inode.
+ * - For now, with NFS, only regular files that are open read-only will be able
+ * to use the cache.
+ * - May be invoked multiple times in parallel by parallel nfs_open() functions.
+ */
+void nfs_fscache_set_inode_cookie(struct inode *inode, struct file *filp)
+{
+ if (NFS_FSCACHE(inode)) {
+ nfs_fscache_inode_lock(inode);
+ if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
+ nfs_fscache_disable_inode_cookie(inode);
+ else
+ nfs_fscache_enable_inode_cookie(inode);
+ nfs_fscache_inode_unlock(inode);
+ }
+}
+EXPORT_SYMBOL_GPL(nfs_fscache_set_inode_cookie);
+
+/*
+ * Replace a per-inode cookie due to revalidation detecting a file having
+ * changed on the server.
+ */
+void nfs_fscache_reset_inode_cookie(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_server *nfss = NFS_SERVER(inode);
+ NFS_IFDEBUG(struct fscache_cookie *old = nfsi->fscache);
+
+ nfs_fscache_inode_lock(inode);
+ if (nfsi->fscache) {
+ /* retire the current fscache cache and get a new one */
+ fscache_relinquish_cookie(nfsi->fscache, 1);
+
+ nfsi->fscache = fscache_acquire_cookie(
+ nfss->nfs_client->fscache,
+ &nfs_fscache_inode_object_def,
+ nfsi);
+
+ dfprintk(FSCACHE,
+ "NFS: revalidation new cookie (0x%p/0x%p/0x%p/0x%p)\n",
+ nfss, nfsi, old, nfsi->fscache);
}
+ nfs_fscache_inode_unlock(inode);
}
-EXPORT_SYMBOL_GPL(nfs_fscache_open_file);
/*
* Release the caching state associated with a page, if the page isn't busy
@@ -260,11 +344,12 @@ EXPORT_SYMBOL_GPL(nfs_fscache_open_file);
int nfs_fscache_release_page(struct page *page, gfp_t gfp)
{
if (PageFsCache(page)) {
- struct fscache_cookie *cookie = nfs_i_fscache(page->mapping->host);
+ struct nfs_inode *nfsi = NFS_I(page->mapping->host);
+ struct fscache_cookie *cookie = nfsi->fscache;
BUG_ON(!cookie);
dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n",
- cookie, page, NFS_I(page->mapping->host));
+ cookie, page, nfsi);
if (!fscache_maybe_release_page(cookie, page, gfp))
return 0;
@@ -282,12 +367,13 @@ int nfs_fscache_release_page(struct page *page, gfp_t gfp)
*/
void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode)
{
- struct fscache_cookie *cookie = nfs_i_fscache(inode);
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct fscache_cookie *cookie = nfsi->fscache;
BUG_ON(!cookie);
dfprintk(FSCACHE, "NFS: fscache invalidatepage (0x%p/0x%p/0x%p)\n",
- cookie, page, NFS_I(inode));
+ cookie, page, nfsi);
fscache_wait_on_page_write(cookie, page);
@@ -331,9 +417,9 @@ int __nfs_readpage_from_fscache(struct nfs_open_context *ctx,
dfprintk(FSCACHE,
"NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n",
- nfs_i_fscache(inode), page, page->index, page->flags, inode);
+ NFS_I(inode)->fscache, page, page->index, page->flags, inode);
- ret = fscache_read_or_alloc_page(nfs_i_fscache(inode),
+ ret = fscache_read_or_alloc_page(NFS_I(inode)->fscache,
page,
nfs_readpage_from_fscache_complete,
ctx,
@@ -373,9 +459,9 @@ int __nfs_readpages_from_fscache(struct nfs_open_context *ctx,
int ret;
dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n",
- nfs_i_fscache(inode), npages, inode);
+ NFS_I(inode)->fscache, npages, inode);
- ret = fscache_read_or_alloc_pages(nfs_i_fscache(inode),
+ ret = fscache_read_or_alloc_pages(NFS_I(inode)->fscache,
mapping, pages, nr_pages,
nfs_readpage_from_fscache_complete,
ctx,
@@ -420,15 +506,15 @@ void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync)
dfprintk(FSCACHE,
"NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n",
- nfs_i_fscache(inode), page, page->index, page->flags, sync);
+ NFS_I(inode)->fscache, page, page->index, page->flags, sync);
- ret = fscache_write_page(nfs_i_fscache(inode), page, GFP_KERNEL);
+ ret = fscache_write_page(NFS_I(inode)->fscache, page, GFP_KERNEL);
dfprintk(FSCACHE,
"NFS: readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n",
page, page->index, page->flags, ret);
if (ret != 0) {
- fscache_uncache_page(nfs_i_fscache(inode), page);
+ fscache_uncache_page(NFS_I(inode)->fscache, page);
nfs_add_fscache_stats(inode,
NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, 1);
nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index d7fe3e7..4ecb766 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -76,9 +76,11 @@ extern void nfs_fscache_release_client_cookie(struct nfs_client *);
extern void nfs_fscache_get_super_cookie(struct super_block *, const char *, int);
extern void nfs_fscache_release_super_cookie(struct super_block *);
-extern void nfs_fscache_init_inode(struct inode *);
-extern void nfs_fscache_clear_inode(struct inode *);
-extern void nfs_fscache_open_file(struct inode *, struct file *);
+extern void nfs_fscache_init_inode_cookie(struct inode *);
+extern void nfs_fscache_release_inode_cookie(struct inode *);
+extern void nfs_fscache_zap_inode_cookie(struct inode *);
+extern void nfs_fscache_set_inode_cookie(struct inode *, struct file *);
+extern void nfs_fscache_reset_inode_cookie(struct inode *);
extern void __nfs_fscache_invalidate_page(struct page *, struct inode *);
extern int nfs_fscache_release_page(struct page *, gfp_t);
@@ -185,10 +187,12 @@ static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {}
static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {}
-static inline void nfs_fscache_init_inode(struct inode *inode) {}
-static inline void nfs_fscache_clear_inode(struct inode *inode) {}
-static inline void nfs_fscache_open_file(struct inode *inode,
- struct file *filp) {}
+static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {}
+static inline void nfs_fscache_release_inode_cookie(struct inode *inode) {}
+static inline void nfs_fscache_zap_inode_cookie(struct inode *inode) {}
+static inline void nfs_fscache_set_inode_cookie(struct inode *inode,
+ struct file *filp) {}
+static inline void nfs_fscache_reset_inode_cookie(struct inode *inode) {}
static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp)
{
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 00ad1c2..eda8879 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -122,7 +122,7 @@ void nfs_clear_inode(struct inode *inode)
WARN_ON_ONCE(!list_empty(&NFS_I(inode)->open_files));
nfs_zap_acl_cache(inode);
nfs_access_zap_cache(inode);
- nfs_fscache_clear_inode(inode);
+ nfs_fscache_release_inode_cookie(inode);
}
EXPORT_SYMBOL_GPL(nfs_clear_inode);
@@ -274,6 +274,12 @@ void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
if (label == NULL)
return;
+ if (nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL) == 0)
+ return;
+
+ if (NFS_SERVER(inode)->nfs_client->cl_minorversion < 2)
+ return;
+
if ((fattr->valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL) && inode->i_security) {
error = security_inode_notifysecctx(inode, label->label,
label->len);
@@ -312,7 +318,7 @@ struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags)
}
EXPORT_SYMBOL_GPL(nfs4_label_alloc);
#else
-void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
+void inline nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
struct nfs4_label *label)
{
}
@@ -453,7 +459,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
nfsi->attrtimeo_timestamp = now;
nfsi->access_cache = RB_ROOT;
- nfs_fscache_init_inode(inode);
+ nfs_fscache_init_inode_cookie(inode);
unlock_new_inode(inode);
} else
@@ -848,7 +854,7 @@ int nfs_open(struct inode *inode, struct file *filp)
return PTR_ERR(ctx);
nfs_file_set_open_context(filp, ctx);
put_nfs_open_context(ctx);
- nfs_fscache_open_file(inode, filp);
+ nfs_fscache_set_inode_cookie(inode, filp);
return 0;
}
@@ -917,8 +923,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
if (nfsi->cache_validity & NFS_INO_INVALID_ACL)
nfs_zap_acl_cache(inode);
- nfs_setsecurity(inode, fattr, label);
-
dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n",
inode->i_sb->s_id,
(long long)NFS_FILEID(inode));
@@ -1205,7 +1209,6 @@ u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh)
* not on the result */
return nfs_fhandle_hash(fh);
}
-EXPORT_SYMBOL_GPL(_nfs_display_fhandle_hash);
/*
* _nfs_display_fhandle - display an NFS file handle on the console
@@ -1250,7 +1253,6 @@ void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption)
}
}
}
-EXPORT_SYMBOL_GPL(_nfs_display_fhandle);
#endif
/**
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 8b5cc04..38da8c2 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -88,8 +88,8 @@ struct nfs_parsed_mount_data {
unsigned int namlen;
unsigned int options;
unsigned int bsize;
- struct nfs_auth_info auth_info;
- rpc_authflavor_t selected_flavor;
+ unsigned int auth_flavor_len;
+ rpc_authflavor_t auth_flavors[1];
char *client_address;
unsigned int version;
unsigned int minorversion;
@@ -154,7 +154,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *,
rpc_authflavor_t);
int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *);
void nfs_server_insert_lists(struct nfs_server *);
-void nfs_server_remove_lists(struct nfs_server *);
void nfs_init_timeout_values(struct rpc_timeout *, int, unsigned int, unsigned int);
int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t,
rpc_authflavor_t);
@@ -175,8 +174,6 @@ extern struct nfs_server *nfs4_create_server(
struct nfs_subversion *);
extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
struct nfs_fh *);
-extern int nfs4_update_server(struct nfs_server *server, const char *hostname,
- struct sockaddr *sap, size_t salen);
extern void nfs_free_server(struct nfs_server *server);
extern struct nfs_server *nfs_clone_server(struct nfs_server *,
struct nfs_fh *,
@@ -269,21 +266,6 @@ extern const u32 nfs41_maxgetdevinfo_overhead;
extern struct rpc_procinfo nfs4_procedures[];
#endif
-#ifdef CONFIG_NFS_V4_SECURITY_LABEL
-extern struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags);
-static inline void nfs4_label_free(struct nfs4_label *label)
-{
- if (label) {
- kfree(label->label);
- kfree(label);
- }
- return;
-}
-#else
-static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; }
-static inline void nfs4_label_free(void *label) {}
-#endif /* CONFIG_NFS_V4_SECURITY_LABEL */
-
/* proc.c */
void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
@@ -341,7 +323,6 @@ extern struct file_system_type nfs_xdev_fs_type;
extern struct file_system_type nfs4_xdev_fs_type;
extern struct file_system_type nfs4_referral_fs_type;
#endif
-bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t);
struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *,
struct nfs_subversion *);
void nfs_initialise_sb(struct super_block *);
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index b5a0afc..348b535 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -253,8 +253,9 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
dprintk("--> nfs_do_submount()\n");
- dprintk("%s: submounting on %pd2\n", __func__,
- dentry);
+ dprintk("%s: submounting on %s/%s\n", __func__,
+ dentry->d_parent->d_name.name,
+ dentry->d_name.name);
if (page == NULL)
goto out;
devname = nfs_devname(dentry, page, PAGE_SIZE);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 01b6f6a..90cb10d 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -321,7 +321,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
umode_t mode = sattr->ia_mode;
int status = -ENOMEM;
- dprintk("NFS call create %pd\n", dentry);
+ dprintk("NFS call create %s\n", dentry->d_name.name);
data = nfs3_alloc_createdata();
if (data == NULL)
@@ -548,7 +548,7 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
if (len > NFS3_MAXPATHLEN)
return -ENAMETOOLONG;
- dprintk("NFS call symlink %pd\n", dentry);
+ dprintk("NFS call symlink %s\n", dentry->d_name.name);
data = nfs3_alloc_createdata();
if (data == NULL)
@@ -576,7 +576,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
umode_t mode = sattr->ia_mode;
int status = -ENOMEM;
- dprintk("NFS call mkdir %pd\n", dentry);
+ dprintk("NFS call mkdir %s\n", dentry->d_name.name);
sattr->ia_mode &= ~current_umask();
@@ -695,7 +695,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
umode_t mode = sattr->ia_mode;
int status = -ENOMEM;
- dprintk("NFS call mknod %pd %u:%u\n", dentry,
+ dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name,
MAJOR(rdev), MINOR(rdev));
sattr->ia_mode &= ~current_umask();
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 5609edc..28842ab 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -9,14 +9,6 @@
#ifndef __LINUX_FS_NFS_NFS4_FS_H
#define __LINUX_FS_NFS_NFS4_FS_H
-#if defined(CONFIG_NFS_V4_2)
-#define NFS4_MAX_MINOR_VERSION 2
-#elif defined(CONFIG_NFS_V4_1)
-#define NFS4_MAX_MINOR_VERSION 1
-#else
-#define NFS4_MAX_MINOR_VERSION 0
-#endif
-
#if IS_ENABLED(CONFIG_NFS_V4)
#define NFS4_MAX_LOOP_ON_RECOVER (10)
@@ -37,8 +29,6 @@ enum nfs4_client_state {
NFS4CLNT_SERVER_SCOPE_MISMATCH,
NFS4CLNT_PURGE_STATE,
NFS4CLNT_BIND_CONN_TO_SESSION,
- NFS4CLNT_MOVED,
- NFS4CLNT_LEASE_MOVED,
};
#define NFS4_RENEW_TIMEOUT 0x01
@@ -60,7 +50,6 @@ struct nfs4_minor_version_ops {
const struct nfs4_state_recovery_ops *reboot_recovery_ops;
const struct nfs4_state_recovery_ops *nograce_recovery_ops;
const struct nfs4_state_maintenance_ops *state_renewal_ops;
- const struct nfs4_mig_recovery_ops *mig_recovery_ops;
};
#define NFS_SEQID_CONFIRMED 1
@@ -214,12 +203,6 @@ struct nfs4_state_maintenance_ops {
int (*renew_lease)(struct nfs_client *, struct rpc_cred *);
};
-struct nfs4_mig_recovery_ops {
- int (*get_locations)(struct inode *, struct nfs4_fs_locations *,
- struct page *, struct rpc_cred *);
- int (*fsid_present)(struct inode *, struct rpc_cred *);
-};
-
extern const struct dentry_operations nfs4_dentry_operations;
/* dir.c */
@@ -230,11 +213,10 @@ int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
extern struct file_system_type nfs4_fs_type;
/* nfs4namespace.c */
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *);
struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
struct nfs_fh *, struct nfs_fattr *);
-int nfs4_replace_transport(struct nfs_server *server,
- const struct nfs4_fs_locations *locations);
/* nfs4proc.c */
extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
@@ -249,9 +231,6 @@ extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait);
extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *,
struct nfs4_fs_locations *, struct page *);
-extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *,
- struct page *page, struct rpc_cred *);
-extern int nfs4_proc_fsid_present(struct inode *, struct rpc_cred *);
extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct qstr *,
struct nfs_fh *, struct nfs_fattr *);
extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
@@ -432,8 +411,6 @@ extern int nfs4_client_recover_expired_lease(struct nfs_client *clp);
extern void nfs4_schedule_state_manager(struct nfs_client *);
extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp);
extern int nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
-extern int nfs4_schedule_migration_recovery(const struct nfs_server *);
-extern void nfs4_schedule_lease_moved_recovery(struct nfs_client *);
extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
extern void nfs41_handle_server_scope(struct nfs_client *,
struct nfs41_server_scope **);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index b4a160a..a860ab5 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -197,7 +197,6 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
clp->cl_minorversion = cl_init->minorversion;
clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
- clp->cl_mig_gen = 1;
return clp;
error:
@@ -369,7 +368,6 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
if (clp->cl_minorversion != 0)
__set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags);
__set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
- __set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags);
error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_GSS_KRB5I);
if (error == -EINVAL)
error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX);
@@ -926,7 +924,7 @@ static int nfs4_server_common_setup(struct nfs_server *server,
dprintk("Server FSID: %llx:%llx\n",
(unsigned long long) server->fsid.major,
(unsigned long long) server->fsid.minor);
- nfs_display_fhandle(mntfh, "Pseudo-fs root FH");
+ dprintk("Mount FH: %d\n", mntfh->size);
nfs4_session_set_rwsize(server);
@@ -949,8 +947,9 @@ out:
* Create a version 4 volume record
*/
static int nfs4_init_server(struct nfs_server *server,
- struct nfs_parsed_mount_data *data)
+ const struct nfs_parsed_mount_data *data)
{
+ rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
struct rpc_timeout timeparms;
int error;
@@ -962,15 +961,9 @@ static int nfs4_init_server(struct nfs_server *server,
/* Initialise the client representation from the mount data */
server->flags = data->flags;
server->options = data->options;
- server->auth_info = data->auth_info;
- /* Use the first specified auth flavor. If this flavor isn't
- * allowed by the server, use the SECINFO path to try the
- * other specified flavors */
- if (data->auth_info.flavor_len >= 1)
- data->selected_flavor = data->auth_info.flavors[0];
- else
- data->selected_flavor = RPC_AUTH_UNIX;
+ if (data->auth_flavor_len >= 1)
+ pseudoflavor = data->auth_flavors[0];
/* Get a client record */
error = nfs4_set_client(server,
@@ -978,7 +971,7 @@ static int nfs4_init_server(struct nfs_server *server,
(const struct sockaddr *)&data->nfs_server.address,
data->nfs_server.addrlen,
data->client_address,
- data->selected_flavor,
+ pseudoflavor,
data->nfs_server.protocol,
&timeparms,
data->minorversion,
@@ -998,8 +991,7 @@ static int nfs4_init_server(struct nfs_server *server,
server->port = data->nfs_server.port;
- error = nfs_init_server_rpcclient(server, &timeparms,
- data->selected_flavor);
+ error = nfs_init_server_rpcclient(server, &timeparms, pseudoflavor);
error:
/* Done */
@@ -1026,7 +1018,7 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
if (!server)
return ERR_PTR(-ENOMEM);
- auth_probe = mount_info->parsed->auth_info.flavor_len < 1;
+ auth_probe = mount_info->parsed->auth_flavor_len < 1;
/* set up the general RPC client */
error = nfs4_init_server(server, mount_info->parsed);
@@ -1054,7 +1046,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
{
struct nfs_client *parent_client;
struct nfs_server *server, *parent_server;
- bool auth_probe;
int error;
dprintk("--> nfs4_create_referral_server()\n");
@@ -1087,9 +1078,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
if (error < 0)
goto error;
- auth_probe = parent_server->auth_info.flavor_len < 1;
-
- error = nfs4_server_common_setup(server, mntfh, auth_probe);
+ error = nfs4_server_common_setup(server, mntfh,
+ !(parent_server->flags & NFS_MOUNT_SECFLAVOUR));
if (error < 0)
goto error;
@@ -1101,111 +1091,3 @@ error:
dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
return ERR_PTR(error);
}
-
-/*
- * Grab the destination's particulars, including lease expiry time.
- *
- * Returns zero if probe succeeded and retrieved FSID matches the FSID
- * we have cached.
- */
-static int nfs_probe_destination(struct nfs_server *server)
-{
- struct inode *inode = server->super->s_root->d_inode;
- struct nfs_fattr *fattr;
- int error;
-
- fattr = nfs_alloc_fattr();
- if (fattr == NULL)
- return -ENOMEM;
-
- /* Sanity: the probe won't work if the destination server
- * does not recognize the migrated FH. */
- error = nfs_probe_fsinfo(server, NFS_FH(inode), fattr);
-
- nfs_free_fattr(fattr);
- return error;
-}
-
-/**
- * nfs4_update_server - Move an nfs_server to a different nfs_client
- *
- * @server: represents FSID to be moved
- * @hostname: new end-point's hostname
- * @sap: new end-point's socket address
- * @salen: size of "sap"
- *
- * The nfs_server must be quiescent before this function is invoked.
- * Either its session is drained (NFSv4.1+), or its transport is
- * plugged and drained (NFSv4.0).
- *
- * Returns zero on success, or a negative errno value.
- */
-int nfs4_update_server(struct nfs_server *server, const char *hostname,
- struct sockaddr *sap, size_t salen)
-{
- struct nfs_client *clp = server->nfs_client;
- struct rpc_clnt *clnt = server->client;
- struct xprt_create xargs = {
- .ident = clp->cl_proto,
- .net = &init_net,
- .dstaddr = sap,
- .addrlen = salen,
- .servername = hostname,
- };
- char buf[INET6_ADDRSTRLEN + 1];
- struct sockaddr_storage address;
- struct sockaddr *localaddr = (struct sockaddr *)&address;
- int error;
-
- dprintk("--> %s: move FSID %llx:%llx to \"%s\")\n", __func__,
- (unsigned long long)server->fsid.major,
- (unsigned long long)server->fsid.minor,
- hostname);
-
- error = rpc_switch_client_transport(clnt, &xargs, clnt->cl_timeout);
- if (error != 0) {
- dprintk("<-- %s(): rpc_switch_client_transport returned %d\n",
- __func__, error);
- goto out;
- }
-
- error = rpc_localaddr(clnt, localaddr, sizeof(address));
- if (error != 0) {
- dprintk("<-- %s(): rpc_localaddr returned %d\n",
- __func__, error);
- goto out;
- }
-
- error = -EAFNOSUPPORT;
- if (rpc_ntop(localaddr, buf, sizeof(buf)) == 0) {
- dprintk("<-- %s(): rpc_ntop returned %d\n",
- __func__, error);
- goto out;
- }
-
- nfs_server_remove_lists(server);
- error = nfs4_set_client(server, hostname, sap, salen, buf,
- clp->cl_rpcclient->cl_auth->au_flavor,
- clp->cl_proto, clnt->cl_timeout,
- clp->cl_minorversion, clp->cl_net);
- nfs_put_client(clp);
- if (error != 0) {
- nfs_server_insert_lists(server);
- dprintk("<-- %s(): nfs4_set_client returned %d\n",
- __func__, error);
- goto out;
- }
-
- if (server->nfs_client->cl_hostname == NULL)
- server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL);
- nfs_server_insert_lists(server);
-
- error = nfs_probe_destination(server);
- if (error < 0)
- goto out;
-
- dprintk("<-- %s() succeeded\n", __func__);
-
-out:
- return error;
-}
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 8de3407..77efaf1 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -31,7 +31,9 @@ nfs4_file_open(struct inode *inode, struct file *filp)
* -EOPENSTALE. The VFS will retry the lookup/create/open.
*/
- dprintk("NFS: open file(%pd2)\n", dentry);
+ dprintk("NFS: open file(%s/%s)\n",
+ dentry->d_parent->d_name.name,
+ dentry->d_name.name);
if ((openflags & O_ACCMODE) == 3)
openflags--;
@@ -73,7 +75,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
nfs_file_set_open_context(filp, ctx);
- nfs_fscache_open_file(inode, filp);
+ nfs_fscache_set_inode_cookie(inode, filp);
err = 0;
out_put_ctx:
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 4e7f05d..2288cd3 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -137,7 +137,6 @@ static size_t nfs_parse_server_name(char *string, size_t len,
/**
* nfs_find_best_sec - Find a security mechanism supported locally
- * @server: NFS server struct
* @flavors: List of security tuples returned by SECINFO procedure
*
* Return the pseudoflavor of the first security mechanism in
@@ -146,8 +145,7 @@ static size_t nfs_parse_server_name(char *string, size_t len,
* is searched in the order returned from the server, per RFC 3530
* recommendation.
*/
-static rpc_authflavor_t nfs_find_best_sec(struct nfs_server *server,
- struct nfs4_secinfo_flavors *flavors)
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
{
rpc_authflavor_t pseudoflavor;
struct nfs4_secinfo4 *secinfo;
@@ -162,19 +160,12 @@ static rpc_authflavor_t nfs_find_best_sec(struct nfs_server *server,
case RPC_AUTH_GSS:
pseudoflavor = rpcauth_get_pseudoflavor(secinfo->flavor,
&secinfo->flavor_info);
- /* make sure pseudoflavor matches sec= mount opt */
- if (pseudoflavor != RPC_AUTH_MAXFLAVOR &&
- nfs_auth_info_match(&server->auth_info,
- pseudoflavor))
+ if (pseudoflavor != RPC_AUTH_MAXFLAVOR)
return pseudoflavor;
break;
}
}
- /* if there were any sec= options then nothing matched */
- if (server->auth_info.flavor_len > 0)
- return -EPERM;
-
return RPC_AUTH_UNIX;
}
@@ -196,7 +187,7 @@ static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr
goto out;
}
- flavor = nfs_find_best_sec(NFS_SERVER(inode), flavors);
+ flavor = nfs_find_best_sec(flavors);
out:
put_page(page);
@@ -292,7 +283,8 @@ static struct vfsmount *nfs_follow_referral(struct dentry *dentry,
if (locations == NULL || locations->nlocations <= 0)
goto out;
- dprintk("%s: referral at %pd2\n", __func__, dentry);
+ dprintk("%s: referral at %s/%s\n", __func__,
+ dentry->d_parent->d_name.name, dentry->d_name.name);
page = (char *) __get_free_page(GFP_USER);
if (!page)
@@ -356,8 +348,8 @@ static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *
mnt = ERR_PTR(-ENOENT);
parent = dget_parent(dentry);
- dprintk("%s: getting locations for %pd2\n",
- __func__, dentry);
+ dprintk("%s: getting locations for %s/%s\n",
+ __func__, parent->d_name.name, dentry->d_name.name);
err = nfs4_proc_fs_locations(client, parent->d_inode, &dentry->d_name, fs_locations, page);
dput(parent);
@@ -398,7 +390,7 @@ struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
if (client->cl_auth->au_flavor != flavor)
flavor = client->cl_auth->au_flavor;
- else {
+ else if (!(server->flags & NFS_MOUNT_SECFLAVOUR)) {
rpc_authflavor_t new = nfs4_negotiate_security(dir, name);
if ((int)new >= 0)
flavor = new;
@@ -408,104 +400,3 @@ out:
rpc_shutdown_client(client);
return mnt;
}
-
-/*
- * Try one location from the fs_locations array.
- *
- * Returns zero on success, or a negative errno value.
- */
-static int nfs4_try_replacing_one_location(struct nfs_server *server,
- char *page, char *page2,
- const struct nfs4_fs_location *location)
-{
- const size_t addr_bufsize = sizeof(struct sockaddr_storage);
- struct sockaddr *sap;
- unsigned int s;
- size_t salen;
- int error;
-
- sap = kmalloc(addr_bufsize, GFP_KERNEL);
- if (sap == NULL)
- return -ENOMEM;
-
- error = -ENOENT;
- for (s = 0; s < location->nservers; s++) {
- const struct nfs4_string *buf = &location->servers[s];
- char *hostname;
-
- if (buf->len <= 0 || buf->len > PAGE_SIZE)
- continue;
-
- if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len) != NULL)
- continue;
-
- salen = nfs_parse_server_name(buf->data, buf->len,
- sap, addr_bufsize, server);
- if (salen == 0)
- continue;
- rpc_set_port(sap, NFS_PORT);
-
- error = -ENOMEM;
- hostname = kstrndup(buf->data, buf->len, GFP_KERNEL);
- if (hostname == NULL)
- break;
-
- error = nfs4_update_server(server, hostname, sap, salen);
- kfree(hostname);
- if (error == 0)
- break;
- }
-
- kfree(sap);
- return error;
-}
-
-/**
- * nfs4_replace_transport - set up transport to destination server
- *
- * @server: export being migrated
- * @locations: fs_locations array
- *
- * Returns zero on success, or a negative errno value.
- *
- * The client tries all the entries in the "locations" array, in the
- * order returned by the server, until one works or the end of the
- * array is reached.
- */
-int nfs4_replace_transport(struct nfs_server *server,
- const struct nfs4_fs_locations *locations)
-{
- char *page = NULL, *page2 = NULL;
- int loc, error;
-
- error = -ENOENT;
- if (locations == NULL || locations->nlocations <= 0)
- goto out;
-
- error = -ENOMEM;
- page = (char *) __get_free_page(GFP_USER);
- if (!page)
- goto out;
- page2 = (char *) __get_free_page(GFP_USER);
- if (!page2)
- goto out;
-
- for (loc = 0; loc < locations->nlocations; loc++) {
- const struct nfs4_fs_location *location =
- &locations->locations[loc];
-
- if (location == NULL || location->nservers <= 0 ||
- location->rootpath.ncomponents == 0)
- continue;
-
- error = nfs4_try_replacing_one_location(server, page,
- page2, location);
- if (error == 0)
- break;
- }
-
-out:
- free_page((unsigned long)page);
- free_page((unsigned long)page2);
- return error;
-}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 15052b8..d53d678 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -105,6 +105,9 @@ nfs4_label_init_security(struct inode *dir, struct dentry *dentry,
if (nfs_server_capable(dir, NFS_CAP_SECURITY_LABEL) == 0)
return NULL;
+ if (NFS_SERVER(dir)->nfs_client->cl_minorversion < 2)
+ return NULL;
+
err = security_dentry_init_security(dentry, sattr->ia_mode,
&dentry->d_name, (void **)&label->label, &label->len);
if (err == 0)
@@ -381,14 +384,6 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
case -NFS4ERR_STALE_CLIENTID:
nfs4_schedule_lease_recovery(clp);
goto wait_on_recovery;
- case -NFS4ERR_MOVED:
- ret = nfs4_schedule_migration_recovery(server);
- if (ret < 0)
- break;
- goto wait_on_recovery;
- case -NFS4ERR_LEASE_MOVED:
- nfs4_schedule_lease_moved_recovery(clp);
- goto wait_on_recovery;
#if defined(CONFIG_NFS_V4_1)
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -436,8 +431,6 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
return nfs4_map_errors(ret);
wait_on_recovery:
ret = nfs4_wait_clnt_recover(clp);
- if (test_bit(NFS_MIG_FAILED, &server->mig_status))
- return -EIO;
if (ret == 0)
exception->retry = 1;
return ret;
@@ -1325,24 +1318,31 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
int ret;
if (!data->rpc_done) {
- if (data->rpc_status) {
- ret = data->rpc_status;
- goto err;
- }
- /* cached opens have already been processed */
- goto update;
+ ret = data->rpc_status;
+ goto err;
}
+ ret = -ESTALE;
+ if (!(data->f_attr.valid & NFS_ATTR_FATTR_TYPE) ||
+ !(data->f_attr.valid & NFS_ATTR_FATTR_FILEID) ||
+ !(data->f_attr.valid & NFS_ATTR_FATTR_CHANGE))
+ goto err;
+
+ ret = -ENOMEM;
+ state = nfs4_get_open_state(inode, data->owner);
+ if (state == NULL)
+ goto err;
+
ret = nfs_refresh_inode(inode, &data->f_attr);
if (ret)
goto err;
+ nfs_setsecurity(inode, &data->f_attr, data->f_label);
+
if (data->o_res.delegation_type != 0)
nfs4_opendata_check_deleg(data, state);
-update:
update_open_stateid(state, &data->o_res.stateid, NULL,
data->o_arg.fmode);
- atomic_inc(&state->count);
return state;
err:
@@ -1575,12 +1575,6 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
/* Don't recall a delegation if it was lost */
nfs4_schedule_lease_recovery(server->nfs_client);
return -EAGAIN;
- case -NFS4ERR_MOVED:
- nfs4_schedule_migration_recovery(server);
- return -EAGAIN;
- case -NFS4ERR_LEASE_MOVED:
- nfs4_schedule_lease_moved_recovery(server->nfs_client);
- return -EAGAIN;
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
@@ -2518,8 +2512,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
calldata->roc_barrier);
nfs_set_open_stateid(state, &calldata->res.stateid, 0);
renew_lease(server, calldata->timestamp);
+ nfs4_close_clear_stateid_flags(state,
+ calldata->arg.fmode);
break;
- case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_BAD_STATEID:
@@ -2527,13 +2522,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
if (calldata->arg.fmode == 0)
break;
default:
- if (nfs4_async_handle_error(task, server, state) == -EAGAIN) {
+ if (nfs4_async_handle_error(task, server, state) == -EAGAIN)
rpc_restart_call_prepare(task);
- goto out_release;
- }
}
- nfs4_close_clear_stateid_flags(state, calldata->arg.fmode);
-out_release:
nfs_release_seqid(calldata->arg.seqid);
nfs_refresh_inode(calldata->inode, calldata->res.fattr);
dprintk("%s: done, ret = %d!\n", __func__, task->tk_status);
@@ -2706,10 +2697,6 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
nfs4_close_state(ctx->state, ctx->mode);
}
-#define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL)
-#define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL)
-#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_CHANGE_SECURITY_LABEL - 1UL)
-
static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
{
struct nfs4_server_caps_arg args = {
@@ -2725,25 +2712,12 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
if (status == 0) {
- /* Sanity check the server answers */
- switch (server->nfs_client->cl_minorversion) {
- case 0:
- res.attr_bitmask[1] &= FATTR4_WORD1_NFS40_MASK;
- res.attr_bitmask[2] = 0;
- break;
- case 1:
- res.attr_bitmask[2] &= FATTR4_WORD2_NFS41_MASK;
- break;
- case 2:
- res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK;
- }
memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS|
NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|
NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME|
- NFS_CAP_CTIME|NFS_CAP_MTIME|
- NFS_CAP_SECURITY_LABEL);
+ NFS_CAP_CTIME|NFS_CAP_MTIME);
if (res.attr_bitmask[0] & FATTR4_WORD0_ACL)
server->caps |= NFS_CAP_ACLS;
if (res.has_links != 0)
@@ -2772,12 +2746,14 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
#endif
memcpy(server->attr_bitmask_nl, res.attr_bitmask,
sizeof(server->attr_bitmask));
- server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+ if (server->caps & NFS_CAP_SECURITY_LABEL) {
+ server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+ res.attr_bitmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+ }
memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask));
server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
- server->cache_consistency_bitmask[2] = 0;
server->acl_bitmask = res.acl_bitmask;
server->fh_expire_type = res.fh_expire_type;
}
@@ -2888,24 +2864,11 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
int status = -EPERM;
size_t i;
- if (server->auth_info.flavor_len > 0) {
- /* try each flavor specified by user */
- for (i = 0; i < server->auth_info.flavor_len; i++) {
- status = nfs4_lookup_root_sec(server, fhandle, info,
- server->auth_info.flavors[i]);
- if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
- continue;
- break;
- }
- } else {
- /* no flavors specified by user, try default list */
- for (i = 0; i < ARRAY_SIZE(flav_array); i++) {
- status = nfs4_lookup_root_sec(server, fhandle, info,
- flav_array[i]);
- if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
- continue;
- break;
- }
+ for (i = 0; i < ARRAY_SIZE(flav_array); i++) {
+ status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]);
+ if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
+ continue;
+ break;
}
/*
@@ -2947,6 +2910,9 @@ int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle,
status = nfs4_lookup_root(server, fhandle, info);
if (status != -NFS4ERR_WRONGSEC)
break;
+ /* Did user force a 'sec=' mount option? */
+ if (server->flags & NFS_MOUNT_SECFLAVOUR)
+ break;
default:
status = nfs4_do_find_root_sec(server, fhandle, info);
}
@@ -3015,16 +2981,11 @@ static int nfs4_get_referral(struct rpc_clnt *client, struct inode *dir,
status = nfs4_proc_fs_locations(client, dir, name, locations, page);
if (status != 0)
goto out;
-
- /*
- * If the fsid didn't change, this is a migration event, not a
- * referral. Cause us to drop into the exception handler, which
- * will kick off migration recovery.
- */
+ /* Make sure server returned a different fsid for the referral */
if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) {
dprintk("%s: server did not return a different fsid for"
" a referral at %s\n", __func__, name->name);
- status = -NFS4ERR_MOVED;
+ status = -EIO;
goto out;
}
/* Fixup attributes for the nfs_lookup() call to nfs_fhget() */
@@ -3204,6 +3165,9 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
err = -EPERM;
if (client != *clnt)
goto out;
+ /* No security negotiation if the user specified 'sec=' */
+ if (NFS_SERVER(dir)->flags & NFS_MOUNT_SECFLAVOUR)
+ goto out;
client = nfs4_create_sec_client(client, dir, name);
if (IS_ERR(client))
return PTR_ERR(client);
@@ -3774,8 +3738,9 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
};
int status;
- dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__,
- dentry,
+ dprintk("%s: dentry = %s/%s, cookie = %Lu\n", __func__,
+ dentry->d_parent->d_name.name,
+ dentry->d_name.name,
(unsigned long long)cookie);
nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args);
res.pgbase = args.pgbase;
@@ -4256,13 +4221,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
unsigned long timestamp = data->timestamp;
trace_nfs4_renew_async(clp, task->tk_status);
- switch (task->tk_status) {
- case 0:
- break;
- case -NFS4ERR_LEASE_MOVED:
- nfs4_schedule_lease_moved_recovery(clp);
- break;
- default:
+ if (task->tk_status < 0) {
/* Unless we're shutting down, schedule state recovery! */
if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) == 0)
return;
@@ -4616,7 +4575,7 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf,
struct nfs4_label label = {0, 0, buflen, buf};
u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL };
- struct nfs4_getattr_arg arg = {
+ struct nfs4_getattr_arg args = {
.fh = NFS_FH(inode),
.bitmask = bitmask,
};
@@ -4627,14 +4586,14 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf,
};
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETATTR],
- .rpc_argp = &arg,
+ .rpc_argp = &args,
.rpc_resp = &res,
};
int ret;
nfs_fattr_init(&fattr);
- ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 0);
+ ret = rpc_call_sync(server->client, &msg, 0);
if (ret)
return ret;
if (!(fattr.valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL))
@@ -4671,7 +4630,7 @@ static int _nfs4_do_set_security_label(struct inode *inode,
struct iattr sattr = {0};
struct nfs_server *server = NFS_SERVER(inode);
const u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL };
- struct nfs_setattrargs arg = {
+ struct nfs_setattrargs args = {
.fh = NFS_FH(inode),
.iap = &sattr,
.server = server,
@@ -4685,14 +4644,14 @@ static int _nfs4_do_set_security_label(struct inode *inode,
};
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR],
- .rpc_argp = &arg,
+ .rpc_argp = &args,
.rpc_resp = &res,
};
int status;
- nfs4_stateid_copy(&arg.stateid, &zero_stateid);
+ nfs4_stateid_copy(&args.stateid, &zero_stateid);
- status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
+ status = rpc_call_sync(server->client, &msg, 0);
if (status)
dprintk("%s failed: %d\n", __func__, status);
@@ -4776,24 +4735,17 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
if (state == NULL)
break;
if (nfs4_schedule_stateid_recovery(server, state) < 0)
- goto recovery_failed;
+ goto stateid_invalid;
goto wait_on_recovery;
case -NFS4ERR_EXPIRED:
if (state != NULL) {
if (nfs4_schedule_stateid_recovery(server, state) < 0)
- goto recovery_failed;
+ goto stateid_invalid;
}
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_STALE_CLIENTID:
nfs4_schedule_lease_recovery(clp);
goto wait_on_recovery;
- case -NFS4ERR_MOVED:
- if (nfs4_schedule_migration_recovery(server) < 0)
- goto recovery_failed;
- goto wait_on_recovery;
- case -NFS4ERR_LEASE_MOVED:
- nfs4_schedule_lease_moved_recovery(clp);
- goto wait_on_recovery;
#if defined(CONFIG_NFS_V4_1)
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -4805,28 +4757,29 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
dprintk("%s ERROR %d, Reset session\n", __func__,
task->tk_status);
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
- goto wait_on_recovery;
+ task->tk_status = 0;
+ return -EAGAIN;
#endif /* CONFIG_NFS_V4_1 */
case -NFS4ERR_DELAY:
nfs_inc_server_stats(server, NFSIOS_DELAY);
case -NFS4ERR_GRACE:
rpc_delay(task, NFS4_POLL_RETRY_MAX);
+ task->tk_status = 0;
+ return -EAGAIN;
case -NFS4ERR_RETRY_UNCACHED_REP:
case -NFS4ERR_OLD_STATEID:
- goto restart_call;
+ task->tk_status = 0;
+ return -EAGAIN;
}
task->tk_status = nfs4_map_errors(task->tk_status);
return 0;
-recovery_failed:
+stateid_invalid:
task->tk_status = -EIO;
return 0;
wait_on_recovery:
rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
- if (test_bit(NFS_MIG_FAILED, &server->mig_status))
- goto recovery_failed;
-restart_call:
task->tk_status = 0;
return -EAGAIN;
}
@@ -4990,16 +4943,10 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status);
switch (task->tk_status) {
- case 0:
- renew_lease(data->res.server, data->timestamp);
- break;
- case -NFS4ERR_ADMIN_REVOKED:
- case -NFS4ERR_DELEG_REVOKED:
- case -NFS4ERR_BAD_STATEID:
- case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
- task->tk_status = 0;
+ case 0:
+ renew_lease(data->res.server, data->timestamp);
break;
default:
if (nfs4_async_handle_error(task, data->res.server, NULL) ==
@@ -5159,7 +5106,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
status = 0;
}
request->fl_ops->fl_release_private(request);
- request->fl_ops = NULL;
out:
return status;
}
@@ -5833,7 +5779,6 @@ struct nfs_release_lockowner_data {
struct nfs_release_lockowner_args args;
struct nfs4_sequence_args seq_args;
struct nfs4_sequence_res seq_res;
- unsigned long timestamp;
};
static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata)
@@ -5841,27 +5786,12 @@ static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata
struct nfs_release_lockowner_data *data = calldata;
nfs40_setup_sequence(data->server,
&data->seq_args, &data->seq_res, task);
- data->timestamp = jiffies;
}
static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata)
{
struct nfs_release_lockowner_data *data = calldata;
- struct nfs_server *server = data->server;
-
nfs40_sequence_done(task, &data->seq_res);
-
- switch (task->tk_status) {
- case 0:
- renew_lease(server, data->timestamp);
- break;
- case -NFS4ERR_STALE_CLIENTID:
- case -NFS4ERR_EXPIRED:
- case -NFS4ERR_LEASE_MOVED:
- case -NFS4ERR_DELAY:
- if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN)
- rpc_restart_call_prepare(task);
- }
}
static void nfs4_release_lockowner_release(void *calldata)
@@ -6060,283 +5990,6 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
return err;
}
-/*
- * This operation also signals the server that this client is
- * performing migration recovery. The server can stop returning
- * NFS4ERR_LEASE_MOVED to this client. A RENEW operation is
- * appended to this compound to identify the client ID which is
- * performing recovery.
- */
-static int _nfs40_proc_get_locations(struct inode *inode,
- struct nfs4_fs_locations *locations,
- struct page *page, struct rpc_cred *cred)
-{
- struct nfs_server *server = NFS_SERVER(inode);
- struct rpc_clnt *clnt = server->client;
- u32 bitmask[2] = {
- [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
- };
- struct nfs4_fs_locations_arg args = {
- .clientid = server->nfs_client->cl_clientid,
- .fh = NFS_FH(inode),
- .page = page,
- .bitmask = bitmask,
- .migration = 1, /* skip LOOKUP */
- .renew = 1, /* append RENEW */
- };
- struct nfs4_fs_locations_res res = {
- .fs_locations = locations,
- .migration = 1,
- .renew = 1,
- };
- struct rpc_message msg = {
- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS],
- .rpc_argp = &args,
- .rpc_resp = &res,
- .rpc_cred = cred,
- };
- unsigned long now = jiffies;
- int status;
-
- nfs_fattr_init(&locations->fattr);
- locations->server = server;
- locations->nlocations = 0;
-
- nfs4_init_sequence(&args.seq_args, &res.seq_res, 0);
- nfs4_set_sequence_privileged(&args.seq_args);
- status = nfs4_call_sync_sequence(clnt, server, &msg,
- &args.seq_args, &res.seq_res);
- if (status)
- return status;
-
- renew_lease(server, now);
- return 0;
-}
-
-#ifdef CONFIG_NFS_V4_1
-
-/*
- * This operation also signals the server that this client is
- * performing migration recovery. The server can stop asserting
- * SEQ4_STATUS_LEASE_MOVED for this client. The client ID
- * performing this operation is identified in the SEQUENCE
- * operation in this compound.
- *
- * When the client supports GETATTR(fs_locations_info), it can
- * be plumbed in here.
- */
-static int _nfs41_proc_get_locations(struct inode *inode,
- struct nfs4_fs_locations *locations,
- struct page *page, struct rpc_cred *cred)
-{
- struct nfs_server *server = NFS_SERVER(inode);
- struct rpc_clnt *clnt = server->client;
- u32 bitmask[2] = {
- [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
- };
- struct nfs4_fs_locations_arg args = {
- .fh = NFS_FH(inode),
- .page = page,
- .bitmask = bitmask,
- .migration = 1, /* skip LOOKUP */
- };
- struct nfs4_fs_locations_res res = {
- .fs_locations = locations,
- .migration = 1,
- };
- struct rpc_message msg = {
- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS],
- .rpc_argp = &args,
- .rpc_resp = &res,
- .rpc_cred = cred,
- };
- int status;
-
- nfs_fattr_init(&locations->fattr);
- locations->server = server;
- locations->nlocations = 0;
-
- nfs4_init_sequence(&args.seq_args, &res.seq_res, 0);
- nfs4_set_sequence_privileged(&args.seq_args);
- status = nfs4_call_sync_sequence(clnt, server, &msg,
- &args.seq_args, &res.seq_res);
- if (status == NFS4_OK &&
- res.seq_res.sr_status_flags & SEQ4_STATUS_LEASE_MOVED)
- status = -NFS4ERR_LEASE_MOVED;
- return status;
-}
-
-#endif /* CONFIG_NFS_V4_1 */
-
-/**
- * nfs4_proc_get_locations - discover locations for a migrated FSID
- * @inode: inode on FSID that is migrating
- * @locations: result of query
- * @page: buffer
- * @cred: credential to use for this operation
- *
- * Returns NFS4_OK on success, a negative NFS4ERR status code if the
- * operation failed, or a negative errno if a local error occurred.
- *
- * On success, "locations" is filled in, but if the server has
- * no locations information, NFS_ATTR_FATTR_V4_LOCATIONS is not
- * asserted.
- *
- * -NFS4ERR_LEASE_MOVED is returned if the server still has leases
- * from this client that require migration recovery.
- */
-int nfs4_proc_get_locations(struct inode *inode,
- struct nfs4_fs_locations *locations,
- struct page *page, struct rpc_cred *cred)
-{
- struct nfs_server *server = NFS_SERVER(inode);
- struct nfs_client *clp = server->nfs_client;
- const struct nfs4_mig_recovery_ops *ops =
- clp->cl_mvops->mig_recovery_ops;
- struct nfs4_exception exception = { };
- int status;
-
- dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__,
- (unsigned long long)server->fsid.major,
- (unsigned long long)server->fsid.minor,
- clp->cl_hostname);
- nfs_display_fhandle(NFS_FH(inode), __func__);
-
- do {
- status = ops->get_locations(inode, locations, page, cred);
- if (status != -NFS4ERR_DELAY)
- break;
- nfs4_handle_exception(server, status, &exception);
- } while (exception.retry);
- return status;
-}
-
-/*
- * This operation also signals the server that this client is
- * performing "lease moved" recovery. The server can stop
- * returning NFS4ERR_LEASE_MOVED to this client. A RENEW operation
- * is appended to this compound to identify the client ID which is
- * performing recovery.
- */
-static int _nfs40_proc_fsid_present(struct inode *inode, struct rpc_cred *cred)
-{
- struct nfs_server *server = NFS_SERVER(inode);
- struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
- struct rpc_clnt *clnt = server->client;
- struct nfs4_fsid_present_arg args = {
- .fh = NFS_FH(inode),
- .clientid = clp->cl_clientid,
- .renew = 1, /* append RENEW */
- };
- struct nfs4_fsid_present_res res = {
- .renew = 1,
- };
- struct rpc_message msg = {
- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSID_PRESENT],
- .rpc_argp = &args,
- .rpc_resp = &res,
- .rpc_cred = cred,
- };
- unsigned long now = jiffies;
- int status;
-
- res.fh = nfs_alloc_fhandle();
- if (res.fh == NULL)
- return -ENOMEM;
-
- nfs4_init_sequence(&args.seq_args, &res.seq_res, 0);
- nfs4_set_sequence_privileged(&args.seq_args);
- status = nfs4_call_sync_sequence(clnt, server, &msg,
- &args.seq_args, &res.seq_res);
- nfs_free_fhandle(res.fh);
- if (status)
- return status;
-
- do_renew_lease(clp, now);
- return 0;
-}
-
-#ifdef CONFIG_NFS_V4_1
-
-/*
- * This operation also signals the server that this client is
- * performing "lease moved" recovery. The server can stop asserting
- * SEQ4_STATUS_LEASE_MOVED for this client. The client ID performing
- * this operation is identified in the SEQUENCE operation in this
- * compound.
- */
-static int _nfs41_proc_fsid_present(struct inode *inode, struct rpc_cred *cred)
-{
- struct nfs_server *server = NFS_SERVER(inode);
- struct rpc_clnt *clnt = server->client;
- struct nfs4_fsid_present_arg args = {
- .fh = NFS_FH(inode),
- };
- struct nfs4_fsid_present_res res = {
- };
- struct rpc_message msg = {
- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSID_PRESENT],
- .rpc_argp = &args,
- .rpc_resp = &res,
- .rpc_cred = cred,
- };
- int status;
-
- res.fh = nfs_alloc_fhandle();
- if (res.fh == NULL)
- return -ENOMEM;
-
- nfs4_init_sequence(&args.seq_args, &res.seq_res, 0);
- nfs4_set_sequence_privileged(&args.seq_args);
- status = nfs4_call_sync_sequence(clnt, server, &msg,
- &args.seq_args, &res.seq_res);
- nfs_free_fhandle(res.fh);
- if (status == NFS4_OK &&
- res.seq_res.sr_status_flags & SEQ4_STATUS_LEASE_MOVED)
- status = -NFS4ERR_LEASE_MOVED;
- return status;
-}
-
-#endif /* CONFIG_NFS_V4_1 */
-
-/**
- * nfs4_proc_fsid_present - Is this FSID present or absent on server?
- * @inode: inode on FSID to check
- * @cred: credential to use for this operation
- *
- * Server indicates whether the FSID is present, moved, or not
- * recognized. This operation is necessary to clear a LEASE_MOVED
- * condition for this client ID.
- *
- * Returns NFS4_OK if the FSID is present on this server,
- * -NFS4ERR_MOVED if the FSID is no longer present, a negative
- * NFS4ERR code if some error occurred on the server, or a
- * negative errno if a local failure occurred.
- */
-int nfs4_proc_fsid_present(struct inode *inode, struct rpc_cred *cred)
-{
- struct nfs_server *server = NFS_SERVER(inode);
- struct nfs_client *clp = server->nfs_client;
- const struct nfs4_mig_recovery_ops *ops =
- clp->cl_mvops->mig_recovery_ops;
- struct nfs4_exception exception = { };
- int status;
-
- dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__,
- (unsigned long long)server->fsid.major,
- (unsigned long long)server->fsid.minor,
- clp->cl_hostname);
- nfs_display_fhandle(NFS_FH(inode), __func__);
-
- do {
- status = ops->fsid_present(inode, cred);
- if (status != -NFS4ERR_DELAY)
- break;
- nfs4_handle_exception(server, status, &exception);
- } while (exception.retry);
- return status;
-}
-
/**
* If 'use_integrity' is true and the state managment nfs_client
* cl_rpcclient is using krb5i/p, use the integrity protected cl_rpcclient
@@ -6623,14 +6276,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
struct nfs41_exchange_id_args args = {
.verifier = &verifier,
.client = clp,
-#ifdef CONFIG_NFS_V4_1_MIGRATION
- .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER |
- EXCHGID4_FLAG_BIND_PRINC_STATEID |
- EXCHGID4_FLAG_SUPP_MOVED_MIGR,
-#else
.flags = EXCHGID4_FLAG_SUPP_MOVED_REFER |
- EXCHGID4_FLAG_BIND_PRINC_STATEID,
-#endif
+ EXCHGID4_FLAG_BIND_PRINC_STATEID,
};
struct nfs41_exchange_id_res res = {
0
@@ -7598,14 +7245,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
return;
server = NFS_SERVER(lrp->args.inode);
- switch (task->tk_status) {
- default:
- task->tk_status = 0;
- case 0:
- break;
- case -NFS4ERR_DELAY:
- if (nfs4_async_handle_error(task, server, NULL) != -EAGAIN)
- break;
+ if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
rpc_restart_call_prepare(task);
return;
}
@@ -7976,9 +7616,6 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
break;
}
- if (!nfs_auth_info_match(&server->auth_info, flavor))
- flavor = RPC_AUTH_MAXFLAVOR;
-
if (flavor != RPC_AUTH_MAXFLAVOR) {
err = nfs4_lookup_root_sec(server, fhandle,
info, flavor);
@@ -8250,18 +7887,6 @@ static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
};
#endif
-static const struct nfs4_mig_recovery_ops nfs40_mig_recovery_ops = {
- .get_locations = _nfs40_proc_get_locations,
- .fsid_present = _nfs40_proc_fsid_present,
-};
-
-#if defined(CONFIG_NFS_V4_1)
-static const struct nfs4_mig_recovery_ops nfs41_mig_recovery_ops = {
- .get_locations = _nfs41_proc_get_locations,
- .fsid_present = _nfs41_proc_fsid_present,
-};
-#endif /* CONFIG_NFS_V4_1 */
-
static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
.minor_version = 0,
.init_caps = NFS_CAP_READDIRPLUS
@@ -8277,7 +7902,6 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
.reboot_recovery_ops = &nfs40_reboot_recovery_ops,
.nograce_recovery_ops = &nfs40_nograce_recovery_ops,
.state_renewal_ops = &nfs40_state_renewal_ops,
- .mig_recovery_ops = &nfs40_mig_recovery_ops,
};
#if defined(CONFIG_NFS_V4_1)
@@ -8298,7 +7922,6 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
.reboot_recovery_ops = &nfs41_reboot_recovery_ops,
.nograce_recovery_ops = &nfs41_nograce_recovery_ops,
.state_renewal_ops = &nfs41_state_renewal_ops,
- .mig_recovery_ops = &nfs41_mig_recovery_ops,
};
#endif
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 059c01b..cc14cbb 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -239,12 +239,14 @@ static void nfs4_end_drain_session(struct nfs_client *clp)
}
}
+#if defined(CONFIG_NFS_V4_1)
+
static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl)
{
set_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state);
spin_lock(&tbl->slot_tbl_lock);
if (tbl->highest_used_slotid != NFS4_NO_SLOT) {
- reinit_completion(&tbl->complete);
+ INIT_COMPLETION(tbl->complete);
spin_unlock(&tbl->slot_tbl_lock);
return wait_for_completion_interruptible(&tbl->complete);
}
@@ -268,8 +270,6 @@ static int nfs4_begin_drain_session(struct nfs_client *clp)
return nfs4_drain_slot_tbl(&ses->fc_slot_table);
}
-#if defined(CONFIG_NFS_V4_1)
-
static int nfs41_setup_state_renewal(struct nfs_client *clp)
{
int status;
@@ -1197,74 +1197,20 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp)
}
EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery);
-/**
- * nfs4_schedule_migration_recovery - trigger migration recovery
- *
- * @server: FSID that is migrating
- *
- * Returns zero if recovery has started, otherwise a negative NFS4ERR
- * value is returned.
- */
-int nfs4_schedule_migration_recovery(const struct nfs_server *server)
-{
- struct nfs_client *clp = server->nfs_client;
-
- if (server->fh_expire_type != NFS4_FH_PERSISTENT) {
- pr_err("NFS: volatile file handles not supported (server %s)\n",
- clp->cl_hostname);
- return -NFS4ERR_IO;
- }
-
- if (test_bit(NFS_MIG_FAILED, &server->mig_status))
- return -NFS4ERR_IO;
-
- dprintk("%s: scheduling migration recovery for (%llx:%llx) on %s\n",
- __func__,
- (unsigned long long)server->fsid.major,
- (unsigned long long)server->fsid.minor,
- clp->cl_hostname);
-
- set_bit(NFS_MIG_IN_TRANSITION,
- &((struct nfs_server *)server)->mig_status);
- set_bit(NFS4CLNT_MOVED, &clp->cl_state);
-
- nfs4_schedule_state_manager(clp);
- return 0;
-}
-EXPORT_SYMBOL_GPL(nfs4_schedule_migration_recovery);
-
-/**
- * nfs4_schedule_lease_moved_recovery - start lease-moved recovery
- *
- * @clp: server to check for moved leases
- *
- */
-void nfs4_schedule_lease_moved_recovery(struct nfs_client *clp)
-{
- dprintk("%s: scheduling lease-moved recovery for client ID %llx on %s\n",
- __func__, clp->cl_clientid, clp->cl_hostname);
-
- set_bit(NFS4CLNT_LEASE_MOVED, &clp->cl_state);
- nfs4_schedule_state_manager(clp);
-}
-EXPORT_SYMBOL_GPL(nfs4_schedule_lease_moved_recovery);
-
int nfs4_wait_clnt_recover(struct nfs_client *clp)
{
int res;
might_sleep();
- atomic_inc(&clp->cl_count);
res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
nfs_wait_bit_killable, TASK_KILLABLE);
if (res)
- goto out;
+ return res;
+
if (clp->cl_cons_state < 0)
- res = clp->cl_cons_state;
-out:
- nfs_put_client(clp);
- return res;
+ return clp->cl_cons_state;
+ return 0;
}
int nfs4_client_recover_expired_lease(struct nfs_client *clp)
@@ -1429,8 +1375,8 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
goto out;
default:
- printk(KERN_ERR "NFS: %s: unhandled error %d\n",
- __func__, status);
+ printk(KERN_ERR "NFS: %s: unhandled error %d. "
+ "Zeroing state\n", __func__, status);
case -ENOMEM:
case -NFS4ERR_DENIED:
case -NFS4ERR_RECLAIM_BAD:
@@ -1476,7 +1422,7 @@ restart:
if (status >= 0) {
status = nfs4_reclaim_locks(state, ops);
if (status >= 0) {
- if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+ if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) {
spin_lock(&state->state_lock);
list_for_each_entry(lock, &state->lock_states, ls_locks) {
if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
@@ -1493,12 +1439,15 @@ restart:
}
switch (status) {
default:
- printk(KERN_ERR "NFS: %s: unhandled error %d\n",
- __func__, status);
+ printk(KERN_ERR "NFS: %s: unhandled error %d. "
+ "Zeroing state\n", __func__, status);
case -ENOENT:
case -ENOMEM:
case -ESTALE:
- /* Open state on this file cannot be recovered */
+ /*
+ * Open state on this file cannot be recovered
+ * All we can do is revert to using the zero stateid.
+ */
nfs4_state_mark_recovery_failed(state, status);
break;
case -EAGAIN:
@@ -1679,6 +1628,7 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
nfs4_state_end_reclaim_reboot(clp);
break;
case -NFS4ERR_STALE_CLIENTID:
+ case -NFS4ERR_LEASE_MOVED:
set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
nfs4_state_clear_reclaim_reboot(clp);
nfs4_state_start_reclaim_reboot(clp);
@@ -1879,168 +1829,6 @@ static int nfs4_purge_lease(struct nfs_client *clp)
return 0;
}
-/*
- * Try remote migration of one FSID from a source server to a
- * destination server. The source server provides a list of
- * potential destinations.
- *
- * Returns zero or a negative NFS4ERR status code.
- */
-static int nfs4_try_migration(struct nfs_server *server, struct rpc_cred *cred)
-{
- struct nfs_client *clp = server->nfs_client;
- struct nfs4_fs_locations *locations = NULL;
- struct inode *inode;
- struct page *page;
- int status, result;
-
- dprintk("--> %s: FSID %llx:%llx on \"%s\"\n", __func__,
- (unsigned long long)server->fsid.major,
- (unsigned long long)server->fsid.minor,
- clp->cl_hostname);
-
- result = 0;
- page = alloc_page(GFP_KERNEL);
- locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
- if (page == NULL || locations == NULL) {
- dprintk("<-- %s: no memory\n", __func__);
- goto out;
- }
-
- inode = server->super->s_root->d_inode;
- result = nfs4_proc_get_locations(inode, locations, page, cred);
- if (result) {
- dprintk("<-- %s: failed to retrieve fs_locations: %d\n",
- __func__, result);
- goto out;
- }
-
- result = -NFS4ERR_NXIO;
- if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) {
- dprintk("<-- %s: No fs_locations data, migration skipped\n",
- __func__);
- goto out;
- }
-
- nfs4_begin_drain_session(clp);
-
- status = nfs4_replace_transport(server, locations);
- if (status != 0) {
- dprintk("<-- %s: failed to replace transport: %d\n",
- __func__, status);
- goto out;
- }
-
- result = 0;
- dprintk("<-- %s: migration succeeded\n", __func__);
-
-out:
- if (page != NULL)
- __free_page(page);
- kfree(locations);
- if (result) {
- pr_err("NFS: migration recovery failed (server %s)\n",
- clp->cl_hostname);
- set_bit(NFS_MIG_FAILED, &server->mig_status);
- }
- return result;
-}
-
-/*
- * Returns zero or a negative NFS4ERR status code.
- */
-static int nfs4_handle_migration(struct nfs_client *clp)
-{
- const struct nfs4_state_maintenance_ops *ops =
- clp->cl_mvops->state_renewal_ops;
- struct nfs_server *server;
- struct rpc_cred *cred;
-
- dprintk("%s: migration reported on \"%s\"\n", __func__,
- clp->cl_hostname);
-
- spin_lock(&clp->cl_lock);
- cred = ops->get_state_renewal_cred_locked(clp);
- spin_unlock(&clp->cl_lock);
- if (cred == NULL)
- return -NFS4ERR_NOENT;
-
- clp->cl_mig_gen++;
-restart:
- rcu_read_lock();
- list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
- int status;
-
- if (server->mig_gen == clp->cl_mig_gen)
- continue;
- server->mig_gen = clp->cl_mig_gen;
-
- if (!test_and_clear_bit(NFS_MIG_IN_TRANSITION,
- &server->mig_status))
- continue;
-
- rcu_read_unlock();
- status = nfs4_try_migration(server, cred);
- if (status < 0) {
- put_rpccred(cred);
- return status;
- }
- goto restart;
- }
- rcu_read_unlock();
- put_rpccred(cred);
- return 0;
-}
-
-/*
- * Test each nfs_server on the clp's cl_superblocks list to see
- * if it's moved to another server. Stop when the server no longer
- * returns NFS4ERR_LEASE_MOVED.
- */
-static int nfs4_handle_lease_moved(struct nfs_client *clp)
-{
- const struct nfs4_state_maintenance_ops *ops =
- clp->cl_mvops->state_renewal_ops;
- struct nfs_server *server;
- struct rpc_cred *cred;
-
- dprintk("%s: lease moved reported on \"%s\"\n", __func__,
- clp->cl_hostname);
-
- spin_lock(&clp->cl_lock);
- cred = ops->get_state_renewal_cred_locked(clp);
- spin_unlock(&clp->cl_lock);
- if (cred == NULL)
- return -NFS4ERR_NOENT;
-
- clp->cl_mig_gen++;
-restart:
- rcu_read_lock();
- list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
- struct inode *inode;
- int status;
-
- if (server->mig_gen == clp->cl_mig_gen)
- continue;
- server->mig_gen = clp->cl_mig_gen;
-
- rcu_read_unlock();
-
- inode = server->super->s_root->d_inode;
- status = nfs4_proc_fsid_present(inode, cred);
- if (status != -NFS4ERR_MOVED)
- goto restart; /* wasn't this one */
- if (nfs4_try_migration(server, cred) == -NFS4ERR_LEASE_MOVED)
- goto restart; /* there are more */
- goto out;
- }
- rcu_read_unlock();
-
-out:
- put_rpccred(cred);
- return 0;
-}
-
/**
* nfs4_discover_server_trunking - Detect server IP address trunking
*
@@ -2093,15 +1881,10 @@ again:
nfs4_root_machine_cred(clp);
goto again;
}
- if (clnt->cl_auth->au_flavor == RPC_AUTH_UNIX)
+ if (i > 2)
break;
case -NFS4ERR_CLID_INUSE:
case -NFS4ERR_WRONGSEC:
- /* No point in retrying if we already used RPC_AUTH_UNIX */
- if (clnt->cl_auth->au_flavor == RPC_AUTH_UNIX) {
- status = -EPERM;
- break;
- }
clnt = rpc_clone_client_set_auth(clnt, RPC_AUTH_UNIX);
if (IS_ERR(clnt)) {
status = PTR_ERR(clnt);
@@ -2234,10 +2017,9 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
nfs41_handle_server_reboot(clp);
if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED |
- SEQ4_STATUS_ADMIN_STATE_REVOKED))
+ SEQ4_STATUS_ADMIN_STATE_REVOKED |
+ SEQ4_STATUS_LEASE_MOVED))
nfs41_handle_state_revoked(clp);
- if (flags & SEQ4_STATUS_LEASE_MOVED)
- nfs4_schedule_lease_moved_recovery(clp);
if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
nfs41_handle_recallable_state_revoked(clp);
if (flags & SEQ4_STATUS_BACKCHANNEL_FAULT)
@@ -2375,20 +2157,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
status = nfs4_check_lease(clp);
if (status < 0)
goto out_error;
- }
-
- if (test_and_clear_bit(NFS4CLNT_MOVED, &clp->cl_state)) {
- section = "migration";
- status = nfs4_handle_migration(clp);
- if (status < 0)
- goto out_error;
- }
-
- if (test_and_clear_bit(NFS4CLNT_LEASE_MOVED, &clp->cl_state)) {
- section = "lease moved";
- status = nfs4_handle_lease_moved(clp);
- if (status < 0)
- goto out_error;
+ continue;
}
/* First recover reboot state... */
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index 65ab0a0..e26acdd 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -261,9 +261,9 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name,
res = nfs_follow_remote_path(root_mnt, export_path);
- dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n",
- PTR_ERR_OR_ZERO(res),
- IS_ERR(res) ? " [error]" : "");
+ dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n",
+ IS_ERR(res) ? PTR_ERR(res) : 0,
+ IS_ERR(res) ? " [error]" : "");
return res;
}
@@ -319,9 +319,9 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
data->mnt_path = export_path;
res = nfs_follow_remote_path(root_mnt, export_path);
- dprintk("<-- nfs4_referral_mount() = %d%s\n",
- PTR_ERR_OR_ZERO(res),
- IS_ERR(res) ? " [error]" : "");
+ dprintk("<-- nfs4_referral_mount() = %ld%s\n",
+ IS_ERR(res) ? PTR_ERR(res) : 0,
+ IS_ERR(res) ? " [error]" : "");
return res;
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 5be2868..79210d2 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -105,8 +105,12 @@ static int nfs4_stat_to_errno(int);
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
/* PI(4 bytes) + LFS(4 bytes) + 1(for null terminator?) + MAXLABELLEN */
#define nfs4_label_maxsz (4 + 4 + 1 + XDR_QUADLEN(NFS4_MAXLABELLEN))
+#define encode_readdir_space 24
+#define encode_readdir_bitmask_sz 3
#else
#define nfs4_label_maxsz 0
+#define encode_readdir_space 20
+#define encode_readdir_bitmask_sz 2
#endif
/* We support only one layout type per file system */
#define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8)
@@ -591,13 +595,11 @@ static int nfs4_stat_to_errno(int);
#define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
- encode_getattr_maxsz + \
- encode_renew_maxsz)
+ encode_getattr_maxsz)
#define NFS4_dec_getattr_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
- decode_getattr_maxsz + \
- decode_renew_maxsz)
+ decode_getattr_maxsz)
#define NFS4_enc_lookup_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
@@ -734,15 +736,13 @@ static int nfs4_stat_to_errno(int);
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_lookup_maxsz + \
- encode_fs_locations_maxsz + \
- encode_renew_maxsz)
+ encode_fs_locations_maxsz)
#define NFS4_dec_fs_locations_sz \
(compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_lookup_maxsz + \
- decode_fs_locations_maxsz + \
- decode_renew_maxsz)
+ decode_fs_locations_maxsz)
#define NFS4_enc_secinfo_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
@@ -751,18 +751,6 @@ static int nfs4_stat_to_errno(int);
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_secinfo_maxsz)
-#define NFS4_enc_fsid_present_sz \
- (compound_encode_hdr_maxsz + \
- encode_sequence_maxsz + \
- encode_putfh_maxsz + \
- encode_getfh_maxsz + \
- encode_renew_maxsz)
-#define NFS4_dec_fsid_present_sz \
- (compound_decode_hdr_maxsz + \
- decode_sequence_maxsz + \
- decode_putfh_maxsz + \
- decode_getfh_maxsz + \
- decode_renew_maxsz)
#if defined(CONFIG_NFS_V4_1)
#define NFS4_enc_bind_conn_to_session_sz \
(compound_encode_hdr_maxsz + \
@@ -1577,8 +1565,6 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
};
uint32_t dircount = readdir->count >> 1;
__be32 *p, verf[2];
- uint32_t attrlen = 0;
- unsigned int i;
if (readdir->plus) {
attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE|
@@ -1587,27 +1573,26 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV|
FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS|
FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
- attrs[2] |= FATTR4_WORD2_SECURITY_LABEL;
dircount >>= 1;
}
/* Use mounted_on_fileid only if the server supports it */
if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID))
attrs[0] |= FATTR4_WORD0_FILEID;
- for (i = 0; i < ARRAY_SIZE(attrs); i++) {
- attrs[i] &= readdir->bitmask[i];
- if (attrs[i] != 0)
- attrlen = i+1;
- }
encode_op_hdr(xdr, OP_READDIR, decode_readdir_maxsz, hdr);
encode_uint64(xdr, readdir->cookie);
encode_nfs4_verifier(xdr, &readdir->verifier);
- p = reserve_space(xdr, 12 + (attrlen << 2));
+ p = reserve_space(xdr, encode_readdir_space);
*p++ = cpu_to_be32(dircount);
*p++ = cpu_to_be32(readdir->count);
- *p++ = cpu_to_be32(attrlen);
- for (i = 0; i < attrlen; i++)
- *p++ = cpu_to_be32(attrs[i]);
+ *p++ = cpu_to_be32(encode_readdir_bitmask_sz);
+ *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]);
+ *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]);
+ if (encode_readdir_bitmask_sz > 2) {
+ if (hdr->minorversion > 1)
+ attrs[2] |= FATTR4_WORD2_SECURITY_LABEL;
+ p++, *p++ = cpu_to_be32(attrs[2] & readdir->bitmask[2]);
+ }
memcpy(verf, readdir->verifier.data, sizeof(verf));
dprintk("%s: cookie = %llu, verifier = %08x:%08x, bitmap = %08x:%08x:%08x\n",
@@ -2702,20 +2687,11 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
- if (args->migration) {
- encode_putfh(xdr, args->fh, &hdr);
- replen = hdr.replen;
- encode_fs_locations(xdr, args->bitmask, &hdr);
- if (args->renew)
- encode_renew(xdr, args->clientid, &hdr);
- } else {
- encode_putfh(xdr, args->dir_fh, &hdr);
- encode_lookup(xdr, args->name, &hdr);
- replen = hdr.replen;
- encode_fs_locations(xdr, args->bitmask, &hdr);
- }
+ encode_putfh(xdr, args->dir_fh, &hdr);
+ encode_lookup(xdr, args->name, &hdr);
+ replen = hdr.replen; /* get the attribute into args->page */
+ encode_fs_locations(xdr, args->bitmask, &hdr);
- /* Set up reply kvec to capture returned fs_locations array. */
xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page,
0, PAGE_SIZE);
encode_nops(&hdr);
@@ -2739,26 +2715,6 @@ static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
encode_nops(&hdr);
}
-/*
- * Encode FSID_PRESENT request
- */
-static void nfs4_xdr_enc_fsid_present(struct rpc_rqst *req,
- struct xdr_stream *xdr,
- struct nfs4_fsid_present_arg *args)
-{
- struct compound_hdr hdr = {
- .minorversion = nfs4_xdr_minorversion(&args->seq_args),
- };
-
- encode_compound_hdr(xdr, req, &hdr);
- encode_sequence(xdr, &args->seq_args, &hdr);
- encode_putfh(xdr, args->fh, &hdr);
- encode_getfh(xdr, &hdr);
- if (args->renew)
- encode_renew(xdr, args->clientid, &hdr);
- encode_nops(&hdr);
-}
-
#if defined(CONFIG_NFS_V4_1)
/*
* BIND_CONN_TO_SESSION request
@@ -6868,26 +6824,13 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
status = decode_putfh(xdr);
if (status)
goto out;
- if (res->migration) {
- xdr_enter_page(xdr, PAGE_SIZE);
- status = decode_getfattr_generic(xdr,
- &res->fs_locations->fattr,
- NULL, res->fs_locations,
- NULL, res->fs_locations->server);
- if (status)
- goto out;
- if (res->renew)
- status = decode_renew(xdr);
- } else {
- status = decode_lookup(xdr);
- if (status)
- goto out;
- xdr_enter_page(xdr, PAGE_SIZE);
- status = decode_getfattr_generic(xdr,
- &res->fs_locations->fattr,
+ status = decode_lookup(xdr);
+ if (status)
+ goto out;
+ xdr_enter_page(xdr, PAGE_SIZE);
+ status = decode_getfattr_generic(xdr, &res->fs_locations->fattr,
NULL, res->fs_locations,
NULL, res->fs_locations->server);
- }
out:
return status;
}
@@ -6916,34 +6859,6 @@ out:
return status;
}
-/*
- * Decode FSID_PRESENT response
- */
-static int nfs4_xdr_dec_fsid_present(struct rpc_rqst *rqstp,
- struct xdr_stream *xdr,
- struct nfs4_fsid_present_res *res)
-{
- struct compound_hdr hdr;
- int status;
-
- status = decode_compound_hdr(xdr, &hdr);
- if (status)
- goto out;
- status = decode_sequence(xdr, &res->seq_res, rqstp);
- if (status)
- goto out;
- status = decode_putfh(xdr);
- if (status)
- goto out;
- status = decode_getfh(xdr, res->fh);
- if (status)
- goto out;
- if (res->renew)
- status = decode_renew(xdr);
-out:
- return status;
-}
-
#if defined(CONFIG_NFS_V4_1)
/*
* Decode BIND_CONN_TO_SESSION response
@@ -7458,7 +7373,6 @@ struct rpc_procinfo nfs4_procedures[] = {
PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations),
PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
PROC(SECINFO, enc_secinfo, dec_secinfo),
- PROC(FSID_PRESENT, enc_fsid_present, dec_fsid_present),
#if defined(CONFIG_NFS_V4_1)
PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id),
PROC(CREATE_SESSION, enc_create_session, dec_create_session),
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index fddbba2..a8f57c7 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -235,7 +235,7 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
};
int status = -ENOMEM;
- dprintk("NFS call create %pd\n", dentry);
+ dprintk("NFS call create %s\n", dentry->d_name.name);
data = nfs_alloc_createdata(dir, dentry, sattr);
if (data == NULL)
goto out;
@@ -265,7 +265,7 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
umode_t mode;
int status = -ENOMEM;
- dprintk("NFS call mknod %pd\n", dentry);
+ dprintk("NFS call mknod %s\n", dentry->d_name.name);
mode = sattr->ia_mode;
if (S_ISFIFO(mode)) {
@@ -423,7 +423,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
};
int status = -ENAMETOOLONG;
- dprintk("NFS call symlink %pd\n", dentry);
+ dprintk("NFS call symlink %s\n", dentry->d_name.name);
if (len > NFS2_MAXPATHLEN)
goto out;
@@ -462,7 +462,7 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
};
int status = -ENOMEM;
- dprintk("NFS call mkdir %pd\n", dentry);
+ dprintk("NFS call mkdir %s\n", dentry->d_name.name);
data = nfs_alloc_createdata(dir, dentry, sattr);
if (data == NULL)
goto out;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 910ed90..a03b9c6 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -497,8 +497,7 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
static const struct {
rpc_authflavor_t flavour;
const char *str;
- } sec_flavours[NFS_AUTH_INFO_MAX_FLAVORS] = {
- /* update NFS_AUTH_INFO_MAX_FLAVORS when this list changes! */
+ } sec_flavours[] = {
{ RPC_AUTH_NULL, "null" },
{ RPC_AUTH_UNIX, "sys" },
{ RPC_AUTH_GSS_KRB5, "krb5" },
@@ -924,7 +923,8 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void)
data->mount_server.port = NFS_UNSPEC_PORT;
data->nfs_server.port = NFS_UNSPEC_PORT;
data->nfs_server.protocol = XPRT_TRANSPORT_TCP;
- data->selected_flavor = RPC_AUTH_MAXFLAVOR;
+ data->auth_flavors[0] = RPC_AUTH_MAXFLAVOR;
+ data->auth_flavor_len = 0;
data->minorversion = 0;
data->need_mount = true;
data->net = current->nsproxy->net_ns;
@@ -1019,51 +1019,12 @@ static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt)
}
}
-/*
- * Add 'flavor' to 'auth_info' if not already present.
- * Returns true if 'flavor' ends up in the list, false otherwise
- */
-static bool nfs_auth_info_add(struct nfs_auth_info *auth_info,
- rpc_authflavor_t flavor)
-{
- unsigned int i;
- unsigned int max_flavor_len = (sizeof(auth_info->flavors) /
- sizeof(auth_info->flavors[0]));
-
- /* make sure this flavor isn't already in the list */
- for (i = 0; i < auth_info->flavor_len; i++) {
- if (flavor == auth_info->flavors[i])
- return true;
- }
-
- if (auth_info->flavor_len + 1 >= max_flavor_len) {
- dfprintk(MOUNT, "NFS: too many sec= flavors\n");
- return false;
- }
-
- auth_info->flavors[auth_info->flavor_len++] = flavor;
- return true;
-}
-
-/*
- * Return true if 'match' is in auth_info or auth_info is empty.
- * Return false otherwise.
- */
-bool nfs_auth_info_match(const struct nfs_auth_info *auth_info,
- rpc_authflavor_t match)
+static void nfs_set_auth_parsed_mount_data(struct nfs_parsed_mount_data *data,
+ rpc_authflavor_t pseudoflavor)
{
- int i;
-
- if (!auth_info->flavor_len)
- return true;
-
- for (i = 0; i < auth_info->flavor_len; i++) {
- if (auth_info->flavors[i] == match)
- return true;
- }
- return false;
+ data->auth_flavors[0] = pseudoflavor;
+ data->auth_flavor_len = 1;
}
-EXPORT_SYMBOL_GPL(nfs_auth_info_match);
/*
* Parse the value of the 'sec=' option.
@@ -1073,55 +1034,49 @@ static int nfs_parse_security_flavors(char *value,
{
substring_t args[MAX_OPT_ARGS];
rpc_authflavor_t pseudoflavor;
- char *p;
dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value);
- while ((p = strsep(&value, ":")) != NULL) {
- switch (match_token(p, nfs_secflavor_tokens, args)) {
- case Opt_sec_none:
- pseudoflavor = RPC_AUTH_NULL;
- break;
- case Opt_sec_sys:
- pseudoflavor = RPC_AUTH_UNIX;
- break;
- case Opt_sec_krb5:
- pseudoflavor = RPC_AUTH_GSS_KRB5;
- break;
- case Opt_sec_krb5i:
- pseudoflavor = RPC_AUTH_GSS_KRB5I;
- break;
- case Opt_sec_krb5p:
- pseudoflavor = RPC_AUTH_GSS_KRB5P;
- break;
- case Opt_sec_lkey:
- pseudoflavor = RPC_AUTH_GSS_LKEY;
- break;
- case Opt_sec_lkeyi:
- pseudoflavor = RPC_AUTH_GSS_LKEYI;
- break;
- case Opt_sec_lkeyp:
- pseudoflavor = RPC_AUTH_GSS_LKEYP;
- break;
- case Opt_sec_spkm:
- pseudoflavor = RPC_AUTH_GSS_SPKM;
- break;
- case Opt_sec_spkmi:
- pseudoflavor = RPC_AUTH_GSS_SPKMI;
- break;
- case Opt_sec_spkmp:
- pseudoflavor = RPC_AUTH_GSS_SPKMP;
- break;
- default:
- dfprintk(MOUNT,
- "NFS: sec= option '%s' not recognized\n", p);
- return 0;
- }
-
- if (!nfs_auth_info_add(&mnt->auth_info, pseudoflavor))
- return 0;
+ switch (match_token(value, nfs_secflavor_tokens, args)) {
+ case Opt_sec_none:
+ pseudoflavor = RPC_AUTH_NULL;
+ break;
+ case Opt_sec_sys:
+ pseudoflavor = RPC_AUTH_UNIX;
+ break;
+ case Opt_sec_krb5:
+ pseudoflavor = RPC_AUTH_GSS_KRB5;
+ break;
+ case Opt_sec_krb5i:
+ pseudoflavor = RPC_AUTH_GSS_KRB5I;
+ break;
+ case Opt_sec_krb5p:
+ pseudoflavor = RPC_AUTH_GSS_KRB5P;
+ break;
+ case Opt_sec_lkey:
+ pseudoflavor = RPC_AUTH_GSS_LKEY;
+ break;
+ case Opt_sec_lkeyi:
+ pseudoflavor = RPC_AUTH_GSS_LKEYI;
+ break;
+ case Opt_sec_lkeyp:
+ pseudoflavor = RPC_AUTH_GSS_LKEYP;
+ break;
+ case Opt_sec_spkm:
+ pseudoflavor = RPC_AUTH_GSS_SPKM;
+ break;
+ case Opt_sec_spkmi:
+ pseudoflavor = RPC_AUTH_GSS_SPKMI;
+ break;
+ case Opt_sec_spkmp:
+ pseudoflavor = RPC_AUTH_GSS_SPKMP;
+ break;
+ default:
+ return 0;
}
+ mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+ nfs_set_auth_parsed_mount_data(mnt, pseudoflavor);
return 1;
}
@@ -1614,7 +1569,7 @@ static int nfs_parse_mount_options(char *raw,
goto out_minorversion_mismatch;
if (mnt->options & NFS_OPTION_MIGRATION &&
- (mnt->version != 4 || mnt->minorversion != 0))
+ mnt->version != 4 && mnt->minorversion != 0)
goto out_migration_misuse;
/*
@@ -1668,14 +1623,12 @@ out_security_failure:
}
/*
- * Ensure that a specified authtype in args->auth_info is supported by
- * the server. Returns 0 and sets args->selected_flavor if it's ok, and
- * -EACCES if not.
+ * Ensure that the specified authtype in args->auth_flavors[0] is supported by
+ * the server. Returns 0 if it's ok, and -EACCES if not.
*/
-static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args,
+static int nfs_verify_authflavor(struct nfs_parsed_mount_data *args,
rpc_authflavor_t *server_authlist, unsigned int count)
{
- rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR;
unsigned int i;
/*
@@ -1687,20 +1640,17 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args,
* can be used.
*/
for (i = 0; i < count; i++) {
- flavor = server_authlist[i];
-
- if (nfs_auth_info_match(&args->auth_info, flavor) ||
- flavor == RPC_AUTH_NULL)
+ if (args->auth_flavors[0] == server_authlist[i] ||
+ server_authlist[i] == RPC_AUTH_NULL)
goto out;
}
- dfprintk(MOUNT,
- "NFS: specified auth flavors not supported by server\n");
+ dfprintk(MOUNT, "NFS: auth flavor %u not supported by server\n",
+ args->auth_flavors[0]);
return -EACCES;
out:
- args->selected_flavor = flavor;
- dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->selected_flavor);
+ dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->auth_flavors[0]);
return 0;
}
@@ -1788,10 +1738,9 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf
* Was a sec= authflavor specified in the options? First, verify
* whether the server supports it, and then just try to use it if so.
*/
- if (args->auth_info.flavor_len > 0) {
- status = nfs_verify_authflavors(args, authlist, authlist_len);
- dfprintk(MOUNT, "NFS: using auth flavor %u\n",
- args->selected_flavor);
+ if (args->auth_flavor_len > 0) {
+ status = nfs_verify_authflavor(args, authlist, authlist_len);
+ dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->auth_flavors[0]);
if (status)
return ERR_PTR(status);
return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
@@ -1820,7 +1769,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf
/* Fallthrough */
}
dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor);
- args->selected_flavor = flavor;
+ nfs_set_auth_parsed_mount_data(args, flavor);
server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
if (!IS_ERR(server))
return server;
@@ -1836,7 +1785,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf
/* Last chance! Try AUTH_UNIX */
dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", RPC_AUTH_UNIX);
- args->selected_flavor = RPC_AUTH_UNIX;
+ nfs_set_auth_parsed_mount_data(args, RPC_AUTH_UNIX);
return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
}
@@ -2023,9 +1972,9 @@ static int nfs23_validate_mount_data(void *options,
args->bsize = data->bsize;
if (data->flags & NFS_MOUNT_SECFLAVOUR)
- args->selected_flavor = data->pseudoflavor;
+ nfs_set_auth_parsed_mount_data(args, data->pseudoflavor);
else
- args->selected_flavor = RPC_AUTH_UNIX;
+ nfs_set_auth_parsed_mount_data(args, RPC_AUTH_UNIX);
if (!args->nfs_server.hostname)
goto out_nomem;
@@ -2159,6 +2108,9 @@ static int nfs_validate_text_mount_data(void *options,
nfs_set_port(sap, &args->nfs_server.port, port);
+ if (args->auth_flavor_len > 1)
+ goto out_bad_auth;
+
return nfs_parse_devname(dev_name,
&args->nfs_server.hostname,
max_namelen,
@@ -2178,6 +2130,10 @@ out_invalid_transport_udp:
out_no_address:
dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
return -EINVAL;
+
+out_bad_auth:
+ dfprintk(MOUNT, "NFS: Too many RPC auth flavours specified\n");
+ return -EINVAL;
}
static int
@@ -2187,10 +2143,8 @@ nfs_compare_remount_data(struct nfs_server *nfss,
if (data->flags != nfss->flags ||
data->rsize != nfss->rsize ||
data->wsize != nfss->wsize ||
- data->version != nfss->nfs_client->rpc_ops->version ||
- data->minorversion != nfss->nfs_client->cl_minorversion ||
data->retrans != nfss->client->cl_timeout->to_retries ||
- data->selected_flavor != nfss->client->cl_auth->au_flavor ||
+ data->auth_flavors[0] != nfss->client->cl_auth->au_flavor ||
data->acregmin != nfss->acregmin / HZ ||
data->acregmax != nfss->acregmax / HZ ||
data->acdirmin != nfss->acdirmin / HZ ||
@@ -2235,8 +2189,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
data->rsize = nfss->rsize;
data->wsize = nfss->wsize;
data->retrans = nfss->client->cl_timeout->to_retries;
- data->selected_flavor = nfss->client->cl_auth->au_flavor;
- data->auth_info = nfss->auth_info;
+ nfs_set_auth_parsed_mount_data(data, nfss->client->cl_auth->au_flavor);
data->acregmin = nfss->acregmin / HZ;
data->acregmax = nfss->acregmax / HZ;
data->acdirmin = nfss->acdirmin / HZ;
@@ -2244,14 +2197,12 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
data->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ;
data->nfs_server.port = nfss->port;
data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen;
- data->version = nfsvers;
- data->minorversion = nfss->nfs_client->cl_minorversion;
memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr,
data->nfs_server.addrlen);
/* overwrite those values with any that were specified */
- error = -EINVAL;
- if (!nfs_parse_mount_options((char *)options, data))
+ error = nfs_parse_mount_options((char *)options, data);
+ if (error < 0)
goto out;
/*
@@ -2381,7 +2332,7 @@ static int nfs_compare_mount_options(const struct super_block *s, const struct n
goto Ebusy;
if (a->acdirmax != b->acdirmax)
goto Ebusy;
- if (b->auth_info.flavor_len > 0 &&
+ if (b->flags & NFS_MOUNT_SECFLAVOUR &&
clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
goto Ebusy;
return 1;
@@ -2579,7 +2530,6 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,
mntroot = ERR_PTR(error);
goto error_splat_bdi;
}
- server->super = s;
}
if (!s->s_root) {
@@ -2763,9 +2713,9 @@ static int nfs4_validate_mount_data(void *options,
data->auth_flavours,
sizeof(pseudoflavor)))
return -EFAULT;
- args->selected_flavor = pseudoflavor;
+ nfs_set_auth_parsed_mount_data(args, pseudoflavor);
} else
- args->selected_flavor = RPC_AUTH_UNIX;
+ nfs_set_auth_parsed_mount_data(args, RPC_AUTH_UNIX);
c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
if (IS_ERR(c))
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 11d7894..bb939ed 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -493,15 +493,17 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
unsigned long long fileid;
struct dentry *sdentry;
struct rpc_task *task;
- int error = -EBUSY;
+ int error = -EIO;
- dfprintk(VFS, "NFS: silly-rename(%pd2, ct=%d)\n",
- dentry, d_count(dentry));
+ dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ d_count(dentry));
nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
/*
* We don't allow a dentry to be silly-renamed twice.
*/
+ error = -EBUSY;
if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
goto out;
@@ -520,8 +522,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
SILLYNAME_FILEID_LEN, fileid,
SILLYNAME_COUNTER_LEN, sillycounter);
- dfprintk(VFS, "NFS: trying to rename %pd to %s\n",
- dentry, silly);
+ dfprintk(VFS, "NFS: trying to rename %s to %s\n",
+ dentry->d_name.name, silly);
sdentry = lookup_one_len(silly, dentry->d_parent, slen);
/*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c1d5482..ac1dc33 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -954,8 +954,10 @@ int nfs_updatepage(struct file *file, struct page *page,
nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
- dprintk("NFS: nfs_updatepage(%pD2 %d@%lld)\n",
- file, count, (long long)(page_file_offset(page) + offset));
+ dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n",
+ file->f_path.dentry->d_parent->d_name.name,
+ file->f_path.dentry->d_name.name, count,
+ (long long)(page_file_offset(page) + offset));
if (nfs_can_extend_write(file, page, inode)) {
count = max(count + offset, nfs_page_length(page));
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index f994e75..dc8f1ef 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -95,7 +95,7 @@ config NFSD_V4_SECURITY_LABEL
Smack policies on NFSv4 files, say N.
WARNING: there is still a chance of backwards-incompatible protocol changes.
- For now we recommend "Y" only for developers and testers.
+ For now we recommend "Y" only for developers and testers."
config NFSD_FAULT_INJECTION
bool "NFS server manual fault injection"
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 8513c59..5f38ea3 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -536,12 +536,16 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
if (err)
goto out3;
exp.ex_anon_uid= make_kuid(&init_user_ns, an_int);
+ if (!uid_valid(exp.ex_anon_uid))
+ goto out3;
/* anon gid */
err = get_int(&mesg, &an_int);
if (err)
goto out3;
exp.ex_anon_gid= make_kgid(&init_user_ns, an_int);
+ if (!gid_valid(exp.ex_anon_gid))
+ goto out3;
/* fsid */
err = get_int(&mesg, &an_int);
@@ -579,26 +583,6 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
exp.ex_uuid);
if (err)
goto out4;
- /*
- * No point caching this if it would immediately expire.
- * Also, this protects exportfs's dummy export from the
- * anon_uid/anon_gid checks:
- */
- if (exp.h.expiry_time < seconds_since_boot())
- goto out4;
- /*
- * For some reason exportfs has been passing down an
- * invalid (-1) uid & gid on the "dummy" export which it
- * uses to test export support. To make sure exportfs
- * sees errors from check_export we therefore need to
- * delay these checks till after check_export:
- */
- err = -EINVAL;
- if (!uid_valid(exp.ex_anon_uid))
- goto out4;
- if (!gid_valid(exp.ex_anon_gid))
- goto out4;
- err = 0;
}
expp = svc_export_lookup(&exp);
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 9c271f4..e0a65a9 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -385,8 +385,8 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
status = vfs_rmdir(parent->d_inode, child);
if (status)
- printk("failed to remove client recovery directory %pd\n",
- child);
+ printk("failed to remove client recovery directory %s\n",
+ child->d_name.name);
/* Keep trying, success or failure: */
return 0;
}
@@ -410,15 +410,15 @@ out:
nfs4_release_reclaim(nn);
if (status)
printk("nfsd4: failed to purge old clients from recovery"
- " directory %pD\n", nn->rec_file);
+ " directory %s\n", nn->rec_file->f_path.dentry->d_name.name);
}
static int
load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
{
if (child->d_name.len != HEXDIR_LEN - 1) {
- printk("nfsd4: illegal name %pd in recovery directory\n",
- child);
+ printk("nfsd4: illegal name %s in recovery directory\n",
+ child->d_name.name);
/* Keep trying; maybe the others are OK: */
return 0;
}
@@ -437,7 +437,7 @@ nfsd4_recdir_load(struct net *net) {
status = nfsd4_list_rec_dir(load_recdir, nn);
if (status)
printk("nfsd4: failed loading clients from recovery"
- " directory %pD\n", nn->rec_file);
+ " directory %s\n", nn->rec_file->f_path.dentry->d_name.name);
return status;
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 105d6fa..0874998 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -402,16 +402,11 @@ static void remove_stid(struct nfs4_stid *s)
idr_remove(stateids, s->sc_stateid.si_opaque.so_id);
}
-static void nfs4_free_stid(struct kmem_cache *slab, struct nfs4_stid *s)
-{
- kmem_cache_free(slab, s);
-}
-
void
nfs4_put_delegation(struct nfs4_delegation *dp)
{
if (atomic_dec_and_test(&dp->dl_count)) {
- nfs4_free_stid(deleg_slab, &dp->dl_stid);
+ kmem_cache_free(deleg_slab, dp);
num_delegations--;
}
}
@@ -615,7 +610,7 @@ static void close_generic_stateid(struct nfs4_ol_stateid *stp)
static void free_generic_stateid(struct nfs4_ol_stateid *stp)
{
remove_stid(&stp->st_stid);
- nfs4_free_stid(stateid_slab, &stp->st_stid);
+ kmem_cache_free(stateid_slab, stp);
}
static void release_lock_stateid(struct nfs4_ol_stateid *stp)
@@ -673,6 +668,7 @@ static void unhash_open_stateid(struct nfs4_ol_stateid *stp)
static void release_open_stateid(struct nfs4_ol_stateid *stp)
{
unhash_open_stateid(stp);
+ unhash_stid(&stp->st_stid);
free_generic_stateid(stp);
}
@@ -694,6 +690,7 @@ static void release_last_closed_stateid(struct nfs4_openowner *oo)
struct nfs4_ol_stateid *s = oo->oo_last_closed_stid;
if (s) {
+ unhash_stid(&s->st_stid);
free_generic_stateid(s);
oo->oo_last_closed_stid = NULL;
}
@@ -1130,11 +1127,6 @@ destroy_client(struct nfs4_client *clp)
dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
destroy_delegation(dp);
}
- list_splice_init(&clp->cl_revoked, &reaplist);
- while (!list_empty(&reaplist)) {
- dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
- destroy_revoked_delegation(dp);
- }
while (!list_empty(&clp->cl_openowners)) {
oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient);
release_openowner(oo);
@@ -3016,7 +3008,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f
return NULL;
locks_init_lock(fl);
fl->fl_lmops = &nfsd_lease_mng_ops;
- fl->fl_flags = FL_DELEG;
+ fl->fl_flags = FL_LEASE;
fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
fl->fl_end = OFFSET_MAX;
fl->fl_owner = (fl_owner_t)(dp->dl_file);
@@ -3162,7 +3154,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh,
open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
return;
out_free:
- remove_stid(&dp->dl_stid);
+ unhash_stid(&dp->dl_stid);
nfs4_put_delegation(dp);
out_no_deleg:
open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
@@ -3851,8 +3843,9 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfs4_ol_stateid *stp;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
- dprintk("NFSD: nfsd4_open_confirm on file %pd\n",
- cstate->current_fh.fh_dentry);
+ dprintk("NFSD: nfsd4_open_confirm on file %.*s\n",
+ (int)cstate->current_fh.fh_dentry->d_name.len,
+ cstate->current_fh.fh_dentry->d_name.name);
status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0);
if (status)
@@ -3929,8 +3922,9 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
struct nfs4_ol_stateid *stp;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
- dprintk("NFSD: nfsd4_open_downgrade on file %pd\n",
- cstate->current_fh.fh_dentry);
+ dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n",
+ (int)cstate->current_fh.fh_dentry->d_name.len,
+ cstate->current_fh.fh_dentry->d_name.name);
/* We don't yet support WANT bits: */
if (od->od_deleg_want)
@@ -3986,8 +3980,9 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- dprintk("NFSD: nfsd4_close on file %pd\n",
- cstate->current_fh.fh_dentry);
+ dprintk("NFSD: nfsd4_close on file %.*s\n",
+ (int)cstate->current_fh.fh_dentry->d_name.len,
+ cstate->current_fh.fh_dentry->d_name.name);
nfs4_lock_state();
status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid,
@@ -4003,9 +3998,10 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfsd4_close_open_stateid(stp);
- if (cstate->minorversion)
+ if (cstate->minorversion) {
+ unhash_stid(&stp->st_stid);
free_generic_stateid(stp);
- else
+ } else
oo->oo_last_closed_stid = stp;
if (list_empty(&oo->oo_owner.so_stateids)) {
@@ -5126,6 +5122,7 @@ out_recovery:
return ret;
}
+/* should be called with the state lock held */
void
nfs4_state_shutdown_net(struct net *net)
{
@@ -5136,7 +5133,6 @@ nfs4_state_shutdown_net(struct net *net)
cancel_delayed_work_sync(&nn->laundromat_work);
locks_end_grace(&nn->nfsd4_manager);
- nfs4_lock_state();
INIT_LIST_HEAD(&reaplist);
spin_lock(&recall_lock);
list_for_each_safe(pos, next, &nn->del_recall_lru) {
@@ -5151,7 +5147,6 @@ nfs4_state_shutdown_net(struct net *net)
nfsd4_client_tracking_exit(net);
nfs4_state_destroy_net(net);
- nfs4_unlock_state();
}
void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index ee7237f..d9454fe 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -141,8 +141,8 @@ xdr_error: \
static void next_decode_page(struct nfsd4_compoundargs *argp)
{
- argp->p = page_address(argp->pagelist[0]);
argp->pagelist++;
+ argp->p = page_address(argp->pagelist[0]);
if (argp->pagelen < PAGE_SIZE) {
argp->end = argp->p + (argp->pagelen>>2);
argp->pagelen = 0;
@@ -411,7 +411,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
label->data = kzalloc(dummy32 + 1, GFP_KERNEL);
if (!label->data)
return nfserr_jukebox;
- label->len = dummy32;
defer_free(argp, kfree, label->data);
memcpy(label->data, buf, dummy32);
}
@@ -946,16 +945,13 @@ static __be32
nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf)
{
DECODE_HEAD;
-
- if (argp->minorversion >= 1)
- return nfserr_notsupp;
-
+
status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid);
if (status)
return status;
READ_BUF(4);
READ32(open_conf->oc_seqid);
-
+
DECODE_TAIL;
}
@@ -995,14 +991,6 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)
}
static __be32
-nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p)
-{
- if (argp->minorversion == 0)
- return nfs_ok;
- return nfserr_notsupp;
-}
-
-static __be32
nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
{
DECODE_HEAD;
@@ -1073,9 +1061,6 @@ nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid)
{
DECODE_HEAD;
- if (argp->minorversion >= 1)
- return nfserr_notsupp;
-
READ_BUF(sizeof(clientid_t));
COPYMEM(clientid, sizeof(clientid_t));
@@ -1126,9 +1111,6 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient
{
DECODE_HEAD;
- if (argp->minorversion >= 1)
- return nfserr_notsupp;
-
READ_BUF(NFS4_VERIFIER_SIZE);
COPYMEM(setclientid->se_verf.data, NFS4_VERIFIER_SIZE);
@@ -1155,9 +1137,6 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s
{
DECODE_HEAD;
- if (argp->minorversion >= 1)
- return nfserr_notsupp;
-
READ_BUF(8 + NFS4_VERIFIER_SIZE);
COPYMEM(&scd_c->sc_clientid, 8);
COPYMEM(&scd_c->sc_confirm, NFS4_VERIFIER_SIZE);
@@ -1229,7 +1208,6 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
len -= pages * PAGE_SIZE;
argp->p = (__be32 *)page_address(argp->pagelist[0]);
- argp->pagelist++;
argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE);
}
argp->p += XDR_QUADLEN(len);
@@ -1242,9 +1220,6 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel
{
DECODE_HEAD;
- if (argp->minorversion >= 1)
- return nfserr_notsupp;
-
READ_BUF(12);
COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t));
READ32(rlockowner->rl_owner.len);
@@ -1544,7 +1519,7 @@ static nfsd4_dec nfsd4_dec_ops[] = {
[OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm,
[OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade,
[OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh,
- [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_putpubfh,
+ [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_noop,
[OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop,
[OP_READ] = (nfsd4_dec)nfsd4_decode_read,
[OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir,
@@ -1561,6 +1536,46 @@ static nfsd4_dec nfsd4_dec_ops[] = {
[OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify,
[OP_WRITE] = (nfsd4_dec)nfsd4_decode_write,
[OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner,
+};
+
+static nfsd4_dec nfsd41_dec_ops[] = {
+ [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access,
+ [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close,
+ [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit,
+ [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create,
+ [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn,
+ [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr,
+ [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_LINK] = (nfsd4_dec)nfsd4_decode_link,
+ [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock,
+ [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt,
+ [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku,
+ [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup,
+ [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify,
+ [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open,
+ [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade,
+ [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh,
+ [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_READ] = (nfsd4_dec)nfsd4_decode_read,
+ [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir,
+ [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove,
+ [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename,
+ [OP_RENEW] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo,
+ [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr,
+ [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_SETCLIENTID_CONFIRM]= (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify,
+ [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write,
+ [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_notsupp,
/* new operations for NFSv4.1 */
[OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_backchannel_ctl,
@@ -1584,53 +1599,24 @@ static nfsd4_dec nfsd4_dec_ops[] = {
[OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete,
};
-static inline bool
-nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op)
-{
- if (op->opnum < FIRST_NFS4_OP)
- return false;
- else if (argp->minorversion == 0 && op->opnum > LAST_NFS40_OP)
- return false;
- else if (argp->minorversion == 1 && op->opnum > LAST_NFS41_OP)
- return false;
- else if (argp->minorversion == 2 && op->opnum > LAST_NFS42_OP)
- return false;
- return true;
-}
+struct nfsd4_minorversion_ops {
+ nfsd4_dec *decoders;
+ int nops;
+};
-/*
- * Return a rough estimate of the maximum possible reply size. Note the
- * estimate includes rpc headers so is meant to be passed to
- * svc_reserve, not svc_reserve_auth.
- *
- * Also note the current compound encoding permits only one operation to
- * use pages beyond the first one, so the maximum possible length is the
- * maximum over these values, not the sum.
- */
-static int nfsd4_max_reply(u32 opnum)
-{
- switch (opnum) {
- case OP_READLINK:
- case OP_READDIR:
- /*
- * Both of these ops take a single page for data and put
- * the head and tail in another page:
- */
- return 2 * PAGE_SIZE;
- case OP_READ:
- return INT_MAX;
- default:
- return PAGE_SIZE;
- }
-}
+static struct nfsd4_minorversion_ops nfsd4_minorversion[] = {
+ [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) },
+ [1] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) },
+ [2] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) },
+};
static __be32
nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
{
DECODE_HEAD;
struct nfsd4_op *op;
+ struct nfsd4_minorversion_ops *ops;
bool cachethis = false;
- int max_reply = PAGE_SIZE;
int i;
READ_BUF(4);
@@ -1654,9 +1640,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
}
}
- if (argp->minorversion > NFSD_SUPPORTED_MINOR_VERSION)
+ if (argp->minorversion >= ARRAY_SIZE(nfsd4_minorversion))
argp->opcnt = 0;
+ ops = &nfsd4_minorversion[argp->minorversion];
for (i = 0; i < argp->opcnt; i++) {
op = &argp->ops[i];
op->replay = NULL;
@@ -1664,8 +1651,8 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
READ_BUF(4);
READ32(op->opnum);
- if (nfsd4_opnum_in_range(argp, op))
- op->status = nfsd4_dec_ops[op->opnum](argp, &op->u);
+ if (op->opnum >= FIRST_NFS4_OP && op->opnum <= LAST_NFS4_OP)
+ op->status = ops->decoders[op->opnum](argp, &op->u);
else {
op->opnum = OP_ILLEGAL;
op->status = nfserr_op_illegal;
@@ -1680,14 +1667,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
* op in the compound wants to be cached:
*/
cachethis |= nfsd4_cache_this_op(op);
-
- max_reply = max(max_reply, nfsd4_max_reply(op->opnum));
}
/* Sessions make the DRC unnecessary: */
if (argp->minorversion)
cachethis = false;
- if (max_reply != INT_MAX)
- svc_reserve(argp->rqstp, max_reply);
argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
DECODE_TAIL;
@@ -2392,7 +2375,7 @@ out_acl:
if (bmval0 & FATTR4_WORD0_MAXFILESIZE) {
if ((buflen -= 8) < 0)
goto out_resource;
- WRITE64(exp->ex_path.mnt->mnt_sb->s_maxbytes);
+ WRITE64(~(u64)0);
}
if (bmval0 & FATTR4_WORD0_MAXLINK) {
if ((buflen -= 4) < 0)
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 3c37b16..814afaa 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -47,7 +47,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
tdentry = parent;
}
if (tdentry != exp->ex_path.dentry)
- dprintk("nfsd_acceptable failed at %p %pd\n", tdentry, tdentry);
+ dprintk("nfsd_acceptable failed at %p %s\n", tdentry, tdentry->d_name.name);
rv = (tdentry == exp->ex_path.dentry);
dput(tdentry);
return rv;
@@ -253,8 +253,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
if (S_ISDIR(dentry->d_inode->i_mode) &&
(dentry->d_flags & DCACHE_DISCONNECTED)) {
- printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %pd2\n",
- dentry);
+ printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
}
fhp->fh_dentry = dentry;
@@ -361,9 +361,10 @@ skip_pseudoflavor_check:
error = nfsd_permission(rqstp, exp, dentry, access);
if (error) {
- dprintk("fh_verify: %pd2 permission failure, "
+ dprintk("fh_verify: %s/%s permission failure, "
"acc=%x, error=%d\n",
- dentry,
+ dentry->d_parent->d_name.name,
+ dentry->d_name.name,
access, ntohl(error));
}
out:
@@ -513,13 +514,14 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
*/
struct inode * inode = dentry->d_inode;
+ struct dentry *parent = dentry->d_parent;
__u32 *datap;
dev_t ex_dev = exp_sb(exp)->s_dev;
- dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n",
+ dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n",
MAJOR(ex_dev), MINOR(ex_dev),
(long) exp->ex_path.dentry->d_inode->i_ino,
- dentry,
+ parent->d_name.name, dentry->d_name.name,
(inode ? inode->i_ino : 0));
/* Choose filehandle version and fsid type based on
@@ -532,13 +534,13 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
fh_put(ref_fh);
if (fhp->fh_locked || fhp->fh_dentry) {
- printk(KERN_ERR "fh_compose: fh %pd2 not initialized!\n",
- dentry);
+ printk(KERN_ERR "fh_compose: fh %s/%s not initialized!\n",
+ parent->d_name.name, dentry->d_name.name);
}
if (fhp->fh_maxsize < NFS_FHSIZE)
- printk(KERN_ERR "fh_compose: called with maxsize %d! %pd2\n",
+ printk(KERN_ERR "fh_compose: called with maxsize %d! %s/%s\n",
fhp->fh_maxsize,
- dentry);
+ parent->d_name.name, dentry->d_name.name);
fhp->fh_dentry = dget(dentry); /* our internal copy */
fhp->fh_export = exp;
@@ -598,20 +600,22 @@ fh_update(struct svc_fh *fhp)
_fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle);
} else {
if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT)
- return 0;
+ goto out;
_fh_update(fhp, fhp->fh_export, dentry);
if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID)
return nfserr_opnotsupp;
}
+out:
return 0;
+
out_bad:
printk(KERN_ERR "fh_update: fh not verified!\n");
- return nfserr_serverfault;
+ goto out;
out_negative:
- printk(KERN_ERR "fh_update: %pd2 still negative!\n",
- dentry);
- return nfserr_serverfault;
+ printk(KERN_ERR "fh_update: %s/%s still negative!\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
+ goto out;
}
/*
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 4775bc4..e5e6707 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -173,8 +173,8 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
BUG_ON(!dentry);
if (fhp->fh_locked) {
- printk(KERN_WARNING "fh_lock: %pd2 already locked!\n",
- dentry);
+ printk(KERN_WARNING "fh_lock: %s/%s already locked!\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
return;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7eea63c..c827acb 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -298,12 +298,41 @@ commit_metadata(struct svc_fh *fhp)
}
/*
- * Go over the attributes and take care of the small differences between
- * NFS semantics and what Linux expects.
+ * Set various file attributes.
+ * N.B. After this call fhp needs an fh_put
*/
-static void
-nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
+__be32
+nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
+ int check_guard, time_t guardtime)
{
+ struct dentry *dentry;
+ struct inode *inode;
+ int accmode = NFSD_MAY_SATTR;
+ umode_t ftype = 0;
+ __be32 err;
+ int host_err;
+ int size_change = 0;
+
+ if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
+ accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
+ if (iap->ia_valid & ATTR_SIZE)
+ ftype = S_IFREG;
+
+ /* Get inode */
+ err = fh_verify(rqstp, fhp, ftype, accmode);
+ if (err)
+ goto out;
+
+ dentry = fhp->fh_dentry;
+ inode = dentry->d_inode;
+
+ /* Ignore any mode updates on symlinks */
+ if (S_ISLNK(inode->i_mode))
+ iap->ia_valid &= ~ATTR_MODE;
+
+ if (!iap->ia_valid)
+ goto out;
+
/*
* NFSv2 does not differentiate between "set-[ac]time-to-now"
* which only requires access, and "set-[ac]time-to-X" which
@@ -313,7 +342,8 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
* convert to "set to now" instead of "set to explicit time"
*
* We only call inode_change_ok as the last test as technically
- * it is not an interface that we should be using.
+ * it is not an interface that we should be using. It is only
+ * valid if the filesystem does not define it's own i_op->setattr.
*/
#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
#define MAX_TOUCH_TIME_ERROR (30*60)
@@ -339,6 +369,30 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
iap->ia_valid &= ~BOTH_TIME_SET;
}
}
+
+ /*
+ * The size case is special.
+ * It changes the file as well as the attributes.
+ */
+ if (iap->ia_valid & ATTR_SIZE) {
+ if (iap->ia_size < inode->i_size) {
+ err = nfsd_permission(rqstp, fhp->fh_export, dentry,
+ NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
+ if (err)
+ goto out;
+ }
+
+ host_err = get_write_access(inode);
+ if (host_err)
+ goto out_nfserr;
+
+ size_change = 1;
+ host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
+ if (host_err) {
+ put_write_access(inode);
+ goto out_nfserr;
+ }
+ }
/* sanitize the mode change */
if (iap->ia_valid & ATTR_MODE) {
@@ -361,111 +415,32 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID);
}
}
-}
-static __be32
-nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct iattr *iap)
-{
- struct inode *inode = fhp->fh_dentry->d_inode;
- int host_err;
-
- if (iap->ia_size < inode->i_size) {
- __be32 err;
-
- err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
- NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE);
- if (err)
- return err;
- }
-
- host_err = get_write_access(inode);
- if (host_err)
- goto out_nfserrno;
-
- host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
- if (host_err)
- goto out_put_write_access;
- return 0;
-
-out_put_write_access:
- put_write_access(inode);
-out_nfserrno:
- return nfserrno(host_err);
-}
-
-/*
- * Set various file attributes. After this call fhp needs an fh_put.
- */
-__be32
-nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
- int check_guard, time_t guardtime)
-{
- struct dentry *dentry;
- struct inode *inode;
- int accmode = NFSD_MAY_SATTR;
- umode_t ftype = 0;
- __be32 err;
- int host_err;
- int size_change = 0;
-
- if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
- accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
- if (iap->ia_valid & ATTR_SIZE)
- ftype = S_IFREG;
-
- /* Get inode */
- err = fh_verify(rqstp, fhp, ftype, accmode);
- if (err)
- goto out;
-
- dentry = fhp->fh_dentry;
- inode = dentry->d_inode;
-
- /* Ignore any mode updates on symlinks */
- if (S_ISLNK(inode->i_mode))
- iap->ia_valid &= ~ATTR_MODE;
-
- if (!iap->ia_valid)
- goto out;
-
- nfsd_sanitize_attrs(inode, iap);
-
- /*
- * The size case is special, it changes the file in addition to the
- * attributes.
- */
- if (iap->ia_valid & ATTR_SIZE) {
- err = nfsd_get_write_access(rqstp, fhp, iap);
- if (err)
- goto out;
- size_change = 1;
- }
+ /* Change the attributes. */
iap->ia_valid |= ATTR_CTIME;
- if (check_guard && guardtime != inode->i_ctime.tv_sec) {
- err = nfserr_notsync;
- goto out_put_write_access;
- }
-
- host_err = nfsd_break_lease(inode);
- if (host_err)
- goto out_put_write_access_nfserror;
-
- fh_lock(fhp);
- host_err = notify_change(dentry, iap, NULL);
- fh_unlock(fhp);
+ err = nfserr_notsync;
+ if (!check_guard || guardtime == inode->i_ctime.tv_sec) {
+ host_err = nfsd_break_lease(inode);
+ if (host_err)
+ goto out_nfserr;
+ fh_lock(fhp);
-out_put_write_access_nfserror:
- err = nfserrno(host_err);
-out_put_write_access:
+ host_err = notify_change(dentry, iap);
+ err = nfserrno(host_err);
+ fh_unlock(fhp);
+ }
if (size_change)
put_write_access(inode);
if (!err)
commit_metadata(fhp);
out:
return err;
+
+out_nfserr:
+ err = nfserrno(host_err);
+ goto out;
}
#if defined(CONFIG_NFSD_V2_ACL) || \
@@ -1013,11 +988,7 @@ static void kill_suid(struct dentry *dentry)
ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
mutex_lock(&dentry->d_inode->i_mutex);
- /*
- * Note we call this on write, so notify_change will not
- * encounter any conflicting delegations:
- */
- notify_change(dentry, &ia, NULL);
+ notify_change(dentry, &ia);
mutex_unlock(&dentry->d_inode->i_mutex);
}
@@ -1346,8 +1317,9 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (!fhp->fh_locked) {
/* not actually possible */
printk(KERN_ERR
- "nfsd_create: parent %pd2 not locked!\n",
- dentry);
+ "nfsd_create: parent %s/%s not locked!\n",
+ dentry->d_parent->d_name.name,
+ dentry->d_name.name);
err = nfserr_io;
goto out;
}
@@ -1357,8 +1329,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
*/
err = nfserr_exist;
if (dchild->d_inode) {
- dprintk("nfsd_create: dentry %pd/%pd not negative!\n",
- dentry, dchild);
+ dprintk("nfsd_create: dentry %s/%s not negative!\n",
+ dentry->d_name.name, dchild->d_name.name);
goto out;
}
@@ -1765,7 +1737,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
err = nfserrno(host_err);
goto out_dput;
}
- host_err = vfs_link(dold, dirp, dnew, NULL);
+ host_err = vfs_link(dold, dirp, dnew);
if (!host_err) {
err = nfserrno(commit_metadata(ffhp));
if (!err)
@@ -1866,7 +1838,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
if (host_err)
goto out_dput_new;
}
- host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL);
+ host_err = vfs_rename(fdir, odentry, tdir, ndentry);
if (!host_err) {
host_err = commit_metadata(tfhp);
if (!host_err)
@@ -1939,7 +1911,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
if (host_err)
goto out_put;
if (type != S_IFDIR)
- host_err = vfs_unlink(dirp, rdentry, NULL);
+ host_err = vfs_unlink(dirp, rdentry);
else
host_err = vfs_rmdir(dirp, rdentry);
if (!host_err)
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index ffb9b36..2778b02 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -55,7 +55,7 @@
*
* Return 1 if the attributes match and 0 if not.
*
- * NOTE: This function runs with the inode_hash_lock spin lock held so it is not
+ * NOTE: This function runs with the inode->i_lock spin lock held so it is not
* allowed to sleep.
*/
int ntfs_test_inode(struct inode *vi, ntfs_attr *na)
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index dc7411f..17e6bdd 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1025,7 +1025,7 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle,
for(i = count; i < (num_got + count); i++) {
bhs[i] = sb_getblk(osb->sb, first_blkno);
if (bhs[i] == NULL) {
- status = -ENOMEM;
+ status = -EIO;
mlog_errno(status);
goto bail;
}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index aeb44e8..f37d3c0 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -80,7 +80,6 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb,
le32_to_cpu(fe->i_clusters))) {
- err = -ENOMEM;
mlog(ML_ERROR, "block offset is outside the allocated size: "
"%llu\n", (unsigned long long)iblock);
goto bail;
@@ -93,7 +92,6 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
iblock;
buffer_cache_bh = sb_getblk(osb->sb, blkno);
if (!buffer_cache_bh) {
- err = -ENOMEM;
mlog(ML_ERROR, "couldn't getblock for symlink!\n");
goto bail;
}
@@ -594,11 +592,26 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
ocfs2_rw_unlock(inode, level);
}
+/*
+ * ocfs2_invalidatepage() and ocfs2_releasepage() are shamelessly stolen
+ * from ext3. PageChecked() bits have been removed as OCFS2 does not
+ * do journalled data.
+ */
+static void ocfs2_invalidatepage(struct page *page, unsigned int offset,
+ unsigned int length)
+{
+ journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal;
+
+ jbd2_journal_invalidatepage(journal, page, offset, length);
+}
+
static int ocfs2_releasepage(struct page *page, gfp_t wait)
{
+ journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal;
+
if (!page_has_buffers(page))
return 0;
- return try_to_free_buffers(page);
+ return jbd2_journal_try_to_free_buffers(journal, page, wait);
}
static ssize_t ocfs2_direct_IO(int rw,
@@ -1789,7 +1802,8 @@ try_again:
data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
credits = ocfs2_calc_extend_credits(inode->i_sb,
- &di->id2.i_list);
+ &di->id2.i_list,
+ clusters_to_alloc);
}
@@ -1883,14 +1897,10 @@ out_commit:
out:
ocfs2_free_write_ctxt(wc);
- if (data_ac) {
+ if (data_ac)
ocfs2_free_alloc_context(data_ac);
- data_ac = NULL;
- }
- if (meta_ac) {
+ if (meta_ac)
ocfs2_free_alloc_context(meta_ac);
- meta_ac = NULL;
- }
if (ret == -ENOSPC && try_free) {
/*
@@ -2077,7 +2087,7 @@ const struct address_space_operations ocfs2_aops = {
.write_end = ocfs2_write_end,
.bmap = ocfs2_bmap,
.direct_IO = ocfs2_direct_IO,
- .invalidatepage = block_invalidatepage,
+ .invalidatepage = ocfs2_invalidatepage,
.releasepage = ocfs2_releasepage,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 5b704c6..5d18ad1 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -115,7 +115,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
if (bhs[i] == NULL) {
bhs[i] = sb_getblk(osb->sb, block++);
if (bhs[i] == NULL) {
- status = -ENOMEM;
+ status = -EIO;
mlog_errno(status);
goto bail;
}
@@ -214,7 +214,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
bhs[i] = sb_getblk(sb, block++);
if (bhs[i] == NULL) {
ocfs2_metadata_cache_io_unlock(ci);
- status = -ENOMEM;
+ status = -EIO;
mlog_errno(status);
goto bail;
}
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 73920ff..363f0dc 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -35,7 +35,6 @@
#include <linux/time.h>
#include <linux/debugfs.h>
#include <linux/slab.h>
-#include <linux/bitmap.h>
#include "heartbeat.h"
#include "tcp.h"
@@ -283,6 +282,15 @@ struct o2hb_bio_wait_ctxt {
int wc_error;
};
+static int o2hb_pop_count(void *map, int count)
+{
+ int i = -1, pop = 0;
+
+ while ((i = find_next_bit(map, count, i + 1)) < count)
+ pop++;
+ return pop;
+}
+
static void o2hb_write_timeout(struct work_struct *work)
{
int failed, quorum;
@@ -299,9 +307,9 @@ static void o2hb_write_timeout(struct work_struct *work)
spin_lock_irqsave(&o2hb_live_lock, flags);
if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
set_bit(reg->hr_region_num, o2hb_failed_region_bitmap);
- failed = bitmap_weight(o2hb_failed_region_bitmap,
+ failed = o2hb_pop_count(&o2hb_failed_region_bitmap,
O2NM_MAX_REGIONS);
- quorum = bitmap_weight(o2hb_quorum_region_bitmap,
+ quorum = o2hb_pop_count(&o2hb_quorum_region_bitmap,
O2NM_MAX_REGIONS);
spin_unlock_irqrestore(&o2hb_live_lock, flags);
@@ -757,7 +765,7 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg)
* If global heartbeat active, unpin all regions if the
* region count > CUT_OFF
*/
- if (bitmap_weight(o2hb_quorum_region_bitmap,
+ if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF)
o2hb_region_unpin(NULL);
unlock:
@@ -946,9 +954,23 @@ out:
return changed;
}
-static int o2hb_highest_node(unsigned long *nodes, int numbits)
+/* This could be faster if we just implmented a find_last_bit, but I
+ * don't think the circumstances warrant it. */
+static int o2hb_highest_node(unsigned long *nodes,
+ int numbits)
{
- return find_last_bit(nodes, numbits);
+ int highest, node;
+
+ highest = numbits;
+ node = -1;
+ while ((node = find_next_bit(nodes, numbits, node + 1)) != -1) {
+ if (node >= numbits)
+ break;
+
+ highest = node;
+ }
+
+ return highest;
}
static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
@@ -1807,7 +1829,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
live_threshold = O2HB_LIVE_THRESHOLD;
if (o2hb_global_heartbeat_active()) {
spin_lock(&o2hb_live_lock);
- if (bitmap_weight(o2hb_region_bitmap, O2NM_MAX_REGIONS) == 1)
+ if (o2hb_pop_count(&o2hb_region_bitmap, O2NM_MAX_REGIONS) == 1)
live_threshold <<= 1;
spin_unlock(&o2hb_live_lock);
}
@@ -2158,7 +2180,7 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
if (!o2hb_dependent_users)
goto unlock;
- if (bitmap_weight(o2hb_quorum_region_bitmap,
+ if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
o2hb_region_pin(NULL);
@@ -2458,7 +2480,7 @@ static int o2hb_region_inc_user(const char *region_uuid)
if (o2hb_dependent_users > 1)
goto unlock;
- if (bitmap_weight(o2hb_quorum_region_bitmap,
+ if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
ret = o2hb_region_pin(NULL);
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 2260fb9..baa2b9e 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -199,8 +199,7 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
#define mlog_errno(st) do { \
int _st = (st); \
if (_st != -ERESTARTSYS && _st != -EINTR && \
- _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \
- _st != -EDQUOT) \
+ _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC) \
mlog(ML_ERROR, "status = %lld\n", (long long)_st); \
} while (0)
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 91a7e85..30544ce 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2349,7 +2349,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
dx_root_bh = sb_getblk(osb->sb, dr_blkno);
if (dx_root_bh == NULL) {
- ret = -ENOMEM;
+ ret = -EIO;
goto out;
}
ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dx_root_bh);
@@ -2422,7 +2422,7 @@ static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb,
for (i = 0; i < num_dx_leaves; i++) {
bh = sb_getblk(osb->sb, start_blk + i);
if (bh == NULL) {
- ret = -ENOMEM;
+ ret = -EIO;
goto out;
}
dx_leaves[i] = bh;
@@ -2929,7 +2929,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
dirdata_bh = sb_getblk(sb, blkno);
if (!dirdata_bh) {
- ret = -ENOMEM;
+ ret = -EIO;
mlog_errno(ret);
goto out_commit;
}
@@ -3159,7 +3159,7 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
*new_bh = sb_getblk(sb, p_blkno);
if (!*new_bh) {
- status = -ENOMEM;
+ status = -EIO;
mlog_errno(status);
goto bail;
}
@@ -3284,7 +3284,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
if (ocfs2_dir_resv_allowed(osb))
data_ac->ac_resv = &OCFS2_I(dir)->ip_la_data_resv;
- credits = ocfs2_calc_extend_credits(sb, el);
+ credits = ocfs2_calc_extend_credits(sb, el, 1);
} else {
spin_unlock(&OCFS2_I(dir)->ip_lock);
credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
@@ -3716,7 +3716,7 @@ static int ocfs2_dx_dir_rebalance_credits(struct ocfs2_super *osb,
{
int credits = ocfs2_clusters_to_blocks(osb->sb, 2);
- credits += ocfs2_calc_extend_credits(osb->sb, &dx_root->dr_list);
+ credits += ocfs2_calc_extend_credits(osb->sb, &dx_root->dr_list, 1);
credits += ocfs2_quota_trans_credits(osb->sb);
return credits;
}
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index af3f7aa..cf0f103 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1885,10 +1885,8 @@ ok:
* up nodes that this node contacted */
while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES,
nn+1)) < O2NM_MAX_NODES) {
- if (nn != dlm->node_num && nn != assert->node_idx) {
+ if (nn != dlm->node_num && nn != assert->node_idx)
master_request = 1;
- break;
- }
}
}
mle->master = assert->node_idx;
@@ -2356,10 +2354,6 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
assert_spin_locked(&res->spinlock);
- /* delay migration when the lockres is in MIGRATING state */
- if (res->state & DLM_LOCK_RES_MIGRATING)
- return 0;
-
if (res->owner != dlm->node_num)
return 0;
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 7035af0..0b5adca 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1886,13 +1886,6 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
if (ml->type == LKM_NLMODE)
goto skip_lvb;
- /*
- * If the lock is in the blocked list it can't have a valid lvb,
- * so skip it
- */
- if (ml->list == DLM_BLOCKED_LIST)
- goto skip_lvb;
-
if (!dlm_lvb_is_empty(mres->lvb)) {
if (lksb->flags & DLM_LKSB_PUT_LVB) {
/* other node was trying to update
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 3407b2c..3a44a64 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1304,7 +1304,7 @@ static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw)
{
wait_for_completion(&mw->mw_complete);
/* Re-arm the completion in case we want to wait on it again */
- reinit_completion(&mw->mw_complete);
+ INIT_COMPLETION(mw->mw_complete);
return mw->mw_status;
}
@@ -1355,7 +1355,7 @@ static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
else
ret = mw->mw_status;
/* Re-arm the completion in case we want to wait on it again */
- reinit_completion(&mw->mw_complete);
+ INIT_COMPLETION(mw->mw_complete);
return ret;
}
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6fff128..d71903c 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -580,7 +580,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
int did_quota = 0;
/*
- * Unwritten extent only exists for file systems which
+ * This function only exists for file systems which don't
* support holes.
*/
BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
@@ -603,7 +603,8 @@ restart_all:
goto leave;
}
- credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list);
+ credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list,
+ clusters_to_add);
handle = ocfs2_start_trans(osb, credits);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index f29a90f..f87f9bd 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -386,9 +386,19 @@ static int ocfs2_read_locked_inode(struct inode *inode,
u32 generation = 0;
status = -EINVAL;
+ if (inode == NULL || inode->i_sb == NULL) {
+ mlog(ML_ERROR, "bad inode\n");
+ return status;
+ }
sb = inode->i_sb;
osb = OCFS2_SB(sb);
+ if (!args) {
+ mlog(ML_ERROR, "bad inode args\n");
+ make_bad_inode(inode);
+ return status;
+ }
+
/*
* To improve performance of cold-cache inode stats, we take
* the cluster lock here if possible.
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 9ff4e8c..0b479ba 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -524,7 +524,8 @@ static inline int ocfs2_calc_dxi_expand_credits(struct super_block *sb)
* the result may be wrong.
*/
static inline int ocfs2_calc_extend_credits(struct super_block *sb,
- struct ocfs2_extent_list *root_el)
+ struct ocfs2_extent_list *root_el,
+ u32 bits_wanted)
{
int bitmap_blocks, sysfile_bitmap_blocks, extent_blocks;
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 631a982..3d3f3c8 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -201,7 +201,8 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode,
}
}
- *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el);
+ *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el,
+ clusters_to_move + 2);
mlog(0, "reserve metadata_blocks: %d, data_clusters: %u, credits: %d\n",
extra_blocks, clusters_to_move, *credits);
@@ -1066,10 +1067,8 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
if (status)
return status;
- if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE)) {
- status = -EPERM;
+ if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE))
goto out_drop;
- }
if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
status = -EPERM;
@@ -1091,10 +1090,8 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
goto out_free;
}
- if (range.me_start > i_size_read(inode)) {
- status = -EINVAL;
+ if (range.me_start > i_size_read(inode))
goto out_free;
- }
if (range.me_start + range.me_len > i_size_read(inode))
range.me_len = i_size_read(inode) - range.me_start;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 4f791f6..be3f867 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -489,7 +489,7 @@ static int __ocfs2_mknod_locked(struct inode *dir,
*new_fe_bh = sb_getblk(osb->sb, fe_blkno);
if (!*new_fe_bh) {
- status = -ENOMEM;
+ status = -EIO;
mlog_errno(status);
goto leave;
}
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 55767e1..bf4dfc1 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -612,11 +612,6 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
}
new_bh = sb_getblk(inode->i_sb, first_blkno);
- if (!new_bh) {
- ret = -ENOMEM;
- mlog_errno(ret);
- goto out_commit;
- }
ocfs2_set_new_buffer_uptodate(&new_tree->rf_ci, new_bh);
ret = ocfs2_journal_access_rb(handle, &new_tree->rf_ci, new_bh,
@@ -1315,7 +1310,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
new_bh = sb_getblk(sb, blkno);
if (new_bh == NULL) {
- ret = -ENOMEM;
+ ret = -EIO;
mlog_errno(ret);
goto out;
}
@@ -1566,7 +1561,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
new_bh = sb_getblk(sb, blkno);
if (new_bh == NULL) {
- ret = -ENOMEM;
+ ret = -EIO;
mlog_errno(ret);
goto out;
}
@@ -2507,7 +2502,8 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
*meta_add += ocfs2_extend_meta_needed(et.et_root_el);
*credits += ocfs2_calc_extend_credits(sb,
- et.et_root_el);
+ et.et_root_el,
+ ref_blocks);
} else {
*credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
*meta_add += 1;
@@ -2878,7 +2874,8 @@ static int ocfs2_lock_refcount_allocators(struct super_block *sb,
meta_add =
ocfs2_extend_meta_needed(et->et_root_el);
- *credits += ocfs2_calc_extend_credits(sb, et->et_root_el);
+ *credits += ocfs2_calc_extend_credits(sb, et->et_root_el,
+ num_clusters + 2);
ret = ocfs2_calc_refcount_meta_credits(sb, ref_ci, ref_root_bh,
p_cluster, num_clusters,
@@ -3034,7 +3031,7 @@ int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
for (i = 0; i < blocks; i++, old_block++, new_block++) {
new_bh = sb_getblk(osb->sb, new_block);
if (new_bh == NULL) {
- ret = -ENOMEM;
+ ret = -EIO;
mlog_errno(ret);
break;
}
@@ -3628,7 +3625,8 @@ int ocfs2_refcounted_xattr_delete_need(struct inode *inode,
ocfs2_init_refcount_extent_tree(&et, ref_ci, ref_root_bh);
*credits += ocfs2_calc_extend_credits(inode->i_sb,
- et.et_root_el);
+ et.et_root_el,
+ ref_blocks);
}
out:
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 822ebc1..ec55add 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -469,7 +469,6 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
struct ocfs2_chain_list *cl;
struct ocfs2_chain_rec *cr;
u16 cl_bpc;
- u64 bg_ptr;
if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
return -EROFS;
@@ -514,7 +513,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh);
if (ret) {
mlog_errno(ret);
- goto out_free_group_bh;
+ goto out_unlock;
}
trace_ocfs2_group_add((unsigned long long)input->group,
@@ -524,7 +523,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
if (IS_ERR(handle)) {
mlog_errno(PTR_ERR(handle));
ret = -EINVAL;
- goto out_free_group_bh;
+ goto out_unlock;
}
cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc);
@@ -539,14 +538,12 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
}
group = (struct ocfs2_group_desc *)group_bh->b_data;
- bg_ptr = le64_to_cpu(group->bg_next_group);
group->bg_next_group = cr->c_blkno;
ocfs2_journal_dirty(handle, group_bh);
ret = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode),
main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE);
if (ret < 0) {
- group->bg_next_group = cpu_to_le64(bg_ptr);
mlog_errno(ret);
goto out_commit;
}
@@ -577,11 +574,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
out_commit:
ocfs2_commit_trans(osb, handle);
-
-out_free_group_bh:
- brelse(group_bh);
-
out_unlock:
+ brelse(group_bh);
brelse(main_bm_bh);
ocfs2_inode_unlock(main_bm_inode, 1);
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index cb7ec0b..39abf89 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -643,7 +643,7 @@ error:
#define FS_OCFS2_NM 1
-static struct ctl_table ocfs2_nm_table[] = {
+static ctl_table ocfs2_nm_table[] = {
{
.procname = "hb_ctl_path",
.data = ocfs2_hb_ctl_path,
@@ -654,7 +654,7 @@ static struct ctl_table ocfs2_nm_table[] = {
{ }
};
-static struct ctl_table ocfs2_mod_table[] = {
+static ctl_table ocfs2_mod_table[] = {
{
.procname = "nm",
.data = NULL,
@@ -665,7 +665,7 @@ static struct ctl_table ocfs2_mod_table[] = {
{ }
};
-static struct ctl_table ocfs2_kern_table[] = {
+static ctl_table ocfs2_kern_table[] = {
{
.procname = "ocfs2",
.data = NULL,
@@ -676,7 +676,7 @@ static struct ctl_table ocfs2_kern_table[] = {
{ }
};
-static struct ctl_table ocfs2_root_table[] = {
+static ctl_table ocfs2_root_table[] = {
{
.procname = "fs",
.data = NULL,
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 2c91452..5397c07 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -481,7 +481,7 @@ ocfs2_block_group_alloc_contig(struct ocfs2_super *osb, handle_t *handle,
bg_bh = sb_getblk(osb->sb, bg_blkno);
if (!bg_bh) {
- status = -ENOMEM;
+ status = -EIO;
mlog_errno(status);
goto bail;
}
@@ -661,7 +661,7 @@ ocfs2_block_group_alloc_discontig(handle_t *handle,
bg_bh = sb_getblk(osb->sb, bg_blkno);
if (!bg_bh) {
- status = -ENOMEM;
+ status = -EIO;
mlog_errno(status);
goto bail;
}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index c414929..d4e81e4 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1848,8 +1848,8 @@ static int ocfs2_get_sector(struct super_block *sb,
*bh = sb_getblk(sb, block);
if (!*bh) {
- mlog_errno(-ENOMEM);
- return -ENOMEM;
+ mlog_errno(-EIO);
+ return -EIO;
}
lock_buffer(*bh);
if (!buffer_dirty(*bh))
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index f0a1326..6ce0686 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -377,7 +377,7 @@ static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
xb_blkno + i);
if (!bucket->bu_bhs[i]) {
- rc = -ENOMEM;
+ rc = -EIO;
mlog_errno(rc);
break;
}
@@ -754,7 +754,8 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
BUG_ON(why == RESTART_META);
credits = ocfs2_calc_extend_credits(inode->i_sb,
- &vb->vb_xv->xr_list);
+ &vb->vb_xv->xr_list,
+ clusters_to_add);
status = ocfs2_extend_trans(handle, credits);
if (status < 0) {
status = -ENOMEM;
@@ -2864,12 +2865,6 @@ static int ocfs2_create_xattr_block(struct inode *inode,
}
new_bh = sb_getblk(inode->i_sb, first_blkno);
- if (!new_bh) {
- ret = -ENOMEM;
- mlog_errno(ret);
- goto end;
- }
-
ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode),
@@ -3045,7 +3040,8 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
clusters_add += new_clusters;
credits += ocfs2_calc_extend_credits(inode->i_sb,
- &def_xv.xv.xr_list);
+ &def_xv.xv.xr_list,
+ new_clusters);
}
goto meta_guess;
@@ -3110,7 +3106,8 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
if (!ocfs2_xattr_is_local(xe))
credits += ocfs2_calc_extend_credits(
inode->i_sb,
- &def_xv.xv.xr_list);
+ &def_xv.xv.xr_list,
+ new_clusters);
goto out;
}
}
@@ -3135,7 +3132,9 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
clusters_add += new_clusters - old_clusters;
credits += ocfs2_calc_extend_credits(inode->i_sb,
- &xv->xr_list);
+ &xv->xr_list,
+ new_clusters -
+ old_clusters);
if (value_size >= OCFS2_XATTR_ROOT_SIZE)
goto out;
}
@@ -3181,7 +3180,7 @@ meta_guess:
&xb->xb_attrs.xb_root.xt_list;
meta_add += ocfs2_extend_meta_needed(el);
credits += ocfs2_calc_extend_credits(inode->i_sb,
- el);
+ el, 1);
} else
credits += OCFS2_SUBALLOC_ALLOC + 1;
@@ -6217,7 +6216,8 @@ static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
le16_to_cpu(xv->xr_list.l_next_free_rec);
*credits += ocfs2_calc_extend_credits(sb,
- &def_xv.xv.xr_list);
+ &def_xv.xv.xr_list,
+ le32_to_cpu(xv->xr_clusters));
/*
* If the value is a tree with depth > 1, We don't go deep
@@ -6782,7 +6782,7 @@ static int ocfs2_lock_reflink_xattr_rec_allocators(
metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
*credits += ocfs2_calc_extend_credits(osb->sb,
- xt_et->et_root_el);
+ xt_et->et_root_el, len);
if (metas.num_metas) {
ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
diff --git a/fs/open.c b/fs/open.c
index 4b3e1ed..d420331 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -57,8 +57,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
newattrs.ia_valid |= ret | ATTR_FORCE;
mutex_lock(&dentry->d_inode->i_mutex);
- /* Note any delegations or leases have already been broken: */
- ret = notify_change(dentry, &newattrs, NULL);
+ ret = notify_change(dentry, &newattrs);
mutex_unlock(&dentry->d_inode->i_mutex);
return ret;
}
@@ -465,28 +464,21 @@ out:
static int chmod_common(struct path *path, umode_t mode)
{
struct inode *inode = path->dentry->d_inode;
- struct inode *delegated_inode = NULL;
struct iattr newattrs;
int error;
error = mnt_want_write(path->mnt);
if (error)
return error;
-retry_deleg:
mutex_lock(&inode->i_mutex);
error = security_path_chmod(path, mode);
if (error)
goto out_unlock;
newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
- error = notify_change(path->dentry, &newattrs, &delegated_inode);
+ error = notify_change(path->dentry, &newattrs);
out_unlock:
mutex_unlock(&inode->i_mutex);
- if (delegated_inode) {
- error = break_deleg_wait(&delegated_inode);
- if (!error)
- goto retry_deleg;
- }
mnt_drop_write(path->mnt);
return error;
}
@@ -530,7 +522,6 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
static int chown_common(struct path *path, uid_t user, gid_t group)
{
struct inode *inode = path->dentry->d_inode;
- struct inode *delegated_inode = NULL;
int error;
struct iattr newattrs;
kuid_t uid;
@@ -555,17 +546,12 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
if (!S_ISDIR(inode->i_mode))
newattrs.ia_valid |=
ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
-retry_deleg:
mutex_lock(&inode->i_mutex);
error = security_path_chown(path, uid, gid);
if (!error)
- error = notify_change(path->dentry, &newattrs, &delegated_inode);
+ error = notify_change(path->dentry, &newattrs);
mutex_unlock(&inode->i_mutex);
- if (delegated_inode) {
- error = break_deleg_wait(&delegated_inode);
- if (!error)
- goto retry_deleg;
- }
+
return error;
}
@@ -699,6 +685,7 @@ static int do_dentry_open(struct file *f,
}
f->f_mapping = inode->i_mapping;
+ file_sb_list_add(f, inode->i_sb);
if (unlikely(f->f_mode & FMODE_PATH)) {
f->f_op = &empty_fops;
@@ -706,10 +693,6 @@ static int do_dentry_open(struct file *f,
}
f->f_op = fops_get(inode->i_fop);
- if (unlikely(WARN_ON(!f->f_op))) {
- error = -ENODEV;
- goto cleanup_all;
- }
error = security_file_open(f, cred);
if (error)
@@ -719,7 +702,7 @@ static int do_dentry_open(struct file *f,
if (error)
goto cleanup_all;
- if (!open)
+ if (!open && f->f_op)
open = f->f_op->open;
if (open) {
error = open(inode, f);
@@ -737,6 +720,7 @@ static int do_dentry_open(struct file *f,
cleanup_all:
fops_put(f->f_op);
+ file_sb_list_del(f);
if (f->f_mode & FMODE_WRITE) {
put_write_access(inode);
if (!special_file(inode->i_mode)) {
@@ -1039,7 +1023,7 @@ int filp_close(struct file *filp, fl_owner_t id)
return 0;
}
- if (filp->f_op->flush)
+ if (filp->f_op && filp->f_op->flush)
retval = filp->f_op->flush(filp, id);
if (likely(!(filp->f_mode & FMODE_PATH))) {
diff --git a/fs/pipe.c b/fs/pipe.c
index 0e0752e..d2c45e1 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -726,25 +726,11 @@ pipe_poll(struct file *filp, poll_table *wait)
return mask;
}
-static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
-{
- int kill = 0;
-
- spin_lock(&inode->i_lock);
- if (!--pipe->files) {
- inode->i_pipe = NULL;
- kill = 1;
- }
- spin_unlock(&inode->i_lock);
-
- if (kill)
- free_pipe_info(pipe);
-}
-
static int
pipe_release(struct inode *inode, struct file *file)
{
- struct pipe_inode_info *pipe = file->private_data;
+ struct pipe_inode_info *pipe = inode->i_pipe;
+ int kill = 0;
__pipe_lock(pipe);
if (file->f_mode & FMODE_READ)
@@ -757,9 +743,17 @@ pipe_release(struct inode *inode, struct file *file)
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
+ spin_lock(&inode->i_lock);
+ if (!--pipe->files) {
+ inode->i_pipe = NULL;
+ kill = 1;
+ }
+ spin_unlock(&inode->i_lock);
__pipe_unlock(pipe);
- put_pipe_info(inode, pipe);
+ if (kill)
+ free_pipe_info(pipe);
+
return 0;
}
@@ -1020,6 +1014,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
{
struct pipe_inode_info *pipe;
bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
+ int kill = 0;
int ret;
filp->f_version = 0;
@@ -1135,9 +1130,15 @@ err_wr:
goto err;
err:
+ spin_lock(&inode->i_lock);
+ if (!--pipe->files) {
+ inode->i_pipe = NULL;
+ kill = 1;
+ }
+ spin_unlock(&inode->i_lock);
__pipe_unlock(pipe);
-
- put_pipe_info(inode, pipe);
+ if (kill)
+ free_pipe_info(pipe);
return ret;
}
diff --git a/fs/pnode.c b/fs/pnode.c
index c7221bb..9af0df1 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -264,12 +264,12 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
prev_src_mnt = child;
}
out:
- lock_mount_hash();
+ br_write_lock(&vfsmount_lock);
while (!list_empty(&tmp_list)) {
child = list_first_entry(&tmp_list, struct mount, mnt_hash);
umount_tree(child, 0);
}
- unlock_mount_hash();
+ br_write_unlock(&vfsmount_lock);
return ret;
}
@@ -278,7 +278,8 @@ out:
*/
static inline int do_refcount_check(struct mount *mnt, int count)
{
- return mnt_get_count(mnt) > count;
+ int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts;
+ return (mycount > count);
}
/*
@@ -310,7 +311,7 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
for (m = propagation_next(parent, parent); m;
m = propagation_next(m, parent)) {
- child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
+ child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint, 0);
if (child && list_empty(&child->mnt_mounts) &&
(ret = do_refcount_check(child, 1)))
break;
@@ -332,8 +333,8 @@ static void __propagate_umount(struct mount *mnt)
for (m = propagation_next(parent, parent); m;
m = propagation_next(m, parent)) {
- struct mount *child = __lookup_mnt_last(&m->mnt,
- mnt->mnt_mountpoint);
+ struct mount *child = __lookup_mnt(&m->mnt,
+ mnt->mnt_mountpoint, 0);
/*
* umount the child only if the child has no
* other children
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 2183fcf..15af622 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -31,10 +31,6 @@ config PROC_FS
config PROC_KCORE
bool "/proc/kcore support" if !ARM
depends on PROC_FS && MMU
- help
- Provides a virtual ELF core file of the live kernel. This can
- be read with gdb and other ELF tools. No modifications can be
- made using this mechanism.
config PROC_VMCORE
bool "/proc/vmcore support"
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 1bd2077..cbd0f1b 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -183,7 +183,6 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
seq_printf(m,
"State:\t%s\n"
"Tgid:\t%d\n"
- "Ngid:\t%d\n"
"Pid:\t%d\n"
"PPid:\t%d\n"
"TracerPid:\t%d\n"
@@ -191,7 +190,6 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
"Gid:\t%d\t%d\t%d\t%d\n",
get_task_state(p),
task_tgid_nr_ns(p, ns),
- task_numa_group_id(p),
pid_nr_ns(pid, ns),
ppid, tpid,
from_kuid_munged(user_ns, cred->uid),
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 03c8d74..1485e38 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1151,16 +1151,10 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
goto out_free_page;
}
-
- /* is userspace tring to explicitly UNSET the loginuid? */
- if (loginuid == AUDIT_UID_UNSET) {
- kloginuid = INVALID_UID;
- } else {
- kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
- if (!uid_valid(kloginuid)) {
- length = -EINVAL;
- goto out_free_page;
- }
+ kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
+ if (!uid_valid(kloginuid)) {
+ length = -EINVAL;
+ goto out_free_page;
}
length = audit_set_loginuid(kloginuid);
diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
index 51942d5..b701eaa 100644
--- a/fs/proc/consoles.c
+++ b/fs/proc/consoles.c
@@ -29,6 +29,7 @@ static int show_console_dev(struct seq_file *m, void *v)
char flags[ARRAY_SIZE(con_flags) + 1];
struct console *con = v;
unsigned int a;
+ int len;
dev_t dev = 0;
if (con->device) {
@@ -46,10 +47,11 @@ static int show_console_dev(struct seq_file *m, void *v)
con_flags[a].name : ' ';
flags[a] = 0;
- seq_setwidth(m, 21 - 1);
- seq_printf(m, "%s%d", con->name, con->index);
- seq_pad(m, ' ');
- seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-',
+ seq_printf(m, "%s%d%n", con->name, con->index, &len);
+ len = 21 - len;
+ if (len < 1)
+ len = 1;
+ seq_printf(m, "%*c%c%c%c (%s)", len, ' ', con->read ? 'R' : '-',
con->write ? 'W' : '-', con->unblank ? 'U' : '-',
flags);
if (dev)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index cca93b6..737e156 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -175,6 +175,22 @@ static const struct inode_operations proc_link_inode_operations = {
};
/*
+ * As some entries in /proc are volatile, we want to
+ * get rid of unused dentries. This could be made
+ * smarter: we could keep a "volatile" flag in the
+ * inode to indicate which ones to keep.
+ */
+static int proc_delete_dentry(const struct dentry * dentry)
+{
+ return 1;
+}
+
+static const struct dentry_operations proc_dentry_operations =
+{
+ .d_delete = proc_delete_dentry,
+};
+
+/*
* Don't create negative dentries here, return -ENOENT by hand
* instead.
*/
@@ -193,7 +209,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
inode = proc_get_inode(dir->i_sb, de);
if (!inode)
return ERR_PTR(-ENOMEM);
- d_set_d_op(dentry, &simple_dentry_operations);
+ d_set_d_op(dentry, &proc_dentry_operations);
d_add(dentry, inode);
return NULL;
}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 28955d4..8eaa1ba 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -285,23 +285,19 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
return rv;
}
-static unsigned long
-proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
- unsigned long len, unsigned long pgoff,
- unsigned long flags)
+static unsigned long proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
unsigned long rv = -EIO;
- unsigned long (*get_area)(struct file *, unsigned long, unsigned long,
- unsigned long, unsigned long) = NULL;
+ unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long) = NULL;
if (use_pde(pde)) {
#ifdef CONFIG_MMU
- get_area = current->mm->get_unmapped_area;
+ get_unmapped_area = current->mm->get_unmapped_area;
#endif
if (pde->proc_fops->get_unmapped_area)
- get_area = pde->proc_fops->get_unmapped_area;
- if (get_area)
- rv = get_area(file, orig_addr, len, pgoff, flags);
+ get_unmapped_area = pde->proc_fops->get_unmapped_area;
+ if (get_unmapped_area)
+ rv = get_unmapped_area(file, orig_addr, len, pgoff, flags);
unuse_pde(pde);
}
return rv;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 5ed0e52..06ea155 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -255,7 +255,8 @@ static int kcore_update_ram(void)
end_pfn = 0;
for_each_node_state(nid, N_MEMORY) {
unsigned long node_end;
- node_end = node_end_pfn(nid);
+ node_end = NODE_DATA(nid)->node_start_pfn +
+ NODE_DATA(nid)->node_spanned_pages;
if (end_pfn < node_end)
end_pfn = node_end;
}
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index a77d2b2..59d85d6 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -1,8 +1,8 @@
#include <linux/fs.h>
+#include <linux/hugetlb.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <linux/hugetlb.h>
#include <linux/mman.h>
#include <linux/mmzone.h>
#include <linux/proc_fs.h>
@@ -24,6 +24,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
{
struct sysinfo i;
unsigned long committed;
+ unsigned long allowed;
struct vmalloc_info vmi;
long cached;
unsigned long pages[NR_LRU_LISTS];
@@ -36,6 +37,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
si_meminfo(&i);
si_swapinfo(&i);
committed = percpu_counter_read_positive(&vm_committed_as);
+ allowed = ((totalram_pages - hugetlb_total_pages())
+ * sysctl_overcommit_ratio / 100) + total_swap_pages;
cached = global_page_state(NR_FILE_PAGES) -
total_swapcache_pages() - i.bufferram;
@@ -144,7 +147,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
K(global_page_state(NR_UNSTABLE_NFS)),
K(global_page_state(NR_BOUNCE)),
K(global_page_state(NR_WRITEBACK_TEMP)),
- K(vm_commit_limit()),
+ K(allowed),
K(committed),
(unsigned long)VMALLOC_TOTAL >> 10,
vmi.used >> 10,
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 9ae46b8..49a7fff 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -42,6 +42,12 @@ static const struct inode_operations ns_inode_operations = {
.setattr = proc_setattr,
};
+static int ns_delete_dentry(const struct dentry *dentry)
+{
+ /* Don't cache namespace inodes when not in use */
+ return 1;
+}
+
static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
{
struct inode *inode = dentry->d_inode;
@@ -53,7 +59,7 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
const struct dentry_operations ns_dentry_operations =
{
- .d_delete = always_delete_dentry,
+ .d_delete = ns_delete_dentry,
.d_dname = ns_dname,
};
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 5f9bc8a..ccfd99b 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -39,7 +39,7 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)
unsigned long ino = 0;
struct file *file;
dev_t dev = 0;
- int flags;
+ int flags, len;
flags = region->vm_flags;
file = region->vm_file;
@@ -50,9 +50,8 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)
ino = inode->i_ino;
}
- seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
seq_printf(m,
- "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
+ "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
region->vm_start,
region->vm_end,
flags & VM_READ ? 'r' : '-',
@@ -60,10 +59,13 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)
flags & VM_EXEC ? 'x' : '-',
flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
((loff_t)region->vm_pgoff) << PAGE_SHIFT,
- MAJOR(dev), MINOR(dev), ino);
+ MAJOR(dev), MINOR(dev), ino, &len);
if (file) {
- seq_pad(m, ' ');
+ len = 25 + sizeof(void *) * 6 - len;
+ if (len < 1)
+ len = 1;
+ seq_printf(m, "%*c", len, ' ');
seq_path(m, &file->f_path, "");
}
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index 70779b2..106a835 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -14,13 +14,16 @@
#include <linux/of.h>
#include <linux/export.h>
#include <linux/slab.h>
+#include <asm/prom.h>
#include <asm/uaccess.h>
#include "internal.h"
static inline void set_node_proc_entry(struct device_node *np,
struct proc_dir_entry *de)
{
+#ifdef HAVE_ARCH_DEVTREE_FIXUPS
np->pde = de;
+#endif
}
static struct proc_dir_entry *proc_device_tree;
diff --git a/fs/proc/self.c b/fs/proc/self.c
index ffeb202..6b6a993 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -36,10 +36,18 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
return NULL;
}
+static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
+ void *cookie)
+{
+ char *s = nd_get_link(nd);
+ if (!IS_ERR(s))
+ kfree(s);
+}
+
static const struct inode_operations proc_self_inode_operations = {
.readlink = proc_self_readlink,
.follow_link = proc_self_follow_link,
- .put_link = kfree_put_link,
+ .put_link = proc_self_put_link,
};
static unsigned self_inum;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index fb52b54..390bdab 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -62,8 +62,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
total_rss << (PAGE_SHIFT-10),
data << (PAGE_SHIFT-10),
mm->stack_vm << (PAGE_SHIFT-10), text, lib,
- (PTRS_PER_PTE * sizeof(pte_t) *
- atomic_long_read(&mm->nr_ptes)) >> 10,
+ (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10,
swap << (PAGE_SHIFT-10));
}
@@ -84,6 +83,14 @@ unsigned long task_statm(struct mm_struct *mm,
return mm->total_vm;
}
+static void pad_len_spaces(struct seq_file *m, int len)
+{
+ len = 25 + sizeof(void*) * 6 - len;
+ if (len < 1)
+ len = 1;
+ seq_printf(m, "%*c", len, ' ');
+}
+
#ifdef CONFIG_NUMA
/*
* These functions are for numa_maps but called in generic **maps seq_file
@@ -261,6 +268,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
unsigned long long pgoff = 0;
unsigned long start, end;
dev_t dev = 0;
+ int len;
const char *name = NULL;
if (file) {
@@ -278,8 +286,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
if (stack_guard_page_end(vma, end))
end -= PAGE_SIZE;
- seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
- seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
+ seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
start,
end,
flags & VM_READ ? 'r' : '-',
@@ -287,14 +294,14 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
flags & VM_EXEC ? 'x' : '-',
flags & VM_MAYSHARE ? 's' : 'p',
pgoff,
- MAJOR(dev), MINOR(dev), ino);
+ MAJOR(dev), MINOR(dev), ino, &len);
/*
* Print the dentry name for named mappings, and a
* special [heap] marker for the heap:
*/
if (file) {
- seq_pad(m, ' ');
+ pad_len_spaces(m, len);
seq_path(m, &file->f_path, "\n");
goto done;
}
@@ -326,7 +333,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
name = "[stack]";
} else {
/* Thread stack in /proc/PID/maps */
- seq_pad(m, ' ');
+ pad_len_spaces(m, len);
seq_printf(m, "[stack:%d]", tid);
}
}
@@ -334,7 +341,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
done:
if (name) {
- seq_pad(m, ' ');
+ pad_len_spaces(m, len);
seq_puts(m, name);
}
seq_putc(m, '\n');
@@ -498,9 +505,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pte_t *pte;
spinlock_t *ptl;
- if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
+ if (pmd_trans_huge_lock(pmd, vma) == 1) {
smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);
- spin_unlock(ptl);
+ spin_unlock(&walk->mm->page_table_lock);
mss->anonymous_thp += HPAGE_PMD_SIZE;
return 0;
}
@@ -554,9 +561,6 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_NONLINEAR)] = "nl",
[ilog2(VM_ARCH_1)] = "ar",
[ilog2(VM_DONTDUMP)] = "dd",
-#ifdef CONFIG_MEM_SOFT_DIRTY
- [ilog2(VM_SOFTDIRTY)] = "sd",
-#endif
[ilog2(VM_MIXEDMAP)] = "mm",
[ilog2(VM_HUGEPAGE)] = "hg",
[ilog2(VM_NOHUGEPAGE)] = "nh",
@@ -991,14 +995,13 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
{
struct vm_area_struct *vma;
struct pagemapread *pm = walk->private;
- spinlock_t *ptl;
pte_t *pte;
int err = 0;
pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
/* find the first VMA at or above 'addr' */
vma = find_vma(walk->mm, addr);
- if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
+ if (vma && pmd_trans_huge_lock(pmd, vma) == 1) {
int pmd_flags2;
if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
@@ -1016,7 +1019,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
if (err)
break;
}
- spin_unlock(ptl);
+ spin_unlock(&walk->mm->page_table_lock);
return err;
}
@@ -1318,7 +1321,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
md = walk->private;
- if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) {
+ if (pmd_trans_huge_lock(pmd, md->vma) == 1) {
pte_t huge_pte = *(pte_t *)pmd;
struct page *page;
@@ -1326,7 +1329,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
if (page)
gather_stats(page, md, pte_dirty(huge_pte),
HPAGE_PMD_SIZE/PAGE_SIZE);
- spin_unlock(ptl);
+ spin_unlock(&walk->mm->page_table_lock);
return 0;
}
@@ -1384,8 +1387,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
struct mm_struct *mm = vma->vm_mm;
struct mm_walk walk = {};
struct mempolicy *pol;
- char buffer[64];
- int nid;
+ int n;
+ char buffer[50];
if (!mm)
return 0;
@@ -1401,8 +1404,10 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
walk.mm = mm;
pol = get_vma_policy(task, vma, vma->vm_start);
- mpol_to_str(buffer, sizeof(buffer), pol);
+ n = mpol_to_str(buffer, sizeof(buffer), pol);
mpol_cond_put(pol);
+ if (n < 0)
+ return n;
seq_printf(m, "%08lx %s", vma->vm_start, buffer);
@@ -1455,9 +1460,9 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
if (md->writeback)
seq_printf(m, " writeback=%lu", md->writeback);
- for_each_node_state(nid, N_MEMORY)
- if (md->node[nid])
- seq_printf(m, " N%d=%lu", nid, md->node[nid]);
+ for_each_node_state(n, N_MEMORY)
+ if (md->node[n])
+ seq_printf(m, " N%d=%lu", n, md->node[n]);
out:
seq_putc(m, '\n');
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 678455d..56123a6 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -123,6 +123,14 @@ unsigned long task_statm(struct mm_struct *mm,
return size;
}
+static void pad_len_spaces(struct seq_file *m, int len)
+{
+ len = 25 + sizeof(void*) * 6 - len;
+ if (len < 1)
+ len = 1;
+ seq_printf(m, "%*c", len, ' ');
+}
+
/*
* display a single VMA to a sequenced file
*/
@@ -134,7 +142,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
unsigned long ino = 0;
struct file *file;
dev_t dev = 0;
- int flags;
+ int flags, len;
unsigned long long pgoff = 0;
flags = vma->vm_flags;
@@ -147,9 +155,8 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
}
- seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
seq_printf(m,
- "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
+ "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
vma->vm_start,
vma->vm_end,
flags & VM_READ ? 'r' : '-',
@@ -157,16 +164,16 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
flags & VM_EXEC ? 'x' : '-',
flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
pgoff,
- MAJOR(dev), MINOR(dev), ino);
+ MAJOR(dev), MINOR(dev), ino, &len);
if (file) {
- seq_pad(m, ' ');
+ pad_len_spaces(m, len);
seq_path(m, &file->f_path, "");
} else if (mm) {
pid_t tid = vm_is_stack(priv->task, vma, is_pid);
if (tid != 0) {
- seq_pad(m, ' ');
+ pad_len_spaces(m, len);
/*
* Thread stack in /proc/PID/task/TID/maps or
* the main process stack.
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 439406e..5fe34c3 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -20,15 +20,15 @@ static unsigned mounts_poll(struct file *file, poll_table *wait)
struct proc_mounts *p = proc_mounts(file->private_data);
struct mnt_namespace *ns = p->ns;
unsigned res = POLLIN | POLLRDNORM;
- int event;
poll_wait(file, &p->ns->poll, wait);
- event = ACCESS_ONCE(ns->event);
- if (p->m.poll_event != event) {
- p->m.poll_event = event;
+ br_read_lock(&vfsmount_lock);
+ if (p->m.poll_event != ns->event) {
+ p->m.poll_event = ns->event;
res |= POLLERR | POLLPRI;
}
+ br_read_unlock(&vfsmount_lock);
return res;
}
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index e62c818..d024505 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -60,6 +60,10 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir,
struct buffer_head *bh;
*res_dir = NULL;
+ if (!dir->i_sb) {
+ printk(KERN_WARNING "qnx4: no superblock on dir.\n");
+ return NULL;
+ }
bh = NULL;
block = offset = blkofs = 0;
while (blkofs * QNX4_BLOCK_SIZE + offset < dir->i_size) {
diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c
index 72d2917..16e8abb 100644
--- a/fs/quota/netlink.c
+++ b/fs/quota/netlink.c
@@ -9,25 +9,13 @@
#include <net/netlink.h>
#include <net/genetlink.h>
-static const struct genl_multicast_group quota_mcgrps[] = {
- { .name = "events", },
-};
-
/* Netlink family structure for quota */
static struct genl_family quota_genl_family = {
- /*
- * Needed due to multicast group ID abuse - old code assumed
- * the family ID was also a valid multicast group ID (which
- * isn't true) and userspace might thus rely on it. Assign a
- * static ID for this group to make dealing with that easier.
- */
- .id = GENL_ID_VFS_DQUOT,
+ .id = GENL_ID_GENERATE,
.hdrsize = 0,
.name = "VFS_DQUOT",
.version = 1,
.maxattr = QUOTA_NL_A_MAX,
- .mcgrps = quota_mcgrps,
- .n_mcgrps = ARRAY_SIZE(quota_mcgrps),
};
/**
@@ -90,7 +78,7 @@ void quota_send_warning(struct kqid qid, dev_t dev,
goto attr_err_out;
genlmsg_end(skb, msg_head);
- genlmsg_multicast(&quota_genl_family, skb, 0, 0, GFP_NOFS);
+ genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
return;
attr_err_out:
printk(KERN_ERR "VFS: Not enough space to compose quota message!\n");
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 2b363e2..dea86e8 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -117,7 +117,6 @@ static int quota_setinfo(struct super_block *sb, int type, void __user *addr)
static void copy_to_if_dqblk(struct if_dqblk *dst, struct fs_disk_quota *src)
{
- memset(dst, 0, sizeof(*dst));
dst->dqb_bhardlimit = src->d_blk_hardlimit;
dst->dqb_bsoftlimit = src->d_blk_softlimit;
dst->dqb_curspace = src->d_bcount;
diff --git a/fs/read_write.c b/fs/read_write.c
index 58e440d..e3cd280 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -257,7 +257,7 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
fn = no_llseek;
if (file->f_mode & FMODE_LSEEK) {
- if (file->f_op->llseek)
+ if (file->f_op && file->f_op->llseek)
fn = file->f_op->llseek;
}
return fn(file, offset, whence);
@@ -384,7 +384,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
if (!(file->f_mode & FMODE_READ))
return -EBADF;
- if (!file->f_op->read && !file->f_op->aio_read)
+ if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
return -EINVAL;
if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
return -EFAULT;
@@ -433,7 +433,7 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t
const char __user *p;
ssize_t ret;
- if (!file->f_op->write && !file->f_op->aio_write)
+ if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
return -EINVAL;
old_fs = get_fs();
@@ -460,7 +460,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
- if (!file->f_op->write && !file->f_op->aio_write)
+ if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
return -EINVAL;
if (unlikely(!access_ok(VERIFY_READ, buf, count)))
return -EFAULT;
@@ -727,6 +727,11 @@ static ssize_t do_readv_writev(int type, struct file *file,
io_fn_t fn;
iov_fn_t fnv;
+ if (!file->f_op) {
+ ret = -EINVAL;
+ goto out;
+ }
+
ret = rw_copy_check_uvector(type, uvector, nr_segs,
ARRAY_SIZE(iovstack), iovstack, &iov);
if (ret <= 0)
@@ -773,7 +778,7 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
{
if (!(file->f_mode & FMODE_READ))
return -EBADF;
- if (!file->f_op->aio_read && !file->f_op->read)
+ if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
return -EINVAL;
return do_readv_writev(READ, file, vec, vlen, pos);
@@ -786,7 +791,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
{
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
- if (!file->f_op->aio_write && !file->f_op->write)
+ if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
return -EINVAL;
return do_readv_writev(WRITE, file, vec, vlen, pos);
@@ -901,6 +906,10 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
io_fn_t fn;
iov_fn_t fnv;
+ ret = -EINVAL;
+ if (!file->f_op)
+ goto out;
+
ret = -EFAULT;
if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
goto out;
@@ -956,7 +965,7 @@ static size_t compat_readv(struct file *file,
goto out;
ret = -EINVAL;
- if (!file->f_op->aio_read && !file->f_op->read)
+ if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
goto out;
ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
@@ -1023,7 +1032,7 @@ static size_t compat_writev(struct file *file,
goto out;
ret = -EINVAL;
- if (!file->f_op->aio_write && !file->f_op->write)
+ if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
goto out;
ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
diff --git a/fs/readdir.c b/fs/readdir.c
index 5b53d99..93d71e5 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -24,7 +24,7 @@ int iterate_dir(struct file *file, struct dir_context *ctx)
{
struct inode *inode = file_inode(file);
int res = -ENOTDIR;
- if (!file->f_op->iterate)
+ if (!file->f_op || !file->f_op->iterate)
goto out;
res = security_file_permission(file, MAY_READ);
diff --git a/fs/select.c b/fs/select.c
index 467bb1c..dfd5cb1 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -454,7 +454,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
const struct file_operations *f_op;
f_op = f.file->f_op;
mask = DEFAULT_POLLMASK;
- if (f_op->poll) {
+ if (f_op && f_op->poll) {
wait_key_set(wait, in, out,
bit, busy_flag);
mask = (*f_op->poll)(f.file, wait);
@@ -761,7 +761,7 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait,
mask = POLLNVAL;
if (f.file) {
mask = DEFAULT_POLLMASK;
- if (f.file->f_op->poll) {
+ if (f.file->f_op && f.file->f_op->poll) {
pwait->_key = pollfd->events|POLLERR|POLLHUP;
pwait->_key |= busy_flag;
mask = f.file->f_op->poll(f.file, pwait);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 1d641bb..a290157 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -136,7 +136,6 @@ static int traverse(struct seq_file *m, loff_t offset)
Eoverflow:
m->op->stop(m, p);
kfree(m->buf);
- m->count = 0;
m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
return !m->buf ? -ENOMEM : -EAGAIN;
}
@@ -233,10 +232,10 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
goto Fill;
m->op->stop(m, p);
kfree(m->buf);
- m->count = 0;
m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
if (!m->buf)
goto Enomem;
+ m->count = 0;
m->version = 0;
pos = m->index;
p = m->op->start(m, &pos);
@@ -767,21 +766,6 @@ int seq_write(struct seq_file *seq, const void *data, size_t len)
}
EXPORT_SYMBOL(seq_write);
-/**
- * seq_pad - write padding spaces to buffer
- * @m: seq_file identifying the buffer to which data should be written
- * @c: the byte to append after padding if non-zero
- */
-void seq_pad(struct seq_file *m, char c)
-{
- int size = m->pad_until - m->count;
- if (size > 0)
- seq_printf(m, "%*s", size, "");
- if (c)
- seq_putc(m, c);
-}
-EXPORT_SYMBOL(seq_pad);
-
struct list_head *seq_list_start(struct list_head *head, loff_t pos)
{
struct list_head *lh;
diff --git a/fs/splice.c b/fs/splice.c
index 46a08f7..3b7ee65 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -695,7 +695,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
loff_t pos = sd->pos;
int more;
- if (!likely(file->f_op->sendpage))
+ if (!likely(file->f_op && file->f_op->sendpage))
return -EINVAL;
more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0;
@@ -1099,7 +1099,7 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
loff_t *, size_t, unsigned int);
- if (out->f_op->splice_write)
+ if (out->f_op && out->f_op->splice_write)
splice_write = out->f_op->splice_write;
else
splice_write = default_file_splice_write;
@@ -1125,7 +1125,7 @@ static long do_splice_to(struct file *in, loff_t *ppos,
if (unlikely(ret < 0))
return ret;
- if (in->f_op->splice_read)
+ if (in->f_op && in->f_op->splice_read)
splice_read = in->f_op->splice_read;
else
splice_read = default_file_splice_read;
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index b6fa865..c70111e 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -25,78 +25,6 @@ config SQUASHFS
If unsure, say N.
-choice
- prompt "File decompression options"
- depends on SQUASHFS
- help
- Squashfs now supports two options for decompressing file
- data. Traditionally Squashfs has decompressed into an
- intermediate buffer and then memcopied it into the page cache.
- Squashfs now supports the ability to decompress directly into
- the page cache.
-
- If unsure, select "Decompress file data into an intermediate buffer"
-
-config SQUASHFS_FILE_CACHE
- bool "Decompress file data into an intermediate buffer"
- help
- Decompress file data into an intermediate buffer and then
- memcopy it into the page cache.
-
-config SQUASHFS_FILE_DIRECT
- bool "Decompress files directly into the page cache"
- help
- Directly decompress file data into the page cache.
- Doing so can significantly improve performance because
- it eliminates a memcpy and it also removes the lock contention
- on the single buffer.
-
-endchoice
-
-choice
- prompt "Decompressor parallelisation options"
- depends on SQUASHFS
- help
- Squashfs now supports three parallelisation options for
- decompression. Each one exhibits various trade-offs between
- decompression performance and CPU and memory usage.
-
- If in doubt, select "Single threaded compression"
-
-config SQUASHFS_DECOMP_SINGLE
- bool "Single threaded compression"
- help
- Traditionally Squashfs has used single-threaded decompression.
- Only one block (data or metadata) can be decompressed at any
- one time. This limits CPU and memory usage to a minimum.
-
-config SQUASHFS_DECOMP_MULTI
- bool "Use multiple decompressors for parallel I/O"
- help
- By default Squashfs uses a single decompressor but it gives
- poor performance on parallel I/O workloads when using multiple CPU
- machines due to waiting on decompressor availability.
-
- If you have a parallel I/O workload and your system has enough memory,
- using this option may improve overall I/O performance.
-
- This decompressor implementation uses up to two parallel
- decompressors per core. It dynamically allocates decompressors
- on a demand basis.
-
-config SQUASHFS_DECOMP_MULTI_PERCPU
- bool "Use percpu multiple decompressors for parallel I/O"
- help
- By default Squashfs uses a single decompressor but it gives
- poor performance on parallel I/O workloads when using multiple CPU
- machines due to waiting on decompressor availability.
-
- This decompressor implementation uses a maximum of one
- decompressor per core. It uses percpu variables to ensure
- decompression is load-balanced across the cores.
-
-endchoice
-
config SQUASHFS_XATTR
bool "Squashfs XATTR support"
depends on SQUASHFS
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index 4132520..110b047 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -5,11 +5,6 @@
obj-$(CONFIG_SQUASHFS) += squashfs.o
squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
squashfs-y += namei.o super.o symlink.o decompressor.o
-squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o
-squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o
-squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
-squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
-squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o
squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o
squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 0cea9b9..41d108e 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -36,7 +36,6 @@
#include "squashfs_fs_sb.h"
#include "squashfs.h"
#include "decompressor.h"
-#include "page_actor.h"
/*
* Read the metadata block length, this is stored in the first two
@@ -87,16 +86,16 @@ static struct buffer_head *get_block_length(struct super_block *sb,
* generated a larger block - this does occasionally happen with compression
* algorithms).
*/
-int squashfs_read_data(struct super_block *sb, u64 index, int length,
- u64 *next_index, struct squashfs_page_actor *output)
+int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
+ int length, u64 *next_index, int srclength, int pages)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
struct buffer_head **bh;
int offset = index & ((1 << msblk->devblksize_log2) - 1);
u64 cur_index = index >> msblk->devblksize_log2;
- int bytes, compressed, b = 0, k = 0, avail, i;
+ int bytes, compressed, b = 0, k = 0, page = 0, avail;
- bh = kcalloc(((output->length + msblk->devblksize - 1)
+ bh = kcalloc(((srclength + msblk->devblksize - 1)
>> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
if (bh == NULL)
return -ENOMEM;
@@ -112,9 +111,9 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
*next_index = index + length;
TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n",
- index, compressed ? "" : "un", length, output->length);
+ index, compressed ? "" : "un", length, srclength);
- if (length < 0 || length > output->length ||
+ if (length < 0 || length > srclength ||
(index + length) > msblk->bytes_used)
goto read_failure;
@@ -146,7 +145,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
TRACE("Block @ 0x%llx, %scompressed size %d\n", index,
compressed ? "" : "un", length);
- if (length < 0 || length > output->length ||
+ if (length < 0 || length > srclength ||
(index + length) > msblk->bytes_used)
goto block_release;
@@ -159,15 +158,9 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
ll_rw_block(READ, b - 1, bh + 1);
}
- for (i = 0; i < b; i++) {
- wait_on_buffer(bh[i]);
- if (!buffer_uptodate(bh[i]))
- goto block_release;
- }
-
if (compressed) {
- length = squashfs_decompress(msblk, bh, b, offset, length,
- output);
+ length = squashfs_decompress(msblk, buffer, bh, b, offset,
+ length, srclength, pages);
if (length < 0)
goto read_failure;
} else {
@@ -175,20 +168,22 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
* Block is uncompressed.
*/
int in, pg_offset = 0;
- void *data = squashfs_first_page(output);
for (bytes = length; k < b; k++) {
in = min(bytes, msblk->devblksize - offset);
bytes -= in;
+ wait_on_buffer(bh[k]);
+ if (!buffer_uptodate(bh[k]))
+ goto block_release;
while (in) {
if (pg_offset == PAGE_CACHE_SIZE) {
- data = squashfs_next_page(output);
+ page++;
pg_offset = 0;
}
avail = min_t(int, in, PAGE_CACHE_SIZE -
pg_offset);
- memcpy(data + pg_offset, bh[k]->b_data + offset,
- avail);
+ memcpy(buffer[page] + pg_offset,
+ bh[k]->b_data + offset, avail);
in -= avail;
pg_offset += avail;
offset += avail;
@@ -196,7 +191,6 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
offset = 0;
put_bh(bh[k]);
}
- squashfs_finish_page(output);
}
kfree(bh);
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 1cb70a0..af0b738 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -56,7 +56,6 @@
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
#include "squashfs.h"
-#include "page_actor.h"
/*
* Look-up block in cache, and increment usage count. If not in cache, read
@@ -120,8 +119,9 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb,
entry->error = 0;
spin_unlock(&cache->lock);
- entry->length = squashfs_read_data(sb, block, length,
- &entry->next_index, entry->actor);
+ entry->length = squashfs_read_data(sb, entry->data,
+ block, length, &entry->next_index,
+ cache->block_size, cache->pages);
spin_lock(&cache->lock);
@@ -220,7 +220,6 @@ void squashfs_cache_delete(struct squashfs_cache *cache)
kfree(cache->entry[i].data[j]);
kfree(cache->entry[i].data);
}
- kfree(cache->entry[i].actor);
}
kfree(cache->entry);
@@ -281,13 +280,6 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries,
goto cleanup;
}
}
-
- entry->actor = squashfs_page_actor_init(entry->data,
- cache->pages, 0);
- if (entry->actor == NULL) {
- ERROR("Failed to allocate %s cache entry\n", name);
- goto cleanup;
- }
}
return cache;
@@ -418,7 +410,6 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length)
int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
int i, res;
void *table, *buffer, **data;
- struct squashfs_page_actor *actor;
table = buffer = kmalloc(length, GFP_KERNEL);
if (table == NULL)
@@ -430,28 +421,19 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length)
goto failed;
}
- actor = squashfs_page_actor_init(data, pages, length);
- if (actor == NULL) {
- res = -ENOMEM;
- goto failed2;
- }
-
for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
data[i] = buffer;
- res = squashfs_read_data(sb, block, length |
- SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor);
+ res = squashfs_read_data(sb, data, block, length |
+ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages);
kfree(data);
- kfree(actor);
if (res < 0)
goto failed;
return table;
-failed2:
- kfree(data);
failed:
kfree(table);
return ERR_PTR(res);
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index ac22fe7..3f6271d 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -30,7 +30,6 @@
#include "squashfs_fs_sb.h"
#include "decompressor.h"
#include "squashfs.h"
-#include "page_actor.h"
/*
* This file (and decompressor.h) implements a decompressor framework for
@@ -38,29 +37,29 @@
*/
static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = {
- NULL, NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0
+ NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0
};
#ifndef CONFIG_SQUASHFS_LZO
static const struct squashfs_decompressor squashfs_lzo_comp_ops = {
- NULL, NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
+ NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
};
#endif
#ifndef CONFIG_SQUASHFS_XZ
static const struct squashfs_decompressor squashfs_xz_comp_ops = {
- NULL, NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0
+ NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0
};
#endif
#ifndef CONFIG_SQUASHFS_ZLIB
static const struct squashfs_decompressor squashfs_zlib_comp_ops = {
- NULL, NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0
+ NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0
};
#endif
static const struct squashfs_decompressor squashfs_unknown_comp_ops = {
- NULL, NULL, NULL, NULL, 0, "unknown", 0
+ NULL, NULL, NULL, 0, "unknown", 0
};
static const struct squashfs_decompressor *decompressor[] = {
@@ -84,11 +83,10 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
}
-static void *get_comp_opts(struct super_block *sb, unsigned short flags)
+void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
- void *buffer = NULL, *comp_opts;
- struct squashfs_page_actor *actor = NULL;
+ void *strm, *buffer = NULL;
int length = 0;
/*
@@ -96,46 +94,23 @@ static void *get_comp_opts(struct super_block *sb, unsigned short flags)
*/
if (SQUASHFS_COMP_OPTS(flags)) {
buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
- if (buffer == NULL) {
- comp_opts = ERR_PTR(-ENOMEM);
- goto out;
- }
-
- actor = squashfs_page_actor_init(&buffer, 1, 0);
- if (actor == NULL) {
- comp_opts = ERR_PTR(-ENOMEM);
- goto out;
- }
+ if (buffer == NULL)
+ return ERR_PTR(-ENOMEM);
- length = squashfs_read_data(sb,
- sizeof(struct squashfs_super_block), 0, NULL, actor);
+ length = squashfs_read_data(sb, &buffer,
+ sizeof(struct squashfs_super_block), 0, NULL,
+ PAGE_CACHE_SIZE, 1);
if (length < 0) {
- comp_opts = ERR_PTR(length);
- goto out;
+ strm = ERR_PTR(length);
+ goto finished;
}
}
- comp_opts = squashfs_comp_opts(msblk, buffer, length);
+ strm = msblk->decompressor->init(msblk, buffer, length);
-out:
- kfree(actor);
+finished:
kfree(buffer);
- return comp_opts;
-}
-
-
-void *squashfs_decompressor_setup(struct super_block *sb, unsigned short flags)
-{
- struct squashfs_sb_info *msblk = sb->s_fs_info;
- void *stream, *comp_opts = get_comp_opts(sb, flags);
-
- if (IS_ERR(comp_opts))
- return comp_opts;
-
- stream = squashfs_decompressor_create(msblk, comp_opts);
- if (IS_ERR(stream))
- kfree(comp_opts);
- return stream;
+ return strm;
}
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
index af09853..330073e 100644
--- a/fs/squashfs/decompressor.h
+++ b/fs/squashfs/decompressor.h
@@ -24,22 +24,28 @@
*/
struct squashfs_decompressor {
- void *(*init)(struct squashfs_sb_info *, void *);
- void *(*comp_opts)(struct squashfs_sb_info *, void *, int);
+ void *(*init)(struct squashfs_sb_info *, void *, int);
void (*free)(void *);
- int (*decompress)(struct squashfs_sb_info *, void *,
- struct buffer_head **, int, int, int,
- struct squashfs_page_actor *);
+ int (*decompress)(struct squashfs_sb_info *, void **,
+ struct buffer_head **, int, int, int, int, int);
int id;
char *name;
int supported;
};
-static inline void *squashfs_comp_opts(struct squashfs_sb_info *msblk,
- void *buff, int length)
+static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
+ void *s)
{
- return msblk->decompressor->comp_opts ?
- msblk->decompressor->comp_opts(msblk, buff, length) : NULL;
+ if (msblk->decompressor)
+ msblk->decompressor->free(s);
+}
+
+static inline int squashfs_decompress(struct squashfs_sb_info *msblk,
+ void **buffer, struct buffer_head **bh, int b, int offset, int length,
+ int srclength, int pages)
+{
+ return msblk->decompressor->decompress(msblk, buffer, bh, b, offset,
+ length, srclength, pages);
}
#ifdef CONFIG_SQUASHFS_XZ
diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c
deleted file mode 100644
index d6008a6..0000000
--- a/fs/squashfs/decompressor_multi.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Copyright (c) 2013
- * Minchan Kim <minchan@kernel.org>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- */
-#include <linux/types.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-#include <linux/buffer_head.h>
-#include <linux/sched.h>
-#include <linux/wait.h>
-#include <linux/cpumask.h>
-
-#include "squashfs_fs.h"
-#include "squashfs_fs_sb.h"
-#include "decompressor.h"
-#include "squashfs.h"
-
-/*
- * This file implements multi-threaded decompression in the
- * decompressor framework
- */
-
-
-/*
- * The reason that multiply two is that a CPU can request new I/O
- * while it is waiting previous request.
- */
-#define MAX_DECOMPRESSOR (num_online_cpus() * 2)
-
-
-int squashfs_max_decompressors(void)
-{
- return MAX_DECOMPRESSOR;
-}
-
-
-struct squashfs_stream {
- void *comp_opts;
- struct list_head strm_list;
- struct mutex mutex;
- int avail_decomp;
- wait_queue_head_t wait;
-};
-
-
-struct decomp_stream {
- void *stream;
- struct list_head list;
-};
-
-
-static void put_decomp_stream(struct decomp_stream *decomp_strm,
- struct squashfs_stream *stream)
-{
- mutex_lock(&stream->mutex);
- list_add(&decomp_strm->list, &stream->strm_list);
- mutex_unlock(&stream->mutex);
- wake_up(&stream->wait);
-}
-
-void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
- void *comp_opts)
-{
- struct squashfs_stream *stream;
- struct decomp_stream *decomp_strm = NULL;
- int err = -ENOMEM;
-
- stream = kzalloc(sizeof(*stream), GFP_KERNEL);
- if (!stream)
- goto out;
-
- stream->comp_opts = comp_opts;
- mutex_init(&stream->mutex);
- INIT_LIST_HEAD(&stream->strm_list);
- init_waitqueue_head(&stream->wait);
-
- /*
- * We should have a decompressor at least as default
- * so if we fail to allocate new decompressor dynamically,
- * we could always fall back to default decompressor and
- * file system works.
- */
- decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL);
- if (!decomp_strm)
- goto out;
-
- decomp_strm->stream = msblk->decompressor->init(msblk,
- stream->comp_opts);
- if (IS_ERR(decomp_strm->stream)) {
- err = PTR_ERR(decomp_strm->stream);
- goto out;
- }
-
- list_add(&decomp_strm->list, &stream->strm_list);
- stream->avail_decomp = 1;
- return stream;
-
-out:
- kfree(decomp_strm);
- kfree(stream);
- return ERR_PTR(err);
-}
-
-
-void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
-{
- struct squashfs_stream *stream = msblk->stream;
- if (stream) {
- struct decomp_stream *decomp_strm;
-
- while (!list_empty(&stream->strm_list)) {
- decomp_strm = list_entry(stream->strm_list.prev,
- struct decomp_stream, list);
- list_del(&decomp_strm->list);
- msblk->decompressor->free(decomp_strm->stream);
- kfree(decomp_strm);
- stream->avail_decomp--;
- }
- WARN_ON(stream->avail_decomp);
- kfree(stream->comp_opts);
- kfree(stream);
- }
-}
-
-
-static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk,
- struct squashfs_stream *stream)
-{
- struct decomp_stream *decomp_strm;
-
- while (1) {
- mutex_lock(&stream->mutex);
-
- /* There is available decomp_stream */
- if (!list_empty(&stream->strm_list)) {
- decomp_strm = list_entry(stream->strm_list.prev,
- struct decomp_stream, list);
- list_del(&decomp_strm->list);
- mutex_unlock(&stream->mutex);
- break;
- }
-
- /*
- * If there is no available decomp and already full,
- * let's wait for releasing decomp from other users.
- */
- if (stream->avail_decomp >= MAX_DECOMPRESSOR)
- goto wait;
-
- /* Let's allocate new decomp */
- decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL);
- if (!decomp_strm)
- goto wait;
-
- decomp_strm->stream = msblk->decompressor->init(msblk,
- stream->comp_opts);
- if (IS_ERR(decomp_strm->stream)) {
- kfree(decomp_strm);
- goto wait;
- }
-
- stream->avail_decomp++;
- WARN_ON(stream->avail_decomp > MAX_DECOMPRESSOR);
-
- mutex_unlock(&stream->mutex);
- break;
-wait:
- /*
- * If system memory is tough, let's for other's
- * releasing instead of hurting VM because it could
- * make page cache thrashing.
- */
- mutex_unlock(&stream->mutex);
- wait_event(stream->wait,
- !list_empty(&stream->strm_list));
- }
-
- return decomp_strm;
-}
-
-
-int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
- int b, int offset, int length, struct squashfs_page_actor *output)
-{
- int res;
- struct squashfs_stream *stream = msblk->stream;
- struct decomp_stream *decomp_stream = get_decomp_stream(msblk, stream);
- res = msblk->decompressor->decompress(msblk, decomp_stream->stream,
- bh, b, offset, length, output);
- put_decomp_stream(decomp_stream, stream);
- if (res < 0)
- ERROR("%s decompression failed, data probably corrupt\n",
- msblk->decompressor->name);
- return res;
-}
diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c
deleted file mode 100644
index 23a9c28..0000000
--- a/fs/squashfs/decompressor_multi_percpu.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2013
- * Phillip Lougher <phillip@squashfs.org.uk>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- */
-
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/percpu.h>
-#include <linux/buffer_head.h>
-
-#include "squashfs_fs.h"
-#include "squashfs_fs_sb.h"
-#include "decompressor.h"
-#include "squashfs.h"
-
-/*
- * This file implements multi-threaded decompression using percpu
- * variables, one thread per cpu core.
- */
-
-struct squashfs_stream {
- void *stream;
-};
-
-void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
- void *comp_opts)
-{
- struct squashfs_stream *stream;
- struct squashfs_stream __percpu *percpu;
- int err, cpu;
-
- percpu = alloc_percpu(struct squashfs_stream);
- if (percpu == NULL)
- return ERR_PTR(-ENOMEM);
-
- for_each_possible_cpu(cpu) {
- stream = per_cpu_ptr(percpu, cpu);
- stream->stream = msblk->decompressor->init(msblk, comp_opts);
- if (IS_ERR(stream->stream)) {
- err = PTR_ERR(stream->stream);
- goto out;
- }
- }
-
- kfree(comp_opts);
- return (__force void *) percpu;
-
-out:
- for_each_possible_cpu(cpu) {
- stream = per_cpu_ptr(percpu, cpu);
- if (!IS_ERR_OR_NULL(stream->stream))
- msblk->decompressor->free(stream->stream);
- }
- free_percpu(percpu);
- return ERR_PTR(err);
-}
-
-void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
-{
- struct squashfs_stream __percpu *percpu =
- (struct squashfs_stream __percpu *) msblk->stream;
- struct squashfs_stream *stream;
- int cpu;
-
- if (msblk->stream) {
- for_each_possible_cpu(cpu) {
- stream = per_cpu_ptr(percpu, cpu);
- msblk->decompressor->free(stream->stream);
- }
- free_percpu(percpu);
- }
-}
-
-int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
- int b, int offset, int length, struct squashfs_page_actor *output)
-{
- struct squashfs_stream __percpu *percpu =
- (struct squashfs_stream __percpu *) msblk->stream;
- struct squashfs_stream *stream = get_cpu_ptr(percpu);
- int res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
- offset, length, output);
- put_cpu_ptr(stream);
-
- if (res < 0)
- ERROR("%s decompression failed, data probably corrupt\n",
- msblk->decompressor->name);
-
- return res;
-}
-
-int squashfs_max_decompressors(void)
-{
- return num_possible_cpus();
-}
diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c
deleted file mode 100644
index a6c7592..0000000
--- a/fs/squashfs/decompressor_single.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2013
- * Phillip Lougher <phillip@squashfs.org.uk>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- */
-
-#include <linux/types.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-#include <linux/buffer_head.h>
-
-#include "squashfs_fs.h"
-#include "squashfs_fs_sb.h"
-#include "decompressor.h"
-#include "squashfs.h"
-
-/*
- * This file implements single-threaded decompression in the
- * decompressor framework
- */
-
-struct squashfs_stream {
- void *stream;
- struct mutex mutex;
-};
-
-void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
- void *comp_opts)
-{
- struct squashfs_stream *stream;
- int err = -ENOMEM;
-
- stream = kmalloc(sizeof(*stream), GFP_KERNEL);
- if (stream == NULL)
- goto out;
-
- stream->stream = msblk->decompressor->init(msblk, comp_opts);
- if (IS_ERR(stream->stream)) {
- err = PTR_ERR(stream->stream);
- goto out;
- }
-
- kfree(comp_opts);
- mutex_init(&stream->mutex);
- return stream;
-
-out:
- kfree(stream);
- return ERR_PTR(err);
-}
-
-void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
-{
- struct squashfs_stream *stream = msblk->stream;
-
- if (stream) {
- msblk->decompressor->free(stream->stream);
- kfree(stream);
- }
-}
-
-int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
- int b, int offset, int length, struct squashfs_page_actor *output)
-{
- int res;
- struct squashfs_stream *stream = msblk->stream;
-
- mutex_lock(&stream->mutex);
- res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
- offset, length, output);
- mutex_unlock(&stream->mutex);
-
- if (res < 0)
- ERROR("%s decompression failed, data probably corrupt\n",
- msblk->decompressor->name);
-
- return res;
-}
-
-int squashfs_max_decompressors(void)
-{
- return 1;
-}
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index e5c9689..8ca62c2 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -370,15 +370,77 @@ static int read_blocklist(struct inode *inode, int index, u64 *block)
return le32_to_cpu(size);
}
-/* Copy data into page cache */
-void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
- int bytes, int offset)
+
+static int squashfs_readpage(struct file *file, struct page *page)
{
struct inode *inode = page->mapping->host;
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ int bytes, i, offset = 0, sparse = 0;
+ struct squashfs_cache_entry *buffer = NULL;
void *pageaddr;
- int i, mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
- int start_index = page->index & ~mask, end_index = start_index | mask;
+
+ int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+ int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
+ int start_index = page->index & ~mask;
+ int end_index = start_index | mask;
+ int file_end = i_size_read(inode) >> msblk->block_log;
+
+ TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
+ page->index, squashfs_i(inode)->start);
+
+ if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT))
+ goto out;
+
+ if (index < file_end || squashfs_i(inode)->fragment_block ==
+ SQUASHFS_INVALID_BLK) {
+ /*
+ * Reading a datablock from disk. Need to read block list
+ * to get location and block size.
+ */
+ u64 block = 0;
+ int bsize = read_blocklist(inode, index, &block);
+ if (bsize < 0)
+ goto error_out;
+
+ if (bsize == 0) { /* hole */
+ bytes = index == file_end ?
+ (i_size_read(inode) & (msblk->block_size - 1)) :
+ msblk->block_size;
+ sparse = 1;
+ } else {
+ /*
+ * Read and decompress datablock.
+ */
+ buffer = squashfs_get_datablock(inode->i_sb,
+ block, bsize);
+ if (buffer->error) {
+ ERROR("Unable to read page, block %llx, size %x"
+ "\n", block, bsize);
+ squashfs_cache_put(buffer);
+ goto error_out;
+ }
+ bytes = buffer->length;
+ }
+ } else {
+ /*
+ * Datablock is stored inside a fragment (tail-end packed
+ * block).
+ */
+ buffer = squashfs_get_fragment(inode->i_sb,
+ squashfs_i(inode)->fragment_block,
+ squashfs_i(inode)->fragment_size);
+
+ if (buffer->error) {
+ ERROR("Unable to read page, block %llx, size %x\n",
+ squashfs_i(inode)->fragment_block,
+ squashfs_i(inode)->fragment_size);
+ squashfs_cache_put(buffer);
+ goto error_out;
+ }
+ bytes = i_size_read(inode) & (msblk->block_size - 1);
+ offset = squashfs_i(inode)->fragment_offset;
+ }
/*
* Loop copying datablock into pages. As the datablock likely covers
@@ -389,7 +451,7 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
for (i = start_index; i <= end_index && bytes > 0; i++,
bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
struct page *push_page;
- int avail = buffer ? min_t(int, bytes, PAGE_CACHE_SIZE) : 0;
+ int avail = sparse ? 0 : min_t(int, bytes, PAGE_CACHE_SIZE);
TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail);
@@ -413,75 +475,11 @@ skip_page:
if (i != page->index)
page_cache_release(push_page);
}
-}
-
-/* Read datablock stored packed inside a fragment (tail-end packed block) */
-static int squashfs_readpage_fragment(struct page *page)
-{
- struct inode *inode = page->mapping->host;
- struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
- struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb,
- squashfs_i(inode)->fragment_block,
- squashfs_i(inode)->fragment_size);
- int res = buffer->error;
-
- if (res)
- ERROR("Unable to read page, block %llx, size %x\n",
- squashfs_i(inode)->fragment_block,
- squashfs_i(inode)->fragment_size);
- else
- squashfs_copy_cache(page, buffer, i_size_read(inode) &
- (msblk->block_size - 1),
- squashfs_i(inode)->fragment_offset);
-
- squashfs_cache_put(buffer);
- return res;
-}
-static int squashfs_readpage_sparse(struct page *page, int index, int file_end)
-{
- struct inode *inode = page->mapping->host;
- struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
- int bytes = index == file_end ?
- (i_size_read(inode) & (msblk->block_size - 1)) :
- msblk->block_size;
+ if (!sparse)
+ squashfs_cache_put(buffer);
- squashfs_copy_cache(page, NULL, bytes, 0);
return 0;
-}
-
-static int squashfs_readpage(struct file *file, struct page *page)
-{
- struct inode *inode = page->mapping->host;
- struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
- int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
- int file_end = i_size_read(inode) >> msblk->block_log;
- int res;
- void *pageaddr;
-
- TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
- page->index, squashfs_i(inode)->start);
-
- if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT))
- goto out;
-
- if (index < file_end || squashfs_i(inode)->fragment_block ==
- SQUASHFS_INVALID_BLK) {
- u64 block = 0;
- int bsize = read_blocklist(inode, index, &block);
- if (bsize < 0)
- goto error_out;
-
- if (bsize == 0)
- res = squashfs_readpage_sparse(page, index, file_end);
- else
- res = squashfs_readpage_block(page, block, bsize);
- } else
- res = squashfs_readpage_fragment(page);
-
- if (!res)
- return 0;
error_out:
SetPageError(page);
diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c
deleted file mode 100644
index f2310d2..0000000
--- a/fs/squashfs/file_cache.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2013
- * Phillip Lougher <phillip@squashfs.org.uk>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- */
-
-#include <linux/fs.h>
-#include <linux/vfs.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/pagemap.h>
-#include <linux/mutex.h>
-
-#include "squashfs_fs.h"
-#include "squashfs_fs_sb.h"
-#include "squashfs_fs_i.h"
-#include "squashfs.h"
-
-/* Read separately compressed datablock and memcopy into page cache */
-int squashfs_readpage_block(struct page *page, u64 block, int bsize)
-{
- struct inode *i = page->mapping->host;
- struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
- block, bsize);
- int res = buffer->error;
-
- if (res)
- ERROR("Unable to read page, block %llx, size %x\n", block,
- bsize);
- else
- squashfs_copy_cache(page, buffer, buffer->length, 0);
-
- squashfs_cache_put(buffer);
- return res;
-}
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
deleted file mode 100644
index 62a0de6..0000000
--- a/fs/squashfs/file_direct.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Copyright (c) 2013
- * Phillip Lougher <phillip@squashfs.org.uk>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- */
-
-#include <linux/fs.h>
-#include <linux/vfs.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/pagemap.h>
-#include <linux/mutex.h>
-
-#include "squashfs_fs.h"
-#include "squashfs_fs_sb.h"
-#include "squashfs_fs_i.h"
-#include "squashfs.h"
-#include "page_actor.h"
-
-static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
- int pages, struct page **page);
-
-/* Read separately compressed datablock directly into page cache */
-int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
-
-{
- struct inode *inode = target_page->mapping->host;
- struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
-
- int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
- int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
- int start_index = target_page->index & ~mask;
- int end_index = start_index | mask;
- int i, n, pages, missing_pages, bytes, res = -ENOMEM;
- struct page **page;
- struct squashfs_page_actor *actor;
- void *pageaddr;
-
- if (end_index > file_end)
- end_index = file_end;
-
- pages = end_index - start_index + 1;
-
- page = kmalloc(sizeof(void *) * pages, GFP_KERNEL);
- if (page == NULL)
- return res;
-
- /*
- * Create a "page actor" which will kmap and kunmap the
- * page cache pages appropriately within the decompressor
- */
- actor = squashfs_page_actor_init_special(page, pages, 0);
- if (actor == NULL)
- goto out;
-
- /* Try to grab all the pages covered by the Squashfs block */
- for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) {
- page[i] = (n == target_page->index) ? target_page :
- grab_cache_page_nowait(target_page->mapping, n);
-
- if (page[i] == NULL) {
- missing_pages++;
- continue;
- }
-
- if (PageUptodate(page[i])) {
- unlock_page(page[i]);
- page_cache_release(page[i]);
- page[i] = NULL;
- missing_pages++;
- }
- }
-
- if (missing_pages) {
- /*
- * Couldn't get one or more pages, this page has either
- * been VM reclaimed, but others are still in the page cache
- * and uptodate, or we're racing with another thread in
- * squashfs_readpage also trying to grab them. Fall back to
- * using an intermediate buffer.
- */
- res = squashfs_read_cache(target_page, block, bsize, pages,
- page);
- if (res < 0)
- goto mark_errored;
-
- goto out;
- }
-
- /* Decompress directly into the page cache buffers */
- res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
- if (res < 0)
- goto mark_errored;
-
- /* Last page may have trailing bytes not filled */
- bytes = res % PAGE_CACHE_SIZE;
- if (bytes) {
- pageaddr = kmap_atomic(page[pages - 1]);
- memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
- kunmap_atomic(pageaddr);
- }
-
- /* Mark pages as uptodate, unlock and release */
- for (i = 0; i < pages; i++) {
- flush_dcache_page(page[i]);
- SetPageUptodate(page[i]);
- unlock_page(page[i]);
- if (page[i] != target_page)
- page_cache_release(page[i]);
- }
-
- kfree(actor);
- kfree(page);
-
- return 0;
-
-mark_errored:
- /* Decompression failed, mark pages as errored. Target_page is
- * dealt with by the caller
- */
- for (i = 0; i < pages; i++) {
- if (page[i] == NULL || page[i] == target_page)
- continue;
- flush_dcache_page(page[i]);
- SetPageError(page[i]);
- unlock_page(page[i]);
- page_cache_release(page[i]);
- }
-
-out:
- kfree(actor);
- kfree(page);
- return res;
-}
-
-
-static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
- int pages, struct page **page)
-{
- struct inode *i = target_page->mapping->host;
- struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
- block, bsize);
- int bytes = buffer->length, res = buffer->error, n, offset = 0;
- void *pageaddr;
-
- if (res) {
- ERROR("Unable to read page, block %llx, size %x\n", block,
- bsize);
- goto out;
- }
-
- for (n = 0; n < pages && bytes > 0; n++,
- bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
- int avail = min_t(int, bytes, PAGE_CACHE_SIZE);
-
- if (page[n] == NULL)
- continue;
-
- pageaddr = kmap_atomic(page[n]);
- squashfs_copy_data(pageaddr, buffer, offset, avail);
- memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
- kunmap_atomic(pageaddr);
- flush_dcache_page(page[n]);
- SetPageUptodate(page[n]);
- unlock_page(page[n]);
- if (page[n] != target_page)
- page_cache_release(page[n]);
- }
-
-out:
- squashfs_cache_put(buffer);
- return res;
-}
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 244b9fb..00f4dfc 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -31,14 +31,13 @@
#include "squashfs_fs_sb.h"
#include "squashfs.h"
#include "decompressor.h"
-#include "page_actor.h"
struct squashfs_lzo {
void *input;
void *output;
};
-static void *lzo_init(struct squashfs_sb_info *msblk, void *buff)
+static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len)
{
int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE);
@@ -75,16 +74,22 @@ static void lzo_free(void *strm)
}
-static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
- struct buffer_head **bh, int b, int offset, int length,
- struct squashfs_page_actor *output)
+static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer,
+ struct buffer_head **bh, int b, int offset, int length, int srclength,
+ int pages)
{
- struct squashfs_lzo *stream = strm;
- void *buff = stream->input, *data;
+ struct squashfs_lzo *stream = msblk->stream;
+ void *buff = stream->input;
int avail, i, bytes = length, res;
- size_t out_len = output->length;
+ size_t out_len = srclength;
+
+ mutex_lock(&msblk->read_data_mutex);
for (i = 0; i < b; i++) {
+ wait_on_buffer(bh[i]);
+ if (!buffer_uptodate(bh[i]))
+ goto block_release;
+
avail = min(bytes, msblk->devblksize - offset);
memcpy(buff, bh[i]->b_data + offset, avail);
buff += avail;
@@ -99,24 +104,24 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
goto failed;
res = bytes = (int)out_len;
- data = squashfs_first_page(output);
- buff = stream->output;
- while (data) {
- if (bytes <= PAGE_CACHE_SIZE) {
- memcpy(data, buff, bytes);
- break;
- } else {
- memcpy(data, buff, PAGE_CACHE_SIZE);
- buff += PAGE_CACHE_SIZE;
- bytes -= PAGE_CACHE_SIZE;
- data = squashfs_next_page(output);
- }
+ for (i = 0, buff = stream->output; bytes && i < pages; i++) {
+ avail = min_t(int, bytes, PAGE_CACHE_SIZE);
+ memcpy(buffer[i], buff, avail);
+ buff += avail;
+ bytes -= avail;
}
- squashfs_finish_page(output);
+ mutex_unlock(&msblk->read_data_mutex);
return res;
+block_release:
+ for (; i < b; i++)
+ put_bh(bh[i]);
+
failed:
+ mutex_unlock(&msblk->read_data_mutex);
+
+ ERROR("lzo decompression failed, data probably corrupt\n");
return -EIO;
}
diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c
deleted file mode 100644
index 5a1c11f..0000000
--- a/fs/squashfs/page_actor.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2013
- * Phillip Lougher <phillip@squashfs.org.uk>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/pagemap.h>
-#include "page_actor.h"
-
-/*
- * This file contains implementations of page_actor for decompressing into
- * an intermediate buffer, and for decompressing directly into the
- * page cache.
- *
- * Calling code should avoid sleeping between calls to squashfs_first_page()
- * and squashfs_finish_page().
- */
-
-/* Implementation of page_actor for decompressing into intermediate buffer */
-static void *cache_first_page(struct squashfs_page_actor *actor)
-{
- actor->next_page = 1;
- return actor->buffer[0];
-}
-
-static void *cache_next_page(struct squashfs_page_actor *actor)
-{
- if (actor->next_page == actor->pages)
- return NULL;
-
- return actor->buffer[actor->next_page++];
-}
-
-static void cache_finish_page(struct squashfs_page_actor *actor)
-{
- /* empty */
-}
-
-struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
- int pages, int length)
-{
- struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
-
- if (actor == NULL)
- return NULL;
-
- actor->length = length ? : pages * PAGE_CACHE_SIZE;
- actor->buffer = buffer;
- actor->pages = pages;
- actor->next_page = 0;
- actor->squashfs_first_page = cache_first_page;
- actor->squashfs_next_page = cache_next_page;
- actor->squashfs_finish_page = cache_finish_page;
- return actor;
-}
-
-/* Implementation of page_actor for decompressing directly into page cache. */
-static void *direct_first_page(struct squashfs_page_actor *actor)
-{
- actor->next_page = 1;
- return actor->pageaddr = kmap_atomic(actor->page[0]);
-}
-
-static void *direct_next_page(struct squashfs_page_actor *actor)
-{
- if (actor->pageaddr)
- kunmap_atomic(actor->pageaddr);
-
- return actor->pageaddr = actor->next_page == actor->pages ? NULL :
- kmap_atomic(actor->page[actor->next_page++]);
-}
-
-static void direct_finish_page(struct squashfs_page_actor *actor)
-{
- if (actor->pageaddr)
- kunmap_atomic(actor->pageaddr);
-}
-
-struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page,
- int pages, int length)
-{
- struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
-
- if (actor == NULL)
- return NULL;
-
- actor->length = length ? : pages * PAGE_CACHE_SIZE;
- actor->page = page;
- actor->pages = pages;
- actor->next_page = 0;
- actor->pageaddr = NULL;
- actor->squashfs_first_page = direct_first_page;
- actor->squashfs_next_page = direct_next_page;
- actor->squashfs_finish_page = direct_finish_page;
- return actor;
-}
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
deleted file mode 100644
index 26dd820..0000000
--- a/fs/squashfs/page_actor.h
+++ /dev/null
@@ -1,81 +0,0 @@
-#ifndef PAGE_ACTOR_H
-#define PAGE_ACTOR_H
-/*
- * Copyright (c) 2013
- * Phillip Lougher <phillip@squashfs.org.uk>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- */
-
-#ifndef CONFIG_SQUASHFS_FILE_DIRECT
-struct squashfs_page_actor {
- void **page;
- int pages;
- int length;
- int next_page;
-};
-
-static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page,
- int pages, int length)
-{
- struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
-
- if (actor == NULL)
- return NULL;
-
- actor->length = length ? : pages * PAGE_CACHE_SIZE;
- actor->page = page;
- actor->pages = pages;
- actor->next_page = 0;
- return actor;
-}
-
-static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
-{
- actor->next_page = 1;
- return actor->page[0];
-}
-
-static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
-{
- return actor->next_page == actor->pages ? NULL :
- actor->page[actor->next_page++];
-}
-
-static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
-{
- /* empty */
-}
-#else
-struct squashfs_page_actor {
- union {
- void **buffer;
- struct page **page;
- };
- void *pageaddr;
- void *(*squashfs_first_page)(struct squashfs_page_actor *);
- void *(*squashfs_next_page)(struct squashfs_page_actor *);
- void (*squashfs_finish_page)(struct squashfs_page_actor *);
- int pages;
- int length;
- int next_page;
-};
-
-extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int);
-extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page
- **, int, int);
-static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
-{
- return actor->squashfs_first_page(actor);
-}
-static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
-{
- return actor->squashfs_next_page(actor);
-}
-static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
-{
- actor->squashfs_finish_page(actor);
-}
-#endif
-#endif
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 9e1bb79..d126651 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -28,8 +28,8 @@
#define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args)
/* block.c */
-extern int squashfs_read_data(struct super_block *, u64, int, u64 *,
- struct squashfs_page_actor *);
+extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *,
+ int, int);
/* cache.c */
extern struct squashfs_cache *squashfs_cache_init(char *, int, int);
@@ -48,14 +48,7 @@ extern void *squashfs_read_table(struct super_block *, u64, int);
/* decompressor.c */
extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int);
-extern void *squashfs_decompressor_setup(struct super_block *, unsigned short);
-
-/* decompressor_xxx.c */
-extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *);
-extern void squashfs_decompressor_destroy(struct squashfs_sb_info *);
-extern int squashfs_decompress(struct squashfs_sb_info *, struct buffer_head **,
- int, int, int, struct squashfs_page_actor *);
-extern int squashfs_max_decompressors(void);
+extern void *squashfs_decompressor_init(struct super_block *, unsigned short);
/* export.c */
extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64,
@@ -66,13 +59,6 @@ extern int squashfs_frag_lookup(struct super_block *, unsigned int, u64 *);
extern __le64 *squashfs_read_fragment_index_table(struct super_block *,
u64, u64, unsigned int);
-/* file.c */
-void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int,
- int);
-
-/* file_xxx.c */
-extern int squashfs_readpage_block(struct page *, u64, int);
-
/* id.c */
extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *);
extern __le64 *squashfs_read_id_index_table(struct super_block *, u64, u64,
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 1da565c..52934a2 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -50,7 +50,6 @@ struct squashfs_cache_entry {
wait_queue_head_t wait_queue;
struct squashfs_cache *cache;
void **data;
- struct squashfs_page_actor *actor;
};
struct squashfs_sb_info {
@@ -64,9 +63,10 @@ struct squashfs_sb_info {
__le64 *id_table;
__le64 *fragment_index;
__le64 *xattr_id_table;
+ struct mutex read_data_mutex;
struct mutex meta_index_mutex;
struct meta_index *meta_index;
- struct squashfs_stream *stream;
+ void *stream;
__le64 *inode_lookup_table;
u64 inode_table;
u64 directory_table;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 202df63..60553a9 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -98,6 +98,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
msblk->devblksize_log2 = ffz(~msblk->devblksize);
+ mutex_init(&msblk->read_data_mutex);
mutex_init(&msblk->meta_index_mutex);
/*
@@ -205,14 +206,13 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
/* Allocate read_page block */
- msblk->read_page = squashfs_cache_init("data",
- squashfs_max_decompressors(), msblk->block_size);
+ msblk->read_page = squashfs_cache_init("data", 1, msblk->block_size);
if (msblk->read_page == NULL) {
ERROR("Failed to allocate read_page block\n");
goto failed_mount;
}
- msblk->stream = squashfs_decompressor_setup(sb, flags);
+ msblk->stream = squashfs_decompressor_init(sb, flags);
if (IS_ERR(msblk->stream)) {
err = PTR_ERR(msblk->stream);
msblk->stream = NULL;
@@ -336,7 +336,7 @@ failed_mount:
squashfs_cache_delete(msblk->block_cache);
squashfs_cache_delete(msblk->fragment_cache);
squashfs_cache_delete(msblk->read_page);
- squashfs_decompressor_destroy(msblk);
+ squashfs_decompressor_free(msblk, msblk->stream);
kfree(msblk->inode_lookup_table);
kfree(msblk->fragment_index);
kfree(msblk->id_table);
@@ -383,7 +383,7 @@ static void squashfs_put_super(struct super_block *sb)
squashfs_cache_delete(sbi->block_cache);
squashfs_cache_delete(sbi->fragment_cache);
squashfs_cache_delete(sbi->read_page);
- squashfs_decompressor_destroy(sbi);
+ squashfs_decompressor_free(sbi, sbi->stream);
kfree(sbi->id_table);
kfree(sbi->fragment_index);
kfree(sbi->meta_index);
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index c609624..1760b7d1 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -32,70 +32,44 @@
#include "squashfs_fs_sb.h"
#include "squashfs.h"
#include "decompressor.h"
-#include "page_actor.h"
struct squashfs_xz {
struct xz_dec *state;
struct xz_buf buf;
};
-struct disk_comp_opts {
+struct comp_opts {
__le32 dictionary_size;
__le32 flags;
};
-struct comp_opts {
- int dict_size;
-};
-
-static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk,
- void *buff, int len)
+static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff,
+ int len)
{
- struct disk_comp_opts *comp_opts = buff;
- struct comp_opts *opts;
- int err = 0, n;
-
- opts = kmalloc(sizeof(*opts), GFP_KERNEL);
- if (opts == NULL) {
- err = -ENOMEM;
- goto out2;
- }
+ struct comp_opts *comp_opts = buff;
+ struct squashfs_xz *stream;
+ int dict_size = msblk->block_size;
+ int err, n;
if (comp_opts) {
/* check compressor options are the expected length */
if (len < sizeof(*comp_opts)) {
err = -EIO;
- goto out;
+ goto failed;
}
- opts->dict_size = le32_to_cpu(comp_opts->dictionary_size);
+ dict_size = le32_to_cpu(comp_opts->dictionary_size);
/* the dictionary size should be 2^n or 2^n+2^(n+1) */
- n = ffs(opts->dict_size) - 1;
- if (opts->dict_size != (1 << n) && opts->dict_size != (1 << n) +
+ n = ffs(dict_size) - 1;
+ if (dict_size != (1 << n) && dict_size != (1 << n) +
(1 << (n + 1))) {
err = -EIO;
- goto out;
+ goto failed;
}
- } else
- /* use defaults */
- opts->dict_size = max_t(int, msblk->block_size,
- SQUASHFS_METADATA_SIZE);
-
- return opts;
-
-out:
- kfree(opts);
-out2:
- return ERR_PTR(err);
-}
-
+ }
-static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff)
-{
- struct comp_opts *comp_opts = buff;
- struct squashfs_xz *stream;
- int err;
+ dict_size = max_t(int, dict_size, SQUASHFS_METADATA_SIZE);
stream = kmalloc(sizeof(*stream), GFP_KERNEL);
if (stream == NULL) {
@@ -103,7 +77,7 @@ static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff)
goto failed;
}
- stream->state = xz_dec_init(XZ_PREALLOC, comp_opts->dict_size);
+ stream->state = xz_dec_init(XZ_PREALLOC, dict_size);
if (stream->state == NULL) {
kfree(stream);
err = -ENOMEM;
@@ -129,37 +103,42 @@ static void squashfs_xz_free(void *strm)
}
-static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
- struct buffer_head **bh, int b, int offset, int length,
- struct squashfs_page_actor *output)
+static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
+ struct buffer_head **bh, int b, int offset, int length, int srclength,
+ int pages)
{
enum xz_ret xz_err;
- int avail, total = 0, k = 0;
- struct squashfs_xz *stream = strm;
+ int avail, total = 0, k = 0, page = 0;
+ struct squashfs_xz *stream = msblk->stream;
+
+ mutex_lock(&msblk->read_data_mutex);
xz_dec_reset(stream->state);
stream->buf.in_pos = 0;
stream->buf.in_size = 0;
stream->buf.out_pos = 0;
stream->buf.out_size = PAGE_CACHE_SIZE;
- stream->buf.out = squashfs_first_page(output);
+ stream->buf.out = buffer[page++];
do {
if (stream->buf.in_pos == stream->buf.in_size && k < b) {
avail = min(length, msblk->devblksize - offset);
length -= avail;
+ wait_on_buffer(bh[k]);
+ if (!buffer_uptodate(bh[k]))
+ goto release_mutex;
+
stream->buf.in = bh[k]->b_data + offset;
stream->buf.in_size = avail;
stream->buf.in_pos = 0;
offset = 0;
}
- if (stream->buf.out_pos == stream->buf.out_size) {
- stream->buf.out = squashfs_next_page(output);
- if (stream->buf.out != NULL) {
- stream->buf.out_pos = 0;
- total += PAGE_CACHE_SIZE;
- }
+ if (stream->buf.out_pos == stream->buf.out_size
+ && page < pages) {
+ stream->buf.out = buffer[page++];
+ stream->buf.out_pos = 0;
+ total += PAGE_CACHE_SIZE;
}
xz_err = xz_dec_run(stream->state, &stream->buf);
@@ -168,14 +147,23 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
put_bh(bh[k++]);
} while (xz_err == XZ_OK);
- squashfs_finish_page(output);
+ if (xz_err != XZ_STREAM_END) {
+ ERROR("xz_dec_run error, data probably corrupt\n");
+ goto release_mutex;
+ }
+
+ if (k < b) {
+ ERROR("xz_uncompress error, input remaining\n");
+ goto release_mutex;
+ }
- if (xz_err != XZ_STREAM_END || k < b)
- goto out;
+ total += stream->buf.out_pos;
+ mutex_unlock(&msblk->read_data_mutex);
+ return total;
- return total + stream->buf.out_pos;
+release_mutex:
+ mutex_unlock(&msblk->read_data_mutex);
-out:
for (; k < b; k++)
put_bh(bh[k]);
@@ -184,7 +172,6 @@ out:
const struct squashfs_decompressor squashfs_xz_comp_ops = {
.init = squashfs_xz_init,
- .comp_opts = squashfs_xz_comp_opts,
.free = squashfs_xz_free,
.decompress = squashfs_xz_uncompress,
.id = XZ_COMPRESSION,
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 8727cab..55d918f 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -32,9 +32,8 @@
#include "squashfs_fs_sb.h"
#include "squashfs.h"
#include "decompressor.h"
-#include "page_actor.h"
-static void *zlib_init(struct squashfs_sb_info *dummy, void *buff)
+static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len)
{
z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL);
if (stream == NULL)
@@ -62,37 +61,44 @@ static void zlib_free(void *strm)
}
-static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
- struct buffer_head **bh, int b, int offset, int length,
- struct squashfs_page_actor *output)
+static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
+ struct buffer_head **bh, int b, int offset, int length, int srclength,
+ int pages)
{
- int zlib_err, zlib_init = 0, k = 0;
- z_stream *stream = strm;
+ int zlib_err, zlib_init = 0;
+ int k = 0, page = 0;
+ z_stream *stream = msblk->stream;
+
+ mutex_lock(&msblk->read_data_mutex);
- stream->avail_out = PAGE_CACHE_SIZE;
- stream->next_out = squashfs_first_page(output);
+ stream->avail_out = 0;
stream->avail_in = 0;
do {
if (stream->avail_in == 0 && k < b) {
int avail = min(length, msblk->devblksize - offset);
length -= avail;
+ wait_on_buffer(bh[k]);
+ if (!buffer_uptodate(bh[k]))
+ goto release_mutex;
+
stream->next_in = bh[k]->b_data + offset;
stream->avail_in = avail;
offset = 0;
}
- if (stream->avail_out == 0) {
- stream->next_out = squashfs_next_page(output);
- if (stream->next_out != NULL)
- stream->avail_out = PAGE_CACHE_SIZE;
+ if (stream->avail_out == 0 && page < pages) {
+ stream->next_out = buffer[page++];
+ stream->avail_out = PAGE_CACHE_SIZE;
}
if (!zlib_init) {
zlib_err = zlib_inflateInit(stream);
if (zlib_err != Z_OK) {
- squashfs_finish_page(output);
- goto out;
+ ERROR("zlib_inflateInit returned unexpected "
+ "result 0x%x, srclength %d\n",
+ zlib_err, srclength);
+ goto release_mutex;
}
zlib_init = 1;
}
@@ -103,21 +109,29 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
put_bh(bh[k++]);
} while (zlib_err == Z_OK);
- squashfs_finish_page(output);
-
- if (zlib_err != Z_STREAM_END)
- goto out;
+ if (zlib_err != Z_STREAM_END) {
+ ERROR("zlib_inflate error, data probably corrupt\n");
+ goto release_mutex;
+ }
zlib_err = zlib_inflateEnd(stream);
- if (zlib_err != Z_OK)
- goto out;
+ if (zlib_err != Z_OK) {
+ ERROR("zlib_inflate error, data probably corrupt\n");
+ goto release_mutex;
+ }
+
+ if (k < b) {
+ ERROR("zlib_uncompress error, data remaining\n");
+ goto release_mutex;
+ }
- if (k < b)
- goto out;
+ length = stream->total_out;
+ mutex_unlock(&msblk->read_data_mutex);
+ return length;
- return stream->total_out;
+release_mutex:
+ mutex_unlock(&msblk->read_data_mutex);
-out:
for (; k < b; k++)
put_bh(bh[k]);
diff --git a/fs/stat.c b/fs/stat.c
index ae0c3ce..d0ea7ef 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -37,21 +37,14 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
EXPORT_SYMBOL(generic_fillattr);
-/**
- * vfs_getattr_nosec - getattr without security checks
- * @path: file to get attributes from
- * @stat: structure to return attributes in
- *
- * Get attributes without calling security_inode_getattr.
- *
- * Currently the only caller other than vfs_getattr is internal to the
- * filehandle lookup code, which uses only the inode number and returns
- * no attributes to any user. Any other code probably wants
- * vfs_getattr.
- */
-int vfs_getattr_nosec(struct path *path, struct kstat *stat)
+int vfs_getattr(struct path *path, struct kstat *stat)
{
struct inode *inode = path->dentry->d_inode;
+ int retval;
+
+ retval = security_inode_getattr(path->mnt, path->dentry);
+ if (retval)
+ return retval;
if (inode->i_op->getattr)
return inode->i_op->getattr(path->mnt, path->dentry, stat);
@@ -60,18 +53,6 @@ int vfs_getattr_nosec(struct path *path, struct kstat *stat)
return 0;
}
-EXPORT_SYMBOL(vfs_getattr_nosec);
-
-int vfs_getattr(struct path *path, struct kstat *stat)
-{
- int retval;
-
- retval = security_inode_getattr(path->mnt, path->dentry);
- if (retval)
- return retval;
- return vfs_getattr_nosec(path, stat);
-}
-
EXPORT_SYMBOL(vfs_getattr);
int vfs_fstat(unsigned int fd, struct kstat *stat)
diff --git a/fs/super.c b/fs/super.c
index e5f6c2c..0225c20 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -129,24 +129,33 @@ static unsigned long super_cache_count(struct shrinker *shrink,
return total_objects;
}
-/**
- * destroy_super - frees a superblock
- * @s: superblock to free
- *
- * Frees a superblock.
- */
-static void destroy_super(struct super_block *s)
+static int init_sb_writers(struct super_block *s, struct file_system_type *type)
{
+ int err;
int i;
- list_lru_destroy(&s->s_dentry_lru);
- list_lru_destroy(&s->s_inode_lru);
+
+ for (i = 0; i < SB_FREEZE_LEVELS; i++) {
+ err = percpu_counter_init(&s->s_writers.counter[i], 0);
+ if (err < 0)
+ goto err_out;
+ lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
+ &type->s_writers_key[i], 0);
+ }
+ init_waitqueue_head(&s->s_writers.wait);
+ init_waitqueue_head(&s->s_writers.wait_unfrozen);
+ return 0;
+err_out:
+ while (--i >= 0)
+ percpu_counter_destroy(&s->s_writers.counter[i]);
+ return err;
+}
+
+static void destroy_sb_writers(struct super_block *s)
+{
+ int i;
+
for (i = 0; i < SB_FREEZE_LEVELS; i++)
percpu_counter_destroy(&s->s_writers.counter[i]);
- security_sb_free(s);
- WARN_ON(!list_empty(&s->s_mounts));
- kfree(s->s_subtype);
- kfree(s->s_options);
- kfree_rcu(s, rcu);
}
/**
@@ -161,74 +170,111 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
{
struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
static const struct super_operations default_op;
- int i;
- if (!s)
- return NULL;
+ if (s) {
+ if (security_sb_alloc(s))
+ goto out_free_sb;
- if (security_sb_alloc(s))
- goto fail;
+#ifdef CONFIG_SMP
+ s->s_files = alloc_percpu(struct list_head);
+ if (!s->s_files)
+ goto err_out;
+ else {
+ int i;
- for (i = 0; i < SB_FREEZE_LEVELS; i++) {
- if (percpu_counter_init(&s->s_writers.counter[i], 0) < 0)
- goto fail;
- lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
- &type->s_writers_key[i], 0);
+ for_each_possible_cpu(i)
+ INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));
+ }
+#else
+ INIT_LIST_HEAD(&s->s_files);
+#endif
+ if (init_sb_writers(s, type))
+ goto err_out;
+ s->s_flags = flags;
+ s->s_bdi = &default_backing_dev_info;
+ INIT_HLIST_NODE(&s->s_instances);
+ INIT_HLIST_BL_HEAD(&s->s_anon);
+ INIT_LIST_HEAD(&s->s_inodes);
+
+ if (list_lru_init(&s->s_dentry_lru))
+ goto err_out;
+ if (list_lru_init(&s->s_inode_lru))
+ goto err_out_dentry_lru;
+
+ INIT_LIST_HEAD(&s->s_mounts);
+ init_rwsem(&s->s_umount);
+ lockdep_set_class(&s->s_umount, &type->s_umount_key);
+ /*
+ * sget() can have s_umount recursion.
+ *
+ * When it cannot find a suitable sb, it allocates a new
+ * one (this one), and tries again to find a suitable old
+ * one.
+ *
+ * In case that succeeds, it will acquire the s_umount
+ * lock of the old one. Since these are clearly distrinct
+ * locks, and this object isn't exposed yet, there's no
+ * risk of deadlocks.
+ *
+ * Annotate this by putting this lock in a different
+ * subclass.
+ */
+ down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
+ s->s_count = 1;
+ atomic_set(&s->s_active, 1);
+ mutex_init(&s->s_vfs_rename_mutex);
+ lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
+ mutex_init(&s->s_dquot.dqio_mutex);
+ mutex_init(&s->s_dquot.dqonoff_mutex);
+ init_rwsem(&s->s_dquot.dqptr_sem);
+ s->s_maxbytes = MAX_NON_LFS;
+ s->s_op = &default_op;
+ s->s_time_gran = 1000000000;
+ s->cleancache_poolid = -1;
+
+ s->s_shrink.seeks = DEFAULT_SEEKS;
+ s->s_shrink.scan_objects = super_cache_scan;
+ s->s_shrink.count_objects = super_cache_count;
+ s->s_shrink.batch = 1024;
+ s->s_shrink.flags = SHRINKER_NUMA_AWARE;
}
- init_waitqueue_head(&s->s_writers.wait);
- init_waitqueue_head(&s->s_writers.wait_unfrozen);
- s->s_flags = flags;
- s->s_bdi = &default_backing_dev_info;
- INIT_HLIST_NODE(&s->s_instances);
- INIT_HLIST_BL_HEAD(&s->s_anon);
- INIT_LIST_HEAD(&s->s_inodes);
-
- if (list_lru_init(&s->s_dentry_lru))
- goto fail;
- if (list_lru_init(&s->s_inode_lru))
- goto fail;
-
- INIT_LIST_HEAD(&s->s_mounts);
- init_rwsem(&s->s_umount);
- lockdep_set_class(&s->s_umount, &type->s_umount_key);
- /*
- * sget() can have s_umount recursion.
- *
- * When it cannot find a suitable sb, it allocates a new
- * one (this one), and tries again to find a suitable old
- * one.
- *
- * In case that succeeds, it will acquire the s_umount
- * lock of the old one. Since these are clearly distrinct
- * locks, and this object isn't exposed yet, there's no
- * risk of deadlocks.
- *
- * Annotate this by putting this lock in a different
- * subclass.
- */
- down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
- s->s_count = 1;
- atomic_set(&s->s_active, 1);
- mutex_init(&s->s_vfs_rename_mutex);
- lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
- mutex_init(&s->s_dquot.dqio_mutex);
- mutex_init(&s->s_dquot.dqonoff_mutex);
- init_rwsem(&s->s_dquot.dqptr_sem);
- s->s_maxbytes = MAX_NON_LFS;
- s->s_op = &default_op;
- s->s_time_gran = 1000000000;
- s->cleancache_poolid = -1;
-
- s->s_shrink.seeks = DEFAULT_SEEKS;
- s->s_shrink.scan_objects = super_cache_scan;
- s->s_shrink.count_objects = super_cache_count;
- s->s_shrink.batch = 1024;
- s->s_shrink.flags = SHRINKER_NUMA_AWARE;
+out:
return s;
-fail:
- destroy_super(s);
- return NULL;
+err_out_dentry_lru:
+ list_lru_destroy(&s->s_dentry_lru);
+err_out:
+ security_sb_free(s);
+#ifdef CONFIG_SMP
+ if (s->s_files)
+ free_percpu(s->s_files);
+#endif
+ destroy_sb_writers(s);
+out_free_sb:
+ kfree(s);
+ s = NULL;
+ goto out;
+}
+
+/**
+ * destroy_super - frees a superblock
+ * @s: superblock to free
+ *
+ * Frees a superblock.
+ */
+static inline void destroy_super(struct super_block *s)
+{
+ list_lru_destroy(&s->s_dentry_lru);
+ list_lru_destroy(&s->s_inode_lru);
+#ifdef CONFIG_SMP
+ free_percpu(s->s_files);
+#endif
+ destroy_sb_writers(s);
+ security_sb_free(s);
+ WARN_ON(!list_empty(&s->s_mounts));
+ kfree(s->s_subtype);
+ kfree(s->s_options);
+ kfree(s);
}
/* Superblock refcounting */
@@ -710,8 +756,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
make sure there are no rw files opened */
if (remount_ro) {
if (force) {
- sb->s_readonly_remount = 1;
- smp_wmb();
+ mark_files_ro(sb);
} else {
retval = sb_prepare_remount_readonly(sb);
if (retval)
diff --git a/fs/sync.c b/fs/sync.c
index f155374..905f3f6 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -27,11 +27,10 @@
* wait == 1 case since in that case write_inode() functions do
* sync_dirty_buffer() and thus effectively write one block at a time.
*/
-static int __sync_filesystem(struct super_block *sb, int wait,
- unsigned long start)
+static int __sync_filesystem(struct super_block *sb, int wait)
{
if (wait)
- sync_inodes_sb(sb, start);
+ sync_inodes_sb(sb);
else
writeback_inodes_sb(sb, WB_REASON_SYNC);
@@ -48,7 +47,6 @@ static int __sync_filesystem(struct super_block *sb, int wait,
int sync_filesystem(struct super_block *sb)
{
int ret;
- unsigned long start = jiffies;
/*
* We need to be protected against the filesystem going from
@@ -62,17 +60,17 @@ int sync_filesystem(struct super_block *sb)
if (sb->s_flags & MS_RDONLY)
return 0;
- ret = __sync_filesystem(sb, 0, start);
+ ret = __sync_filesystem(sb, 0);
if (ret < 0)
return ret;
- return __sync_filesystem(sb, 1, start);
+ return __sync_filesystem(sb, 1);
}
EXPORT_SYMBOL_GPL(sync_filesystem);
static void sync_inodes_one_sb(struct super_block *sb, void *arg)
{
if (!(sb->s_flags & MS_RDONLY))
- sync_inodes_sb(sb, *((unsigned long *)arg));
+ sync_inodes_sb(sb);
}
static void sync_fs_one_sb(struct super_block *sb, void *arg)
@@ -104,10 +102,9 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
SYSCALL_DEFINE0(sync)
{
int nowait = 0, wait = 1;
- unsigned long start = jiffies;
wakeup_flusher_threads(0, WB_REASON_SYNC);
- iterate_supers(sync_inodes_one_sb, &start);
+ iterate_supers(sync_inodes_one_sb, NULL);
iterate_supers(sync_fs_one_sb, &nowait);
iterate_supers(sync_fs_one_sb, &wait);
iterate_bdevs(fdatawrite_one_bdev, NULL);
@@ -180,7 +177,7 @@ SYSCALL_DEFINE1(syncfs, int, fd)
*/
int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
{
- if (!file->f_op->fsync)
+ if (!file->f_op || !file->f_op->fsync)
return -EINVAL;
return file->f_op->fsync(file, start, end, datasync);
}
diff --git a/fs/sysfs/Makefile b/fs/sysfs/Makefile
index 8876ac1..7a1ceb9 100644
--- a/fs/sysfs/Makefile
+++ b/fs/sysfs/Makefile
@@ -2,4 +2,5 @@
# Makefile for the sysfs virtual filesystem
#
-obj-y := inode.o file.o dir.o symlink.o mount.o group.o
+obj-y := inode.o file.o dir.o symlink.o mount.o bin.o \
+ group.o
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
new file mode 100644
index 0000000..c590cab
--- /dev/null
+++ b/fs/sysfs/bin.c
@@ -0,0 +1,502 @@
+/*
+ * fs/sysfs/bin.c - sysfs binary file implementation
+ *
+ * Copyright (c) 2003 Patrick Mochel
+ * Copyright (c) 2003 Matthew Wilcox
+ * Copyright (c) 2004 Silicon Graphics, Inc.
+ * Copyright (c) 2007 SUSE Linux Products GmbH
+ * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
+ *
+ * This file is released under the GPLv2.
+ *
+ * Please see Documentation/filesystems/sysfs.txt for more information.
+ */
+
+#undef DEBUG
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+
+#include "sysfs.h"
+
+/*
+ * There's one bin_buffer for each open file.
+ *
+ * filp->private_data points to bin_buffer and
+ * sysfs_dirent->s_bin_attr.buffers points to a the bin_buffer s
+ * sysfs_dirent->s_bin_attr.buffers is protected by sysfs_bin_lock
+ */
+static DEFINE_MUTEX(sysfs_bin_lock);
+
+struct bin_buffer {
+ struct mutex mutex;
+ void *buffer;
+ int mmapped;
+ const struct vm_operations_struct *vm_ops;
+ struct file *file;
+ struct hlist_node list;
+};
+
+static int
+fill_read(struct file *file, char *buffer, loff_t off, size_t count)
+{
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
+ struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
+ int rc;
+
+ /* need attr_sd for attr, its parent for kobj */
+ if (!sysfs_get_active(attr_sd))
+ return -ENODEV;
+
+ rc = -EIO;
+ if (attr->read)
+ rc = attr->read(file, kobj, attr, buffer, off, count);
+
+ sysfs_put_active(attr_sd);
+
+ return rc;
+}
+
+static ssize_t
+read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
+{
+ struct bin_buffer *bb = file->private_data;
+ int size = file_inode(file)->i_size;
+ loff_t offs = *off;
+ int count = min_t(size_t, bytes, PAGE_SIZE);
+ char *temp;
+
+ if (!bytes)
+ return 0;
+
+ if (size) {
+ if (offs > size)
+ return 0;
+ if (offs + count > size)
+ count = size - offs;
+ }
+
+ temp = kmalloc(count, GFP_KERNEL);
+ if (!temp)
+ return -ENOMEM;
+
+ mutex_lock(&bb->mutex);
+
+ count = fill_read(file, bb->buffer, offs, count);
+ if (count < 0) {
+ mutex_unlock(&bb->mutex);
+ goto out_free;
+ }
+
+ memcpy(temp, bb->buffer, count);
+
+ mutex_unlock(&bb->mutex);
+
+ if (copy_to_user(userbuf, temp, count)) {
+ count = -EFAULT;
+ goto out_free;
+ }
+
+ pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count);
+
+ *off = offs + count;
+
+ out_free:
+ kfree(temp);
+ return count;
+}
+
+static int
+flush_write(struct file *file, char *buffer, loff_t offset, size_t count)
+{
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
+ struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
+ int rc;
+
+ /* need attr_sd for attr, its parent for kobj */
+ if (!sysfs_get_active(attr_sd))
+ return -ENODEV;
+
+ rc = -EIO;
+ if (attr->write)
+ rc = attr->write(file, kobj, attr, buffer, offset, count);
+
+ sysfs_put_active(attr_sd);
+
+ return rc;
+}
+
+static ssize_t write(struct file *file, const char __user *userbuf,
+ size_t bytes, loff_t *off)
+{
+ struct bin_buffer *bb = file->private_data;
+ int size = file_inode(file)->i_size;
+ loff_t offs = *off;
+ int count = min_t(size_t, bytes, PAGE_SIZE);
+ char *temp;
+
+ if (!bytes)
+ return 0;
+
+ if (size) {
+ if (offs > size)
+ return 0;
+ if (offs + count > size)
+ count = size - offs;
+ }
+
+ temp = memdup_user(userbuf, count);
+ if (IS_ERR(temp))
+ return PTR_ERR(temp);
+
+ mutex_lock(&bb->mutex);
+
+ memcpy(bb->buffer, temp, count);
+
+ count = flush_write(file, bb->buffer, offs, count);
+ mutex_unlock(&bb->mutex);
+
+ if (count > 0)
+ *off = offs + count;
+
+ kfree(temp);
+ return count;
+}
+
+static void bin_vma_open(struct vm_area_struct *vma)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+
+ if (!bb->vm_ops)
+ return;
+
+ if (!sysfs_get_active(attr_sd))
+ return;
+
+ if (bb->vm_ops->open)
+ bb->vm_ops->open(vma);
+
+ sysfs_put_active(attr_sd);
+}
+
+static int bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ int ret;
+
+ if (!bb->vm_ops)
+ return VM_FAULT_SIGBUS;
+
+ if (!sysfs_get_active(attr_sd))
+ return VM_FAULT_SIGBUS;
+
+ ret = VM_FAULT_SIGBUS;
+ if (bb->vm_ops->fault)
+ ret = bb->vm_ops->fault(vma, vmf);
+
+ sysfs_put_active(attr_sd);
+ return ret;
+}
+
+static int bin_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ int ret;
+
+ if (!bb->vm_ops)
+ return VM_FAULT_SIGBUS;
+
+ if (!sysfs_get_active(attr_sd))
+ return VM_FAULT_SIGBUS;
+
+ ret = 0;
+ if (bb->vm_ops->page_mkwrite)
+ ret = bb->vm_ops->page_mkwrite(vma, vmf);
+ else
+ file_update_time(file);
+
+ sysfs_put_active(attr_sd);
+ return ret;
+}
+
+static int bin_access(struct vm_area_struct *vma, unsigned long addr,
+ void *buf, int len, int write)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ int ret;
+
+ if (!bb->vm_ops)
+ return -EINVAL;
+
+ if (!sysfs_get_active(attr_sd))
+ return -EINVAL;
+
+ ret = -EINVAL;
+ if (bb->vm_ops->access)
+ ret = bb->vm_ops->access(vma, addr, buf, len, write);
+
+ sysfs_put_active(attr_sd);
+ return ret;
+}
+
+#ifdef CONFIG_NUMA
+static int bin_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ int ret;
+
+ if (!bb->vm_ops)
+ return 0;
+
+ if (!sysfs_get_active(attr_sd))
+ return -EINVAL;
+
+ ret = 0;
+ if (bb->vm_ops->set_policy)
+ ret = bb->vm_ops->set_policy(vma, new);
+
+ sysfs_put_active(attr_sd);
+ return ret;
+}
+
+static struct mempolicy *bin_get_policy(struct vm_area_struct *vma,
+ unsigned long addr)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ struct mempolicy *pol;
+
+ if (!bb->vm_ops)
+ return vma->vm_policy;
+
+ if (!sysfs_get_active(attr_sd))
+ return vma->vm_policy;
+
+ pol = vma->vm_policy;
+ if (bb->vm_ops->get_policy)
+ pol = bb->vm_ops->get_policy(vma, addr);
+
+ sysfs_put_active(attr_sd);
+ return pol;
+}
+
+static int bin_migrate(struct vm_area_struct *vma, const nodemask_t *from,
+ const nodemask_t *to, unsigned long flags)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ int ret;
+
+ if (!bb->vm_ops)
+ return 0;
+
+ if (!sysfs_get_active(attr_sd))
+ return 0;
+
+ ret = 0;
+ if (bb->vm_ops->migrate)
+ ret = bb->vm_ops->migrate(vma, from, to, flags);
+
+ sysfs_put_active(attr_sd);
+ return ret;
+}
+#endif
+
+static const struct vm_operations_struct bin_vm_ops = {
+ .open = bin_vma_open,
+ .fault = bin_fault,
+ .page_mkwrite = bin_page_mkwrite,
+ .access = bin_access,
+#ifdef CONFIG_NUMA
+ .set_policy = bin_set_policy,
+ .get_policy = bin_get_policy,
+ .migrate = bin_migrate,
+#endif
+};
+
+static int mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
+ struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
+ int rc;
+
+ mutex_lock(&bb->mutex);
+
+ /* need attr_sd for attr, its parent for kobj */
+ rc = -ENODEV;
+ if (!sysfs_get_active(attr_sd))
+ goto out_unlock;
+
+ rc = -EINVAL;
+ if (!attr->mmap)
+ goto out_put;
+
+ rc = attr->mmap(file, kobj, attr, vma);
+ if (rc)
+ goto out_put;
+
+ /*
+ * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
+ * to satisfy versions of X which crash if the mmap fails: that
+ * substitutes a new vm_file, and we don't then want bin_vm_ops.
+ */
+ if (vma->vm_file != file)
+ goto out_put;
+
+ rc = -EINVAL;
+ if (bb->mmapped && bb->vm_ops != vma->vm_ops)
+ goto out_put;
+
+ /*
+ * It is not possible to successfully wrap close.
+ * So error if someone is trying to use close.
+ */
+ rc = -EINVAL;
+ if (vma->vm_ops && vma->vm_ops->close)
+ goto out_put;
+
+ rc = 0;
+ bb->mmapped = 1;
+ bb->vm_ops = vma->vm_ops;
+ vma->vm_ops = &bin_vm_ops;
+out_put:
+ sysfs_put_active(attr_sd);
+out_unlock:
+ mutex_unlock(&bb->mutex);
+
+ return rc;
+}
+
+static int open(struct inode *inode, struct file *file)
+{
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
+ struct bin_buffer *bb = NULL;
+ int error;
+
+ /* binary file operations requires both @sd and its parent */
+ if (!sysfs_get_active(attr_sd))
+ return -ENODEV;
+
+ error = -EACCES;
+ if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap))
+ goto err_out;
+ if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap))
+ goto err_out;
+
+ error = -ENOMEM;
+ bb = kzalloc(sizeof(*bb), GFP_KERNEL);
+ if (!bb)
+ goto err_out;
+
+ bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!bb->buffer)
+ goto err_out;
+
+ mutex_init(&bb->mutex);
+ bb->file = file;
+ file->private_data = bb;
+
+ mutex_lock(&sysfs_bin_lock);
+ hlist_add_head(&bb->list, &attr_sd->s_bin_attr.buffers);
+ mutex_unlock(&sysfs_bin_lock);
+
+ /* open succeeded, put active references */
+ sysfs_put_active(attr_sd);
+ return 0;
+
+ err_out:
+ sysfs_put_active(attr_sd);
+ kfree(bb);
+ return error;
+}
+
+static int release(struct inode *inode, struct file *file)
+{
+ struct bin_buffer *bb = file->private_data;
+
+ mutex_lock(&sysfs_bin_lock);
+ hlist_del(&bb->list);
+ mutex_unlock(&sysfs_bin_lock);
+
+ kfree(bb->buffer);
+ kfree(bb);
+ return 0;
+}
+
+const struct file_operations bin_fops = {
+ .read = read,
+ .write = write,
+ .mmap = mmap,
+ .llseek = generic_file_llseek,
+ .open = open,
+ .release = release,
+};
+
+
+void unmap_bin_file(struct sysfs_dirent *attr_sd)
+{
+ struct bin_buffer *bb;
+
+ if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR)
+ return;
+
+ mutex_lock(&sysfs_bin_lock);
+
+ hlist_for_each_entry(bb, &attr_sd->s_bin_attr.buffers, list) {
+ struct inode *inode = file_inode(bb->file);
+
+ unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+ }
+
+ mutex_unlock(&sysfs_bin_lock);
+}
+
+/**
+ * sysfs_create_bin_file - create binary file for object.
+ * @kobj: object.
+ * @attr: attribute descriptor.
+ */
+int sysfs_create_bin_file(struct kobject *kobj,
+ const struct bin_attribute *attr)
+{
+ BUG_ON(!kobj || !kobj->sd || !attr);
+
+ return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
+}
+EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
+
+/**
+ * sysfs_remove_bin_file - remove binary file for object.
+ * @kobj: object.
+ * @attr: attribute descriptor.
+ */
+void sysfs_remove_bin_file(struct kobject *kobj,
+ const struct bin_attribute *attr)
+{
+ sysfs_hash_and_remove(kobj->sd, NULL, attr->attr.name);
+}
+EXPORT_SYMBOL_GPL(sysfs_remove_bin_file);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 5e73d66..4d83ced 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -26,21 +26,21 @@
#include "sysfs.h"
DEFINE_MUTEX(sysfs_mutex);
-DEFINE_SPINLOCK(sysfs_symlink_target_lock);
+DEFINE_SPINLOCK(sysfs_assoc_lock);
-#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb)
+#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb);
static DEFINE_SPINLOCK(sysfs_ino_lock);
static DEFINE_IDA(sysfs_ino_ida);
/**
* sysfs_name_hash
- * @name: Null terminated string to hash
* @ns: Namespace tag to hash
+ * @name: Null terminated string to hash
*
* Returns 31 bit hash of ns + name (so it fits in an off_t )
*/
-static unsigned int sysfs_name_hash(const char *name, const void *ns)
+static unsigned int sysfs_name_hash(const void *ns, const char *name)
{
unsigned long hash = init_name_hash();
unsigned int len = strlen(name);
@@ -56,8 +56,8 @@ static unsigned int sysfs_name_hash(const char *name, const void *ns)
return hash;
}
-static int sysfs_name_compare(unsigned int hash, const char *name,
- const void *ns, const struct sysfs_dirent *sd)
+static int sysfs_name_compare(unsigned int hash, const void *ns,
+ const char *name, const struct sysfs_dirent *sd)
{
if (hash != sd->s_hash)
return hash - sd->s_hash;
@@ -69,7 +69,7 @@ static int sysfs_name_compare(unsigned int hash, const char *name,
static int sysfs_sd_compare(const struct sysfs_dirent *left,
const struct sysfs_dirent *right)
{
- return sysfs_name_compare(left->s_hash, left->s_name, left->s_ns,
+ return sysfs_name_compare(left->s_hash, left->s_ns, left->s_name,
right);
}
@@ -132,6 +132,24 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children);
}
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+/* Test for attributes that want to ignore lockdep for read-locking */
+static bool ignore_lockdep(struct sysfs_dirent *sd)
+{
+ return sysfs_type(sd) == SYSFS_KOBJ_ATTR &&
+ sd->s_attr.attr->ignore_lockdep;
+}
+
+#else
+
+static inline bool ignore_lockdep(struct sysfs_dirent *sd)
+{
+ return true;
+}
+
+#endif
+
/**
* sysfs_get_active - get an active reference to sysfs_dirent
* @sd: sysfs_dirent to get an active reference to
@@ -150,7 +168,7 @@ struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
if (!atomic_inc_unless_negative(&sd->s_active))
return NULL;
- if (likely(!sysfs_ignore_lockdep(sd)))
+ if (likely(!ignore_lockdep(sd)))
rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_);
return sd;
}
@@ -169,7 +187,7 @@ void sysfs_put_active(struct sysfs_dirent *sd)
if (unlikely(!sd))
return;
- if (likely(!sysfs_ignore_lockdep(sd)))
+ if (likely(!ignore_lockdep(sd)))
rwsem_release(&sd->dep_map, 1, _RET_IP_);
v = atomic_dec_return(&sd->s_active);
if (likely(v != SD_DEACTIVATED_BIAS))
@@ -382,19 +400,22 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
/**
* sysfs_addrm_start - prepare for sysfs_dirent add/remove
* @acxt: pointer to sysfs_addrm_cxt to be used
+ * @parent_sd: parent sysfs_dirent
*
- * This function is called when the caller is about to add or remove
- * sysfs_dirent. This function acquires sysfs_mutex. @acxt is used
- * to keep and pass context to other addrm functions.
+ * This function is called when the caller is about to add or
+ * remove sysfs_dirent under @parent_sd. This function acquires
+ * sysfs_mutex. @acxt is used to keep and pass context to
+ * other addrm functions.
*
* LOCKING:
* Kernel thread context (may sleep). sysfs_mutex is locked on
* return.
*/
-void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt)
- __acquires(sysfs_mutex)
+void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
+ struct sysfs_dirent *parent_sd)
{
memset(acxt, 0, sizeof(*acxt));
+ acxt->parent_sd = parent_sd;
mutex_lock(&sysfs_mutex);
}
@@ -403,11 +424,10 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt)
* __sysfs_add_one - add sysfs_dirent to parent without warning
* @acxt: addrm context to use
* @sd: sysfs_dirent to be added
- * @parent_sd: the parent sysfs_dirent to add @sd to
*
- * Get @parent_sd and set @sd->s_parent to it and increment nlink of
- * the parent inode if @sd is a directory and link into the children
- * list of the parent.
+ * Get @acxt->parent_sd and set sd->s_parent to it and increment
+ * nlink of parent inode if @sd is a directory and link into the
+ * children list of the parent.
*
* This function should be called between calls to
* sysfs_addrm_start() and sysfs_addrm_finish() and should be
@@ -420,28 +440,27 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt)
* 0 on success, -EEXIST if entry with the given name already
* exists.
*/
-int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
- struct sysfs_dirent *parent_sd)
+int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
{
struct sysfs_inode_attrs *ps_iattr;
int ret;
- if (!!sysfs_ns_type(parent_sd) != !!sd->s_ns) {
+ if (!!sysfs_ns_type(acxt->parent_sd) != !!sd->s_ns) {
WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
- sysfs_ns_type(parent_sd) ? "required" : "invalid",
- parent_sd->s_name, sd->s_name);
+ sysfs_ns_type(acxt->parent_sd) ? "required" : "invalid",
+ acxt->parent_sd->s_name, sd->s_name);
return -EINVAL;
}
- sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns);
- sd->s_parent = sysfs_get(parent_sd);
+ sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name);
+ sd->s_parent = sysfs_get(acxt->parent_sd);
ret = sysfs_link_sibling(sd);
if (ret)
return ret;
/* Update timestamps on the parent */
- ps_iattr = parent_sd->s_iattr;
+ ps_iattr = acxt->parent_sd->s_iattr;
if (ps_iattr) {
struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
@@ -471,32 +490,14 @@ static char *sysfs_pathname(struct sysfs_dirent *sd, char *path)
return path;
}
-void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name)
-{
- char *path;
-
- path = kzalloc(PATH_MAX, GFP_KERNEL);
- if (path) {
- sysfs_pathname(parent, path);
- strlcat(path, "/", PATH_MAX);
- strlcat(path, name, PATH_MAX);
- }
-
- WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s'\n",
- path ? path : name);
-
- kfree(path);
-}
-
/**
* sysfs_add_one - add sysfs_dirent to parent
* @acxt: addrm context to use
* @sd: sysfs_dirent to be added
- * @parent_sd: the parent sysfs_dirent to add @sd to
*
- * Get @parent_sd and set @sd->s_parent to it and increment nlink of
- * the parent inode if @sd is a directory and link into the children
- * list of the parent.
+ * Get @acxt->parent_sd and set sd->s_parent to it and increment
+ * nlink of parent inode if @sd is a directory and link into the
+ * children list of the parent.
*
* This function should be called between calls to
* sysfs_addrm_start() and sysfs_addrm_finish() and should be
@@ -509,15 +510,23 @@ void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name)
* 0 on success, -EEXIST if entry with the given name already
* exists.
*/
-int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
- struct sysfs_dirent *parent_sd)
+int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
{
int ret;
- ret = __sysfs_add_one(acxt, sd, parent_sd);
+ ret = __sysfs_add_one(acxt, sd);
+ if (ret == -EEXIST) {
+ char *path = kzalloc(PATH_MAX, GFP_KERNEL);
+ WARN(1, KERN_WARNING
+ "sysfs: cannot create duplicate filename '%s'\n",
+ (path == NULL) ? sd->s_name
+ : (sysfs_pathname(acxt->parent_sd, path),
+ strlcat(path, "/", PATH_MAX),
+ strlcat(path, sd->s_name, PATH_MAX),
+ path));
+ kfree(path);
+ }
- if (ret == -EEXIST)
- sysfs_warn_dup(parent_sd, sd->s_name);
return ret;
}
@@ -536,22 +545,16 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
* LOCKING:
* Determined by sysfs_addrm_start().
*/
-static void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
- struct sysfs_dirent *sd)
+void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
{
struct sysfs_inode_attrs *ps_iattr;
- /*
- * Removal can be called multiple times on the same node. Only the
- * first invocation is effective and puts the base ref.
- */
- if (sd->s_flags & SYSFS_FLAG_REMOVED)
- return;
+ BUG_ON(sd->s_flags & SYSFS_FLAG_REMOVED);
sysfs_unlink_sibling(sd);
/* Update timestamps on the parent */
- ps_iattr = sd->s_parent->s_iattr;
+ ps_iattr = acxt->parent_sd->s_iattr;
if (ps_iattr) {
struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
@@ -574,7 +577,6 @@ static void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
* sysfs_mutex is released.
*/
void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
- __releases(sysfs_mutex)
{
/* release resources acquired by sysfs_addrm_start() */
mutex_unlock(&sysfs_mutex);
@@ -586,7 +588,7 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
acxt->removed = sd->u.removed_list;
sysfs_deactivate(sd);
- sysfs_unmap_bin_file(sd);
+ unmap_bin_file(sd);
sysfs_put(sd);
}
}
@@ -595,7 +597,6 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
* sysfs_find_dirent - find sysfs_dirent with the given name
* @parent_sd: sysfs_dirent to search under
* @name: name to look for
- * @ns: the namespace tag to use
*
* Look for sysfs_dirent with name @name under @parent_sd.
*
@@ -606,8 +607,8 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
* Pointer to sysfs_dirent if found, NULL if not.
*/
struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
- const unsigned char *name,
- const void *ns)
+ const void *ns,
+ const unsigned char *name)
{
struct rb_node *node = parent_sd->s_dir.children.rb_node;
unsigned int hash;
@@ -619,13 +620,13 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
return NULL;
}
- hash = sysfs_name_hash(name, ns);
+ hash = sysfs_name_hash(ns, name);
while (node) {
struct sysfs_dirent *sd;
int result;
sd = to_sysfs_dirent(node);
- result = sysfs_name_compare(hash, name, ns, sd);
+ result = sysfs_name_compare(hash, ns, name, sd);
if (result < 0)
node = node->rb_left;
else if (result > 0)
@@ -637,10 +638,9 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
}
/**
- * sysfs_get_dirent_ns - find and get sysfs_dirent with the given name
+ * sysfs_get_dirent - find and get sysfs_dirent with the given name
* @parent_sd: sysfs_dirent to search under
* @name: name to look for
- * @ns: the namespace tag to use
*
* Look for sysfs_dirent with name @name under @parent_sd and get
* it if found.
@@ -651,25 +651,24 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
* RETURNS:
* Pointer to sysfs_dirent if found, NULL if not.
*/
-struct sysfs_dirent *sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd,
- const unsigned char *name,
- const void *ns)
+struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
+ const void *ns,
+ const unsigned char *name)
{
struct sysfs_dirent *sd;
mutex_lock(&sysfs_mutex);
- sd = sysfs_find_dirent(parent_sd, name, ns);
+ sd = sysfs_find_dirent(parent_sd, ns, name);
sysfs_get(sd);
mutex_unlock(&sysfs_mutex);
return sd;
}
-EXPORT_SYMBOL_GPL(sysfs_get_dirent_ns);
+EXPORT_SYMBOL_GPL(sysfs_get_dirent);
static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
- enum kobj_ns_type type,
- const char *name, const void *ns,
- struct sysfs_dirent **p_sd)
+ enum kobj_ns_type type, const void *ns, const char *name,
+ struct sysfs_dirent **p_sd)
{
umode_t mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
struct sysfs_addrm_cxt acxt;
@@ -686,8 +685,8 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
sd->s_dir.kobj = kobj;
/* link in */
- sysfs_addrm_start(&acxt);
- rc = sysfs_add_one(&acxt, sd, parent_sd);
+ sysfs_addrm_start(&acxt, parent_sd);
+ rc = sysfs_add_one(&acxt, sd);
sysfs_addrm_finish(&acxt);
if (rc == 0)
@@ -702,7 +701,7 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name,
struct sysfs_dirent **p_sd)
{
return create_dir(kobj, kobj->sd,
- KOBJ_NS_TYPE_NONE, name, NULL, p_sd);
+ KOBJ_NS_TYPE_NONE, NULL, name, p_sd);
}
/**
@@ -731,14 +730,14 @@ static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj)
}
/**
- * sysfs_create_dir_ns - create a directory for an object with a namespace tag
- * @kobj: object we're creating directory for
- * @ns: the namespace tag to use
+ * sysfs_create_dir - create a directory for an object.
+ * @kobj: object we're creating directory for.
*/
-int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
+int sysfs_create_dir(struct kobject *kobj)
{
enum kobj_ns_type type;
struct sysfs_dirent *parent_sd, *sd;
+ const void *ns = NULL;
int error = 0;
BUG_ON(!kobj);
@@ -751,9 +750,11 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
if (!parent_sd)
return -ENOENT;
+ if (sysfs_ns_type(parent_sd))
+ ns = kobj->ktype->namespace(kobj);
type = sysfs_read_ns_type(kobj);
- error = create_dir(kobj, parent_sd, type, kobject_name(kobj), ns, &sd);
+ error = create_dir(kobj, parent_sd, type, ns, kobject_name(kobj), &sd);
if (!error)
kobj->sd = sd;
return error;
@@ -775,7 +776,7 @@ static struct dentry *sysfs_lookup(struct inode *dir, struct dentry *dentry,
type = sysfs_ns_type(parent_sd);
ns = sysfs_info(dir->i_sb)->ns[type];
- sd = sysfs_find_dirent(parent_sd, dentry->d_name.name, ns);
+ sd = sysfs_find_dirent(parent_sd, ns, dentry->d_name.name);
/* no such entry */
if (!sd) {
@@ -806,128 +807,41 @@ const struct inode_operations sysfs_dir_inode_operations = {
.setxattr = sysfs_setxattr,
};
-static struct sysfs_dirent *sysfs_leftmost_descendant(struct sysfs_dirent *pos)
+static void remove_dir(struct sysfs_dirent *sd)
{
- struct sysfs_dirent *last;
-
- while (true) {
- struct rb_node *rbn;
-
- last = pos;
-
- if (sysfs_type(pos) != SYSFS_DIR)
- break;
-
- rbn = rb_first(&pos->s_dir.children);
- if (!rbn)
- break;
-
- pos = to_sysfs_dirent(rbn);
- }
+ struct sysfs_addrm_cxt acxt;
- return last;
+ sysfs_addrm_start(&acxt, sd->s_parent);
+ sysfs_remove_one(&acxt, sd);
+ sysfs_addrm_finish(&acxt);
}
-/**
- * sysfs_next_descendant_post - find the next descendant for post-order walk
- * @pos: the current position (%NULL to initiate traversal)
- * @root: sysfs_dirent whose descendants to walk
- *
- * Find the next descendant to visit for post-order traversal of @root's
- * descendants. @root is included in the iteration and the last node to be
- * visited.
- */
-static struct sysfs_dirent *sysfs_next_descendant_post(struct sysfs_dirent *pos,
- struct sysfs_dirent *root)
+void sysfs_remove_subdir(struct sysfs_dirent *sd)
{
- struct rb_node *rbn;
-
- lockdep_assert_held(&sysfs_mutex);
-
- /* if first iteration, visit leftmost descendant which may be root */
- if (!pos)
- return sysfs_leftmost_descendant(root);
-
- /* if we visited @root, we're done */
- if (pos == root)
- return NULL;
-
- /* if there's an unvisited sibling, visit its leftmost descendant */
- rbn = rb_next(&pos->s_rb);
- if (rbn)
- return sysfs_leftmost_descendant(to_sysfs_dirent(rbn));
-
- /* no sibling left, visit parent */
- return pos->s_parent;
+ remove_dir(sd);
}
-static void __sysfs_remove(struct sysfs_addrm_cxt *acxt,
- struct sysfs_dirent *sd)
-{
- struct sysfs_dirent *pos, *next;
-
- if (!sd)
- return;
- pr_debug("sysfs %s: removing\n", sd->s_name);
-
- next = NULL;
- do {
- pos = next;
- next = sysfs_next_descendant_post(pos, sd);
- if (pos)
- sysfs_remove_one(acxt, pos);
- } while (next);
-}
-
-/**
- * sysfs_remove - remove a sysfs_dirent recursively
- * @sd: the sysfs_dirent to remove
- *
- * Remove @sd along with all its subdirectories and files.
- */
-void sysfs_remove(struct sysfs_dirent *sd)
+static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
{
struct sysfs_addrm_cxt acxt;
+ struct rb_node *pos;
- sysfs_addrm_start(&acxt);
- __sysfs_remove(&acxt, sd);
- sysfs_addrm_finish(&acxt);
-}
-
-/**
- * sysfs_hash_and_remove - find a sysfs_dirent by name and remove it
- * @dir_sd: parent of the target
- * @name: name of the sysfs_dirent to remove
- * @ns: namespace tag of the sysfs_dirent to remove
- *
- * Look for the sysfs_dirent with @name and @ns under @dir_sd and remove
- * it. Returns 0 on success, -ENOENT if such entry doesn't exist.
- */
-int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name,
- const void *ns)
-{
- struct sysfs_addrm_cxt acxt;
- struct sysfs_dirent *sd;
+ if (!dir_sd)
+ return;
- if (!dir_sd) {
- WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n",
- name);
- return -ENOENT;
+ pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
+ sysfs_addrm_start(&acxt, dir_sd);
+ pos = rb_first(&dir_sd->s_dir.children);
+ while (pos) {
+ struct sysfs_dirent *sd = to_sysfs_dirent(pos);
+ pos = rb_next(pos);
+ if (sysfs_type(sd) != SYSFS_DIR)
+ sysfs_remove_one(&acxt, sd);
}
-
- sysfs_addrm_start(&acxt);
-
- sd = sysfs_find_dirent(dir_sd, name, ns);
- if (sd)
- __sysfs_remove(&acxt, sd);
-
sysfs_addrm_finish(&acxt);
- if (sd)
- return 0;
- else
- return -ENOENT;
+ remove_dir(dir_sd);
}
/**
@@ -938,34 +852,21 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name,
* the directory before we remove the directory, and we've inlined
* what used to be sysfs_rmdir() below, instead of calling separately.
*/
+
void sysfs_remove_dir(struct kobject *kobj)
{
struct sysfs_dirent *sd = kobj->sd;
- /*
- * In general, kboject owner is responsible for ensuring removal
- * doesn't race with other operations and sysfs doesn't provide any
- * protection; however, when @kobj is used as a symlink target, the
- * symlinking entity usually doesn't own @kobj and thus has no
- * control over removal. @kobj->sd may be removed anytime and
- * symlink code may end up dereferencing an already freed sd.
- *
- * sysfs_symlink_target_lock synchronizes @kobj->sd disassociation
- * against symlink operations so that symlink code can safely
- * dereference @kobj->sd.
- */
- spin_lock(&sysfs_symlink_target_lock);
+ spin_lock(&sysfs_assoc_lock);
kobj->sd = NULL;
- spin_unlock(&sysfs_symlink_target_lock);
+ spin_unlock(&sysfs_assoc_lock);
- if (sd) {
- WARN_ON_ONCE(sysfs_type(sd) != SYSFS_DIR);
- sysfs_remove(sd);
- }
+ __sysfs_remove_dir(sd);
}
-int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd,
- const char *new_name, const void *new_ns)
+int sysfs_rename(struct sysfs_dirent *sd,
+ struct sysfs_dirent *new_parent_sd, const void *new_ns,
+ const char *new_name)
{
int error;
@@ -977,7 +878,7 @@ int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd,
goto out; /* nothing to rename */
error = -EEXIST;
- if (sysfs_find_dirent(new_parent_sd, new_name, new_ns))
+ if (sysfs_find_dirent(new_parent_sd, new_ns, new_name))
goto out;
/* rename sysfs_dirent */
@@ -998,7 +899,7 @@ int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd,
sysfs_get(new_parent_sd);
sysfs_put(sd->s_parent);
sd->s_ns = new_ns;
- sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns);
+ sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name);
sd->s_parent = new_parent_sd;
sysfs_link_sibling(sd);
@@ -1008,25 +909,30 @@ int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd,
return error;
}
-int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name,
- const void *new_ns)
+int sysfs_rename_dir(struct kobject *kobj, const char *new_name)
{
struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
+ const void *new_ns = NULL;
+
+ if (sysfs_ns_type(parent_sd))
+ new_ns = kobj->ktype->namespace(kobj);
- return sysfs_rename(kobj->sd, parent_sd, new_name, new_ns);
+ return sysfs_rename(kobj->sd, parent_sd, new_ns, new_name);
}
-int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj,
- const void *new_ns)
+int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
{
struct sysfs_dirent *sd = kobj->sd;
struct sysfs_dirent *new_parent_sd;
+ const void *new_ns = NULL;
BUG_ON(!sd->s_parent);
+ if (sysfs_ns_type(sd->s_parent))
+ new_ns = kobj->ktype->namespace(kobj);
new_parent_sd = new_parent_kobj && new_parent_kobj->sd ?
new_parent_kobj->sd : &sysfs_root;
- return sysfs_rename(sd, new_parent_sd, sd->s_name, new_ns);
+ return sysfs_rename(sd, new_parent_sd, new_ns, sd->s_name);
}
/* Relationship between s_mode and the DT_xxx types */
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index b94f936..15ef5eb 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -21,114 +21,70 @@
#include <linux/mutex.h>
#include <linux/limits.h>
#include <linux/uaccess.h>
-#include <linux/seq_file.h>
-#include <linux/mm.h>
#include "sysfs.h"
/*
- * There's one sysfs_open_file for each open file and one sysfs_open_dirent
- * for each sysfs_dirent with one or more open files.
+ * There's one sysfs_buffer for each open file and one
+ * sysfs_open_dirent for each sysfs_dirent with one or more open
+ * files.
*
- * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open is
- * protected by sysfs_open_dirent_lock.
- *
- * filp->private_data points to seq_file whose ->private points to
- * sysfs_open_file. sysfs_open_files are chained at
- * sysfs_open_dirent->files, which is protected by sysfs_open_file_mutex.
+ * filp->private_data points to sysfs_buffer and
+ * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open
+ * is protected by sysfs_open_dirent_lock.
*/
static DEFINE_SPINLOCK(sysfs_open_dirent_lock);
-static DEFINE_MUTEX(sysfs_open_file_mutex);
struct sysfs_open_dirent {
atomic_t refcnt;
atomic_t event;
wait_queue_head_t poll;
- struct list_head files; /* goes through sysfs_open_file.list */
+ struct list_head buffers; /* goes through sysfs_buffer.list */
};
-struct sysfs_open_file {
- struct sysfs_dirent *sd;
- struct file *file;
+struct sysfs_buffer {
+ size_t count;
+ loff_t pos;
+ char *page;
+ const struct sysfs_ops *ops;
struct mutex mutex;
+ int needs_read_fill;
int event;
struct list_head list;
-
- bool mmapped;
- const struct vm_operations_struct *vm_ops;
};
-static bool sysfs_is_bin(struct sysfs_dirent *sd)
-{
- return sysfs_type(sd) == SYSFS_KOBJ_BIN_ATTR;
-}
-
-static struct sysfs_open_file *sysfs_of(struct file *file)
-{
- return ((struct seq_file *)file->private_data)->private;
-}
-
-/*
- * Determine ktype->sysfs_ops for the given sysfs_dirent. This function
- * must be called while holding an active reference.
- */
-static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd)
-{
- struct kobject *kobj = sd->s_parent->s_dir.kobj;
-
- if (!sysfs_ignore_lockdep(sd))
- lockdep_assert_held(sd);
- return kobj->ktype ? kobj->ktype->sysfs_ops : NULL;
-}
-
-/*
- * Reads on sysfs are handled through seq_file, which takes care of hairy
- * details like buffering and seeking. The following function pipes
- * sysfs_ops->show() result through seq_file.
+/**
+ * fill_read_buffer - allocate and fill buffer from object.
+ * @dentry: dentry pointer.
+ * @buffer: data buffer for file.
+ *
+ * Allocate @buffer->page, if it hasn't been already, then call the
+ * kobject's show() method to fill the buffer with this attribute's
+ * data.
+ * This is called only once, on the file's first read unless an error
+ * is returned.
*/
-static int sysfs_seq_show(struct seq_file *sf, void *v)
+static int fill_read_buffer(struct dentry *dentry, struct sysfs_buffer *buffer)
{
- struct sysfs_open_file *of = sf->private;
- struct kobject *kobj = of->sd->s_parent->s_dir.kobj;
- const struct sysfs_ops *ops;
- char *buf;
+ struct sysfs_dirent *attr_sd = dentry->d_fsdata;
+ struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
+ const struct sysfs_ops *ops = buffer->ops;
+ int ret = 0;
ssize_t count;
- /* acquire buffer and ensure that it's >= PAGE_SIZE */
- count = seq_get_buf(sf, &buf);
- if (count < PAGE_SIZE) {
- seq_commit(sf, -1);
- return 0;
- }
+ if (!buffer->page)
+ buffer->page = (char *) get_zeroed_page(GFP_KERNEL);
+ if (!buffer->page)
+ return -ENOMEM;
- /*
- * Need @of->sd for attr and ops, its parent for kobj. @of->mutex
- * nests outside active ref and is just to ensure that the ops
- * aren't called concurrently for the same open file.
- */
- mutex_lock(&of->mutex);
- if (!sysfs_get_active(of->sd)) {
- mutex_unlock(&of->mutex);
+ /* need attr_sd for attr and ops, its parent for kobj */
+ if (!sysfs_get_active(attr_sd))
return -ENODEV;
- }
-
- of->event = atomic_read(&of->sd->s_attr.open->event);
-
- /*
- * Lookup @ops and invoke show(). Control may reach here via seq
- * file lseek even if @ops->show() isn't implemented.
- */
- ops = sysfs_file_ops(of->sd);
- if (ops->show)
- count = ops->show(kobj, of->sd->s_attr.attr, buf);
- else
- count = 0;
- sysfs_put_active(of->sd);
- mutex_unlock(&of->mutex);
+ buffer->event = atomic_read(&attr_sd->s_attr.open->event);
+ count = ops->show(kobj, attr_sd->s_attr.attr, buffer->page);
- if (count < 0)
- return count;
+ sysfs_put_active(attr_sd);
/*
* The code works fine with PAGE_SIZE return but it's likely to
@@ -140,389 +96,155 @@ static int sysfs_seq_show(struct seq_file *sf, void *v)
/* Try to struggle along */
count = PAGE_SIZE - 1;
}
- seq_commit(sf, count);
- return 0;
+ if (count >= 0) {
+ buffer->needs_read_fill = 0;
+ buffer->count = count;
+ } else {
+ ret = count;
+ }
+ return ret;
}
-/*
- * Read method for bin files. As reading a bin file can have side-effects,
- * the exact offset and bytes specified in read(2) call should be passed to
- * the read callback making it difficult to use seq_file. Implement
- * simplistic custom buffering for bin files.
+/**
+ * sysfs_read_file - read an attribute.
+ * @file: file pointer.
+ * @buf: buffer to fill.
+ * @count: number of bytes to read.
+ * @ppos: starting offset in file.
+ *
+ * Userspace wants to read an attribute file. The attribute descriptor
+ * is in the file's ->d_fsdata. The target object is in the directory's
+ * ->d_fsdata.
+ *
+ * We call fill_read_buffer() to allocate and fill the buffer from the
+ * object's show() method exactly once (if the read is happening from
+ * the beginning of the file). That should fill the entire buffer with
+ * all the data the object has to offer for that attribute.
+ * We then call flush_read_buffer() to copy the buffer to userspace
+ * in the increments specified.
*/
-static ssize_t sysfs_bin_read(struct file *file, char __user *userbuf,
- size_t bytes, loff_t *off)
-{
- struct sysfs_open_file *of = sysfs_of(file);
- struct bin_attribute *battr = of->sd->s_attr.bin_attr;
- struct kobject *kobj = of->sd->s_parent->s_dir.kobj;
- loff_t size = file_inode(file)->i_size;
- int count = min_t(size_t, bytes, PAGE_SIZE);
- loff_t offs = *off;
- char *buf;
-
- if (!bytes)
- return 0;
-
- if (size) {
- if (offs > size)
- return 0;
- if (offs + count > size)
- count = size - offs;
- }
-
- buf = kmalloc(count, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
- /* need of->sd for battr, its parent for kobj */
- mutex_lock(&of->mutex);
- if (!sysfs_get_active(of->sd)) {
- count = -ENODEV;
- mutex_unlock(&of->mutex);
- goto out_free;
- }
-
- if (battr->read)
- count = battr->read(file, kobj, battr, buf, offs, count);
- else
- count = -EIO;
-
- sysfs_put_active(of->sd);
- mutex_unlock(&of->mutex);
-
- if (count < 0)
- goto out_free;
+static ssize_t
+sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+ struct sysfs_buffer *buffer = file->private_data;
+ ssize_t retval = 0;
- if (copy_to_user(userbuf, buf, count)) {
- count = -EFAULT;
- goto out_free;
+ mutex_lock(&buffer->mutex);
+ if (buffer->needs_read_fill || *ppos == 0) {
+ retval = fill_read_buffer(file->f_path.dentry, buffer);
+ if (retval)
+ goto out;
}
+ pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
+ __func__, count, *ppos, buffer->page);
+ retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
+ buffer->count);
+out:
+ mutex_unlock(&buffer->mutex);
+ return retval;
+}
- pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count);
+/**
+ * fill_write_buffer - copy buffer from userspace.
+ * @buffer: data buffer for file.
+ * @buf: data from user.
+ * @count: number of bytes in @userbuf.
+ *
+ * Allocate @buffer->page if it hasn't been already, then
+ * copy the user-supplied buffer into it.
+ */
+static int fill_write_buffer(struct sysfs_buffer *buffer,
+ const char __user *buf, size_t count)
+{
+ int error;
- *off = offs + count;
+ if (!buffer->page)
+ buffer->page = (char *)get_zeroed_page(GFP_KERNEL);
+ if (!buffer->page)
+ return -ENOMEM;
- out_free:
- kfree(buf);
- return count;
+ if (count >= PAGE_SIZE)
+ count = PAGE_SIZE - 1;
+ error = copy_from_user(buffer->page, buf, count);
+ buffer->needs_read_fill = 1;
+ /* if buf is assumed to contain a string, terminate it by \0,
+ so e.g. sscanf() can scan the string easily */
+ buffer->page[count] = 0;
+ return error ? -EFAULT : count;
}
+
/**
- * flush_write_buffer - push buffer to kobject
- * @of: open file
- * @buf: data buffer for file
- * @off: file offset to write to
- * @count: number of bytes
+ * flush_write_buffer - push buffer to kobject.
+ * @dentry: dentry to the attribute
+ * @buffer: data buffer for file.
+ * @count: number of bytes
*
- * Get the correct pointers for the kobject and the attribute we're dealing
- * with, then call the store() method for it with @buf.
+ * Get the correct pointers for the kobject and the attribute we're
+ * dealing with, then call the store() method for the attribute,
+ * passing the buffer that we acquired in fill_write_buffer().
*/
-static int flush_write_buffer(struct sysfs_open_file *of, char *buf, loff_t off,
- size_t count)
+static int flush_write_buffer(struct dentry *dentry,
+ struct sysfs_buffer *buffer, size_t count)
{
- struct kobject *kobj = of->sd->s_parent->s_dir.kobj;
- int rc = 0;
+ struct sysfs_dirent *attr_sd = dentry->d_fsdata;
+ struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
+ const struct sysfs_ops *ops = buffer->ops;
+ int rc;
- /*
- * Need @of->sd for attr and ops, its parent for kobj. @of->mutex
- * nests outside active ref and is just to ensure that the ops
- * aren't called concurrently for the same open file.
- */
- mutex_lock(&of->mutex);
- if (!sysfs_get_active(of->sd)) {
- mutex_unlock(&of->mutex);
+ /* need attr_sd for attr and ops, its parent for kobj */
+ if (!sysfs_get_active(attr_sd))
return -ENODEV;
- }
-
- if (sysfs_is_bin(of->sd)) {
- struct bin_attribute *battr = of->sd->s_attr.bin_attr;
-
- rc = -EIO;
- if (battr->write)
- rc = battr->write(of->file, kobj, battr, buf, off,
- count);
- } else {
- const struct sysfs_ops *ops = sysfs_file_ops(of->sd);
- rc = ops->store(kobj, of->sd->s_attr.attr, buf, count);
- }
+ rc = ops->store(kobj, attr_sd->s_attr.attr, buffer->page, count);
- sysfs_put_active(of->sd);
- mutex_unlock(&of->mutex);
+ sysfs_put_active(attr_sd);
return rc;
}
+
/**
- * sysfs_write_file - write an attribute
- * @file: file pointer
- * @user_buf: data to write
- * @count: number of bytes
- * @ppos: starting offset
+ * sysfs_write_file - write an attribute.
+ * @file: file pointer
+ * @buf: data to write
+ * @count: number of bytes
+ * @ppos: starting offset
*
- * Copy data in from userland and pass it to the matching
- * sysfs_ops->store() by invoking flush_write_buffer().
- *
- * There is no easy way for us to know if userspace is only doing a partial
- * write, so we don't support them. We expect the entire buffer to come on
- * the first write. Hint: if you're writing a value, first read the file,
- * modify only the the value you're changing, then write entire buffer
- * back.
+ * Similar to sysfs_read_file(), though working in the opposite direction.
+ * We allocate and fill the data from the user in fill_write_buffer(),
+ * then push it to the kobject in flush_write_buffer().
+ * There is no easy way for us to know if userspace is only doing a partial
+ * write, so we don't support them. We expect the entire buffer to come
+ * on the first write.
+ * Hint: if you're writing a value, first read the file, modify only the
+ * the value you're changing, then write entire buffer back.
*/
-static ssize_t sysfs_write_file(struct file *file, const char __user *user_buf,
+static ssize_t sysfs_write_file(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- struct sysfs_open_file *of = sysfs_of(file);
- ssize_t len = min_t(size_t, count, PAGE_SIZE);
- loff_t size = file_inode(file)->i_size;
- char *buf;
-
- if (sysfs_is_bin(of->sd) && size) {
- if (size <= *ppos)
- return 0;
- len = min_t(ssize_t, len, size - *ppos);
- }
-
- if (!len)
- return 0;
+ struct sysfs_buffer *buffer = file->private_data;
+ ssize_t len;
- buf = kmalloc(len + 1, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- if (copy_from_user(buf, user_buf, len)) {
- len = -EFAULT;
- goto out_free;
- }
- buf[len] = '\0'; /* guarantee string termination */
-
- len = flush_write_buffer(of, buf, *ppos, len);
+ mutex_lock(&buffer->mutex);
+ len = fill_write_buffer(buffer, buf, count);
+ if (len > 0)
+ len = flush_write_buffer(file->f_path.dentry, buffer, len);
if (len > 0)
*ppos += len;
-out_free:
- kfree(buf);
+ mutex_unlock(&buffer->mutex);
return len;
}
-static void sysfs_bin_vma_open(struct vm_area_struct *vma)
-{
- struct file *file = vma->vm_file;
- struct sysfs_open_file *of = sysfs_of(file);
-
- if (!of->vm_ops)
- return;
-
- if (!sysfs_get_active(of->sd))
- return;
-
- if (of->vm_ops->open)
- of->vm_ops->open(vma);
-
- sysfs_put_active(of->sd);
-}
-
-static int sysfs_bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct file *file = vma->vm_file;
- struct sysfs_open_file *of = sysfs_of(file);
- int ret;
-
- if (!of->vm_ops)
- return VM_FAULT_SIGBUS;
-
- if (!sysfs_get_active(of->sd))
- return VM_FAULT_SIGBUS;
-
- ret = VM_FAULT_SIGBUS;
- if (of->vm_ops->fault)
- ret = of->vm_ops->fault(vma, vmf);
-
- sysfs_put_active(of->sd);
- return ret;
-}
-
-static int sysfs_bin_page_mkwrite(struct vm_area_struct *vma,
- struct vm_fault *vmf)
-{
- struct file *file = vma->vm_file;
- struct sysfs_open_file *of = sysfs_of(file);
- int ret;
-
- if (!of->vm_ops)
- return VM_FAULT_SIGBUS;
-
- if (!sysfs_get_active(of->sd))
- return VM_FAULT_SIGBUS;
-
- ret = 0;
- if (of->vm_ops->page_mkwrite)
- ret = of->vm_ops->page_mkwrite(vma, vmf);
- else
- file_update_time(file);
-
- sysfs_put_active(of->sd);
- return ret;
-}
-
-static int sysfs_bin_access(struct vm_area_struct *vma, unsigned long addr,
- void *buf, int len, int write)
-{
- struct file *file = vma->vm_file;
- struct sysfs_open_file *of = sysfs_of(file);
- int ret;
-
- if (!of->vm_ops)
- return -EINVAL;
-
- if (!sysfs_get_active(of->sd))
- return -EINVAL;
-
- ret = -EINVAL;
- if (of->vm_ops->access)
- ret = of->vm_ops->access(vma, addr, buf, len, write);
-
- sysfs_put_active(of->sd);
- return ret;
-}
-
-#ifdef CONFIG_NUMA
-static int sysfs_bin_set_policy(struct vm_area_struct *vma,
- struct mempolicy *new)
-{
- struct file *file = vma->vm_file;
- struct sysfs_open_file *of = sysfs_of(file);
- int ret;
-
- if (!of->vm_ops)
- return 0;
-
- if (!sysfs_get_active(of->sd))
- return -EINVAL;
-
- ret = 0;
- if (of->vm_ops->set_policy)
- ret = of->vm_ops->set_policy(vma, new);
-
- sysfs_put_active(of->sd);
- return ret;
-}
-
-static struct mempolicy *sysfs_bin_get_policy(struct vm_area_struct *vma,
- unsigned long addr)
-{
- struct file *file = vma->vm_file;
- struct sysfs_open_file *of = sysfs_of(file);
- struct mempolicy *pol;
-
- if (!of->vm_ops)
- return vma->vm_policy;
-
- if (!sysfs_get_active(of->sd))
- return vma->vm_policy;
-
- pol = vma->vm_policy;
- if (of->vm_ops->get_policy)
- pol = of->vm_ops->get_policy(vma, addr);
-
- sysfs_put_active(of->sd);
- return pol;
-}
-
-static int sysfs_bin_migrate(struct vm_area_struct *vma, const nodemask_t *from,
- const nodemask_t *to, unsigned long flags)
-{
- struct file *file = vma->vm_file;
- struct sysfs_open_file *of = sysfs_of(file);
- int ret;
-
- if (!of->vm_ops)
- return 0;
-
- if (!sysfs_get_active(of->sd))
- return 0;
-
- ret = 0;
- if (of->vm_ops->migrate)
- ret = of->vm_ops->migrate(vma, from, to, flags);
-
- sysfs_put_active(of->sd);
- return ret;
-}
-#endif
-
-static const struct vm_operations_struct sysfs_bin_vm_ops = {
- .open = sysfs_bin_vma_open,
- .fault = sysfs_bin_fault,
- .page_mkwrite = sysfs_bin_page_mkwrite,
- .access = sysfs_bin_access,
-#ifdef CONFIG_NUMA
- .set_policy = sysfs_bin_set_policy,
- .get_policy = sysfs_bin_get_policy,
- .migrate = sysfs_bin_migrate,
-#endif
-};
-
-static int sysfs_bin_mmap(struct file *file, struct vm_area_struct *vma)
-{
- struct sysfs_open_file *of = sysfs_of(file);
- struct bin_attribute *battr = of->sd->s_attr.bin_attr;
- struct kobject *kobj = of->sd->s_parent->s_dir.kobj;
- int rc;
-
- mutex_lock(&of->mutex);
-
- /* need of->sd for battr, its parent for kobj */
- rc = -ENODEV;
- if (!sysfs_get_active(of->sd))
- goto out_unlock;
-
- if (!battr->mmap)
- goto out_put;
-
- rc = battr->mmap(file, kobj, battr, vma);
- if (rc)
- goto out_put;
-
- /*
- * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
- * to satisfy versions of X which crash if the mmap fails: that
- * substitutes a new vm_file, and we don't then want bin_vm_ops.
- */
- if (vma->vm_file != file)
- goto out_put;
-
- rc = -EINVAL;
- if (of->mmapped && of->vm_ops != vma->vm_ops)
- goto out_put;
-
- /*
- * It is not possible to successfully wrap close.
- * So error if someone is trying to use close.
- */
- rc = -EINVAL;
- if (vma->vm_ops && vma->vm_ops->close)
- goto out_put;
-
- rc = 0;
- of->mmapped = 1;
- of->vm_ops = vma->vm_ops;
- vma->vm_ops = &sysfs_bin_vm_ops;
-out_put:
- sysfs_put_active(of->sd);
-out_unlock:
- mutex_unlock(&of->mutex);
-
- return rc;
-}
-
/**
* sysfs_get_open_dirent - get or create sysfs_open_dirent
* @sd: target sysfs_dirent
- * @of: sysfs_open_file for this instance of open
+ * @buffer: sysfs_buffer for this instance of open
*
* If @sd->s_attr.open exists, increment its reference count;
- * otherwise, create one. @of is chained to the files list.
+ * otherwise, create one. @buffer is chained to the buffers
+ * list.
*
* LOCKING:
* Kernel thread context (may sleep).
@@ -531,12 +253,11 @@ out_unlock:
* 0 on success, -errno on failure.
*/
static int sysfs_get_open_dirent(struct sysfs_dirent *sd,
- struct sysfs_open_file *of)
+ struct sysfs_buffer *buffer)
{
struct sysfs_open_dirent *od, *new_od = NULL;
retry:
- mutex_lock(&sysfs_open_file_mutex);
spin_lock_irq(&sysfs_open_dirent_lock);
if (!sd->s_attr.open && new_od) {
@@ -547,11 +268,10 @@ static int sysfs_get_open_dirent(struct sysfs_dirent *sd,
od = sd->s_attr.open;
if (od) {
atomic_inc(&od->refcnt);
- list_add_tail(&of->list, &od->files);
+ list_add_tail(&buffer->list, &od->buffers);
}
spin_unlock_irq(&sysfs_open_dirent_lock);
- mutex_unlock(&sysfs_open_file_mutex);
if (od) {
kfree(new_od);
@@ -566,40 +286,36 @@ static int sysfs_get_open_dirent(struct sysfs_dirent *sd,
atomic_set(&new_od->refcnt, 0);
atomic_set(&new_od->event, 1);
init_waitqueue_head(&new_od->poll);
- INIT_LIST_HEAD(&new_od->files);
+ INIT_LIST_HEAD(&new_od->buffers);
goto retry;
}
/**
* sysfs_put_open_dirent - put sysfs_open_dirent
* @sd: target sysfs_dirent
- * @of: associated sysfs_open_file
+ * @buffer: associated sysfs_buffer
*
- * Put @sd->s_attr.open and unlink @of from the files list. If
- * reference count reaches zero, disassociate and free it.
+ * Put @sd->s_attr.open and unlink @buffer from the buffers list.
+ * If reference count reaches zero, disassociate and free it.
*
* LOCKING:
* None.
*/
static void sysfs_put_open_dirent(struct sysfs_dirent *sd,
- struct sysfs_open_file *of)
+ struct sysfs_buffer *buffer)
{
struct sysfs_open_dirent *od = sd->s_attr.open;
unsigned long flags;
- mutex_lock(&sysfs_open_file_mutex);
spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
- if (of)
- list_del(&of->list);
-
+ list_del(&buffer->list);
if (atomic_dec_and_test(&od->refcnt))
sd->s_attr.open = NULL;
else
od = NULL;
spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
- mutex_unlock(&sysfs_open_file_mutex);
kfree(od);
}
@@ -608,99 +324,67 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
{
struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
- struct sysfs_open_file *of;
- bool has_read, has_write, has_mmap;
+ struct sysfs_buffer *buffer;
+ const struct sysfs_ops *ops;
int error = -EACCES;
/* need attr_sd for attr and ops, its parent for kobj */
if (!sysfs_get_active(attr_sd))
return -ENODEV;
- if (sysfs_is_bin(attr_sd)) {
- struct bin_attribute *battr = attr_sd->s_attr.bin_attr;
-
- has_read = battr->read || battr->mmap;
- has_write = battr->write || battr->mmap;
- has_mmap = battr->mmap;
- } else {
- const struct sysfs_ops *ops = sysfs_file_ops(attr_sd);
+ /* every kobject with an attribute needs a ktype assigned */
+ if (kobj->ktype && kobj->ktype->sysfs_ops)
+ ops = kobj->ktype->sysfs_ops;
+ else {
+ WARN(1, KERN_ERR
+ "missing sysfs attribute operations for kobject: %s\n",
+ kobject_name(kobj));
+ goto err_out;
+ }
- /* every kobject with an attribute needs a ktype assigned */
- if (WARN(!ops, KERN_ERR
- "missing sysfs attribute operations for kobject: %s\n",
- kobject_name(kobj)))
+ /* File needs write support.
+ * The inode's perms must say it's ok,
+ * and we must have a store method.
+ */
+ if (file->f_mode & FMODE_WRITE) {
+ if (!(inode->i_mode & S_IWUGO) || !ops->store)
goto err_out;
-
- has_read = ops->show;
- has_write = ops->store;
- has_mmap = false;
}
- /* check perms and supported operations */
- if ((file->f_mode & FMODE_WRITE) &&
- (!(inode->i_mode & S_IWUGO) || !has_write))
- goto err_out;
-
- if ((file->f_mode & FMODE_READ) &&
- (!(inode->i_mode & S_IRUGO) || !has_read))
- goto err_out;
-
- /* allocate a sysfs_open_file for the file */
- error = -ENOMEM;
- of = kzalloc(sizeof(struct sysfs_open_file), GFP_KERNEL);
- if (!of)
- goto err_out;
-
- /*
- * The following is done to give a different lockdep key to
- * @of->mutex for files which implement mmap. This is a rather
- * crude way to avoid false positive lockdep warning around
- * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and
- * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
- * which mm->mmap_sem nests, while holding @of->mutex. As each
- * open file has a separate mutex, it's okay as long as those don't
- * happen on the same file. At this point, we can't easily give
- * each file a separate locking class. Let's differentiate on
- * whether the file has mmap or not for now.
+ /* File needs read support.
+ * The inode's perms must say it's ok, and we there
+ * must be a show method for it.
*/
- if (has_mmap)
- mutex_init(&of->mutex);
- else
- mutex_init(&of->mutex);
-
- of->sd = attr_sd;
- of->file = file;
+ if (file->f_mode & FMODE_READ) {
+ if (!(inode->i_mode & S_IRUGO) || !ops->show)
+ goto err_out;
+ }
- /*
- * Always instantiate seq_file even if read access doesn't use
- * seq_file or is not requested. This unifies private data access
- * and readable regular files are the vast majority anyway.
+ /* No error? Great, allocate a buffer for the file, and store it
+ * it in file->private_data for easy access.
*/
- if (sysfs_is_bin(attr_sd))
- error = single_open(file, NULL, of);
- else
- error = single_open(file, sysfs_seq_show, of);
- if (error)
- goto err_free;
+ error = -ENOMEM;
+ buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL);
+ if (!buffer)
+ goto err_out;
- /* seq_file clears PWRITE unconditionally, restore it if WRITE */
- if (file->f_mode & FMODE_WRITE)
- file->f_mode |= FMODE_PWRITE;
+ mutex_init(&buffer->mutex);
+ buffer->needs_read_fill = 1;
+ buffer->ops = ops;
+ file->private_data = buffer;
/* make sure we have open dirent struct */
- error = sysfs_get_open_dirent(attr_sd, of);
+ error = sysfs_get_open_dirent(attr_sd, buffer);
if (error)
- goto err_close;
+ goto err_free;
/* open succeeded, put active references */
sysfs_put_active(attr_sd);
return 0;
-err_close:
- single_release(inode, file);
-err_free:
- kfree(of);
-err_out:
+ err_free:
+ kfree(buffer);
+ err_out:
sysfs_put_active(attr_sd);
return error;
}
@@ -708,39 +392,15 @@ err_out:
static int sysfs_release(struct inode *inode, struct file *filp)
{
struct sysfs_dirent *sd = filp->f_path.dentry->d_fsdata;
- struct sysfs_open_file *of = sysfs_of(filp);
-
- sysfs_put_open_dirent(sd, of);
- single_release(inode, filp);
- kfree(of);
-
- return 0;
-}
-
-void sysfs_unmap_bin_file(struct sysfs_dirent *sd)
-{
- struct sysfs_open_dirent *od;
- struct sysfs_open_file *of;
+ struct sysfs_buffer *buffer = filp->private_data;
- if (!sysfs_is_bin(sd))
- return;
-
- spin_lock_irq(&sysfs_open_dirent_lock);
- od = sd->s_attr.open;
- if (od)
- atomic_inc(&od->refcnt);
- spin_unlock_irq(&sysfs_open_dirent_lock);
- if (!od)
- return;
+ sysfs_put_open_dirent(sd, buffer);
- mutex_lock(&sysfs_open_file_mutex);
- list_for_each_entry(of, &od->files, list) {
- struct inode *inode = file_inode(of->file);
- unmap_mapping_range(inode->i_mapping, 0, 0, 1);
- }
- mutex_unlock(&sysfs_open_file_mutex);
+ if (buffer->page)
+ free_page((unsigned long)buffer->page);
+ kfree(buffer);
- sysfs_put_open_dirent(sd, NULL);
+ return 0;
}
/* Sysfs attribute files are pollable. The idea is that you read
@@ -758,7 +418,7 @@ void sysfs_unmap_bin_file(struct sysfs_dirent *sd)
*/
static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
{
- struct sysfs_open_file *of = sysfs_of(filp);
+ struct sysfs_buffer *buffer = filp->private_data;
struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
struct sysfs_open_dirent *od = attr_sd->s_attr.open;
@@ -770,12 +430,13 @@ static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
sysfs_put_active(attr_sd);
- if (of->event != atomic_read(&od->event))
+ if (buffer->event != atomic_read(&od->event))
goto trigger;
return DEFAULT_POLLMASK;
trigger:
+ buffer->needs_read_fill = 1;
return DEFAULT_POLLMASK|POLLERR|POLLPRI;
}
@@ -805,9 +466,9 @@ void sysfs_notify(struct kobject *k, const char *dir, const char *attr)
mutex_lock(&sysfs_mutex);
if (sd && dir)
- sd = sysfs_find_dirent(sd, dir, NULL);
+ sd = sysfs_find_dirent(sd, NULL, dir);
if (sd && attr)
- sd = sysfs_find_dirent(sd, attr, NULL);
+ sd = sysfs_find_dirent(sd, NULL, attr);
if (sd)
sysfs_notify_dirent(sd);
@@ -816,7 +477,7 @@ void sysfs_notify(struct kobject *k, const char *dir, const char *attr)
EXPORT_SYMBOL_GPL(sysfs_notify);
const struct file_operations sysfs_file_operations = {
- .read = seq_read,
+ .read = sysfs_read_file,
.write = sysfs_write_file,
.llseek = generic_file_llseek,
.open = sysfs_open_file,
@@ -824,25 +485,58 @@ const struct file_operations sysfs_file_operations = {
.poll = sysfs_poll,
};
-const struct file_operations sysfs_bin_operations = {
- .read = sysfs_bin_read,
- .write = sysfs_write_file,
- .llseek = generic_file_llseek,
- .mmap = sysfs_bin_mmap,
- .open = sysfs_open_file,
- .release = sysfs_release,
- .poll = sysfs_poll,
-};
+static int sysfs_attr_ns(struct kobject *kobj, const struct attribute *attr,
+ const void **pns)
+{
+ struct sysfs_dirent *dir_sd = kobj->sd;
+ const struct sysfs_ops *ops;
+ const void *ns = NULL;
+ int err;
+
+ if (!dir_sd) {
+ WARN(1, KERN_ERR "sysfs: kobject %s without dirent\n",
+ kobject_name(kobj));
+ return -ENOENT;
+ }
+
+ err = 0;
+ if (!sysfs_ns_type(dir_sd))
+ goto out;
-int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
- const struct attribute *attr, int type,
- umode_t amode, const void *ns)
+ err = -EINVAL;
+ if (!kobj->ktype)
+ goto out;
+ ops = kobj->ktype->sysfs_ops;
+ if (!ops)
+ goto out;
+ if (!ops->namespace)
+ goto out;
+
+ err = 0;
+ ns = ops->namespace(kobj, attr);
+out:
+ if (err) {
+ WARN(1, KERN_ERR
+ "missing sysfs namespace attribute operation for kobject: %s\n",
+ kobject_name(kobj));
+ }
+ *pns = ns;
+ return err;
+}
+
+int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
+ const struct attribute *attr, int type, umode_t amode)
{
umode_t mode = (amode & S_IALLUGO) | S_IFREG;
struct sysfs_addrm_cxt acxt;
struct sysfs_dirent *sd;
+ const void *ns;
int rc;
+ rc = sysfs_attr_ns(dir_sd->s_dir.kobj, attr, &ns);
+ if (rc)
+ return rc;
+
sd = sysfs_new_dirent(attr->name, mode, type);
if (!sd)
return -ENOMEM;
@@ -851,8 +545,8 @@ int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
sd->s_attr.attr = (void *)attr;
sysfs_dirent_init_lockdep(sd);
- sysfs_addrm_start(&acxt);
- rc = sysfs_add_one(&acxt, sd, dir_sd);
+ sysfs_addrm_start(&acxt, dir_sd);
+ rc = sysfs_add_one(&acxt, sd);
sysfs_addrm_finish(&acxt);
if (rc)
@@ -865,25 +559,23 @@ int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
int type)
{
- return sysfs_add_file_mode_ns(dir_sd, attr, type, attr->mode, NULL);
+ return sysfs_add_file_mode(dir_sd, attr, type, attr->mode);
}
+
/**
- * sysfs_create_file_ns - create an attribute file for an object with custom ns
- * @kobj: object we're creating for
- * @attr: attribute descriptor
- * @ns: namespace the new file should belong to
+ * sysfs_create_file - create an attribute file for an object.
+ * @kobj: object we're creating for.
+ * @attr: attribute descriptor.
*/
-int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
- const void *ns)
+int sysfs_create_file(struct kobject *kobj, const struct attribute *attr)
{
BUG_ON(!kobj || !kobj->sd || !attr);
- return sysfs_add_file_mode_ns(kobj->sd, attr, SYSFS_KOBJ_ATTR,
- attr->mode, ns);
+ return sysfs_add_file(kobj->sd, attr, SYSFS_KOBJ_ATTR);
}
-EXPORT_SYMBOL_GPL(sysfs_create_file_ns);
+EXPORT_SYMBOL_GPL(sysfs_create_file);
int sysfs_create_files(struct kobject *kobj, const struct attribute **ptr)
{
@@ -912,7 +604,7 @@ int sysfs_add_file_to_group(struct kobject *kobj,
int error;
if (group)
- dir_sd = sysfs_get_dirent(kobj->sd, group);
+ dir_sd = sysfs_get_dirent(kobj->sd, NULL, group);
else
dir_sd = sysfs_get(kobj->sd);
@@ -938,12 +630,17 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
{
struct sysfs_dirent *sd;
struct iattr newattrs;
+ const void *ns;
int rc;
+ rc = sysfs_attr_ns(kobj, attr, &ns);
+ if (rc)
+ return rc;
+
mutex_lock(&sysfs_mutex);
rc = -ENOENT;
- sd = sysfs_find_dirent(kobj->sd, attr->name, NULL);
+ sd = sysfs_find_dirent(kobj->sd, ns, attr->name);
if (!sd)
goto out;
@@ -958,21 +655,22 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
EXPORT_SYMBOL_GPL(sysfs_chmod_file);
/**
- * sysfs_remove_file_ns - remove an object attribute with a custom ns tag
- * @kobj: object we're acting for
- * @attr: attribute descriptor
- * @ns: namespace tag of the file to remove
+ * sysfs_remove_file - remove an object attribute.
+ * @kobj: object we're acting for.
+ * @attr: attribute descriptor.
*
- * Hash the attribute name and namespace tag and kill the victim.
+ * Hash the attribute name and kill the victim.
*/
-void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr,
- const void *ns)
+void sysfs_remove_file(struct kobject *kobj, const struct attribute *attr)
{
- struct sysfs_dirent *dir_sd = kobj->sd;
+ const void *ns;
- sysfs_hash_and_remove(dir_sd, attr->name, ns);
+ if (sysfs_attr_ns(kobj, attr, &ns))
+ return;
+
+ sysfs_hash_and_remove(kobj->sd, ns, attr->name);
}
-EXPORT_SYMBOL_GPL(sysfs_remove_file_ns);
+EXPORT_SYMBOL_GPL(sysfs_remove_file);
void sysfs_remove_files(struct kobject *kobj, const struct attribute **ptr)
{
@@ -994,42 +692,16 @@ void sysfs_remove_file_from_group(struct kobject *kobj,
struct sysfs_dirent *dir_sd;
if (group)
- dir_sd = sysfs_get_dirent(kobj->sd, group);
+ dir_sd = sysfs_get_dirent(kobj->sd, NULL, group);
else
dir_sd = sysfs_get(kobj->sd);
if (dir_sd) {
- sysfs_hash_and_remove(dir_sd, attr->name, NULL);
+ sysfs_hash_and_remove(dir_sd, NULL, attr->name);
sysfs_put(dir_sd);
}
}
EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
-/**
- * sysfs_create_bin_file - create binary file for object.
- * @kobj: object.
- * @attr: attribute descriptor.
- */
-int sysfs_create_bin_file(struct kobject *kobj,
- const struct bin_attribute *attr)
-{
- BUG_ON(!kobj || !kobj->sd || !attr);
-
- return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
-}
-EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
-
-/**
- * sysfs_remove_bin_file - remove binary file for object.
- * @kobj: object.
- * @attr: attribute descriptor.
- */
-void sysfs_remove_bin_file(struct kobject *kobj,
- const struct bin_attribute *attr)
-{
- sysfs_hash_and_remove(kobj->sd, attr->attr.name, NULL);
-}
-EXPORT_SYMBOL_GPL(sysfs_remove_bin_file);
-
struct sysfs_schedule_callback_struct {
struct list_head workq_list;
struct kobject *kobj;
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 1898a10..5f92cd2 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -26,7 +26,7 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
if (grp->attrs)
for (attr = grp->attrs; *attr; attr++)
- sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL);
+ sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
if (grp->bin_attrs)
for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++)
sysfs_remove_bin_file(kobj, *bin_attr);
@@ -49,17 +49,16 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
* re-adding (if required) the file.
*/
if (update)
- sysfs_hash_and_remove(dir_sd, (*attr)->name,
- NULL);
+ sysfs_hash_and_remove(dir_sd, NULL,
+ (*attr)->name);
if (grp->is_visible) {
mode = grp->is_visible(kobj, *attr, i);
if (!mode)
continue;
}
- error = sysfs_add_file_mode_ns(dir_sd, *attr,
- SYSFS_KOBJ_ATTR,
- (*attr)->mode | mode,
- NULL);
+ error = sysfs_add_file_mode(dir_sd, *attr,
+ SYSFS_KOBJ_ATTR,
+ (*attr)->mode | mode);
if (unlikely(error))
break;
}
@@ -111,7 +110,7 @@ static int internal_create_group(struct kobject *kobj, int update,
error = create_files(sd, kobj, grp, update);
if (error) {
if (grp->name)
- sysfs_remove(sd);
+ sysfs_remove_subdir(sd);
}
sysfs_put(sd);
return error;
@@ -207,7 +206,7 @@ void sysfs_remove_group(struct kobject *kobj,
struct sysfs_dirent *sd;
if (grp->name) {
- sd = sysfs_get_dirent(dir_sd, grp->name);
+ sd = sysfs_get_dirent(dir_sd, NULL, grp->name);
if (!sd) {
WARN(!sd, KERN_WARNING
"sysfs group %p not found for kobject '%s'\n",
@@ -219,7 +218,7 @@ void sysfs_remove_group(struct kobject *kobj,
remove_files(sd, kobj, grp);
if (grp->name)
- sysfs_remove(sd);
+ sysfs_remove_subdir(sd);
sysfs_put(sd);
}
@@ -262,7 +261,7 @@ int sysfs_merge_group(struct kobject *kobj,
struct attribute *const *attr;
int i;
- dir_sd = sysfs_get_dirent(kobj->sd, grp->name);
+ dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
if (!dir_sd)
return -ENOENT;
@@ -270,7 +269,7 @@ int sysfs_merge_group(struct kobject *kobj,
error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR);
if (error) {
while (--i >= 0)
- sysfs_hash_and_remove(dir_sd, (*--attr)->name, NULL);
+ sysfs_hash_and_remove(dir_sd, NULL, (*--attr)->name);
}
sysfs_put(dir_sd);
@@ -289,10 +288,10 @@ void sysfs_unmerge_group(struct kobject *kobj,
struct sysfs_dirent *dir_sd;
struct attribute *const *attr;
- dir_sd = sysfs_get_dirent(kobj->sd, grp->name);
+ dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
if (dir_sd) {
for (attr = grp->attrs; *attr; ++attr)
- sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL);
+ sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
sysfs_put(dir_sd);
}
}
@@ -311,7 +310,7 @@ int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name,
struct sysfs_dirent *dir_sd;
int error = 0;
- dir_sd = sysfs_get_dirent(kobj->sd, group_name);
+ dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
if (!dir_sd)
return -ENOENT;
@@ -333,9 +332,9 @@ void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
{
struct sysfs_dirent *dir_sd;
- dir_sd = sysfs_get_dirent(kobj->sd, group_name);
+ dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
if (dir_sd) {
- sysfs_hash_and_remove(dir_sd, link_name, NULL);
+ sysfs_hash_and_remove(dir_sd, NULL, link_name);
sysfs_put(dir_sd);
}
}
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 1750f79..963f910 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -258,9 +258,9 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
inode->i_fop = &sysfs_file_operations;
break;
case SYSFS_KOBJ_BIN_ATTR:
- bin_attr = sd->s_attr.bin_attr;
+ bin_attr = sd->s_bin_attr.bin_attr;
inode->i_size = bin_attr->size;
- inode->i_fop = &sysfs_bin_operations;
+ inode->i_fop = &bin_fops;
break;
case SYSFS_KOBJ_LINK:
inode->i_op = &sysfs_symlink_inode_operations;
@@ -314,6 +314,32 @@ void sysfs_evict_inode(struct inode *inode)
sysfs_put(sd);
}
+int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns,
+ const char *name)
+{
+ struct sysfs_addrm_cxt acxt;
+ struct sysfs_dirent *sd;
+
+ if (!dir_sd) {
+ WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n",
+ name);
+ return -ENOENT;
+ }
+
+ sysfs_addrm_start(&acxt, dir_sd);
+
+ sd = sysfs_find_dirent(dir_sd, ns, name);
+ if (sd)
+ sysfs_remove_one(&acxt, sd);
+
+ sysfs_addrm_finish(&acxt);
+
+ if (sd)
+ return 0;
+ else
+ return -ENOENT;
+}
+
int sysfs_permission(struct inode *inode, int mask)
{
struct sysfs_dirent *sd;
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 3ae3f1b..2dd4507 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -33,15 +33,13 @@ static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd,
BUG_ON(!name || !parent_sd);
- /*
- * We don't own @target and it may be removed at any time.
- * Synchronize using sysfs_symlink_target_lock. See
- * sysfs_remove_dir() for details.
+ /* target->sd can go away beneath us but is protected with
+ * sysfs_assoc_lock. Fetch target_sd from it.
*/
- spin_lock(&sysfs_symlink_target_lock);
+ spin_lock(&sysfs_assoc_lock);
if (target->sd)
target_sd = sysfs_get(target->sd);
- spin_unlock(&sysfs_symlink_target_lock);
+ spin_unlock(&sysfs_assoc_lock);
error = -ENOENT;
if (!target_sd)
@@ -54,18 +52,18 @@ static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd,
ns_type = sysfs_ns_type(parent_sd);
if (ns_type)
- sd->s_ns = target_sd->s_ns;
+ sd->s_ns = target->ktype->namespace(target);
sd->s_symlink.target_sd = target_sd;
target_sd = NULL; /* reference is now owned by the symlink */
- sysfs_addrm_start(&acxt);
+ sysfs_addrm_start(&acxt, parent_sd);
/* Symlinks must be between directories with the same ns_type */
if (!ns_type ||
(ns_type == sysfs_ns_type(sd->s_symlink.target_sd->s_parent))) {
if (warn)
- error = sysfs_add_one(&acxt, sd, parent_sd);
+ error = sysfs_add_one(&acxt, sd);
else
- error = __sysfs_add_one(&acxt, sd, parent_sd);
+ error = __sysfs_add_one(&acxt, sd);
} else {
error = -EINVAL;
WARN(1, KERN_WARNING
@@ -157,17 +155,11 @@ void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
const char *name)
{
const void *ns = NULL;
-
- /*
- * We don't own @target and it may be removed at any time.
- * Synchronize using sysfs_symlink_target_lock. See
- * sysfs_remove_dir() for details.
- */
- spin_lock(&sysfs_symlink_target_lock);
+ spin_lock(&sysfs_assoc_lock);
if (targ->sd && sysfs_ns_type(kobj->sd))
ns = targ->sd->s_ns;
- spin_unlock(&sysfs_symlink_target_lock);
- sysfs_hash_and_remove(kobj->sd, name, ns);
+ spin_unlock(&sysfs_assoc_lock);
+ sysfs_hash_and_remove(kobj->sd, ns, name);
}
/**
@@ -184,25 +176,24 @@ void sysfs_remove_link(struct kobject *kobj, const char *name)
else
parent_sd = kobj->sd;
- sysfs_hash_and_remove(parent_sd, name, NULL);
+ sysfs_hash_and_remove(parent_sd, NULL, name);
}
EXPORT_SYMBOL_GPL(sysfs_remove_link);
/**
- * sysfs_rename_link_ns - rename symlink in object's directory.
+ * sysfs_rename_link - rename symlink in object's directory.
* @kobj: object we're acting for.
* @targ: object we're pointing to.
* @old: previous name of the symlink.
* @new: new name of the symlink.
- * @new_ns: new namespace of the symlink.
*
* A helper function for the common rename symlink idiom.
*/
-int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ,
- const char *old, const char *new, const void *new_ns)
+int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
+ const char *old, const char *new)
{
struct sysfs_dirent *parent_sd, *sd = NULL;
- const void *old_ns = NULL;
+ const void *old_ns = NULL, *new_ns = NULL;
int result;
if (!kobj)
@@ -214,7 +205,7 @@ int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ,
old_ns = targ->sd->s_ns;
result = -ENOENT;
- sd = sysfs_get_dirent_ns(parent_sd, old, old_ns);
+ sd = sysfs_get_dirent(parent_sd, old_ns, old);
if (!sd)
goto out;
@@ -224,13 +215,16 @@ int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ,
if (sd->s_symlink.target_sd->s_dir.kobj != targ)
goto out;
- result = sysfs_rename(sd, parent_sd, new, new_ns);
+ if (sysfs_ns_type(parent_sd))
+ new_ns = targ->ktype->namespace(targ);
+
+ result = sysfs_rename(sd, parent_sd, new_ns, new);
out:
sysfs_put(sd);
return result;
}
-EXPORT_SYMBOL_GPL(sysfs_rename_link_ns);
+EXPORT_SYMBOL_GPL(sysfs_rename_link);
static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
struct sysfs_dirent *target_sd, char *path)
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 0af09fb..b6deca3 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -29,13 +29,15 @@ struct sysfs_elem_symlink {
};
struct sysfs_elem_attr {
- union {
- struct attribute *attr;
- struct bin_attribute *bin_attr;
- };
+ struct attribute *attr;
struct sysfs_open_dirent *open;
};
+struct sysfs_elem_bin_attr {
+ struct bin_attribute *bin_attr;
+ struct hlist_head buffers;
+};
+
struct sysfs_inode_attrs {
struct iattr ia_iattr;
void *ia_secdata;
@@ -72,6 +74,7 @@ struct sysfs_dirent {
struct sysfs_elem_dir s_dir;
struct sysfs_elem_symlink s_symlink;
struct sysfs_elem_attr s_attr;
+ struct sysfs_elem_bin_attr s_bin_attr;
};
unsigned short s_flags;
@@ -112,7 +115,6 @@ static inline enum kobj_ns_type sysfs_ns_type(struct sysfs_dirent *sd)
}
#ifdef CONFIG_DEBUG_LOCK_ALLOC
-
#define sysfs_dirent_init_lockdep(sd) \
do { \
struct attribute *attr = sd->s_attr.attr; \
@@ -122,31 +124,15 @@ do { \
\
lockdep_init_map(&sd->dep_map, "s_active", key, 0); \
} while (0)
-
-/* Test for attributes that want to ignore lockdep for read-locking */
-static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd)
-{
- int type = sysfs_type(sd);
-
- return (type == SYSFS_KOBJ_ATTR || type == SYSFS_KOBJ_BIN_ATTR) &&
- sd->s_attr.attr->ignore_lockdep;
-}
-
#else
-
#define sysfs_dirent_init_lockdep(sd) do {} while (0)
-
-static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd)
-{
- return true;
-}
-
#endif
/*
* Context structure to be used while adding/removing nodes.
*/
struct sysfs_addrm_cxt {
+ struct sysfs_dirent *parent_sd;
struct sysfs_dirent *removed;
};
@@ -170,37 +156,38 @@ extern struct kmem_cache *sysfs_dir_cachep;
* dir.c
*/
extern struct mutex sysfs_mutex;
-extern spinlock_t sysfs_symlink_target_lock;
+extern spinlock_t sysfs_assoc_lock;
extern const struct dentry_operations sysfs_dentry_ops;
extern const struct file_operations sysfs_dir_operations;
extern const struct inode_operations sysfs_dir_inode_operations;
+struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd);
struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd);
void sysfs_put_active(struct sysfs_dirent *sd);
-void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt);
-void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name);
-int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
- struct sysfs_dirent *parent_sd);
-int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
- struct sysfs_dirent *parent_sd);
-void sysfs_remove(struct sysfs_dirent *sd);
-int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name,
- const void *ns);
+void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
+ struct sysfs_dirent *parent_sd);
+int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
+int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
+void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
- const unsigned char *name,
- const void *ns);
+ const void *ns,
+ const unsigned char *name);
+struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
+ const void *ns,
+ const unsigned char *name);
struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type);
void release_sysfs_dirent(struct sysfs_dirent *sd);
int sysfs_create_subdir(struct kobject *kobj, const char *name,
struct sysfs_dirent **p_sd);
+void sysfs_remove_subdir(struct sysfs_dirent *sd);
int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd,
- const char *new_name, const void *new_ns);
+ const void *ns, const char *new_name);
static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd)
{
@@ -231,21 +218,25 @@ int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
size_t size, int flags);
+int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns,
+ const char *name);
int sysfs_inode_init(void);
/*
* file.c
*/
extern const struct file_operations sysfs_file_operations;
-extern const struct file_operations sysfs_bin_operations;
int sysfs_add_file(struct sysfs_dirent *dir_sd,
const struct attribute *attr, int type);
-int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
- const struct attribute *attr, int type,
- umode_t amode, const void *ns);
-void sysfs_unmap_bin_file(struct sysfs_dirent *sd);
+int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
+ const struct attribute *attr, int type, umode_t amode);
+/*
+ * bin.c
+ */
+extern const struct file_operations bin_fops;
+void unmap_bin_file(struct sysfs_dirent *attr_sd);
/*
* symlink.c
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index cc1febd..6e025e0 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2563,9 +2563,9 @@ static int corrupt_data(const struct ubifs_info *c, const void *buf,
unsigned int from, to, ffs = chance(1, 2);
unsigned char *p = (void *)buf;
- from = prandom_u32() % len;
- /* Corruption span max to end of write unit */
- to = min(len, ALIGN(from + 1, c->max_write_size));
+ from = prandom_u32() % (len + 1);
+ /* Corruption may only span one max. write unit */
+ to = min(len, ALIGN(from, c->max_write_size));
ubifs_warn("filled bytes %u-%u with %s", from, to - 1,
ffs ? "0xFFs" : "random data");
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index ea41649..6b4947f 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -192,7 +192,8 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
struct ubifs_dent_node *dent;
struct ubifs_info *c = dir->i_sb->s_fs_info;
- dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino);
+ dbg_gen("'%.*s' in dir ino %lu",
+ dentry->d_name.len, dentry->d_name.name, dir->i_ino);
if (dentry->d_name.len > UBIFS_MAX_NLEN)
return ERR_PTR(-ENAMETOOLONG);
@@ -224,8 +225,8 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
* checking.
*/
err = PTR_ERR(inode);
- ubifs_err("dead directory entry '%pd', error %d",
- dentry, err);
+ ubifs_err("dead directory entry '%.*s', error %d",
+ dentry->d_name.len, dentry->d_name.name, err);
ubifs_ro_mode(c, err);
goto out;
}
@@ -259,8 +260,8 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
* parent directory inode.
*/
- dbg_gen("dent '%pd', mode %#hx in dir ino %lu",
- dentry, mode, dir->i_ino);
+ dbg_gen("dent '%.*s', mode %#hx in dir ino %lu",
+ dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
err = ubifs_budget_space(c, &req);
if (err)
@@ -508,8 +509,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
* changing the parent inode.
*/
- dbg_gen("dent '%pd' to ino %lu (nlink %d) in dir ino %lu",
- dentry, inode->i_ino,
+ dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu",
+ dentry->d_name.len, dentry->d_name.name, inode->i_ino,
inode->i_nlink, dir->i_ino);
ubifs_assert(mutex_is_locked(&dir->i_mutex));
ubifs_assert(mutex_is_locked(&inode->i_mutex));
@@ -565,8 +566,8 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
* deletions.
*/
- dbg_gen("dent '%pd' from ino %lu (nlink %d) in dir ino %lu",
- dentry, inode->i_ino,
+ dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu",
+ dentry->d_name.len, dentry->d_name.name, inode->i_ino,
inode->i_nlink, dir->i_ino);
ubifs_assert(mutex_is_locked(&dir->i_mutex));
ubifs_assert(mutex_is_locked(&inode->i_mutex));
@@ -655,8 +656,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
* because we have extra space reserved for deletions.
*/
- dbg_gen("directory '%pd', ino %lu in dir ino %lu", dentry,
- inode->i_ino, dir->i_ino);
+ dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len,
+ dentry->d_name.name, inode->i_ino, dir->i_ino);
ubifs_assert(mutex_is_locked(&dir->i_mutex));
ubifs_assert(mutex_is_locked(&inode->i_mutex));
err = check_dir_empty(c, dentry->d_inode);
@@ -715,8 +716,8 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
* directory inode.
*/
- dbg_gen("dent '%pd', mode %#hx in dir ino %lu",
- dentry, mode, dir->i_ino);
+ dbg_gen("dent '%.*s', mode %#hx in dir ino %lu",
+ dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
err = ubifs_budget_space(c, &req);
if (err)
@@ -777,7 +778,8 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
* directory inode.
*/
- dbg_gen("dent '%pd' in dir ino %lu", dentry, dir->i_ino);
+ dbg_gen("dent '%.*s' in dir ino %lu",
+ dentry->d_name.len, dentry->d_name.name, dir->i_ino);
if (!new_valid_dev(rdev))
return -EINVAL;
@@ -851,8 +853,8 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
* directory inode.
*/
- dbg_gen("dent '%pd', target '%s' in dir ino %lu", dentry,
- symname, dir->i_ino);
+ dbg_gen("dent '%.*s', target '%s' in dir ino %lu", dentry->d_name.len,
+ dentry->d_name.name, symname, dir->i_ino);
if (len > UBIFS_MAX_INO_DATA)
return -ENAMETOOLONG;
@@ -977,9 +979,10 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
* separately.
*/
- dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu",
- old_dentry, old_inode->i_ino, old_dir->i_ino,
- new_dentry, new_dir->i_ino);
+ dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in dir ino %lu",
+ old_dentry->d_name.len, old_dentry->d_name.name,
+ old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len,
+ new_dentry->d_name.name, new_dir->i_ino);
ubifs_assert(mutex_is_locked(&old_dir->i_mutex));
ubifs_assert(mutex_is_locked(&new_dir->i_mutex));
if (unlink)
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 9718da8..76ca53c 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -668,7 +668,8 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
ubifs_assert(!wbuf->used);
for (i = 0; ; i++) {
- int space_before, space_after;
+ int space_before = c->leb_size - wbuf->offs - wbuf->used;
+ int space_after;
cond_resched();
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 0e045e7..afaad07 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -933,8 +933,10 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
int move = (old_dir != new_dir);
struct ubifs_inode *uninitialized_var(new_ui);
- dbg_jnl("dent '%pd' in dir ino %lu to dent '%pd' in dir ino %lu",
- old_dentry, old_dir->i_ino, new_dentry, new_dir->i_ino);
+ dbg_jnl("dent '%.*s' in dir ino %lu to dent '%.*s' in dir ino %lu",
+ old_dentry->d_name.len, old_dentry->d_name.name,
+ old_dir->i_ino, new_dentry->d_name.len,
+ new_dentry->d_name.name, new_dir->i_ino);
ubifs_assert(ubifs_inode(old_dir)->data_len == 0);
ubifs_assert(ubifs_inode(new_dir)->data_len == 0);
ubifs_assert(mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex));
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index f69daa5..3e4aa72 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1630,10 +1630,8 @@ static int ubifs_remount_rw(struct ubifs_info *c)
}
c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, GFP_KERNEL);
- if (!c->write_reserve_buf) {
- err = -ENOMEM;
+ if (!c->write_reserve_buf)
goto out;
- }
err = ubifs_lpt_init(c, 0, 1);
if (err)
@@ -2066,10 +2064,8 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
}
sb->s_root = d_make_root(root);
- if (!sb->s_root) {
- err = -ENOMEM;
+ if (!sb->s_root)
goto out_umount;
- }
mutex_unlock(&c->umount_mutex);
return 0;
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 5e0a63b..0f7139b 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -303,8 +303,8 @@ int ubifs_setxattr(struct dentry *dentry, const char *name,
union ubifs_key key;
int err, type;
- dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd", name,
- host->i_ino, dentry, size);
+ dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name,
+ host->i_ino, dentry->d_name.len, dentry->d_name.name, size);
ubifs_assert(mutex_is_locked(&host->i_mutex));
if (size > UBIFS_MAX_INO_DATA)
@@ -367,8 +367,8 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
union ubifs_key key;
int err;
- dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name,
- host->i_ino, dentry, size);
+ dbg_gen("xattr '%s', ino %lu ('%.*s'), buf size %zd", name,
+ host->i_ino, dentry->d_name.len, dentry->d_name.name, size);
err = check_namespace(&nm);
if (err < 0)
@@ -426,8 +426,8 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
int err, len, written = 0;
struct qstr nm = { .name = NULL };
- dbg_gen("ino %lu ('%pd'), buffer size %zd", host->i_ino,
- dentry, size);
+ dbg_gen("ino %lu ('%.*s'), buffer size %zd", host->i_ino,
+ dentry->d_name.len, dentry->d_name.name, size);
len = host_ui->xattr_names + host_ui->xattr_cnt;
if (!buffer)
@@ -529,8 +529,8 @@ int ubifs_removexattr(struct dentry *dentry, const char *name)
union ubifs_key key;
int err;
- dbg_gen("xattr '%s', ino %lu ('%pd')", name,
- host->i_ino, dentry);
+ dbg_gen("xattr '%s', ino %lu ('%.*s')", name,
+ host->i_ino, dentry->d_name.len, dentry->d_name.name);
ubifs_assert(mutex_is_locked(&host->i_mutex));
err = check_namespace(&nm);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 3306b9f..9121938 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -76,9 +76,6 @@
#define UDF_DEFAULT_BLOCKSIZE 2048
-#define VSD_FIRST_SECTOR_OFFSET 32768
-#define VSD_MAX_SECTOR_OFFSET 0x800000
-
enum { UDF_MAX_LINKS = 0xffff };
/* These are the "meat" - everything else is stuffing */
@@ -688,7 +685,7 @@ out_unlock:
static loff_t udf_check_vsd(struct super_block *sb)
{
struct volStructDesc *vsd = NULL;
- loff_t sector = VSD_FIRST_SECTOR_OFFSET;
+ loff_t sector = 32768;
int sectorsize;
struct buffer_head *bh = NULL;
int nsr02 = 0;
@@ -706,18 +703,8 @@ static loff_t udf_check_vsd(struct super_block *sb)
udf_debug("Starting at sector %u (%ld byte sectors)\n",
(unsigned int)(sector >> sb->s_blocksize_bits),
sb->s_blocksize);
- /* Process the sequence (if applicable). The hard limit on the sector
- * offset is arbitrary, hopefully large enough so that all valid UDF
- * filesystems will be recognised. There is no mention of an upper
- * bound to the size of the volume recognition area in the standard.
- * The limit will prevent the code to read all the sectors of a
- * specially crafted image (like a bluray disc full of CD001 sectors),
- * potentially causing minutes or even hours of uninterruptible I/O
- * activity. This actually happened with uninitialised SSD partitions
- * (all 0xFF) before the check for the limit and all valid IDs were
- * added */
- for (; !nsr02 && !nsr03 && sector < VSD_MAX_SECTOR_OFFSET;
- sector += sectorsize) {
+ /* Process the sequence (if applicable) */
+ for (; !nsr02 && !nsr03; sector += sectorsize) {
/* Read a block */
bh = udf_tread(sb, sector >> sb->s_blocksize_bits);
if (!bh)
@@ -727,7 +714,10 @@ static loff_t udf_check_vsd(struct super_block *sb)
vsd = (struct volStructDesc *)(bh->b_data +
(sector & (sb->s_blocksize - 1)));
- if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001,
+ if (vsd->stdIdent[0] == 0) {
+ brelse(bh);
+ break;
+ } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001,
VSD_STD_ID_LEN)) {
switch (vsd->structType) {
case 0:
@@ -763,17 +753,6 @@ static loff_t udf_check_vsd(struct super_block *sb)
else if (!strncmp(vsd->stdIdent, VSD_STD_ID_NSR03,
VSD_STD_ID_LEN))
nsr03 = sector;
- else if (!strncmp(vsd->stdIdent, VSD_STD_ID_BOOT2,
- VSD_STD_ID_LEN))
- ; /* nothing */
- else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CDW02,
- VSD_STD_ID_LEN))
- ; /* nothing */
- else {
- /* invalid id : end of volume recognition area */
- brelse(bh);
- break;
- }
brelse(bh);
}
@@ -781,8 +760,7 @@ static loff_t udf_check_vsd(struct super_block *sb)
return nsr03;
else if (nsr02)
return nsr02;
- else if (!bh && sector - (sbi->s_session << sb->s_blocksize_bits) ==
- VSD_FIRST_SECTOR_OFFSET)
+ else if (sector - (sbi->s_session << sb->s_blocksize_bits) == 32768)
return -1;
else
return 0;
@@ -1292,9 +1270,6 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
* PHYSICAL partitions are already set up
*/
type1_idx = i;
-#ifdef UDFFS_DEBUG
- map = NULL; /* supress 'maybe used uninitialized' warning */
-#endif
for (i = 0; i < sbi->s_partitions; i++) {
map = &sbi->s_partmaps[i];
@@ -1916,9 +1891,7 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
return 0;
}
if (nsr_off == -1)
- udf_debug("Failed to read sector at offset %d. "
- "Assuming open disc. Skipping validity "
- "check\n", VSD_FIRST_SECTOR_OFFSET);
+ udf_debug("Failed to read byte 32768. Assuming open disc. Skipping validity check\n");
if (!sbi->s_last_block)
sbi->s_last_block = udf_get_last_block(sb);
} else {
diff --git a/fs/utimes.c b/fs/utimes.c
index aa138d6..f4fb7ec 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -53,7 +53,6 @@ static int utimes_common(struct path *path, struct timespec *times)
int error;
struct iattr newattrs;
struct inode *inode = path->dentry->d_inode;
- struct inode *delegated_inode = NULL;
error = mnt_want_write(path->mnt);
if (error)
@@ -102,15 +101,9 @@ static int utimes_common(struct path *path, struct timespec *times)
goto mnt_drop_write_and_out;
}
}
-retry_deleg:
mutex_lock(&inode->i_mutex);
- error = notify_change(path->dentry, &newattrs, &delegated_inode);
+ error = notify_change(path->dentry, &newattrs);
mutex_unlock(&inode->i_mutex);
- if (delegated_inode) {
- error = break_deleg_wait(&delegated_inode);
- if (!error)
- goto retry_deleg;
- }
mnt_drop_write_and_out:
mnt_drop_write(path->mnt);
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index c21f435..0719e4d 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -66,14 +66,12 @@ xfs-y += xfs_alloc.o \
xfs_bmap_btree.o \
xfs_btree.o \
xfs_da_btree.o \
- xfs_da_format.o \
xfs_dir2.o \
xfs_dir2_block.o \
xfs_dir2_data.o \
xfs_dir2_leaf.o \
xfs_dir2_node.o \
xfs_dir2_sf.o \
- xfs_dquot_buf.o \
xfs_ialloc.o \
xfs_ialloc_btree.o \
xfs_icreate_item.o \
@@ -105,11 +103,7 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
xfs_qm_bhv.o \
xfs_qm.o \
xfs_quotaops.o
-
-# xfs_rtbitmap is shared with libxfs
-xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o \
- xfs_rtbitmap.o
-
+xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
xfs-$(CONFIG_PROC_FS) += xfs_stats.o
xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 66a36be..a02cfb9 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -63,6 +63,17 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
}
void *
+kmem_zalloc(size_t size, xfs_km_flags_t flags)
+{
+ void *ptr;
+
+ ptr = kmem_alloc(size, flags);
+ if (ptr)
+ memset((char *)ptr, 0, (int)size);
+ return ptr;
+}
+
+void *
kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
{
void *ptr;
@@ -117,3 +128,14 @@ kmem_zone_alloc(kmem_zone_t *zone, xfs_km_flags_t flags)
congestion_wait(BLK_RW_ASYNC, HZ/50);
} while (1);
}
+
+void *
+kmem_zone_zalloc(kmem_zone_t *zone, xfs_km_flags_t flags)
+{
+ void *ptr;
+
+ ptr = kmem_zone_alloc(zone, flags);
+ if (ptr)
+ memset((char *)ptr, 0, kmem_cache_size(zone));
+ return ptr;
+}
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 64db0e5..3a7371c 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -32,7 +32,6 @@ typedef unsigned __bitwise xfs_km_flags_t;
#define KM_NOSLEEP ((__force xfs_km_flags_t)0x0002u)
#define KM_NOFS ((__force xfs_km_flags_t)0x0004u)
#define KM_MAYFAIL ((__force xfs_km_flags_t)0x0008u)
-#define KM_ZERO ((__force xfs_km_flags_t)0x0010u)
/*
* We use a special process flag to avoid recursive callbacks into
@@ -44,7 +43,7 @@ kmem_flags_convert(xfs_km_flags_t flags)
{
gfp_t lflags;
- BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL|KM_ZERO));
+ BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL));
if (flags & KM_NOSLEEP) {
lflags = GFP_ATOMIC | __GFP_NOWARN;
@@ -53,14 +52,11 @@ kmem_flags_convert(xfs_km_flags_t flags)
if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
lflags &= ~__GFP_FS;
}
-
- if (flags & KM_ZERO)
- lflags |= __GFP_ZERO;
-
return lflags;
}
extern void *kmem_alloc(size_t, xfs_km_flags_t);
+extern void *kmem_zalloc(size_t, xfs_km_flags_t);
extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t);
extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t);
extern void kmem_free(const void *);
@@ -68,12 +64,6 @@ extern void kmem_free(const void *);
extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
-static inline void *
-kmem_zalloc(size_t size, xfs_km_flags_t flags)
-{
- return kmem_alloc(size, flags | KM_ZERO);
-}
-
/*
* Zone interfaces
*/
@@ -112,11 +102,6 @@ kmem_zone_destroy(kmem_zone_t *zone)
}
extern void *kmem_zone_alloc(kmem_zone_t *, xfs_km_flags_t);
-
-static inline void *
-kmem_zone_zalloc(kmem_zone_t *zone, xfs_km_flags_t flags)
-{
- return kmem_zone_alloc(zone, flags | KM_ZERO);
-}
+extern void *kmem_zone_zalloc(kmem_zone_t *, xfs_km_flags_t);
#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 370eb3e..0e2f37e 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -16,15 +16,15 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
-#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
+#include "xfs_acl.h"
+#include "xfs_attr.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
#include "xfs_ag.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_inode.h"
-#include "xfs_acl.h"
-#include "xfs_attr.h"
#include "xfs_trace.h"
#include <linux/slab.h>
#include <linux/xattr.h>
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 3fc1098..1cb740a 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -128,6 +128,8 @@ typedef struct xfs_agf {
extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
+extern const struct xfs_buf_ops xfs_agf_buf_ops;
+
/*
* Size of the unlinked inode hash table in the agi.
*/
@@ -189,6 +191,8 @@ typedef struct xfs_agi {
extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, struct xfs_buf **bpp);
+extern const struct xfs_buf_ops xfs_agi_buf_ops;
+
/*
* The third a.g. block contains the a.g. freelist, an array
* of block pointers to blocks owned by the allocation btree code.
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 9eab2df..5a1393f 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -17,25 +17,25 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_shared.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
-#include "xfs_alloc_btree.h"
#include "xfs_alloc.h"
#include "xfs_extent_busy.h"
#include "xfs_error.h"
#include "xfs_cksum.h"
#include "xfs_trace.h"
-#include "xfs_trans.h"
#include "xfs_buf_item.h"
-#include "xfs_log.h"
struct workqueue_struct *xfs_alloc_wq;
@@ -2294,8 +2294,6 @@ xfs_read_agf(
{
int error;
- trace_xfs_read_agf(mp, agno);
-
ASSERT(agno != NULLAGNUMBER);
error = xfs_trans_read_buf(
mp, tp, mp->m_ddev_targp,
@@ -2326,9 +2324,8 @@ xfs_alloc_read_agf(
struct xfs_perag *pag; /* per allocation group data */
int error;
- trace_xfs_alloc_read_agf(mp, agno);
-
ASSERT(agno != NULLAGNUMBER);
+
error = xfs_read_agf(mp, tp, agno,
(flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
bpp);
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index feacb06..99d0a61 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -231,4 +231,7 @@ xfs_alloc_get_rec(
xfs_extlen_t *len, /* output: length of extent */
int *stat); /* output: success/failure */
+extern const struct xfs_buf_ops xfs_agf_buf_ops;
+extern const struct xfs_buf_ops xfs_agfl_buf_ops;
+
#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 1308542..cafc902 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -17,21 +17,23 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_btree.h"
+#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
#include "xfs_alloc.h"
#include "xfs_extent_busy.h"
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_cksum.h"
-#include "xfs_trans.h"
STATIC struct xfs_btree_cur *
diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h
index 45e189e..e3a3f74 100644
--- a/fs/xfs/xfs_alloc_btree.h
+++ b/fs/xfs/xfs_alloc_btree.h
@@ -27,6 +27,39 @@ struct xfs_btree_cur;
struct xfs_mount;
/*
+ * There are two on-disk btrees, one sorted by blockno and one sorted
+ * by blockcount and blockno. All blocks look the same to make the code
+ * simpler; if we have time later, we'll make the optimizations.
+ */
+#define XFS_ABTB_MAGIC 0x41425442 /* 'ABTB' for bno tree */
+#define XFS_ABTB_CRC_MAGIC 0x41423342 /* 'AB3B' */
+#define XFS_ABTC_MAGIC 0x41425443 /* 'ABTC' for cnt tree */
+#define XFS_ABTC_CRC_MAGIC 0x41423343 /* 'AB3C' */
+
+/*
+ * Data record/key structure
+ */
+typedef struct xfs_alloc_rec {
+ __be32 ar_startblock; /* starting block number */
+ __be32 ar_blockcount; /* count of free blocks */
+} xfs_alloc_rec_t, xfs_alloc_key_t;
+
+typedef struct xfs_alloc_rec_incore {
+ xfs_agblock_t ar_startblock; /* starting block number */
+ xfs_extlen_t ar_blockcount; /* count of free blocks */
+} xfs_alloc_rec_incore_t;
+
+/* btree pointer type */
+typedef __be32 xfs_alloc_ptr_t;
+
+/*
+ * Block numbers in the AG:
+ * SB is sector 0, AGF is sector 1, AGI is sector 2, AGFL is sector 3.
+ */
+#define XFS_BNO_BLOCK(mp) ((xfs_agblock_t)(XFS_AGFL_BLOCK(mp) + 1))
+#define XFS_CNT_BLOCK(mp) ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1))
+
+/*
* Btree block header size depends on a superblock flag.
*/
#define XFS_ALLOC_BLOCK_LEN(mp) \
@@ -62,4 +95,6 @@ extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *,
xfs_agnumber_t, xfs_btnum_t);
extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int);
+extern const struct xfs_buf_ops xfs_allocbt_buf_ops;
+
#endif /* __XFS_ALLOC_BTREE_H__ */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 71c8c9d..e51e581 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -16,15 +16,14 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_log.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
+#include "xfs_trans.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
@@ -32,8 +31,6 @@
#include "xfs_trace.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_dinode.h"
#include <linux/aio.h>
#include <linux/gfp.h>
#include <linux/mpage.h>
@@ -336,7 +333,7 @@ xfs_map_blocks(
if (type == XFS_IO_DELALLOC &&
(!nimaps || isnullstartblock(imap->br_startblock))) {
- error = xfs_iomap_write_allocate(ip, offset, imap);
+ error = xfs_iomap_write_allocate(ip, offset, count, imap);
if (!error)
trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
return -XFS_ERROR(error);
@@ -1572,7 +1569,8 @@ xfs_vm_write_begin(
ASSERT(len <= PAGE_CACHE_SIZE);
- page = grab_cache_page_write_begin(mapping, index, flags);
+ page = grab_cache_page_write_begin(mapping, index,
+ flags | AOP_FLAG_NOFS);
if (!page)
return -ENOMEM;
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index b861270..ddcf226 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -17,24 +17,23 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
#include "xfs_attr_sf.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_alloc.h"
-#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
-#include "xfs_bmap_btree.h"
#include "xfs_attr.h"
#include "xfs_attr_leaf.h"
#include "xfs_attr_remote.h"
@@ -42,7 +41,6 @@
#include "xfs_quota.h"
#include "xfs_trans_space.h"
#include "xfs_trace.h"
-#include "xfs_dinode.h"
/*
* xfs_attr.c
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 09480c57..bb24b07 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -18,20 +18,22 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
#include "xfs_da_btree.h"
-#include "xfs_inode.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
#include "xfs_alloc.h"
+#include "xfs_btree.h"
#include "xfs_attr_remote.h"
-#include "xfs_trans.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_attr.h"
@@ -39,8 +41,7 @@
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_trace.h"
-#include "xfs_dinode.h"
-#include "xfs_dir2.h"
+#include "xfs_trans_priv.h"
/*
* Look at all the extents for this logical region,
@@ -231,13 +232,13 @@ xfs_attr3_node_inactive(
}
node = bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&ichdr, node);
+ xfs_da3_node_hdr_from_disk(&ichdr, node);
parent_blkno = bp->b_bn;
if (!ichdr.count) {
xfs_trans_brelse(*trans, bp);
return 0;
}
- btree = dp->d_ops->node_tree_p(node);
+ btree = xfs_da3_node_tree_p(node);
child_fsb = be32_to_cpu(btree[0].before);
xfs_trans_brelse(*trans, bp); /* no locks for later trans */
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 7b126f4..86db20a 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -18,31 +18,32 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
#include "xfs_da_btree.h"
-#include "xfs_inode.h"
-#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_bmap_btree.h"
-#include "xfs_bmap.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
#include "xfs_attr_sf.h"
#include "xfs_attr_remote.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_inode_item.h"
+#include "xfs_bmap.h"
#include "xfs_attr.h"
#include "xfs_attr_leaf.h"
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_buf_item.h"
#include "xfs_cksum.h"
-#include "xfs_dinode.h"
-#include "xfs_dir2.h"
/*
@@ -917,8 +918,8 @@ xfs_attr3_leaf_to_node(
if (error)
goto out;
node = bp1->b_addr;
- dp->d_ops->node_hdr_from_disk(&icnodehdr, node);
- btree = dp->d_ops->node_tree_p(node);
+ xfs_da3_node_hdr_from_disk(&icnodehdr, node);
+ btree = xfs_da3_node_tree_p(node);
leaf = bp2->b_addr;
xfs_attr3_leaf_hdr_from_disk(&icleafhdr, leaf);
@@ -928,7 +929,7 @@ xfs_attr3_leaf_to_node(
btree[0].hashval = entries[icleafhdr.count - 1].hashval;
btree[0].before = cpu_to_be32(blkno);
icnodehdr.count = 1;
- dp->d_ops->node_hdr_to_disk(node, &icnodehdr);
+ xfs_da3_node_hdr_to_disk(node, &icnodehdr);
xfs_trans_log_buf(args->trans, bp1, 0, XFS_LBSIZE(mp) - 1);
error = 0;
out:
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 3ec5ec0..c102213 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -19,6 +19,16 @@
#ifndef __XFS_ATTR_LEAF_H__
#define __XFS_ATTR_LEAF_H__
+/*
+ * Attribute storage layout, internal structure, access macros, etc.
+ *
+ * Attribute lists are structured around Btrees where all the data
+ * elements are in the leaf nodes. Attribute names are hashed into an int,
+ * then that int is used as the index into the Btree. Since the hashval
+ * of an attribute name may not be unique, we may have duplicate keys. The
+ * internal links in the Btree are logical block offsets into the file.
+ */
+
struct attrlist;
struct attrlist_cursor_kern;
struct xfs_attr_list_context;
@@ -28,6 +38,226 @@ struct xfs_da_state_blk;
struct xfs_inode;
struct xfs_trans;
+/*========================================================================
+ * Attribute structure when equal to XFS_LBSIZE(mp) bytes.
+ *========================================================================*/
+
+/*
+ * This is the structure of the leaf nodes in the Btree.
+ *
+ * Struct leaf_entry's are packed from the top. Name/values grow from the
+ * bottom but are not packed. The freemap contains run-length-encoded entries
+ * for the free bytes after the leaf_entry's, but only the N largest such,
+ * smaller runs are dropped. When the freemap doesn't show enough space
+ * for an allocation, we compact the name/value area and try again. If we
+ * still don't have enough space, then we have to split the block. The
+ * name/value structs (both local and remote versions) must be 32bit aligned.
+ *
+ * Since we have duplicate hash keys, for each key that matches, compare
+ * the actual name string. The root and intermediate node search always
+ * takes the first-in-the-block key match found, so we should only have
+ * to work "forw"ard. If none matches, continue with the "forw"ard leaf
+ * nodes until the hash key changes or the attribute name is found.
+ *
+ * We store the fact that an attribute is a ROOT/USER/SECURE attribute in
+ * the leaf_entry. The namespaces are independent only because we also look
+ * at the namespace bit when we are looking for a matching attribute name.
+ *
+ * We also store an "incomplete" bit in the leaf_entry. It shows that an
+ * attribute is in the middle of being created and should not be shown to
+ * the user if we crash during the time that the bit is set. We clear the
+ * bit when we have finished setting up the attribute. We do this because
+ * we cannot create some large attributes inside a single transaction, and we
+ * need some indication that we weren't finished if we crash in the middle.
+ */
+#define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */
+
+typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */
+ __be16 base; /* base of free region */
+ __be16 size; /* length of free region */
+} xfs_attr_leaf_map_t;
+
+typedef struct xfs_attr_leaf_hdr { /* constant-structure header block */
+ xfs_da_blkinfo_t info; /* block type, links, etc. */
+ __be16 count; /* count of active leaf_entry's */
+ __be16 usedbytes; /* num bytes of names/values stored */
+ __be16 firstused; /* first used byte in name area */
+ __u8 holes; /* != 0 if blk needs compaction */
+ __u8 pad1;
+ xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE];
+ /* N largest free regions */
+} xfs_attr_leaf_hdr_t;
+
+typedef struct xfs_attr_leaf_entry { /* sorted on key, not name */
+ __be32 hashval; /* hash value of name */
+ __be16 nameidx; /* index into buffer of name/value */
+ __u8 flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */
+ __u8 pad2; /* unused pad byte */
+} xfs_attr_leaf_entry_t;
+
+typedef struct xfs_attr_leaf_name_local {
+ __be16 valuelen; /* number of bytes in value */
+ __u8 namelen; /* length of name bytes */
+ __u8 nameval[1]; /* name/value bytes */
+} xfs_attr_leaf_name_local_t;
+
+typedef struct xfs_attr_leaf_name_remote {
+ __be32 valueblk; /* block number of value bytes */
+ __be32 valuelen; /* number of bytes in value */
+ __u8 namelen; /* length of name bytes */
+ __u8 name[1]; /* name bytes */
+} xfs_attr_leaf_name_remote_t;
+
+typedef struct xfs_attr_leafblock {
+ xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */
+ xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */
+ xfs_attr_leaf_name_local_t namelist; /* grows from bottom of buf */
+ xfs_attr_leaf_name_remote_t valuelist; /* grows from bottom of buf */
+} xfs_attr_leafblock_t;
+
+/*
+ * CRC enabled leaf structures. Called "version 3" structures to match the
+ * version number of the directory and dablk structures for this feature, and
+ * attr2 is already taken by the variable inode attribute fork size feature.
+ */
+struct xfs_attr3_leaf_hdr {
+ struct xfs_da3_blkinfo info;
+ __be16 count;
+ __be16 usedbytes;
+ __be16 firstused;
+ __u8 holes;
+ __u8 pad1;
+ struct xfs_attr_leaf_map freemap[XFS_ATTR_LEAF_MAPSIZE];
+ __be32 pad2; /* 64 bit alignment */
+};
+
+#define XFS_ATTR3_LEAF_CRC_OFF (offsetof(struct xfs_attr3_leaf_hdr, info.crc))
+
+struct xfs_attr3_leafblock {
+ struct xfs_attr3_leaf_hdr hdr;
+ struct xfs_attr_leaf_entry entries[1];
+
+ /*
+ * The rest of the block contains the following structures after the
+ * leaf entries, growing from the bottom up. The variables are never
+ * referenced, the locations accessed purely from helper functions.
+ *
+ * struct xfs_attr_leaf_name_local
+ * struct xfs_attr_leaf_name_remote
+ */
+};
+
+/*
+ * incore, neutral version of the attribute leaf header
+ */
+struct xfs_attr3_icleaf_hdr {
+ __uint32_t forw;
+ __uint32_t back;
+ __uint16_t magic;
+ __uint16_t count;
+ __uint16_t usedbytes;
+ __uint16_t firstused;
+ __u8 holes;
+ struct {
+ __uint16_t base;
+ __uint16_t size;
+ } freemap[XFS_ATTR_LEAF_MAPSIZE];
+};
+
+/*
+ * Flags used in the leaf_entry[i].flags field.
+ * NOTE: the INCOMPLETE bit must not collide with the flags bits specified
+ * on the system call, they are "or"ed together for various operations.
+ */
+#define XFS_ATTR_LOCAL_BIT 0 /* attr is stored locally */
+#define XFS_ATTR_ROOT_BIT 1 /* limit access to trusted attrs */
+#define XFS_ATTR_SECURE_BIT 2 /* limit access to secure attrs */
+#define XFS_ATTR_INCOMPLETE_BIT 7 /* attr in middle of create/delete */
+#define XFS_ATTR_LOCAL (1 << XFS_ATTR_LOCAL_BIT)
+#define XFS_ATTR_ROOT (1 << XFS_ATTR_ROOT_BIT)
+#define XFS_ATTR_SECURE (1 << XFS_ATTR_SECURE_BIT)
+#define XFS_ATTR_INCOMPLETE (1 << XFS_ATTR_INCOMPLETE_BIT)
+
+/*
+ * Conversion macros for converting namespace bits from argument flags
+ * to ondisk flags.
+ */
+#define XFS_ATTR_NSP_ARGS_MASK (ATTR_ROOT | ATTR_SECURE)
+#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE)
+#define XFS_ATTR_NSP_ONDISK(flags) ((flags) & XFS_ATTR_NSP_ONDISK_MASK)
+#define XFS_ATTR_NSP_ARGS(flags) ((flags) & XFS_ATTR_NSP_ARGS_MASK)
+#define XFS_ATTR_NSP_ARGS_TO_ONDISK(x) (((x) & ATTR_ROOT ? XFS_ATTR_ROOT : 0) |\
+ ((x) & ATTR_SECURE ? XFS_ATTR_SECURE : 0))
+#define XFS_ATTR_NSP_ONDISK_TO_ARGS(x) (((x) & XFS_ATTR_ROOT ? ATTR_ROOT : 0) |\
+ ((x) & XFS_ATTR_SECURE ? ATTR_SECURE : 0))
+
+/*
+ * Alignment for namelist and valuelist entries (since they are mixed
+ * there can be only one alignment value)
+ */
+#define XFS_ATTR_LEAF_NAME_ALIGN ((uint)sizeof(xfs_dablk_t))
+
+static inline int
+xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp)
+{
+ if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
+ return sizeof(struct xfs_attr3_leaf_hdr);
+ return sizeof(struct xfs_attr_leaf_hdr);
+}
+
+static inline struct xfs_attr_leaf_entry *
+xfs_attr3_leaf_entryp(xfs_attr_leafblock_t *leafp)
+{
+ if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
+ return &((struct xfs_attr3_leafblock *)leafp)->entries[0];
+ return &leafp->entries[0];
+}
+
+/*
+ * Cast typed pointers for "local" and "remote" name/value structs.
+ */
+static inline char *
+xfs_attr3_leaf_name(xfs_attr_leafblock_t *leafp, int idx)
+{
+ struct xfs_attr_leaf_entry *entries = xfs_attr3_leaf_entryp(leafp);
+
+ return &((char *)leafp)[be16_to_cpu(entries[idx].nameidx)];
+}
+
+static inline xfs_attr_leaf_name_remote_t *
+xfs_attr3_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx)
+{
+ return (xfs_attr_leaf_name_remote_t *)xfs_attr3_leaf_name(leafp, idx);
+}
+
+static inline xfs_attr_leaf_name_local_t *
+xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
+{
+ return (xfs_attr_leaf_name_local_t *)xfs_attr3_leaf_name(leafp, idx);
+}
+
+/*
+ * Calculate total bytes used (including trailing pad for alignment) for
+ * a "local" name/value structure, a "remote" name/value structure, and
+ * a pointer which might be either.
+ */
+static inline int xfs_attr_leaf_entsize_remote(int nlen)
+{
+ return ((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \
+ XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1);
+}
+
+static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen)
+{
+ return ((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) +
+ XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1);
+}
+
+static inline int xfs_attr_leaf_entsize_local_max(int bsize)
+{
+ return (((bsize) >> 1) + ((bsize) >> 2));
+}
+
/*
* Used to keep a list of "remote value" extents when unlinking an inode.
*/
@@ -106,4 +336,6 @@ void xfs_attr3_leaf_hdr_from_disk(struct xfs_attr3_icleaf_hdr *to,
void xfs_attr3_leaf_hdr_to_disk(struct xfs_attr_leafblock *to,
struct xfs_attr3_icleaf_hdr *from);
+extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops;
+
#endif /* __XFS_ATTR_LEAF_H__ */
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 2d174b1..cbc80d4 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -18,29 +18,31 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
+#include "xfs_attr_sf.h"
+#include "xfs_attr_remote.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_attr.h"
-#include "xfs_attr_sf.h"
-#include "xfs_attr_remote.h"
#include "xfs_attr_leaf.h"
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_buf_item.h"
#include "xfs_cksum.h"
-#include "xfs_dinode.h"
-#include "xfs_dir2.h"
STATIC int
xfs_attr_shortform_compare(const void *a, const void *b)
@@ -227,7 +229,6 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
struct xfs_da_node_entry *btree;
int error, i;
struct xfs_buf *bp;
- struct xfs_inode *dp = context->dp;
trace_xfs_attr_node_list(context);
@@ -241,7 +242,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
*/
bp = NULL;
if (cursor->blkno > 0) {
- error = xfs_da3_node_read(NULL, dp, cursor->blkno, -1,
+ error = xfs_da3_node_read(NULL, context->dp, cursor->blkno, -1,
&bp, XFS_ATTR_FORK);
if ((error != 0) && (error != EFSCORRUPTED))
return(error);
@@ -291,7 +292,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
for (;;) {
__uint16_t magic;
- error = xfs_da3_node_read(NULL, dp,
+ error = xfs_da3_node_read(NULL, context->dp,
cursor->blkno, -1, &bp,
XFS_ATTR_FORK);
if (error)
@@ -311,8 +312,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
return XFS_ERROR(EFSCORRUPTED);
}
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
- btree = dp->d_ops->node_tree_p(node);
+ xfs_da3_node_hdr_from_disk(&nodehdr, node);
+ btree = xfs_da3_node_tree_p(node);
for (i = 0; i < nodehdr.count; btree++, i++) {
if (cursor->hashval
<= be32_to_cpu(btree->hashval)) {
@@ -348,7 +349,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
break;
cursor->blkno = leafhdr.forw;
xfs_trans_brelse(NULL, bp);
- error = xfs_attr3_leaf_read(NULL, dp, cursor->blkno, -1, &bp);
+ error = xfs_attr3_leaf_read(NULL, context->dp, cursor->blkno, -1,
+ &bp);
if (error)
return error;
}
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
index 739e0a5..712a502 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -18,19 +18,20 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
+#include "xfs_error.h"
#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_alloc.h"
-#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
@@ -41,7 +42,6 @@
#include "xfs_trace.h"
#include "xfs_cksum.h"
#include "xfs_buf_item.h"
-#include "xfs_error.h"
#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */
diff --git a/fs/xfs/xfs_attr_remote.h b/fs/xfs/xfs_attr_remote.h
index 5a9acfa..92a8fd7 100644
--- a/fs/xfs/xfs_attr_remote.h
+++ b/fs/xfs/xfs_attr_remote.h
@@ -18,6 +18,35 @@
#ifndef __XFS_ATTR_REMOTE_H__
#define __XFS_ATTR_REMOTE_H__
+#define XFS_ATTR3_RMT_MAGIC 0x5841524d /* XARM */
+
+/*
+ * There is one of these headers per filesystem block in a remote attribute.
+ * This is done to ensure there is a 1:1 mapping between the attribute value
+ * length and the number of blocks needed to store the attribute. This makes the
+ * verification of a buffer a little more complex, but greatly simplifies the
+ * allocation, reading and writing of these attributes as we don't have to guess
+ * the number of blocks needed to store the attribute data.
+ */
+struct xfs_attr3_rmt_hdr {
+ __be32 rm_magic;
+ __be32 rm_offset;
+ __be32 rm_bytes;
+ __be32 rm_crc;
+ uuid_t rm_uuid;
+ __be64 rm_owner;
+ __be64 rm_blkno;
+ __be64 rm_lsn;
+};
+
+#define XFS_ATTR3_RMT_CRC_OFF offsetof(struct xfs_attr3_rmt_hdr, rm_crc)
+
+#define XFS_ATTR3_RMT_BUF_SPACE(mp, bufsize) \
+ ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
+ sizeof(struct xfs_attr3_rmt_hdr) : 0))
+
+extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
+
int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen);
int xfs_attr_rmtval_get(struct xfs_da_args *args);
diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c
index 0e8885a..4822884 100644
--- a/fs/xfs/xfs_bit.c
+++ b/fs/xfs/xfs_bit.c
@@ -16,8 +16,10 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
-#include "xfs_log_format.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_buf_item.h"
/*
* XFS bit manipulation routines, used in non-realtime code.
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 3ef11b2..f47e65c 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -17,37 +17,39 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
#include "xfs_inum.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
#include "xfs_da_btree.h"
+#include "xfs_dir2_format.h"
#include "xfs_dir2.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
-#include "xfs_trans.h"
+#include "xfs_mount.h"
+#include "xfs_itable.h"
#include "xfs_inode_item.h"
#include "xfs_extfree_item.h"
#include "xfs_alloc.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
-#include "xfs_bmap_btree.h"
#include "xfs_rtalloc.h"
#include "xfs_error.h"
+#include "xfs_attr_leaf.h"
#include "xfs_quota.h"
#include "xfs_trans_space.h"
#include "xfs_buf_item.h"
+#include "xfs_filestream.h"
#include "xfs_trace.h"
#include "xfs_symlink.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_dinode.h"
-#include "xfs_filestream.h"
kmem_zone_t *xfs_bmap_free_item_zone;
@@ -1137,7 +1139,6 @@ xfs_bmap_add_attrfork(
int committed; /* xaction was committed */
int logflags; /* logging flags */
int error; /* error return value */
- int cancel_flags = 0;
ASSERT(XFS_IFORK_Q(ip) == 0);
@@ -1148,20 +1149,19 @@ xfs_bmap_add_attrfork(
if (rsvd)
tp->t_flags |= XFS_TRANS_RESERVE;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0);
- if (error) {
- xfs_trans_cancel(tp, 0);
- return error;
- }
- cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
+ if (error)
+ goto error0;
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
XFS_QMOPT_RES_REGBLKS);
- if (error)
- goto trans_cancel;
- cancel_flags |= XFS_TRANS_ABORT;
+ if (error) {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
+ return error;
+ }
if (XFS_IFORK_Q(ip))
- goto trans_cancel;
+ goto error1;
if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
/*
* For inodes coming from pre-6.2 filesystems.
@@ -1171,7 +1171,7 @@ xfs_bmap_add_attrfork(
}
ASSERT(ip->i_d.di_anextents == 0);
- xfs_trans_ijoin(tp, ip, 0);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
switch (ip->i_d.di_format) {
@@ -1193,7 +1193,7 @@ xfs_bmap_add_attrfork(
default:
ASSERT(0);
error = XFS_ERROR(EINVAL);
- goto trans_cancel;
+ goto error1;
}
ASSERT(ip->i_afp == NULL);
@@ -1221,7 +1221,7 @@ xfs_bmap_add_attrfork(
if (logflags)
xfs_trans_log_inode(tp, ip, logflags);
if (error)
- goto bmap_cancel;
+ goto error2;
if (!xfs_sb_version_hasattr(&mp->m_sb) ||
(!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
__int64_t sbfields = 0;
@@ -1244,16 +1244,14 @@ xfs_bmap_add_attrfork(
error = xfs_bmap_finish(&tp, &flist, &committed);
if (error)
- goto bmap_cancel;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return error;
-
-bmap_cancel:
+ goto error2;
+ return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+error2:
xfs_bmap_cancel(&flist);
-trans_cancel:
- xfs_trans_cancel(tp, cancel_flags);
+error1:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
+error0:
+ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
return error;
}
@@ -1484,7 +1482,7 @@ xfs_bmap_search_extents(
xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
"Access to block zero in inode %llu "
"start_block: %llx start_off: %llx "
- "blkcnt: %llx extent-state: %x lastx: %x",
+ "blkcnt: %llx extent-state: %x lastx: %x\n",
(unsigned long long)ip->i_ino,
(unsigned long long)gotp->br_startblock,
(unsigned long long)gotp->br_startoff,
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 706bc3f..bb8de8e 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -17,26 +17,27 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_alloc.h"
#include "xfs_btree.h"
-#include "xfs_bmap_btree.h"
+#include "xfs_itable.h"
#include "xfs_bmap.h"
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_trace.h"
#include "xfs_cksum.h"
-#include "xfs_dinode.h"
/*
* Determine the extent state.
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 6e42e1e..e367461 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -18,6 +18,9 @@
#ifndef __XFS_BMAP_BTREE_H__
#define __XFS_BMAP_BTREE_H__
+#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */
+#define XFS_BMAP_CRC_MAGIC 0x424d4133 /* 'BMA3' */
+
struct xfs_btree_cur;
struct xfs_btree_block;
struct xfs_mount;
@@ -25,6 +28,85 @@ struct xfs_inode;
struct xfs_trans;
/*
+ * Bmap root header, on-disk form only.
+ */
+typedef struct xfs_bmdr_block {
+ __be16 bb_level; /* 0 is a leaf */
+ __be16 bb_numrecs; /* current # of data records */
+} xfs_bmdr_block_t;
+
+/*
+ * Bmap btree record and extent descriptor.
+ * l0:63 is an extent flag (value 1 indicates non-normal).
+ * l0:9-62 are startoff.
+ * l0:0-8 and l1:21-63 are startblock.
+ * l1:0-20 are blockcount.
+ */
+#define BMBT_EXNTFLAG_BITLEN 1
+#define BMBT_STARTOFF_BITLEN 54
+#define BMBT_STARTBLOCK_BITLEN 52
+#define BMBT_BLOCKCOUNT_BITLEN 21
+
+typedef struct xfs_bmbt_rec {
+ __be64 l0, l1;
+} xfs_bmbt_rec_t;
+
+typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */
+typedef xfs_bmbt_rec_t xfs_bmdr_rec_t;
+
+typedef struct xfs_bmbt_rec_host {
+ __uint64_t l0, l1;
+} xfs_bmbt_rec_host_t;
+
+/*
+ * Values and macros for delayed-allocation startblock fields.
+ */
+#define STARTBLOCKVALBITS 17
+#define STARTBLOCKMASKBITS (15 + XFS_BIG_BLKNOS * 20)
+#define DSTARTBLOCKMASKBITS (15 + 20)
+#define STARTBLOCKMASK \
+ (((((xfs_fsblock_t)1) << STARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS)
+#define DSTARTBLOCKMASK \
+ (((((xfs_dfsbno_t)1) << DSTARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS)
+
+static inline int isnullstartblock(xfs_fsblock_t x)
+{
+ return ((x) & STARTBLOCKMASK) == STARTBLOCKMASK;
+}
+
+static inline int isnulldstartblock(xfs_dfsbno_t x)
+{
+ return ((x) & DSTARTBLOCKMASK) == DSTARTBLOCKMASK;
+}
+
+static inline xfs_fsblock_t nullstartblock(int k)
+{
+ ASSERT(k < (1 << STARTBLOCKVALBITS));
+ return STARTBLOCKMASK | (k);
+}
+
+static inline xfs_filblks_t startblockval(xfs_fsblock_t x)
+{
+ return (xfs_filblks_t)((x) & ~STARTBLOCKMASK);
+}
+
+/*
+ * Possible extent formats.
+ */
+typedef enum {
+ XFS_EXTFMT_NOSTATE = 0,
+ XFS_EXTFMT_HASSTATE
+} xfs_exntfmt_t;
+
+/*
+ * Possible extent states.
+ */
+typedef enum {
+ XFS_EXT_NORM, XFS_EXT_UNWRITTEN,
+ XFS_EXT_DMAPI_OFFLINE, XFS_EXT_INVALID
+} xfs_exntst_t;
+
+/*
* Extent state and extent format macros.
*/
#define XFS_EXTFMT_INODE(x) \
@@ -33,6 +115,27 @@ struct xfs_trans;
#define ISUNWRITTEN(x) ((x)->br_state == XFS_EXT_UNWRITTEN)
/*
+ * Incore version of above.
+ */
+typedef struct xfs_bmbt_irec
+{
+ xfs_fileoff_t br_startoff; /* starting file offset */
+ xfs_fsblock_t br_startblock; /* starting block number */
+ xfs_filblks_t br_blockcount; /* number of blocks */
+ xfs_exntst_t br_state; /* extent state */
+} xfs_bmbt_irec_t;
+
+/*
+ * Key structure for non-leaf levels of the tree.
+ */
+typedef struct xfs_bmbt_key {
+ __be64 br_startoff; /* starting file offset */
+} xfs_bmbt_key_t, xfs_bmdr_key_t;
+
+/* btree pointer type */
+typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
+
+/*
* Btree block header size depends on a superblock flag.
*/
#define XFS_BMBT_BLOCK_LEN(mp) \
@@ -140,4 +243,6 @@ extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip,
extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
struct xfs_trans *, struct xfs_inode *, int);
+extern const struct xfs_buf_ops xfs_bmbt_buf_ops;
+
#endif /* __XFS_BMAP_BTREE_H__ */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 5887e41..97f952c 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -18,31 +18,31 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
-#include "xfs_trans.h"
#include "xfs_extfree_item.h"
#include "xfs_alloc.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
-#include "xfs_bmap_btree.h"
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_trans_space.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
-#include "xfs_log.h"
-#include "xfs_dinode.h"
/* Kernel only BMAP related definitions and functions */
@@ -965,12 +965,32 @@ xfs_free_eofblocks(
return error;
}
-int
+/*
+ * xfs_alloc_file_space()
+ * This routine allocates disk space for the given file.
+ *
+ * If alloc_type == 0, this request is for an ALLOCSP type
+ * request which will change the file size. In this case, no
+ * DMAPI event will be generated by the call. A TRUNCATE event
+ * will be generated later by xfs_setattr.
+ *
+ * If alloc_type != 0, this request is for a RESVSP type
+ * request, and a DMAPI DM_EVENT_WRITE will be generated if the
+ * lower block boundary byte address is less than the file's
+ * length.
+ *
+ * RETURNS:
+ * 0 on success
+ * errno on error
+ *
+ */
+STATIC int
xfs_alloc_file_space(
- struct xfs_inode *ip,
+ xfs_inode_t *ip,
xfs_off_t offset,
xfs_off_t len,
- int alloc_type)
+ int alloc_type,
+ int attr_flags)
{
xfs_mount_t *mp = ip->i_mount;
xfs_off_t count;
@@ -1212,11 +1232,24 @@ xfs_zero_remaining_bytes(
return error;
}
-int
+/*
+ * xfs_free_file_space()
+ * This routine frees disk space for the given file.
+ *
+ * This routine is only called by xfs_change_file_space
+ * for an UNRESVSP type call.
+ *
+ * RETURNS:
+ * 0 on success
+ * errno on error
+ *
+ */
+STATIC int
xfs_free_file_space(
- struct xfs_inode *ip,
+ xfs_inode_t *ip,
xfs_off_t offset,
- xfs_off_t len)
+ xfs_off_t len,
+ int attr_flags)
{
int committed;
int done;
@@ -1234,6 +1267,7 @@ xfs_free_file_space(
int rt;
xfs_fileoff_t startoffset_fsb;
xfs_trans_t *tp;
+ int need_iolock = 1;
mp = ip->i_mount;
@@ -1250,15 +1284,20 @@ xfs_free_file_space(
startoffset_fsb = XFS_B_TO_FSB(mp, offset);
endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
- /* wait for the completion of any pending DIOs */
- inode_dio_wait(VFS_I(ip));
+ if (attr_flags & XFS_ATTR_NOLOCK)
+ need_iolock = 0;
+ if (need_iolock) {
+ xfs_ilock(ip, XFS_IOLOCK_EXCL);
+ /* wait for the completion of any pending DIOs */
+ inode_dio_wait(VFS_I(ip));
+ }
rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
ioffset = offset & ~(rounding - 1);
error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
ioffset, -1);
if (error)
- goto out;
+ goto out_unlock_iolock;
truncate_pagecache_range(VFS_I(ip), ioffset, -1);
/*
@@ -1272,7 +1311,7 @@ xfs_free_file_space(
error = xfs_bmapi_read(ip, startoffset_fsb, 1,
&imap, &nimap, 0);
if (error)
- goto out;
+ goto out_unlock_iolock;
ASSERT(nimap == 0 || nimap == 1);
if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
xfs_daddr_t block;
@@ -1287,7 +1326,7 @@ xfs_free_file_space(
error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1,
&imap, &nimap, 0);
if (error)
- goto out;
+ goto out_unlock_iolock;
ASSERT(nimap == 0 || nimap == 1);
if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
@@ -1373,23 +1412,27 @@ xfs_free_file_space(
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
- out:
+ out_unlock_iolock:
+ if (need_iolock)
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return error;
error0:
xfs_bmap_cancel(&free_list);
error1:
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- goto out;
+ xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) :
+ XFS_ILOCK_EXCL);
+ return error;
}
-int
+STATIC int
xfs_zero_file_space(
struct xfs_inode *ip,
xfs_off_t offset,
- xfs_off_t len)
+ xfs_off_t len,
+ int attr_flags)
{
struct xfs_mount *mp = ip->i_mount;
uint granularity;
@@ -1410,6 +1453,9 @@ xfs_zero_file_space(
ASSERT(start_boundary >= offset);
ASSERT(end_boundary <= offset + len);
+ if (!(attr_flags & XFS_ATTR_NOLOCK))
+ xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
if (start_boundary < end_boundary - 1) {
/* punch out the page cache over the conversion range */
truncate_pagecache_range(VFS_I(ip), start_boundary,
@@ -1417,16 +1463,16 @@ xfs_zero_file_space(
/* convert the blocks */
error = xfs_alloc_file_space(ip, start_boundary,
end_boundary - start_boundary - 1,
- XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT);
+ XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT,
+ attr_flags);
if (error)
- goto out;
+ goto out_unlock;
/* We've handled the interior of the range, now for the edges */
- if (start_boundary != offset) {
+ if (start_boundary != offset)
error = xfs_iozero(ip, offset, start_boundary - offset);
- if (error)
- goto out;
- }
+ if (error)
+ goto out_unlock;
if (end_boundary != offset + len)
error = xfs_iozero(ip, end_boundary,
@@ -1440,12 +1486,197 @@ xfs_zero_file_space(
error = xfs_iozero(ip, offset, len);
}
-out:
+out_unlock:
+ if (!(attr_flags & XFS_ATTR_NOLOCK))
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return error;
}
/*
+ * xfs_change_file_space()
+ * This routine allocates or frees disk space for the given file.
+ * The user specified parameters are checked for alignment and size
+ * limitations.
+ *
+ * RETURNS:
+ * 0 on success
+ * errno on error
+ *
+ */
+int
+xfs_change_file_space(
+ xfs_inode_t *ip,
+ int cmd,
+ xfs_flock64_t *bf,
+ xfs_off_t offset,
+ int attr_flags)
+{
+ xfs_mount_t *mp = ip->i_mount;
+ int clrprealloc;
+ int error;
+ xfs_fsize_t fsize;
+ int setprealloc;
+ xfs_off_t startoffset;
+ xfs_trans_t *tp;
+ struct iattr iattr;
+
+ if (!S_ISREG(ip->i_d.di_mode))
+ return XFS_ERROR(EINVAL);
+
+ switch (bf->l_whence) {
+ case 0: /*SEEK_SET*/
+ break;
+ case 1: /*SEEK_CUR*/
+ bf->l_start += offset;
+ break;
+ case 2: /*SEEK_END*/
+ bf->l_start += XFS_ISIZE(ip);
+ break;
+ default:
+ return XFS_ERROR(EINVAL);
+ }
+
+ /*
+ * length of <= 0 for resv/unresv/zero is invalid. length for
+ * alloc/free is ignored completely and we have no idea what userspace
+ * might have set it to, so set it to zero to allow range
+ * checks to pass.
+ */
+ switch (cmd) {
+ case XFS_IOC_ZERO_RANGE:
+ case XFS_IOC_RESVSP:
+ case XFS_IOC_RESVSP64:
+ case XFS_IOC_UNRESVSP:
+ case XFS_IOC_UNRESVSP64:
+ if (bf->l_len <= 0)
+ return XFS_ERROR(EINVAL);
+ break;
+ default:
+ bf->l_len = 0;
+ break;
+ }
+
+ if (bf->l_start < 0 ||
+ bf->l_start > mp->m_super->s_maxbytes ||
+ bf->l_start + bf->l_len < 0 ||
+ bf->l_start + bf->l_len >= mp->m_super->s_maxbytes)
+ return XFS_ERROR(EINVAL);
+
+ bf->l_whence = 0;
+
+ startoffset = bf->l_start;
+ fsize = XFS_ISIZE(ip);
+
+ setprealloc = clrprealloc = 0;
+ switch (cmd) {
+ case XFS_IOC_ZERO_RANGE:
+ error = xfs_zero_file_space(ip, startoffset, bf->l_len,
+ attr_flags);
+ if (error)
+ return error;
+ setprealloc = 1;
+ break;
+
+ case XFS_IOC_RESVSP:
+ case XFS_IOC_RESVSP64:
+ error = xfs_alloc_file_space(ip, startoffset, bf->l_len,
+ XFS_BMAPI_PREALLOC, attr_flags);
+ if (error)
+ return error;
+ setprealloc = 1;
+ break;
+
+ case XFS_IOC_UNRESVSP:
+ case XFS_IOC_UNRESVSP64:
+ if ((error = xfs_free_file_space(ip, startoffset, bf->l_len,
+ attr_flags)))
+ return error;
+ break;
+
+ case XFS_IOC_ALLOCSP:
+ case XFS_IOC_ALLOCSP64:
+ case XFS_IOC_FREESP:
+ case XFS_IOC_FREESP64:
+ /*
+ * These operations actually do IO when extending the file, but
+ * the allocation is done seperately to the zeroing that is
+ * done. This set of operations need to be serialised against
+ * other IO operations, such as truncate and buffered IO. We
+ * need to take the IOLOCK here to serialise the allocation and
+ * zeroing IO to prevent other IOLOCK holders (e.g. getbmap,
+ * truncate, direct IO) from racing against the transient
+ * allocated but not written state we can have here.
+ */
+ xfs_ilock(ip, XFS_IOLOCK_EXCL);
+ if (startoffset > fsize) {
+ error = xfs_alloc_file_space(ip, fsize,
+ startoffset - fsize, 0,
+ attr_flags | XFS_ATTR_NOLOCK);
+ if (error) {
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ break;
+ }
+ }
+
+ iattr.ia_valid = ATTR_SIZE;
+ iattr.ia_size = startoffset;
+
+ error = xfs_setattr_size(ip, &iattr,
+ attr_flags | XFS_ATTR_NOLOCK);
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+
+ if (error)
+ return error;
+
+ clrprealloc = 1;
+ break;
+
+ default:
+ ASSERT(0);
+ return XFS_ERROR(EINVAL);
+ }
+
+ /*
+ * update the inode timestamp, mode, and prealloc flag bits
+ */
+ tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID);
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_writeid, 0, 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ return error;
+ }
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+
+ if ((attr_flags & XFS_ATTR_DMI) == 0) {
+ ip->i_d.di_mode &= ~S_ISUID;
+
+ /*
+ * Note that we don't have to worry about mandatory
+ * file locking being disabled here because we only
+ * clear the S_ISGID bit if the Group execute bit is
+ * on, but if it was on then mandatory locking wouldn't
+ * have been enabled.
+ */
+ if (ip->i_d.di_mode & S_IXGRP)
+ ip->i_d.di_mode &= ~S_ISGID;
+
+ xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ }
+ if (setprealloc)
+ ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
+ else if (clrprealloc)
+ ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ if (attr_flags & XFS_ATTR_SYNC)
+ xfs_trans_set_sync(tp);
+ return xfs_trans_commit(tp, 0);
+}
+
+/*
* We need to check that the format of the data fork in the temporary inode is
* valid for the target inode before doing the swap. This is not a problem with
* attr1 because of the fixed fork offset, but attr2 has a dynamically sized
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 900747b..0612609 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -93,12 +93,9 @@ int xfs_bmap_last_extent(struct xfs_trans *tp, struct xfs_inode *ip,
int *is_empty);
/* preallocation and hole punch interface */
-int xfs_alloc_file_space(struct xfs_inode *ip, xfs_off_t offset,
- xfs_off_t len, int alloc_type);
-int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset,
- xfs_off_t len);
-int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
- xfs_off_t len);
+int xfs_change_file_space(struct xfs_inode *ip, int cmd,
+ xfs_flock64_t *bf, xfs_off_t offset,
+ int attr_flags);
/* EOF block manipulation functions */
bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 9adaae4..5690e10 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -17,16 +17,18 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_buf_item.h"
#include "xfs_btree.h"
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 91e34f2..06729b6 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -27,6 +27,73 @@ struct xfs_trans;
extern kmem_zone_t *xfs_btree_cur_zone;
/*
+ * This nonsense is to make -wlint happy.
+ */
+#define XFS_LOOKUP_EQ ((xfs_lookup_t)XFS_LOOKUP_EQi)
+#define XFS_LOOKUP_LE ((xfs_lookup_t)XFS_LOOKUP_LEi)
+#define XFS_LOOKUP_GE ((xfs_lookup_t)XFS_LOOKUP_GEi)
+
+#define XFS_BTNUM_BNO ((xfs_btnum_t)XFS_BTNUM_BNOi)
+#define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi)
+#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi)
+#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi)
+
+/*
+ * Generic btree header.
+ *
+ * This is a combination of the actual format used on disk for short and long
+ * format btrees. The first three fields are shared by both format, but the
+ * pointers are different and should be used with care.
+ *
+ * To get the size of the actual short or long form headers please use the size
+ * macros below. Never use sizeof(xfs_btree_block).
+ *
+ * The blkno, crc, lsn, owner and uuid fields are only available in filesystems
+ * with the crc feature bit, and all accesses to them must be conditional on
+ * that flag.
+ */
+struct xfs_btree_block {
+ __be32 bb_magic; /* magic number for block type */
+ __be16 bb_level; /* 0 is a leaf */
+ __be16 bb_numrecs; /* current # of data records */
+ union {
+ struct {
+ __be32 bb_leftsib;
+ __be32 bb_rightsib;
+
+ __be64 bb_blkno;
+ __be64 bb_lsn;
+ uuid_t bb_uuid;
+ __be32 bb_owner;
+ __le32 bb_crc;
+ } s; /* short form pointers */
+ struct {
+ __be64 bb_leftsib;
+ __be64 bb_rightsib;
+
+ __be64 bb_blkno;
+ __be64 bb_lsn;
+ uuid_t bb_uuid;
+ __be64 bb_owner;
+ __le32 bb_crc;
+ __be32 bb_pad; /* padding for alignment */
+ } l; /* long form pointers */
+ } bb_u; /* rest */
+};
+
+#define XFS_BTREE_SBLOCK_LEN 16 /* size of a short form block */
+#define XFS_BTREE_LBLOCK_LEN 24 /* size of a long form block */
+
+/* sizes of CRC enabled btree blocks */
+#define XFS_BTREE_SBLOCK_CRC_LEN (XFS_BTREE_SBLOCK_LEN + 40)
+#define XFS_BTREE_LBLOCK_CRC_LEN (XFS_BTREE_LBLOCK_LEN + 48)
+
+#define XFS_BTREE_SBLOCK_CRC_OFF \
+ offsetof(struct xfs_btree_block, bb_u.s.bb_crc)
+#define XFS_BTREE_LBLOCK_CRC_OFF \
+ offsetof(struct xfs_btree_block, bb_u.l.bb_crc)
+
+/*
* Generic key, ptr and record wrapper structures.
*
* These are disk format structures, and are converted where necessary
@@ -52,18 +119,6 @@ union xfs_btree_rec {
};
/*
- * This nonsense is to make -wlint happy.
- */
-#define XFS_LOOKUP_EQ ((xfs_lookup_t)XFS_LOOKUP_EQi)
-#define XFS_LOOKUP_LE ((xfs_lookup_t)XFS_LOOKUP_LEi)
-#define XFS_LOOKUP_GE ((xfs_lookup_t)XFS_LOOKUP_GEi)
-
-#define XFS_BTNUM_BNO ((xfs_btnum_t)XFS_BTNUM_BNOi)
-#define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi)
-#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi)
-#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi)
-
-/*
* For logging record fields.
*/
#define XFS_BB_MAGIC (1 << 0)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index c7f0b77..2634700 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -34,13 +34,12 @@
#include <linux/backing-dev.h>
#include <linux/freezer.h>
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
#include "xfs_sb.h"
+#include "xfs_trans_resv.h"
+#include "xfs_log.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_trace.h"
-#include "xfs_log.h"
static kmem_zone_t *xfs_buf_zone;
@@ -591,7 +590,7 @@ found:
error = _xfs_buf_map_pages(bp, flags);
if (unlikely(error)) {
xfs_warn(target->bt_mount,
- "%s: failed to map pagesn", __func__);
+ "%s: failed to map pages\n", __func__);
xfs_buf_relse(bp);
return NULL;
}
@@ -810,7 +809,7 @@ xfs_buf_get_uncached(
error = _xfs_buf_map_pages(bp, 0);
if (unlikely(error)) {
xfs_warn(target->bt_mount,
- "%s: failed to map pages", __func__);
+ "%s: failed to map pages\n", __func__);
goto fail_free_mem;
}
@@ -1619,7 +1618,7 @@ xfs_setsize_buftarg_flags(
bdevname(btp->bt_bdev, name);
xfs_warn(btp->bt_mount,
- "Cannot set_blocksize to %u on device %s",
+ "Cannot set_blocksize to %u on device %s\n",
sectorsize, name);
return EINVAL;
}
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index a64f67b..f1d85cf 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -17,18 +17,17 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_log.h"
kmem_zone_t *xfs_buf_item_zone;
@@ -809,7 +808,7 @@ xfs_buf_item_init(
* Mark bytes first through last inclusive as dirty in the buf
* item's bitmap.
*/
-static void
+void
xfs_buf_item_log_segment(
struct xfs_buf_log_item *bip,
uint first,
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 3f3455a..db63710 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -71,6 +71,10 @@ void xfs_buf_attach_iodone(struct xfs_buf *,
void xfs_buf_iodone_callbacks(struct xfs_buf *);
void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
+void xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *,
+ enum xfs_blft);
+void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp, struct xfs_buf *src_bp);
+
extern kmem_zone_t *xfs_buf_item_zone;
#endif /* __XFS_BUF_ITEM_H__ */
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 796272a..20bf8e8 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -18,20 +18,20 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dir2_format.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_alloc.h"
#include "xfs_bmap.h"
@@ -129,6 +129,56 @@ xfs_da_state_free(xfs_da_state_t *state)
kmem_zone_free(xfs_da_state_zone, state);
}
+void
+xfs_da3_node_hdr_from_disk(
+ struct xfs_da3_icnode_hdr *to,
+ struct xfs_da_intnode *from)
+{
+ ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
+ from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));
+
+ if (from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
+ struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)from;
+
+ to->forw = be32_to_cpu(hdr3->info.hdr.forw);
+ to->back = be32_to_cpu(hdr3->info.hdr.back);
+ to->magic = be16_to_cpu(hdr3->info.hdr.magic);
+ to->count = be16_to_cpu(hdr3->__count);
+ to->level = be16_to_cpu(hdr3->__level);
+ return;
+ }
+ to->forw = be32_to_cpu(from->hdr.info.forw);
+ to->back = be32_to_cpu(from->hdr.info.back);
+ to->magic = be16_to_cpu(from->hdr.info.magic);
+ to->count = be16_to_cpu(from->hdr.__count);
+ to->level = be16_to_cpu(from->hdr.__level);
+}
+
+void
+xfs_da3_node_hdr_to_disk(
+ struct xfs_da_intnode *to,
+ struct xfs_da3_icnode_hdr *from)
+{
+ ASSERT(from->magic == XFS_DA_NODE_MAGIC ||
+ from->magic == XFS_DA3_NODE_MAGIC);
+
+ if (from->magic == XFS_DA3_NODE_MAGIC) {
+ struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)to;
+
+ hdr3->info.hdr.forw = cpu_to_be32(from->forw);
+ hdr3->info.hdr.back = cpu_to_be32(from->back);
+ hdr3->info.hdr.magic = cpu_to_be16(from->magic);
+ hdr3->__count = cpu_to_be16(from->count);
+ hdr3->__level = cpu_to_be16(from->level);
+ return;
+ }
+ to->hdr.info.forw = cpu_to_be32(from->forw);
+ to->hdr.info.back = cpu_to_be32(from->back);
+ to->hdr.info.magic = cpu_to_be16(from->magic);
+ to->hdr.__count = cpu_to_be16(from->count);
+ to->hdr.__level = cpu_to_be16(from->level);
+}
+
static bool
xfs_da3_node_verify(
struct xfs_buf *bp)
@@ -136,11 +186,8 @@ xfs_da3_node_verify(
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_da_intnode *hdr = bp->b_addr;
struct xfs_da3_icnode_hdr ichdr;
- const struct xfs_dir_ops *ops;
- ops = xfs_dir_get_ops(mp, NULL);
-
- ops->node_hdr_from_disk(&ichdr, hdr);
+ xfs_da3_node_hdr_from_disk(&ichdr, hdr);
if (xfs_sb_version_hascrc(&mp->m_sb)) {
struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
@@ -307,12 +354,11 @@ xfs_da3_node_create(
struct xfs_da3_icnode_hdr ichdr = {0};
struct xfs_buf *bp;
int error;
- struct xfs_inode *dp = args->dp;
trace_xfs_da_node_create(args);
ASSERT(level <= XFS_DA_NODE_MAXDEPTH);
- error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork);
+ error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);
if (error)
return(error);
bp->b_ops = &xfs_da3_node_buf_ops;
@@ -331,9 +377,9 @@ xfs_da3_node_create(
}
ichdr.level = level;
- dp->d_ops->node_hdr_to_disk(node, &ichdr);
+ xfs_da3_node_hdr_to_disk(node, &ichdr);
xfs_trans_log_buf(tp, bp,
- XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));
+ XFS_DA_LOGRANGE(node, &node->hdr, xfs_da3_node_hdr_size(node)));
*bpp = bp;
return(0);
@@ -543,8 +589,8 @@ xfs_da3_root_split(
oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
struct xfs_da3_icnode_hdr nodehdr;
- dp->d_ops->node_hdr_from_disk(&nodehdr, oldroot);
- btree = dp->d_ops->node_tree_p(oldroot);
+ xfs_da3_node_hdr_from_disk(&nodehdr, oldroot);
+ btree = xfs_da3_node_tree_p(oldroot);
size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot);
level = nodehdr.level;
@@ -558,8 +604,8 @@ xfs_da3_root_split(
struct xfs_dir2_leaf_entry *ents;
leaf = (xfs_dir2_leaf_t *)oldroot;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
- ents = dp->d_ops->leaf_ents_p(leaf);
+ xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
+ ents = xfs_dir3_leaf_ents_p(leaf);
ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
@@ -603,14 +649,14 @@ xfs_da3_root_split(
return error;
node = bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
- btree = dp->d_ops->node_tree_p(node);
+ xfs_da3_node_hdr_from_disk(&nodehdr, node);
+ btree = xfs_da3_node_tree_p(node);
btree[0].hashval = cpu_to_be32(blk1->hashval);
btree[0].before = cpu_to_be32(blk1->blkno);
btree[1].hashval = cpu_to_be32(blk2->hashval);
btree[1].before = cpu_to_be32(blk2->blkno);
nodehdr.count = 2;
- dp->d_ops->node_hdr_to_disk(node, &nodehdr);
+ xfs_da3_node_hdr_to_disk(node, &nodehdr);
#ifdef DEBUG
if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
@@ -647,12 +693,11 @@ xfs_da3_node_split(
int newcount;
int error;
int useextra;
- struct xfs_inode *dp = state->args->dp;
trace_xfs_da_node_split(state->args);
node = oldblk->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+ xfs_da3_node_hdr_from_disk(&nodehdr, node);
/*
* With V2 dirs the extra block is data or freespace.
@@ -699,7 +744,7 @@ xfs_da3_node_split(
* If we had double-split op below us, then add the extra block too.
*/
node = oldblk->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+ xfs_da3_node_hdr_from_disk(&nodehdr, node);
if (oldblk->index <= nodehdr.count) {
oldblk->index++;
xfs_da3_node_add(state, oldblk, addblk);
@@ -748,16 +793,15 @@ xfs_da3_node_rebalance(
int count;
int tmp;
int swap = 0;
- struct xfs_inode *dp = state->args->dp;
trace_xfs_da_node_rebalance(state->args);
node1 = blk1->bp->b_addr;
node2 = blk2->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr1, node1);
- dp->d_ops->node_hdr_from_disk(&nodehdr2, node2);
- btree1 = dp->d_ops->node_tree_p(node1);
- btree2 = dp->d_ops->node_tree_p(node2);
+ xfs_da3_node_hdr_from_disk(&nodehdr1, node1);
+ xfs_da3_node_hdr_from_disk(&nodehdr2, node2);
+ btree1 = xfs_da3_node_tree_p(node1);
+ btree2 = xfs_da3_node_tree_p(node2);
/*
* Figure out how many entries need to move, and in which direction.
@@ -770,10 +814,10 @@ xfs_da3_node_rebalance(
tmpnode = node1;
node1 = node2;
node2 = tmpnode;
- dp->d_ops->node_hdr_from_disk(&nodehdr1, node1);
- dp->d_ops->node_hdr_from_disk(&nodehdr2, node2);
- btree1 = dp->d_ops->node_tree_p(node1);
- btree2 = dp->d_ops->node_tree_p(node2);
+ xfs_da3_node_hdr_from_disk(&nodehdr1, node1);
+ xfs_da3_node_hdr_from_disk(&nodehdr2, node2);
+ btree1 = xfs_da3_node_tree_p(node1);
+ btree2 = xfs_da3_node_tree_p(node2);
swap = 1;
}
@@ -835,14 +879,15 @@ xfs_da3_node_rebalance(
/*
* Log header of node 1 and all current bits of node 2.
*/
- dp->d_ops->node_hdr_to_disk(node1, &nodehdr1);
+ xfs_da3_node_hdr_to_disk(node1, &nodehdr1);
xfs_trans_log_buf(tp, blk1->bp,
- XFS_DA_LOGRANGE(node1, &node1->hdr, dp->d_ops->node_hdr_size));
+ XFS_DA_LOGRANGE(node1, &node1->hdr,
+ xfs_da3_node_hdr_size(node1)));
- dp->d_ops->node_hdr_to_disk(node2, &nodehdr2);
+ xfs_da3_node_hdr_to_disk(node2, &nodehdr2);
xfs_trans_log_buf(tp, blk2->bp,
XFS_DA_LOGRANGE(node2, &node2->hdr,
- dp->d_ops->node_hdr_size +
+ xfs_da3_node_hdr_size(node2) +
(sizeof(btree2[0]) * nodehdr2.count)));
/*
@@ -852,10 +897,10 @@ xfs_da3_node_rebalance(
if (swap) {
node1 = blk1->bp->b_addr;
node2 = blk2->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr1, node1);
- dp->d_ops->node_hdr_from_disk(&nodehdr2, node2);
- btree1 = dp->d_ops->node_tree_p(node1);
- btree2 = dp->d_ops->node_tree_p(node2);
+ xfs_da3_node_hdr_from_disk(&nodehdr1, node1);
+ xfs_da3_node_hdr_from_disk(&nodehdr2, node2);
+ btree1 = xfs_da3_node_tree_p(node1);
+ btree2 = xfs_da3_node_tree_p(node2);
}
blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval);
blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval);
@@ -882,13 +927,12 @@ xfs_da3_node_add(
struct xfs_da3_icnode_hdr nodehdr;
struct xfs_da_node_entry *btree;
int tmp;
- struct xfs_inode *dp = state->args->dp;
trace_xfs_da_node_add(state->args);
node = oldblk->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
- btree = dp->d_ops->node_tree_p(node);
+ xfs_da3_node_hdr_from_disk(&nodehdr, node);
+ btree = xfs_da3_node_tree_p(node);
ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count);
ASSERT(newblk->blkno != 0);
@@ -911,9 +955,9 @@ xfs_da3_node_add(
tmp + sizeof(*btree)));
nodehdr.count += 1;
- dp->d_ops->node_hdr_to_disk(node, &nodehdr);
+ xfs_da3_node_hdr_to_disk(node, &nodehdr);
xfs_trans_log_buf(state->args->trans, oldblk->bp,
- XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));
+ XFS_DA_LOGRANGE(node, &node->hdr, xfs_da3_node_hdr_size(node)));
/*
* Copy the last hash value from the oldblk to propagate upwards.
@@ -1050,7 +1094,6 @@ xfs_da3_root_join(
struct xfs_da3_icnode_hdr oldroothdr;
struct xfs_da_node_entry *btree;
int error;
- struct xfs_inode *dp = state->args->dp;
trace_xfs_da_root_join(state->args);
@@ -1058,7 +1101,7 @@ xfs_da3_root_join(
args = state->args;
oldroot = root_blk->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&oldroothdr, oldroot);
+ xfs_da3_node_hdr_from_disk(&oldroothdr, oldroot);
ASSERT(oldroothdr.forw == 0);
ASSERT(oldroothdr.back == 0);
@@ -1072,10 +1115,10 @@ xfs_da3_root_join(
* Read in the (only) child block, then copy those bytes into
* the root block's buffer and free the original child block.
*/
- btree = dp->d_ops->node_tree_p(oldroot);
+ btree = xfs_da3_node_tree_p(oldroot);
child = be32_to_cpu(btree[0].before);
ASSERT(child != 0);
- error = xfs_da3_node_read(args->trans, dp, child, -1, &bp,
+ error = xfs_da3_node_read(args->trans, args->dp, child, -1, &bp,
args->whichfork);
if (error)
return error;
@@ -1125,7 +1168,6 @@ xfs_da3_node_toosmall(
int error;
int retval;
int i;
- struct xfs_inode *dp = state->args->dp;
trace_xfs_da_node_toosmall(state->args);
@@ -1137,7 +1179,7 @@ xfs_da3_node_toosmall(
blk = &state->path.blk[ state->path.active-1 ];
info = blk->bp->b_addr;
node = (xfs_da_intnode_t *)info;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+ xfs_da3_node_hdr_from_disk(&nodehdr, node);
if (nodehdr.count > (state->node_ents >> 1)) {
*action = 0; /* blk over 50%, don't try to join */
return(0); /* blk over 50%, don't try to join */
@@ -1189,13 +1231,13 @@ xfs_da3_node_toosmall(
blkno = nodehdr.back;
if (blkno == 0)
continue;
- error = xfs_da3_node_read(state->args->trans, dp,
+ error = xfs_da3_node_read(state->args->trans, state->args->dp,
blkno, -1, &bp, state->args->whichfork);
if (error)
return(error);
node = bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&thdr, node);
+ xfs_da3_node_hdr_from_disk(&thdr, node);
xfs_trans_brelse(state->args->trans, bp);
if (count - thdr.count >= 0)
@@ -1233,7 +1275,6 @@ xfs_da3_node_toosmall(
*/
STATIC uint
xfs_da3_node_lasthash(
- struct xfs_inode *dp,
struct xfs_buf *bp,
int *count)
{
@@ -1242,12 +1283,12 @@ xfs_da3_node_lasthash(
struct xfs_da3_icnode_hdr nodehdr;
node = bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+ xfs_da3_node_hdr_from_disk(&nodehdr, node);
if (count)
*count = nodehdr.count;
if (!nodehdr.count)
return 0;
- btree = dp->d_ops->node_tree_p(node);
+ btree = xfs_da3_node_tree_p(node);
return be32_to_cpu(btree[nodehdr.count - 1].hashval);
}
@@ -1266,7 +1307,6 @@ xfs_da3_fixhashpath(
xfs_dahash_t lasthash=0;
int level;
int count;
- struct xfs_inode *dp = state->args->dp;
trace_xfs_da_fixhashpath(state->args);
@@ -1279,12 +1319,12 @@ xfs_da3_fixhashpath(
return;
break;
case XFS_DIR2_LEAFN_MAGIC:
- lasthash = xfs_dir2_leafn_lasthash(dp, blk->bp, &count);
+ lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count);
if (count == 0)
return;
break;
case XFS_DA_NODE_MAGIC:
- lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count);
+ lasthash = xfs_da3_node_lasthash(blk->bp, &count);
if (count == 0)
return;
break;
@@ -1293,8 +1333,8 @@ xfs_da3_fixhashpath(
struct xfs_da3_icnode_hdr nodehdr;
node = blk->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
- btree = dp->d_ops->node_tree_p(node);
+ xfs_da3_node_hdr_from_disk(&nodehdr, node);
+ btree = xfs_da3_node_tree_p(node);
if (be32_to_cpu(btree->hashval) == lasthash)
break;
blk->hashval = lasthash;
@@ -1320,12 +1360,11 @@ xfs_da3_node_remove(
struct xfs_da_node_entry *btree;
int index;
int tmp;
- struct xfs_inode *dp = state->args->dp;
trace_xfs_da_node_remove(state->args);
node = drop_blk->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+ xfs_da3_node_hdr_from_disk(&nodehdr, node);
ASSERT(drop_blk->index < nodehdr.count);
ASSERT(drop_blk->index >= 0);
@@ -1333,7 +1372,7 @@ xfs_da3_node_remove(
* Copy over the offending entry, or just zero it out.
*/
index = drop_blk->index;
- btree = dp->d_ops->node_tree_p(node);
+ btree = xfs_da3_node_tree_p(node);
if (index < nodehdr.count - 1) {
tmp = nodehdr.count - index - 1;
tmp *= (uint)sizeof(xfs_da_node_entry_t);
@@ -1346,9 +1385,9 @@ xfs_da3_node_remove(
xfs_trans_log_buf(state->args->trans, drop_blk->bp,
XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index])));
nodehdr.count -= 1;
- dp->d_ops->node_hdr_to_disk(node, &nodehdr);
+ xfs_da3_node_hdr_to_disk(node, &nodehdr);
xfs_trans_log_buf(state->args->trans, drop_blk->bp,
- XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));
+ XFS_DA_LOGRANGE(node, &node->hdr, xfs_da3_node_hdr_size(node)));
/*
* Copy the last hash value from the block to propagate upwards.
@@ -1375,16 +1414,15 @@ xfs_da3_node_unbalance(
struct xfs_trans *tp;
int sindex;
int tmp;
- struct xfs_inode *dp = state->args->dp;
trace_xfs_da_node_unbalance(state->args);
drop_node = drop_blk->bp->b_addr;
save_node = save_blk->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&drop_hdr, drop_node);
- dp->d_ops->node_hdr_from_disk(&save_hdr, save_node);
- drop_btree = dp->d_ops->node_tree_p(drop_node);
- save_btree = dp->d_ops->node_tree_p(save_node);
+ xfs_da3_node_hdr_from_disk(&drop_hdr, drop_node);
+ xfs_da3_node_hdr_from_disk(&save_hdr, save_node);
+ drop_btree = xfs_da3_node_tree_p(drop_node);
+ save_btree = xfs_da3_node_tree_p(save_node);
tp = state->args->trans;
/*
@@ -1418,10 +1456,10 @@ xfs_da3_node_unbalance(
memcpy(&save_btree[sindex], &drop_btree[0], tmp);
save_hdr.count += drop_hdr.count;
- dp->d_ops->node_hdr_to_disk(save_node, &save_hdr);
+ xfs_da3_node_hdr_to_disk(save_node, &save_hdr);
xfs_trans_log_buf(tp, save_blk->bp,
XFS_DA_LOGRANGE(save_node, &save_node->hdr,
- dp->d_ops->node_hdr_size));
+ xfs_da3_node_hdr_size(save_node)));
/*
* Save the last hashval in the remaining block for upward propagation.
@@ -1463,7 +1501,6 @@ xfs_da3_node_lookup_int(
int max;
int error;
int retval;
- struct xfs_inode *dp = state->args->dp;
args = state->args;
@@ -1499,8 +1536,7 @@ xfs_da3_node_lookup_int(
if (blk->magic == XFS_DIR2_LEAFN_MAGIC ||
blk->magic == XFS_DIR3_LEAFN_MAGIC) {
blk->magic = XFS_DIR2_LEAFN_MAGIC;
- blk->hashval = xfs_dir2_leafn_lasthash(args->dp,
- blk->bp, NULL);
+ blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
break;
}
@@ -1511,8 +1547,8 @@ xfs_da3_node_lookup_int(
* Search an intermediate node for a match.
*/
node = blk->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
- btree = dp->d_ops->node_tree_p(node);
+ xfs_da3_node_hdr_from_disk(&nodehdr, node);
+ btree = xfs_da3_node_tree_p(node);
max = nodehdr.count;
blk->hashval = be32_to_cpu(btree[max - 1].hashval);
@@ -1607,7 +1643,6 @@ xfs_da3_node_lookup_int(
*/
STATIC int
xfs_da3_node_order(
- struct xfs_inode *dp,
struct xfs_buf *node1_bp,
struct xfs_buf *node2_bp)
{
@@ -1620,10 +1655,10 @@ xfs_da3_node_order(
node1 = node1_bp->b_addr;
node2 = node2_bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&node1hdr, node1);
- dp->d_ops->node_hdr_from_disk(&node2hdr, node2);
- btree1 = dp->d_ops->node_tree_p(node1);
- btree2 = dp->d_ops->node_tree_p(node2);
+ xfs_da3_node_hdr_from_disk(&node1hdr, node1);
+ xfs_da3_node_hdr_from_disk(&node2hdr, node2);
+ btree1 = xfs_da3_node_tree_p(node1);
+ btree2 = xfs_da3_node_tree_p(node2);
if (node1hdr.count > 0 && node2hdr.count > 0 &&
((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
@@ -1650,7 +1685,6 @@ xfs_da3_blk_link(
struct xfs_buf *bp;
int before = 0;
int error;
- struct xfs_inode *dp = state->args->dp;
/*
* Set up environment.
@@ -1668,10 +1702,10 @@ xfs_da3_blk_link(
before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
break;
case XFS_DIR2_LEAFN_MAGIC:
- before = xfs_dir2_leafn_order(dp, old_blk->bp, new_blk->bp);
+ before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp);
break;
case XFS_DA_NODE_MAGIC:
- before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp);
+ before = xfs_da3_node_order(old_blk->bp, new_blk->bp);
break;
}
@@ -1686,7 +1720,7 @@ xfs_da3_blk_link(
new_info->forw = cpu_to_be32(old_blk->blkno);
new_info->back = old_info->back;
if (old_info->back) {
- error = xfs_da3_node_read(args->trans, dp,
+ error = xfs_da3_node_read(args->trans, args->dp,
be32_to_cpu(old_info->back),
-1, &bp, args->whichfork);
if (error)
@@ -1707,7 +1741,7 @@ xfs_da3_blk_link(
new_info->forw = old_info->forw;
new_info->back = cpu_to_be32(old_blk->blkno);
if (old_info->forw) {
- error = xfs_da3_node_read(args->trans, dp,
+ error = xfs_da3_node_read(args->trans, args->dp,
be32_to_cpu(old_info->forw),
-1, &bp, args->whichfork);
if (error)
@@ -1827,7 +1861,6 @@ xfs_da3_path_shift(
xfs_dablk_t blkno = 0;
int level;
int error;
- struct xfs_inode *dp = state->args->dp;
trace_xfs_da_path_shift(state->args);
@@ -1843,8 +1876,8 @@ xfs_da3_path_shift(
level = (path->active-1) - 1; /* skip bottom layer in path */
for (blk = &path->blk[level]; level >= 0; blk--, level--) {
node = blk->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
- btree = dp->d_ops->node_tree_p(node);
+ xfs_da3_node_hdr_from_disk(&nodehdr, node);
+ btree = xfs_da3_node_tree_p(node);
if (forward && (blk->index < nodehdr.count - 1)) {
blk->index++;
@@ -1878,7 +1911,7 @@ xfs_da3_path_shift(
* Read the next child block.
*/
blk->blkno = blkno;
- error = xfs_da3_node_read(args->trans, dp, blkno, -1,
+ error = xfs_da3_node_read(args->trans, args->dp, blkno, -1,
&blk->bp, args->whichfork);
if (error)
return(error);
@@ -1900,8 +1933,8 @@ xfs_da3_path_shift(
case XFS_DA3_NODE_MAGIC:
blk->magic = XFS_DA_NODE_MAGIC;
node = (xfs_da_intnode_t *)info;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
- btree = dp->d_ops->node_tree_p(node);
+ xfs_da3_node_hdr_from_disk(&nodehdr, node);
+ btree = xfs_da3_node_tree_p(node);
blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
if (forward)
blk->index = 0;
@@ -1914,15 +1947,16 @@ xfs_da3_path_shift(
blk->magic = XFS_ATTR_LEAF_MAGIC;
ASSERT(level == path->active-1);
blk->index = 0;
- blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
+ blk->hashval = xfs_attr_leaf_lasthash(blk->bp,
+ NULL);
break;
case XFS_DIR2_LEAFN_MAGIC:
case XFS_DIR3_LEAFN_MAGIC:
blk->magic = XFS_DIR2_LEAFN_MAGIC;
ASSERT(level == path->active-1);
blk->index = 0;
- blk->hashval = xfs_dir2_leafn_lasthash(args->dp,
- blk->bp, NULL);
+ blk->hashval = xfs_dir2_leafn_lasthash(blk->bp,
+ NULL);
break;
default:
ASSERT(0);
@@ -2129,7 +2163,7 @@ xfs_da3_swap_lastblock(
struct xfs_dir2_leaf *dead_leaf2;
struct xfs_da_node_entry *btree;
struct xfs_da3_icnode_hdr par_hdr;
- struct xfs_inode *dp;
+ struct xfs_inode *ip;
struct xfs_trans *tp;
struct xfs_mount *mp;
struct xfs_buf *dead_buf;
@@ -2153,12 +2187,12 @@ xfs_da3_swap_lastblock(
dead_buf = *dead_bufp;
dead_blkno = *dead_blknop;
tp = args->trans;
- dp = args->dp;
+ ip = args->dp;
w = args->whichfork;
ASSERT(w == XFS_DATA_FORK);
- mp = dp->i_mount;
+ mp = ip->i_mount;
lastoff = mp->m_dirfreeblk;
- error = xfs_bmap_last_before(tp, dp, &lastoff, w);
+ error = xfs_bmap_last_before(tp, ip, &lastoff, w);
if (error)
return error;
if (unlikely(lastoff == 0)) {
@@ -2170,7 +2204,7 @@ xfs_da3_swap_lastblock(
* Read the last block in the btree space.
*/
last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs;
- error = xfs_da3_node_read(tp, dp, last_blkno, -1, &last_buf, w);
+ error = xfs_da3_node_read(tp, ip, last_blkno, -1, &last_buf, w);
if (error)
return error;
/*
@@ -2188,16 +2222,16 @@ xfs_da3_swap_lastblock(
struct xfs_dir2_leaf_entry *ents;
dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, dead_leaf2);
- ents = dp->d_ops->leaf_ents_p(dead_leaf2);
+ xfs_dir3_leaf_hdr_from_disk(&leafhdr, dead_leaf2);
+ ents = xfs_dir3_leaf_ents_p(dead_leaf2);
dead_level = 0;
dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval);
} else {
struct xfs_da3_icnode_hdr deadhdr;
dead_node = (xfs_da_intnode_t *)dead_info;
- dp->d_ops->node_hdr_from_disk(&deadhdr, dead_node);
- btree = dp->d_ops->node_tree_p(dead_node);
+ xfs_da3_node_hdr_from_disk(&deadhdr, dead_node);
+ btree = xfs_da3_node_tree_p(dead_node);
dead_level = deadhdr.level;
dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval);
}
@@ -2206,7 +2240,7 @@ xfs_da3_swap_lastblock(
* If the moved block has a left sibling, fix up the pointers.
*/
if ((sib_blkno = be32_to_cpu(dead_info->back))) {
- error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w);
+ error = xfs_da3_node_read(tp, ip, sib_blkno, -1, &sib_buf, w);
if (error)
goto done;
sib_info = sib_buf->b_addr;
@@ -2228,7 +2262,7 @@ xfs_da3_swap_lastblock(
* If the moved block has a right sibling, fix up the pointers.
*/
if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
- error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w);
+ error = xfs_da3_node_read(tp, ip, sib_blkno, -1, &sib_buf, w);
if (error)
goto done;
sib_info = sib_buf->b_addr;
@@ -2252,11 +2286,11 @@ xfs_da3_swap_lastblock(
* Walk down the tree looking for the parent of the moved block.
*/
for (;;) {
- error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w);
+ error = xfs_da3_node_read(tp, ip, par_blkno, -1, &par_buf, w);
if (error)
goto done;
par_node = par_buf->b_addr;
- dp->d_ops->node_hdr_from_disk(&par_hdr, par_node);
+ xfs_da3_node_hdr_from_disk(&par_hdr, par_node);
if (level >= 0 && level != par_hdr.level + 1) {
XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
XFS_ERRLEVEL_LOW, mp);
@@ -2264,7 +2298,7 @@ xfs_da3_swap_lastblock(
goto done;
}
level = par_hdr.level;
- btree = dp->d_ops->node_tree_p(par_node);
+ btree = xfs_da3_node_tree_p(par_node);
for (entno = 0;
entno < par_hdr.count &&
be32_to_cpu(btree[entno].hashval) < dead_hash;
@@ -2303,18 +2337,18 @@ xfs_da3_swap_lastblock(
error = XFS_ERROR(EFSCORRUPTED);
goto done;
}
- error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w);
+ error = xfs_da3_node_read(tp, ip, par_blkno, -1, &par_buf, w);
if (error)
goto done;
par_node = par_buf->b_addr;
- dp->d_ops->node_hdr_from_disk(&par_hdr, par_node);
+ xfs_da3_node_hdr_from_disk(&par_hdr, par_node);
if (par_hdr.level != level) {
XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
XFS_ERRLEVEL_LOW, mp);
error = XFS_ERROR(EFSCORRUPTED);
goto done;
}
- btree = dp->d_ops->node_tree_p(par_node);
+ btree = xfs_da3_node_tree_p(par_node);
entno = 0;
}
/*
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 6e95ea7..b1f2679 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -23,7 +23,146 @@ struct xfs_bmap_free;
struct xfs_inode;
struct xfs_trans;
struct zone;
-struct xfs_dir_ops;
+
+/*========================================================================
+ * Directory Structure when greater than XFS_LBSIZE(mp) bytes.
+ *========================================================================*/
+
+/*
+ * This structure is common to both leaf nodes and non-leaf nodes in the Btree.
+ *
+ * It is used to manage a doubly linked list of all blocks at the same
+ * level in the Btree, and to identify which type of block this is.
+ */
+#define XFS_DA_NODE_MAGIC 0xfebe /* magic number: non-leaf blocks */
+#define XFS_ATTR_LEAF_MAGIC 0xfbee /* magic number: attribute leaf blks */
+#define XFS_DIR2_LEAF1_MAGIC 0xd2f1 /* magic number: v2 dirlf single blks */
+#define XFS_DIR2_LEAFN_MAGIC 0xd2ff /* magic number: v2 dirlf multi blks */
+
+typedef struct xfs_da_blkinfo {
+ __be32 forw; /* previous block in list */
+ __be32 back; /* following block in list */
+ __be16 magic; /* validity check on block */
+ __be16 pad; /* unused */
+} xfs_da_blkinfo_t;
+
+/*
+ * CRC enabled directory structure types
+ *
+ * The headers change size for the additional verification information, but
+ * otherwise the tree layouts and contents are unchanged. Hence the da btree
+ * code can use the struct xfs_da_blkinfo for manipulating the tree links and
+ * magic numbers without modification for both v2 and v3 nodes.
+ */
+#define XFS_DA3_NODE_MAGIC 0x3ebe /* magic number: non-leaf blocks */
+#define XFS_ATTR3_LEAF_MAGIC 0x3bee /* magic number: attribute leaf blks */
+#define XFS_DIR3_LEAF1_MAGIC 0x3df1 /* magic number: v2 dirlf single blks */
+#define XFS_DIR3_LEAFN_MAGIC 0x3dff /* magic number: v2 dirlf multi blks */
+
+struct xfs_da3_blkinfo {
+ /*
+ * the node link manipulation code relies on the fact that the first
+ * element of this structure is the struct xfs_da_blkinfo so it can
+ * ignore the differences in the rest of the structures.
+ */
+ struct xfs_da_blkinfo hdr;
+ __be32 crc; /* CRC of block */
+ __be64 blkno; /* first block of the buffer */
+ __be64 lsn; /* sequence number of last write */
+ uuid_t uuid; /* filesystem we belong to */
+ __be64 owner; /* inode that owns the block */
+};
+
+/*
+ * This is the structure of the root and intermediate nodes in the Btree.
+ * The leaf nodes are defined above.
+ *
+ * Entries are not packed.
+ *
+ * Since we have duplicate keys, use a binary search but always follow
+ * all match in the block, not just the first match found.
+ */
+#define XFS_DA_NODE_MAXDEPTH 5 /* max depth of Btree */
+
+typedef struct xfs_da_node_hdr {
+ struct xfs_da_blkinfo info; /* block type, links, etc. */
+ __be16 __count; /* count of active entries */
+ __be16 __level; /* level above leaves (leaf == 0) */
+} xfs_da_node_hdr_t;
+
+struct xfs_da3_node_hdr {
+ struct xfs_da3_blkinfo info; /* block type, links, etc. */
+ __be16 __count; /* count of active entries */
+ __be16 __level; /* level above leaves (leaf == 0) */
+ __be32 __pad32;
+};
+
+#define XFS_DA3_NODE_CRC_OFF (offsetof(struct xfs_da3_node_hdr, info.crc))
+
+typedef struct xfs_da_node_entry {
+ __be32 hashval; /* hash value for this descendant */
+ __be32 before; /* Btree block before this key */
+} xfs_da_node_entry_t;
+
+typedef struct xfs_da_intnode {
+ struct xfs_da_node_hdr hdr;
+ struct xfs_da_node_entry __btree[];
+} xfs_da_intnode_t;
+
+struct xfs_da3_intnode {
+ struct xfs_da3_node_hdr hdr;
+ struct xfs_da_node_entry __btree[];
+};
+
+/*
+ * In-core version of the node header to abstract the differences in the v2 and
+ * v3 disk format of the headers. Callers need to convert to/from disk format as
+ * appropriate.
+ */
+struct xfs_da3_icnode_hdr {
+ __uint32_t forw;
+ __uint32_t back;
+ __uint16_t magic;
+ __uint16_t count;
+ __uint16_t level;
+};
+
+extern void xfs_da3_node_hdr_from_disk(struct xfs_da3_icnode_hdr *to,
+ struct xfs_da_intnode *from);
+extern void xfs_da3_node_hdr_to_disk(struct xfs_da_intnode *to,
+ struct xfs_da3_icnode_hdr *from);
+
+static inline int
+__xfs_da3_node_hdr_size(bool v3)
+{
+ if (v3)
+ return sizeof(struct xfs_da3_node_hdr);
+ return sizeof(struct xfs_da_node_hdr);
+}
+static inline int
+xfs_da3_node_hdr_size(struct xfs_da_intnode *dap)
+{
+ bool v3 = dap->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC);
+
+ return __xfs_da3_node_hdr_size(v3);
+}
+
+static inline struct xfs_da_node_entry *
+xfs_da3_node_tree_p(struct xfs_da_intnode *dap)
+{
+ if (dap->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
+ struct xfs_da3_intnode *dap3 = (struct xfs_da3_intnode *)dap;
+ return dap3->__btree;
+ }
+ return dap->__btree;
+}
+
+extern void xfs_da3_intnode_from_disk(struct xfs_da3_icnode_hdr *to,
+ struct xfs_da_intnode *from);
+extern void xfs_da3_intnode_to_disk(struct xfs_da_intnode *to,
+ struct xfs_da3_icnode_hdr *from);
+
+#define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize
/*========================================================================
* Btree searching and modification structure definitions.
@@ -170,6 +309,8 @@ int xfs_da3_node_read(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mappedbno,
struct xfs_buf **bpp, int which_fork);
+extern const struct xfs_buf_ops xfs_da3_node_buf_ops;
+
/*
* Utility routines.
*/
diff --git a/fs/xfs/xfs_da_format.c b/fs/xfs/xfs_da_format.c
deleted file mode 100644
index e6c83e1..0000000
--- a/fs/xfs/xfs_da_format.c
+++ /dev/null
@@ -1,907 +0,0 @@
-/*
- * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
- * Copyright (c) 2013 Red Hat, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_inode.h"
-#include "xfs_dir2.h"
-
-/*
- * Shortform directory ops
- */
-static int
-xfs_dir2_sf_entsize(
- struct xfs_dir2_sf_hdr *hdr,
- int len)
-{
- int count = sizeof(struct xfs_dir2_sf_entry); /* namelen + offset */
-
- count += len; /* name */
- count += hdr->i8count ? sizeof(xfs_dir2_ino8_t) :
- sizeof(xfs_dir2_ino4_t); /* ino # */
- return count;
-}
-
-static int
-xfs_dir3_sf_entsize(
- struct xfs_dir2_sf_hdr *hdr,
- int len)
-{
- return xfs_dir2_sf_entsize(hdr, len) + sizeof(__uint8_t);
-}
-
-static struct xfs_dir2_sf_entry *
-xfs_dir2_sf_nextentry(
- struct xfs_dir2_sf_hdr *hdr,
- struct xfs_dir2_sf_entry *sfep)
-{
- return (struct xfs_dir2_sf_entry *)
- ((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen));
-}
-
-static struct xfs_dir2_sf_entry *
-xfs_dir3_sf_nextentry(
- struct xfs_dir2_sf_hdr *hdr,
- struct xfs_dir2_sf_entry *sfep)
-{
- return (struct xfs_dir2_sf_entry *)
- ((char *)sfep + xfs_dir3_sf_entsize(hdr, sfep->namelen));
-}
-
-
-/*
- * For filetype enabled shortform directories, the file type field is stored at
- * the end of the name. Because it's only a single byte, endian conversion is
- * not necessary. For non-filetype enable directories, the type is always
- * unknown and we never store the value.
- */
-static __uint8_t
-xfs_dir2_sfe_get_ftype(
- struct xfs_dir2_sf_entry *sfep)
-{
- return XFS_DIR3_FT_UNKNOWN;
-}
-
-static void
-xfs_dir2_sfe_put_ftype(
- struct xfs_dir2_sf_entry *sfep,
- __uint8_t ftype)
-{
- ASSERT(ftype < XFS_DIR3_FT_MAX);
-}
-
-static __uint8_t
-xfs_dir3_sfe_get_ftype(
- struct xfs_dir2_sf_entry *sfep)
-{
- __uint8_t ftype;
-
- ftype = sfep->name[sfep->namelen];
- if (ftype >= XFS_DIR3_FT_MAX)
- return XFS_DIR3_FT_UNKNOWN;
- return ftype;
-}
-
-static void
-xfs_dir3_sfe_put_ftype(
- struct xfs_dir2_sf_entry *sfep,
- __uint8_t ftype)
-{
- ASSERT(ftype < XFS_DIR3_FT_MAX);
-
- sfep->name[sfep->namelen] = ftype;
-}
-
-/*
- * Inode numbers in short-form directories can come in two versions,
- * either 4 bytes or 8 bytes wide. These helpers deal with the
- * two forms transparently by looking at the headers i8count field.
- *
- * For 64-bit inode number the most significant byte must be zero.
- */
-static xfs_ino_t
-xfs_dir2_sf_get_ino(
- struct xfs_dir2_sf_hdr *hdr,
- xfs_dir2_inou_t *from)
-{
- if (hdr->i8count)
- return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL;
- else
- return get_unaligned_be32(&from->i4.i);
-}
-
-static void
-xfs_dir2_sf_put_ino(
- struct xfs_dir2_sf_hdr *hdr,
- xfs_dir2_inou_t *to,
- xfs_ino_t ino)
-{
- ASSERT((ino & 0xff00000000000000ULL) == 0);
-
- if (hdr->i8count)
- put_unaligned_be64(ino, &to->i8.i);
- else
- put_unaligned_be32(ino, &to->i4.i);
-}
-
-static xfs_ino_t
-xfs_dir2_sf_get_parent_ino(
- struct xfs_dir2_sf_hdr *hdr)
-{
- return xfs_dir2_sf_get_ino(hdr, &hdr->parent);
-}
-
-static void
-xfs_dir2_sf_put_parent_ino(
- struct xfs_dir2_sf_hdr *hdr,
- xfs_ino_t ino)
-{
- xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino);
-}
-
-/*
- * In short-form directory entries the inode numbers are stored at variable
- * offset behind the entry name. If the entry stores a filetype value, then it
- * sits between the name and the inode number. Hence the inode numbers may only
- * be accessed through the helpers below.
- */
-static xfs_ino_t
-xfs_dir2_sfe_get_ino(
- struct xfs_dir2_sf_hdr *hdr,
- struct xfs_dir2_sf_entry *sfep)
-{
- return xfs_dir2_sf_get_ino(hdr,
- (xfs_dir2_inou_t *)&sfep->name[sfep->namelen]);
-}
-
-static void
-xfs_dir2_sfe_put_ino(
- struct xfs_dir2_sf_hdr *hdr,
- struct xfs_dir2_sf_entry *sfep,
- xfs_ino_t ino)
-{
- xfs_dir2_sf_put_ino(hdr,
- (xfs_dir2_inou_t *)&sfep->name[sfep->namelen], ino);
-}
-
-static xfs_ino_t
-xfs_dir3_sfe_get_ino(
- struct xfs_dir2_sf_hdr *hdr,
- struct xfs_dir2_sf_entry *sfep)
-{
- return xfs_dir2_sf_get_ino(hdr,
- (xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1]);
-}
-
-static void
-xfs_dir3_sfe_put_ino(
- struct xfs_dir2_sf_hdr *hdr,
- struct xfs_dir2_sf_entry *sfep,
- xfs_ino_t ino)
-{
- xfs_dir2_sf_put_ino(hdr,
- (xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1], ino);
-}
-
-
-/*
- * Directory data block operations
- */
-
-/*
- * For special situations, the dirent size ends up fixed because we always know
- * what the size of the entry is. That's true for the "." and "..", and
- * therefore we know that they are a fixed size and hence their offsets are
- * constant, as is the first entry.
- *
- * Hence, this calculation is written as a macro to be able to be calculated at
- * compile time and so certain offsets can be calculated directly in the
- * structure initaliser via the macro. There are two macros - one for dirents
- * with ftype and without so there are no unresolvable conditionals in the
- * calculations. We also use round_up() as XFS_DIR2_DATA_ALIGN is always a power
- * of 2 and the compiler doesn't reject it (unlike roundup()).
- */
-#define XFS_DIR2_DATA_ENTSIZE(n) \
- round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \
- sizeof(xfs_dir2_data_off_t)), XFS_DIR2_DATA_ALIGN)
-
-#define XFS_DIR3_DATA_ENTSIZE(n) \
- round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \
- sizeof(xfs_dir2_data_off_t) + sizeof(__uint8_t)), \
- XFS_DIR2_DATA_ALIGN)
-
-static int
-xfs_dir2_data_entsize(
- int n)
-{
- return XFS_DIR2_DATA_ENTSIZE(n);
-}
-
-static int
-xfs_dir3_data_entsize(
- int n)
-{
- return XFS_DIR3_DATA_ENTSIZE(n);
-}
-
-static __uint8_t
-xfs_dir2_data_get_ftype(
- struct xfs_dir2_data_entry *dep)
-{
- return XFS_DIR3_FT_UNKNOWN;
-}
-
-static void
-xfs_dir2_data_put_ftype(
- struct xfs_dir2_data_entry *dep,
- __uint8_t ftype)
-{
- ASSERT(ftype < XFS_DIR3_FT_MAX);
-}
-
-static __uint8_t
-xfs_dir3_data_get_ftype(
- struct xfs_dir2_data_entry *dep)
-{
- __uint8_t ftype = dep->name[dep->namelen];
-
- ASSERT(ftype < XFS_DIR3_FT_MAX);
- if (ftype >= XFS_DIR3_FT_MAX)
- return XFS_DIR3_FT_UNKNOWN;
- return ftype;
-}
-
-static void
-xfs_dir3_data_put_ftype(
- struct xfs_dir2_data_entry *dep,
- __uint8_t type)
-{
- ASSERT(type < XFS_DIR3_FT_MAX);
- ASSERT(dep->namelen != 0);
-
- dep->name[dep->namelen] = type;
-}
-
-/*
- * Pointer to an entry's tag word.
- */
-static __be16 *
-xfs_dir2_data_entry_tag_p(
- struct xfs_dir2_data_entry *dep)
-{
- return (__be16 *)((char *)dep +
- xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
-}
-
-static __be16 *
-xfs_dir3_data_entry_tag_p(
- struct xfs_dir2_data_entry *dep)
-{
- return (__be16 *)((char *)dep +
- xfs_dir3_data_entsize(dep->namelen) - sizeof(__be16));
-}
-
-/*
- * location of . and .. in data space (always block 0)
- */
-static struct xfs_dir2_data_entry *
-xfs_dir2_data_dot_entry_p(
- struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_entry *)
- ((char *)hdr + sizeof(struct xfs_dir2_data_hdr));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir2_data_dotdot_entry_p(
- struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_entry *)
- ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
- XFS_DIR2_DATA_ENTSIZE(1));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir2_data_first_entry_p(
- struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_entry *)
- ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
- XFS_DIR2_DATA_ENTSIZE(1) +
- XFS_DIR2_DATA_ENTSIZE(2));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir2_ftype_data_dotdot_entry_p(
- struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_entry *)
- ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
- XFS_DIR3_DATA_ENTSIZE(1));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir2_ftype_data_first_entry_p(
- struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_entry *)
- ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
- XFS_DIR3_DATA_ENTSIZE(1) +
- XFS_DIR3_DATA_ENTSIZE(2));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir3_data_dot_entry_p(
- struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_entry *)
- ((char *)hdr + sizeof(struct xfs_dir3_data_hdr));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir3_data_dotdot_entry_p(
- struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_entry *)
- ((char *)hdr + sizeof(struct xfs_dir3_data_hdr) +
- XFS_DIR3_DATA_ENTSIZE(1));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir3_data_first_entry_p(
- struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_entry *)
- ((char *)hdr + sizeof(struct xfs_dir3_data_hdr) +
- XFS_DIR3_DATA_ENTSIZE(1) +
- XFS_DIR3_DATA_ENTSIZE(2));
-}
-
-static struct xfs_dir2_data_free *
-xfs_dir2_data_bestfree_p(struct xfs_dir2_data_hdr *hdr)
-{
- return hdr->bestfree;
-}
-
-static struct xfs_dir2_data_free *
-xfs_dir3_data_bestfree_p(struct xfs_dir2_data_hdr *hdr)
-{
- return ((struct xfs_dir3_data_hdr *)hdr)->best_free;
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir2_data_entry_p(struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_entry *)
- ((char *)hdr + sizeof(struct xfs_dir2_data_hdr));
-}
-
-static struct xfs_dir2_data_unused *
-xfs_dir2_data_unused_p(struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_unused *)
- ((char *)hdr + sizeof(struct xfs_dir2_data_hdr));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir3_data_entry_p(struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_entry *)
- ((char *)hdr + sizeof(struct xfs_dir3_data_hdr));
-}
-
-static struct xfs_dir2_data_unused *
-xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_unused *)
- ((char *)hdr + sizeof(struct xfs_dir3_data_hdr));
-}
-
-
-/*
- * Directory Leaf block operations
- */
-static int
-xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
-{
- return (mp->m_dirblksize - sizeof(struct xfs_dir2_leaf_hdr)) /
- (uint)sizeof(struct xfs_dir2_leaf_entry);
-}
-
-static struct xfs_dir2_leaf_entry *
-xfs_dir2_leaf_ents_p(struct xfs_dir2_leaf *lp)
-{
- return lp->__ents;
-}
-
-static int
-xfs_dir3_max_leaf_ents(struct xfs_mount *mp)
-{
- return (mp->m_dirblksize - sizeof(struct xfs_dir3_leaf_hdr)) /
- (uint)sizeof(struct xfs_dir2_leaf_entry);
-}
-
-static struct xfs_dir2_leaf_entry *
-xfs_dir3_leaf_ents_p(struct xfs_dir2_leaf *lp)
-{
- return ((struct xfs_dir3_leaf *)lp)->__ents;
-}
-
-static void
-xfs_dir2_leaf_hdr_from_disk(
- struct xfs_dir3_icleaf_hdr *to,
- struct xfs_dir2_leaf *from)
-{
- to->forw = be32_to_cpu(from->hdr.info.forw);
- to->back = be32_to_cpu(from->hdr.info.back);
- to->magic = be16_to_cpu(from->hdr.info.magic);
- to->count = be16_to_cpu(from->hdr.count);
- to->stale = be16_to_cpu(from->hdr.stale);
-
- ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC ||
- to->magic == XFS_DIR2_LEAFN_MAGIC);
-}
-
-static void
-xfs_dir2_leaf_hdr_to_disk(
- struct xfs_dir2_leaf *to,
- struct xfs_dir3_icleaf_hdr *from)
-{
- ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC ||
- from->magic == XFS_DIR2_LEAFN_MAGIC);
-
- to->hdr.info.forw = cpu_to_be32(from->forw);
- to->hdr.info.back = cpu_to_be32(from->back);
- to->hdr.info.magic = cpu_to_be16(from->magic);
- to->hdr.count = cpu_to_be16(from->count);
- to->hdr.stale = cpu_to_be16(from->stale);
-}
-
-static void
-xfs_dir3_leaf_hdr_from_disk(
- struct xfs_dir3_icleaf_hdr *to,
- struct xfs_dir2_leaf *from)
-{
- struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)from;
-
- to->forw = be32_to_cpu(hdr3->info.hdr.forw);
- to->back = be32_to_cpu(hdr3->info.hdr.back);
- to->magic = be16_to_cpu(hdr3->info.hdr.magic);
- to->count = be16_to_cpu(hdr3->count);
- to->stale = be16_to_cpu(hdr3->stale);
-
- ASSERT(to->magic == XFS_DIR3_LEAF1_MAGIC ||
- to->magic == XFS_DIR3_LEAFN_MAGIC);
-}
-
-static void
-xfs_dir3_leaf_hdr_to_disk(
- struct xfs_dir2_leaf *to,
- struct xfs_dir3_icleaf_hdr *from)
-{
- struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)to;
-
- ASSERT(from->magic == XFS_DIR3_LEAF1_MAGIC ||
- from->magic == XFS_DIR3_LEAFN_MAGIC);
-
- hdr3->info.hdr.forw = cpu_to_be32(from->forw);
- hdr3->info.hdr.back = cpu_to_be32(from->back);
- hdr3->info.hdr.magic = cpu_to_be16(from->magic);
- hdr3->count = cpu_to_be16(from->count);
- hdr3->stale = cpu_to_be16(from->stale);
-}
-
-
-/*
- * Directory/Attribute Node block operations
- */
-static struct xfs_da_node_entry *
-xfs_da2_node_tree_p(struct xfs_da_intnode *dap)
-{
- return dap->__btree;
-}
-
-static struct xfs_da_node_entry *
-xfs_da3_node_tree_p(struct xfs_da_intnode *dap)
-{
- return ((struct xfs_da3_intnode *)dap)->__btree;
-}
-
-static void
-xfs_da2_node_hdr_from_disk(
- struct xfs_da3_icnode_hdr *to,
- struct xfs_da_intnode *from)
-{
- ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
- to->forw = be32_to_cpu(from->hdr.info.forw);
- to->back = be32_to_cpu(from->hdr.info.back);
- to->magic = be16_to_cpu(from->hdr.info.magic);
- to->count = be16_to_cpu(from->hdr.__count);
- to->level = be16_to_cpu(from->hdr.__level);
-}
-
-static void
-xfs_da2_node_hdr_to_disk(
- struct xfs_da_intnode *to,
- struct xfs_da3_icnode_hdr *from)
-{
- ASSERT(from->magic == XFS_DA_NODE_MAGIC);
- to->hdr.info.forw = cpu_to_be32(from->forw);
- to->hdr.info.back = cpu_to_be32(from->back);
- to->hdr.info.magic = cpu_to_be16(from->magic);
- to->hdr.__count = cpu_to_be16(from->count);
- to->hdr.__level = cpu_to_be16(from->level);
-}
-
-static void
-xfs_da3_node_hdr_from_disk(
- struct xfs_da3_icnode_hdr *to,
- struct xfs_da_intnode *from)
-{
- struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)from;
-
- ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));
- to->forw = be32_to_cpu(hdr3->info.hdr.forw);
- to->back = be32_to_cpu(hdr3->info.hdr.back);
- to->magic = be16_to_cpu(hdr3->info.hdr.magic);
- to->count = be16_to_cpu(hdr3->__count);
- to->level = be16_to_cpu(hdr3->__level);
-}
-
-static void
-xfs_da3_node_hdr_to_disk(
- struct xfs_da_intnode *to,
- struct xfs_da3_icnode_hdr *from)
-{
- struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)to;
-
- ASSERT(from->magic == XFS_DA3_NODE_MAGIC);
- hdr3->info.hdr.forw = cpu_to_be32(from->forw);
- hdr3->info.hdr.back = cpu_to_be32(from->back);
- hdr3->info.hdr.magic = cpu_to_be16(from->magic);
- hdr3->__count = cpu_to_be16(from->count);
- hdr3->__level = cpu_to_be16(from->level);
-}
-
-
-/*
- * Directory free space block operations
- */
-static int
-xfs_dir2_free_max_bests(struct xfs_mount *mp)
-{
- return (mp->m_dirblksize - sizeof(struct xfs_dir2_free_hdr)) /
- sizeof(xfs_dir2_data_off_t);
-}
-
-static __be16 *
-xfs_dir2_free_bests_p(struct xfs_dir2_free *free)
-{
- return (__be16 *)((char *)free + sizeof(struct xfs_dir2_free_hdr));
-}
-
-/*
- * Convert data space db to the corresponding free db.
- */
-static xfs_dir2_db_t
-xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
-{
- return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir2_free_max_bests(mp);
-}
-
-/*
- * Convert data space db to the corresponding index in a free db.
- */
-static int
-xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
-{
- return db % xfs_dir2_free_max_bests(mp);
-}
-
-static int
-xfs_dir3_free_max_bests(struct xfs_mount *mp)
-{
- return (mp->m_dirblksize - sizeof(struct xfs_dir3_free_hdr)) /
- sizeof(xfs_dir2_data_off_t);
-}
-
-static __be16 *
-xfs_dir3_free_bests_p(struct xfs_dir2_free *free)
-{
- return (__be16 *)((char *)free + sizeof(struct xfs_dir3_free_hdr));
-}
-
-/*
- * Convert data space db to the corresponding free db.
- */
-static xfs_dir2_db_t
-xfs_dir3_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
-{
- return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir3_free_max_bests(mp);
-}
-
-/*
- * Convert data space db to the corresponding index in a free db.
- */
-static int
-xfs_dir3_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
-{
- return db % xfs_dir3_free_max_bests(mp);
-}
-
-static void
-xfs_dir2_free_hdr_from_disk(
- struct xfs_dir3_icfree_hdr *to,
- struct xfs_dir2_free *from)
-{
- to->magic = be32_to_cpu(from->hdr.magic);
- to->firstdb = be32_to_cpu(from->hdr.firstdb);
- to->nvalid = be32_to_cpu(from->hdr.nvalid);
- to->nused = be32_to_cpu(from->hdr.nused);
- ASSERT(to->magic == XFS_DIR2_FREE_MAGIC);
-}
-
-static void
-xfs_dir2_free_hdr_to_disk(
- struct xfs_dir2_free *to,
- struct xfs_dir3_icfree_hdr *from)
-{
- ASSERT(from->magic == XFS_DIR2_FREE_MAGIC);
-
- to->hdr.magic = cpu_to_be32(from->magic);
- to->hdr.firstdb = cpu_to_be32(from->firstdb);
- to->hdr.nvalid = cpu_to_be32(from->nvalid);
- to->hdr.nused = cpu_to_be32(from->nused);
-}
-
-static void
-xfs_dir3_free_hdr_from_disk(
- struct xfs_dir3_icfree_hdr *to,
- struct xfs_dir2_free *from)
-{
- struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)from;
-
- to->magic = be32_to_cpu(hdr3->hdr.magic);
- to->firstdb = be32_to_cpu(hdr3->firstdb);
- to->nvalid = be32_to_cpu(hdr3->nvalid);
- to->nused = be32_to_cpu(hdr3->nused);
-
- ASSERT(to->magic == XFS_DIR3_FREE_MAGIC);
-}
-
-static void
-xfs_dir3_free_hdr_to_disk(
- struct xfs_dir2_free *to,
- struct xfs_dir3_icfree_hdr *from)
-{
- struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)to;
-
- ASSERT(from->magic == XFS_DIR3_FREE_MAGIC);
-
- hdr3->hdr.magic = cpu_to_be32(from->magic);
- hdr3->firstdb = cpu_to_be32(from->firstdb);
- hdr3->nvalid = cpu_to_be32(from->nvalid);
- hdr3->nused = cpu_to_be32(from->nused);
-}
-
-static const struct xfs_dir_ops xfs_dir2_ops = {
- .sf_entsize = xfs_dir2_sf_entsize,
- .sf_nextentry = xfs_dir2_sf_nextentry,
- .sf_get_ftype = xfs_dir2_sfe_get_ftype,
- .sf_put_ftype = xfs_dir2_sfe_put_ftype,
- .sf_get_ino = xfs_dir2_sfe_get_ino,
- .sf_put_ino = xfs_dir2_sfe_put_ino,
- .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino,
- .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino,
-
- .data_entsize = xfs_dir2_data_entsize,
- .data_get_ftype = xfs_dir2_data_get_ftype,
- .data_put_ftype = xfs_dir2_data_put_ftype,
- .data_entry_tag_p = xfs_dir2_data_entry_tag_p,
- .data_bestfree_p = xfs_dir2_data_bestfree_p,
-
- .data_dot_offset = sizeof(struct xfs_dir2_data_hdr),
- .data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) +
- XFS_DIR2_DATA_ENTSIZE(1),
- .data_first_offset = sizeof(struct xfs_dir2_data_hdr) +
- XFS_DIR2_DATA_ENTSIZE(1) +
- XFS_DIR2_DATA_ENTSIZE(2),
- .data_entry_offset = sizeof(struct xfs_dir2_data_hdr),
-
- .data_dot_entry_p = xfs_dir2_data_dot_entry_p,
- .data_dotdot_entry_p = xfs_dir2_data_dotdot_entry_p,
- .data_first_entry_p = xfs_dir2_data_first_entry_p,
- .data_entry_p = xfs_dir2_data_entry_p,
- .data_unused_p = xfs_dir2_data_unused_p,
-
- .leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr),
- .leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk,
- .leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk,
- .leaf_max_ents = xfs_dir2_max_leaf_ents,
- .leaf_ents_p = xfs_dir2_leaf_ents_p,
-
- .node_hdr_size = sizeof(struct xfs_da_node_hdr),
- .node_hdr_to_disk = xfs_da2_node_hdr_to_disk,
- .node_hdr_from_disk = xfs_da2_node_hdr_from_disk,
- .node_tree_p = xfs_da2_node_tree_p,
-
- .free_hdr_size = sizeof(struct xfs_dir2_free_hdr),
- .free_hdr_to_disk = xfs_dir2_free_hdr_to_disk,
- .free_hdr_from_disk = xfs_dir2_free_hdr_from_disk,
- .free_max_bests = xfs_dir2_free_max_bests,
- .free_bests_p = xfs_dir2_free_bests_p,
- .db_to_fdb = xfs_dir2_db_to_fdb,
- .db_to_fdindex = xfs_dir2_db_to_fdindex,
-};
-
-static const struct xfs_dir_ops xfs_dir2_ftype_ops = {
- .sf_entsize = xfs_dir3_sf_entsize,
- .sf_nextentry = xfs_dir3_sf_nextentry,
- .sf_get_ftype = xfs_dir3_sfe_get_ftype,
- .sf_put_ftype = xfs_dir3_sfe_put_ftype,
- .sf_get_ino = xfs_dir3_sfe_get_ino,
- .sf_put_ino = xfs_dir3_sfe_put_ino,
- .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino,
- .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino,
-
- .data_entsize = xfs_dir3_data_entsize,
- .data_get_ftype = xfs_dir3_data_get_ftype,
- .data_put_ftype = xfs_dir3_data_put_ftype,
- .data_entry_tag_p = xfs_dir3_data_entry_tag_p,
- .data_bestfree_p = xfs_dir2_data_bestfree_p,
-
- .data_dot_offset = sizeof(struct xfs_dir2_data_hdr),
- .data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) +
- XFS_DIR3_DATA_ENTSIZE(1),
- .data_first_offset = sizeof(struct xfs_dir2_data_hdr) +
- XFS_DIR3_DATA_ENTSIZE(1) +
- XFS_DIR3_DATA_ENTSIZE(2),
- .data_entry_offset = sizeof(struct xfs_dir2_data_hdr),
-
- .data_dot_entry_p = xfs_dir2_data_dot_entry_p,
- .data_dotdot_entry_p = xfs_dir2_ftype_data_dotdot_entry_p,
- .data_first_entry_p = xfs_dir2_ftype_data_first_entry_p,
- .data_entry_p = xfs_dir2_data_entry_p,
- .data_unused_p = xfs_dir2_data_unused_p,
-
- .leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr),
- .leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk,
- .leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk,
- .leaf_max_ents = xfs_dir2_max_leaf_ents,
- .leaf_ents_p = xfs_dir2_leaf_ents_p,
-
- .node_hdr_size = sizeof(struct xfs_da_node_hdr),
- .node_hdr_to_disk = xfs_da2_node_hdr_to_disk,
- .node_hdr_from_disk = xfs_da2_node_hdr_from_disk,
- .node_tree_p = xfs_da2_node_tree_p,
-
- .free_hdr_size = sizeof(struct xfs_dir2_free_hdr),
- .free_hdr_to_disk = xfs_dir2_free_hdr_to_disk,
- .free_hdr_from_disk = xfs_dir2_free_hdr_from_disk,
- .free_max_bests = xfs_dir2_free_max_bests,
- .free_bests_p = xfs_dir2_free_bests_p,
- .db_to_fdb = xfs_dir2_db_to_fdb,
- .db_to_fdindex = xfs_dir2_db_to_fdindex,
-};
-
-static const struct xfs_dir_ops xfs_dir3_ops = {
- .sf_entsize = xfs_dir3_sf_entsize,
- .sf_nextentry = xfs_dir3_sf_nextentry,
- .sf_get_ftype = xfs_dir3_sfe_get_ftype,
- .sf_put_ftype = xfs_dir3_sfe_put_ftype,
- .sf_get_ino = xfs_dir3_sfe_get_ino,
- .sf_put_ino = xfs_dir3_sfe_put_ino,
- .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino,
- .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino,
-
- .data_entsize = xfs_dir3_data_entsize,
- .data_get_ftype = xfs_dir3_data_get_ftype,
- .data_put_ftype = xfs_dir3_data_put_ftype,
- .data_entry_tag_p = xfs_dir3_data_entry_tag_p,
- .data_bestfree_p = xfs_dir3_data_bestfree_p,
-
- .data_dot_offset = sizeof(struct xfs_dir3_data_hdr),
- .data_dotdot_offset = sizeof(struct xfs_dir3_data_hdr) +
- XFS_DIR3_DATA_ENTSIZE(1),
- .data_first_offset = sizeof(struct xfs_dir3_data_hdr) +
- XFS_DIR3_DATA_ENTSIZE(1) +
- XFS_DIR3_DATA_ENTSIZE(2),
- .data_entry_offset = sizeof(struct xfs_dir3_data_hdr),
-
- .data_dot_entry_p = xfs_dir3_data_dot_entry_p,
- .data_dotdot_entry_p = xfs_dir3_data_dotdot_entry_p,
- .data_first_entry_p = xfs_dir3_data_first_entry_p,
- .data_entry_p = xfs_dir3_data_entry_p,
- .data_unused_p = xfs_dir3_data_unused_p,
-
- .leaf_hdr_size = sizeof(struct xfs_dir3_leaf_hdr),
- .leaf_hdr_to_disk = xfs_dir3_leaf_hdr_to_disk,
- .leaf_hdr_from_disk = xfs_dir3_leaf_hdr_from_disk,
- .leaf_max_ents = xfs_dir3_max_leaf_ents,
- .leaf_ents_p = xfs_dir3_leaf_ents_p,
-
- .node_hdr_size = sizeof(struct xfs_da3_node_hdr),
- .node_hdr_to_disk = xfs_da3_node_hdr_to_disk,
- .node_hdr_from_disk = xfs_da3_node_hdr_from_disk,
- .node_tree_p = xfs_da3_node_tree_p,
-
- .free_hdr_size = sizeof(struct xfs_dir3_free_hdr),
- .free_hdr_to_disk = xfs_dir3_free_hdr_to_disk,
- .free_hdr_from_disk = xfs_dir3_free_hdr_from_disk,
- .free_max_bests = xfs_dir3_free_max_bests,
- .free_bests_p = xfs_dir3_free_bests_p,
- .db_to_fdb = xfs_dir3_db_to_fdb,
- .db_to_fdindex = xfs_dir3_db_to_fdindex,
-};
-
-static const struct xfs_dir_ops xfs_dir2_nondir_ops = {
- .node_hdr_size = sizeof(struct xfs_da_node_hdr),
- .node_hdr_to_disk = xfs_da2_node_hdr_to_disk,
- .node_hdr_from_disk = xfs_da2_node_hdr_from_disk,
- .node_tree_p = xfs_da2_node_tree_p,
-};
-
-static const struct xfs_dir_ops xfs_dir3_nondir_ops = {
- .node_hdr_size = sizeof(struct xfs_da3_node_hdr),
- .node_hdr_to_disk = xfs_da3_node_hdr_to_disk,
- .node_hdr_from_disk = xfs_da3_node_hdr_from_disk,
- .node_tree_p = xfs_da3_node_tree_p,
-};
-
-/*
- * Return the ops structure according to the current config. If we are passed
- * an inode, then that overrides the default config we use which is based on
- * feature bits.
- */
-const struct xfs_dir_ops *
-xfs_dir_get_ops(
- struct xfs_mount *mp,
- struct xfs_inode *dp)
-{
- if (dp)
- return dp->d_ops;
- if (mp->m_dir_inode_ops)
- return mp->m_dir_inode_ops;
- if (xfs_sb_version_hascrc(&mp->m_sb))
- return &xfs_dir3_ops;
- if (xfs_sb_version_hasftype(&mp->m_sb))
- return &xfs_dir2_ftype_ops;
- return &xfs_dir2_ops;
-}
-
-const struct xfs_dir_ops *
-xfs_nondir_get_ops(
- struct xfs_mount *mp,
- struct xfs_inode *dp)
-{
- if (dp)
- return dp->d_ops;
- if (mp->m_nondir_inode_ops)
- return mp->m_nondir_inode_ops;
- if (xfs_sb_version_hascrc(&mp->m_sb))
- return &xfs_dir3_nondir_ops;
- return &xfs_dir2_nondir_ops;
-}
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index ce16ef0..edf203a 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -17,24 +17,25 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
#include "xfs_inum.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
+#include "xfs_dir2_format.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_dinode.h"
struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR };
@@ -95,17 +96,13 @@ xfs_dir_mount(
ASSERT(xfs_sb_version_hasdirv2(&mp->m_sb));
ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <=
XFS_MAX_BLOCKSIZE);
-
- mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL);
- mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL);
-
mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog);
mp->m_dirblkfsbs = 1 << mp->m_sb.sb_dirblklog;
mp->m_dirdatablk = xfs_dir2_db_to_da(mp, XFS_DIR2_DATA_FIRSTDB(mp));
mp->m_dirleafblk = xfs_dir2_db_to_da(mp, XFS_DIR2_LEAF_FIRSTDB(mp));
mp->m_dirfreeblk = xfs_dir2_db_to_da(mp, XFS_DIR2_FREE_FIRSTDB(mp));
- nodehdr_size = mp->m_dir_inode_ops->node_hdr_size;
+ nodehdr_size = __xfs_da3_node_hdr_size(xfs_sb_version_hascrc(&mp->m_sb));
mp->m_attr_node_ents = (mp->m_sb.sb_blocksize - nodehdr_size) /
(uint)sizeof(xfs_da_node_entry_t);
mp->m_dir_node_ents = (mp->m_dirblksize - nodehdr_size) /
@@ -116,7 +113,6 @@ xfs_dir_mount(
mp->m_dirnameops = &xfs_ascii_ci_nameops;
else
mp->m_dirnameops = &xfs_default_nameops;
-
}
/*
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index cec70e0..9910401 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -32,83 +32,6 @@ struct xfs_dir2_data_unused;
extern struct xfs_name xfs_name_dotdot;
/*
- * directory operations vector for encode/decode routines
- */
-struct xfs_dir_ops {
- int (*sf_entsize)(struct xfs_dir2_sf_hdr *hdr, int len);
- struct xfs_dir2_sf_entry *
- (*sf_nextentry)(struct xfs_dir2_sf_hdr *hdr,
- struct xfs_dir2_sf_entry *sfep);
- __uint8_t (*sf_get_ftype)(struct xfs_dir2_sf_entry *sfep);
- void (*sf_put_ftype)(struct xfs_dir2_sf_entry *sfep,
- __uint8_t ftype);
- xfs_ino_t (*sf_get_ino)(struct xfs_dir2_sf_hdr *hdr,
- struct xfs_dir2_sf_entry *sfep);
- void (*sf_put_ino)(struct xfs_dir2_sf_hdr *hdr,
- struct xfs_dir2_sf_entry *sfep,
- xfs_ino_t ino);
- xfs_ino_t (*sf_get_parent_ino)(struct xfs_dir2_sf_hdr *hdr);
- void (*sf_put_parent_ino)(struct xfs_dir2_sf_hdr *hdr,
- xfs_ino_t ino);
-
- int (*data_entsize)(int len);
- __uint8_t (*data_get_ftype)(struct xfs_dir2_data_entry *dep);
- void (*data_put_ftype)(struct xfs_dir2_data_entry *dep,
- __uint8_t ftype);
- __be16 * (*data_entry_tag_p)(struct xfs_dir2_data_entry *dep);
- struct xfs_dir2_data_free *
- (*data_bestfree_p)(struct xfs_dir2_data_hdr *hdr);
-
- xfs_dir2_data_aoff_t data_dot_offset;
- xfs_dir2_data_aoff_t data_dotdot_offset;
- xfs_dir2_data_aoff_t data_first_offset;
- size_t data_entry_offset;
-
- struct xfs_dir2_data_entry *
- (*data_dot_entry_p)(struct xfs_dir2_data_hdr *hdr);
- struct xfs_dir2_data_entry *
- (*data_dotdot_entry_p)(struct xfs_dir2_data_hdr *hdr);
- struct xfs_dir2_data_entry *
- (*data_first_entry_p)(struct xfs_dir2_data_hdr *hdr);
- struct xfs_dir2_data_entry *
- (*data_entry_p)(struct xfs_dir2_data_hdr *hdr);
- struct xfs_dir2_data_unused *
- (*data_unused_p)(struct xfs_dir2_data_hdr *hdr);
-
- int leaf_hdr_size;
- void (*leaf_hdr_to_disk)(struct xfs_dir2_leaf *to,
- struct xfs_dir3_icleaf_hdr *from);
- void (*leaf_hdr_from_disk)(struct xfs_dir3_icleaf_hdr *to,
- struct xfs_dir2_leaf *from);
- int (*leaf_max_ents)(struct xfs_mount *mp);
- struct xfs_dir2_leaf_entry *
- (*leaf_ents_p)(struct xfs_dir2_leaf *lp);
-
- int node_hdr_size;
- void (*node_hdr_to_disk)(struct xfs_da_intnode *to,
- struct xfs_da3_icnode_hdr *from);
- void (*node_hdr_from_disk)(struct xfs_da3_icnode_hdr *to,
- struct xfs_da_intnode *from);
- struct xfs_da_node_entry *
- (*node_tree_p)(struct xfs_da_intnode *dap);
-
- int free_hdr_size;
- void (*free_hdr_to_disk)(struct xfs_dir2_free *to,
- struct xfs_dir3_icfree_hdr *from);
- void (*free_hdr_from_disk)(struct xfs_dir3_icfree_hdr *to,
- struct xfs_dir2_free *from);
- int (*free_max_bests)(struct xfs_mount *mp);
- __be16 * (*free_bests_p)(struct xfs_dir2_free *free);
- xfs_dir2_db_t (*db_to_fdb)(struct xfs_mount *mp, xfs_dir2_db_t db);
- int (*db_to_fdindex)(struct xfs_mount *mp, xfs_dir2_db_t db);
-};
-
-extern const struct xfs_dir_ops *
- xfs_dir_get_ops(struct xfs_mount *mp, struct xfs_inode *dp);
-extern const struct xfs_dir_ops *
- xfs_nondir_get_ops(struct xfs_mount *mp, struct xfs_inode *dp);
-
-/*
* Generic directory interface routines
*/
extern void xfs_dir_startup(void);
@@ -142,30 +65,37 @@ extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
/*
* Interface routines used by userspace utilities
*/
+extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp);
+extern void xfs_dir2_sf_put_parent_ino(struct xfs_dir2_sf_hdr *sfp,
+ xfs_ino_t ino);
+extern xfs_ino_t xfs_dir3_sfe_get_ino(struct xfs_mount *mp,
+ struct xfs_dir2_sf_hdr *sfp, struct xfs_dir2_sf_entry *sfep);
+extern void xfs_dir3_sfe_put_ino(struct xfs_mount *mp,
+ struct xfs_dir2_sf_hdr *hdr, struct xfs_dir2_sf_entry *sfep,
+ xfs_ino_t ino);
+
extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
struct xfs_buf *bp);
-extern void xfs_dir2_data_freescan(struct xfs_inode *dp,
+extern void xfs_dir2_data_freescan(struct xfs_mount *mp,
struct xfs_dir2_data_hdr *hdr, int *loghead);
-extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_inode *dp,
- struct xfs_buf *bp, struct xfs_dir2_data_entry *dep);
-extern void xfs_dir2_data_log_header(struct xfs_trans *tp, struct xfs_inode *dp,
+extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_buf *bp,
+ struct xfs_dir2_data_entry *dep);
+extern void xfs_dir2_data_log_header(struct xfs_trans *tp,
struct xfs_buf *bp);
extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_buf *bp,
struct xfs_dir2_data_unused *dup);
-extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_inode *dp,
- struct xfs_buf *bp, xfs_dir2_data_aoff_t offset,
- xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
-extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_inode *dp,
- struct xfs_buf *bp, struct xfs_dir2_data_unused *dup,
+extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_buf *bp,
xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len,
int *needlogp, int *needscanp);
+extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_buf *bp,
+ struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset,
+ xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
extern struct xfs_dir2_data_free *xfs_dir2_data_freefind(
- struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf,
- struct xfs_dir2_data_unused *dup);
+ struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_unused *dup);
extern const struct xfs_buf_ops xfs_dir3_block_buf_ops;
extern const struct xfs_buf_ops xfs_dir3_leafn_buf_ops;
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 90cdbf4..12dad18 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -18,25 +18,25 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_buf_item.h"
+#include "xfs_dir2_format.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_cksum.h"
-#include "xfs_dinode.h"
/*
* Local function prototypes.
@@ -168,7 +168,6 @@ xfs_dir3_block_init(
static void
xfs_dir2_block_need_space(
- struct xfs_inode *dp,
struct xfs_dir2_data_hdr *hdr,
struct xfs_dir2_block_tail *btp,
struct xfs_dir2_leaf_entry *blp,
@@ -184,7 +183,7 @@ xfs_dir2_block_need_space(
struct xfs_dir2_data_unused *enddup = NULL;
*compact = 0;
- bf = dp->d_ops->data_bestfree_p(hdr);
+ bf = xfs_dir3_data_bestfree_p(hdr);
/*
* If there are stale entries we'll use one for the leaf.
@@ -281,7 +280,6 @@ out:
static void
xfs_dir2_block_compact(
struct xfs_trans *tp,
- struct xfs_inode *dp,
struct xfs_buf *bp,
struct xfs_dir2_data_hdr *hdr,
struct xfs_dir2_block_tail *btp,
@@ -314,7 +312,7 @@ xfs_dir2_block_compact(
*lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1);
*lfloghigh -= be32_to_cpu(btp->stale) - 1;
be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1));
- xfs_dir2_data_make_free(tp, dp, bp,
+ xfs_dir2_data_make_free(tp, bp,
(xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
(xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)),
needlog, &needscan);
@@ -325,7 +323,7 @@ xfs_dir2_block_compact(
* This needs to happen before the next call to use_free.
*/
if (needscan)
- xfs_dir2_data_freescan(dp, hdr, needlog);
+ xfs_dir2_data_freescan(tp->t_mountp, hdr, needlog);
}
/*
@@ -371,7 +369,7 @@ xfs_dir2_block_addname(
if (error)
return error;
- len = dp->d_ops->data_entsize(args->namelen);
+ len = xfs_dir3_data_entsize(mp, args->namelen);
/*
* Set up pointers to parts of the block.
@@ -384,7 +382,7 @@ xfs_dir2_block_addname(
* Find out if we can reuse stale entries or whether we need extra
* space for entry and new leaf.
*/
- xfs_dir2_block_need_space(dp, hdr, btp, blp, &tagp, &dup,
+ xfs_dir2_block_need_space(hdr, btp, blp, &tagp, &dup,
&enddup, &compact, len);
/*
@@ -420,7 +418,7 @@ xfs_dir2_block_addname(
* If need to compact the leaf entries, do it now.
*/
if (compact) {
- xfs_dir2_block_compact(tp, dp, bp, hdr, btp, blp, &needlog,
+ xfs_dir2_block_compact(tp, bp, hdr, btp, blp, &needlog,
&lfloghigh, &lfloglow);
/* recalculate blp post-compaction */
blp = xfs_dir2_block_leaf_p(btp);
@@ -455,7 +453,7 @@ xfs_dir2_block_addname(
/*
* Mark the space needed for the new leaf entry, now in use.
*/
- xfs_dir2_data_use_free(tp, dp, bp, enddup,
+ xfs_dir2_data_use_free(tp, bp, enddup,
(xfs_dir2_data_aoff_t)
((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) -
sizeof(*blp)),
@@ -470,7 +468,7 @@ xfs_dir2_block_addname(
* This needs to happen before the next call to use_free.
*/
if (needscan) {
- xfs_dir2_data_freescan(dp, hdr, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
needscan = 0;
}
/*
@@ -542,7 +540,7 @@ xfs_dir2_block_addname(
/*
* Mark space for the data entry used.
*/
- xfs_dir2_data_use_free(tp, dp, bp, dup,
+ xfs_dir2_data_use_free(tp, bp, dup,
(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
(xfs_dir2_data_aoff_t)len, &needlog, &needscan);
/*
@@ -551,18 +549,18 @@ xfs_dir2_block_addname(
dep->inumber = cpu_to_be64(args->inumber);
dep->namelen = args->namelen;
memcpy(dep->name, args->name, args->namelen);
- dp->d_ops->data_put_ftype(dep, args->filetype);
- tagp = dp->d_ops->data_entry_tag_p(dep);
+ xfs_dir3_dirent_put_ftype(mp, dep, args->filetype);
+ tagp = xfs_dir3_data_entry_tag_p(mp, dep);
*tagp = cpu_to_be16((char *)dep - (char *)hdr);
/*
* Clean up the bestfree array and log the header, tail, and entry.
*/
if (needscan)
- xfs_dir2_data_freescan(dp, hdr, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
if (needlog)
- xfs_dir2_data_log_header(tp, dp, bp);
+ xfs_dir2_data_log_header(tp, bp);
xfs_dir2_block_log_tail(tp, bp);
- xfs_dir2_data_log_entry(tp, dp, bp, dep);
+ xfs_dir2_data_log_entry(tp, bp, dep);
xfs_dir3_data_check(dp, bp);
return 0;
}
@@ -644,7 +642,7 @@ xfs_dir2_block_lookup(
* Fill in inode number, CI name if appropriate, release the block.
*/
args->inumber = be64_to_cpu(dep->inumber);
- args->filetype = dp->d_ops->data_get_ftype(dep);
+ args->filetype = xfs_dir3_dirent_get_ftype(mp, dep);
error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
xfs_trans_brelse(args->trans, bp);
return XFS_ERROR(error);
@@ -801,9 +799,9 @@ xfs_dir2_block_removename(
* Mark the data entry's space free.
*/
needlog = needscan = 0;
- xfs_dir2_data_make_free(tp, dp, bp,
+ xfs_dir2_data_make_free(tp, bp,
(xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
- dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);
+ xfs_dir3_data_entsize(mp, dep->namelen), &needlog, &needscan);
/*
* Fix up the block tail.
*/
@@ -818,9 +816,9 @@ xfs_dir2_block_removename(
* Fix up bestfree, log the header if necessary.
*/
if (needscan)
- xfs_dir2_data_freescan(dp, hdr, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
if (needlog)
- xfs_dir2_data_log_header(tp, dp, bp);
+ xfs_dir2_data_log_header(tp, bp);
xfs_dir3_data_check(dp, bp);
/*
* See if the size as a shortform is good enough.
@@ -877,8 +875,8 @@ xfs_dir2_block_replace(
* Change the inode number to the new value.
*/
dep->inumber = cpu_to_be64(args->inumber);
- dp->d_ops->data_put_ftype(dep, args->filetype);
- xfs_dir2_data_log_entry(args->trans, dp, bp, dep);
+ xfs_dir3_dirent_put_ftype(mp, dep, args->filetype);
+ xfs_dir2_data_log_entry(args->trans, bp, dep);
xfs_dir3_data_check(dp, bp);
return 0;
}
@@ -936,8 +934,8 @@ xfs_dir2_leaf_to_block(
tp = args->trans;
mp = dp->i_mount;
leaf = lbp->b_addr;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
- ents = dp->d_ops->leaf_ents_p(leaf);
+ xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
+ ents = xfs_dir3_leaf_ents_p(leaf);
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
ASSERT(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
@@ -951,7 +949,7 @@ xfs_dir2_leaf_to_block(
while (dp->i_d.di_size > mp->m_dirblksize) {
int hdrsz;
- hdrsz = dp->d_ops->data_entry_offset;
+ hdrsz = xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&mp->m_sb));
bestsp = xfs_dir2_leaf_bests_p(ltp);
if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
mp->m_dirblksize - hdrsz) {
@@ -1001,7 +999,7 @@ xfs_dir2_leaf_to_block(
/*
* Use up the space at the end of the block (blp/btp).
*/
- xfs_dir2_data_use_free(tp, dp, dbp, dup, mp->m_dirblksize - size, size,
+ xfs_dir2_data_use_free(tp, dbp, dup, mp->m_dirblksize - size, size,
&needlog, &needscan);
/*
* Initialize the block tail.
@@ -1025,9 +1023,9 @@ xfs_dir2_leaf_to_block(
* Scan the bestfree if we need it and log the data block header.
*/
if (needscan)
- xfs_dir2_data_freescan(dp, hdr, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
if (needlog)
- xfs_dir2_data_log_header(tp, dp, dbp);
+ xfs_dir2_data_log_header(tp, dbp);
/*
* Pitch the old leaf block.
*/
@@ -1138,9 +1136,9 @@ xfs_dir2_sf_to_block(
* The whole thing is initialized to free by the init routine.
* Say we're using the leaf and tail area.
*/
- dup = dp->d_ops->data_unused_p(hdr);
+ dup = xfs_dir3_data_unused_p(hdr);
needlog = needscan = 0;
- xfs_dir2_data_use_free(tp, dp, bp, dup, mp->m_dirblksize - i, i, &needlog,
+ xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog,
&needscan);
ASSERT(needscan == 0);
/*
@@ -1154,38 +1152,38 @@ xfs_dir2_sf_to_block(
/*
* Remove the freespace, we'll manage it.
*/
- xfs_dir2_data_use_free(tp, dp, bp, dup,
+ xfs_dir2_data_use_free(tp, bp, dup,
(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
be16_to_cpu(dup->length), &needlog, &needscan);
/*
* Create entry for .
*/
- dep = dp->d_ops->data_dot_entry_p(hdr);
+ dep = xfs_dir3_data_dot_entry_p(mp, hdr);
dep->inumber = cpu_to_be64(dp->i_ino);
dep->namelen = 1;
dep->name[0] = '.';
- dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR);
- tagp = dp->d_ops->data_entry_tag_p(dep);
+ xfs_dir3_dirent_put_ftype(mp, dep, XFS_DIR3_FT_DIR);
+ tagp = xfs_dir3_data_entry_tag_p(mp, dep);
*tagp = cpu_to_be16((char *)dep - (char *)hdr);
- xfs_dir2_data_log_entry(tp, dp, bp, dep);
+ xfs_dir2_data_log_entry(tp, bp, dep);
blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
(char *)dep - (char *)hdr));
/*
* Create entry for ..
*/
- dep = dp->d_ops->data_dotdot_entry_p(hdr);
- dep->inumber = cpu_to_be64(dp->d_ops->sf_get_parent_ino(sfp));
+ dep = xfs_dir3_data_dotdot_entry_p(mp, hdr);
+ dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp));
dep->namelen = 2;
dep->name[0] = dep->name[1] = '.';
- dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR);
- tagp = dp->d_ops->data_entry_tag_p(dep);
+ xfs_dir3_dirent_put_ftype(mp, dep, XFS_DIR3_FT_DIR);
+ tagp = xfs_dir3_data_entry_tag_p(mp, dep);
*tagp = cpu_to_be16((char *)dep - (char *)hdr);
- xfs_dir2_data_log_entry(tp, dp, bp, dep);
+ xfs_dir2_data_log_entry(tp, bp, dep);
blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
(char *)dep - (char *)hdr));
- offset = dp->d_ops->data_first_offset;
+ offset = xfs_dir3_data_first_offset(mp);
/*
* Loop over existing entries, stuff them in.
*/
@@ -1216,9 +1214,7 @@ xfs_dir2_sf_to_block(
*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16(
((char *)dup - (char *)hdr));
xfs_dir2_data_log_unused(tp, bp, dup);
- xfs_dir2_data_freeinsert(hdr,
- dp->d_ops->data_bestfree_p(hdr),
- dup, &dummy);
+ xfs_dir2_data_freeinsert(hdr, dup, &dummy);
offset += be16_to_cpu(dup->length);
continue;
}
@@ -1226,13 +1222,14 @@ xfs_dir2_sf_to_block(
* Copy a real entry.
*/
dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset);
- dep->inumber = cpu_to_be64(dp->d_ops->sf_get_ino(sfp, sfep));
+ dep->inumber = cpu_to_be64(xfs_dir3_sfe_get_ino(mp, sfp, sfep));
dep->namelen = sfep->namelen;
- dp->d_ops->data_put_ftype(dep, dp->d_ops->sf_get_ftype(sfep));
+ xfs_dir3_dirent_put_ftype(mp, dep,
+ xfs_dir3_sfe_get_ftype(mp, sfp, sfep));
memcpy(dep->name, sfep->name, dep->namelen);
- tagp = dp->d_ops->data_entry_tag_p(dep);
+ tagp = xfs_dir3_data_entry_tag_p(mp, dep);
*tagp = cpu_to_be16((char *)dep - (char *)hdr);
- xfs_dir2_data_log_entry(tp, dp, bp, dep);
+ xfs_dir2_data_log_entry(tp, bp, dep);
name.name = sfep->name;
name.len = sfep->namelen;
blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
@@ -1243,7 +1240,7 @@ xfs_dir2_sf_to_block(
if (++i == sfp->count)
sfep = NULL;
else
- sfep = dp->d_ops->sf_nextentry(sfp, sfep);
+ sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep);
}
/* Done with the temporary buffer */
kmem_free(sfp);
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 70acff4..47e1326 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -18,19 +18,20 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
+#include "xfs_dir2_format.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_error.h"
-#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_cksum.h"
@@ -62,18 +63,11 @@ __xfs_dir3_data_check(
char *p; /* current data position */
int stale; /* count of stale leaves */
struct xfs_name name;
- const struct xfs_dir_ops *ops;
mp = bp->b_target->bt_mount;
-
- /*
- * We can be passed a null dp here from a verifier, so we need to go the
- * hard way to get them.
- */
- ops = xfs_dir_get_ops(mp, dp);
-
hdr = bp->b_addr;
- p = (char *)ops->data_entry_p(hdr);
+ bf = xfs_dir3_data_bestfree_p(hdr);
+ p = (char *)xfs_dir3_data_entry_p(hdr);
switch (hdr->magic) {
case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
@@ -81,16 +75,6 @@ __xfs_dir3_data_check(
btp = xfs_dir2_block_tail_p(mp, hdr);
lep = xfs_dir2_block_leaf_p(btp);
endp = (char *)lep;
-
- /*
- * The number of leaf entries is limited by the size of the
- * block and the amount of space used by the data entries.
- * We don't know how much space is used by the data entries yet,
- * so just ensure that the count falls somewhere inside the
- * block right now.
- */
- XFS_WANT_CORRUPTED_RETURN(be32_to_cpu(btp->count) <
- ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry));
break;
case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
@@ -101,11 +85,10 @@ __xfs_dir3_data_check(
return EFSCORRUPTED;
}
+ count = lastfree = freeseen = 0;
/*
* Account for zero bestfree entries.
*/
- bf = ops->data_bestfree_p(hdr);
- count = lastfree = freeseen = 0;
if (!bf[0].length) {
XFS_WANT_CORRUPTED_RETURN(!bf[0].offset);
freeseen |= 1 << 0;
@@ -138,7 +121,7 @@ __xfs_dir3_data_check(
XFS_WANT_CORRUPTED_RETURN(
be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
(char *)dup - (char *)hdr);
- dfp = xfs_dir2_data_freefind(hdr, bf, dup);
+ dfp = xfs_dir2_data_freefind(hdr, dup);
if (dfp) {
i = (int)(dfp - bf);
XFS_WANT_CORRUPTED_RETURN(
@@ -164,10 +147,10 @@ __xfs_dir3_data_check(
XFS_WANT_CORRUPTED_RETURN(
!xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)));
XFS_WANT_CORRUPTED_RETURN(
- be16_to_cpu(*ops->data_entry_tag_p(dep)) ==
+ be16_to_cpu(*xfs_dir3_data_entry_tag_p(mp, dep)) ==
(char *)dep - (char *)hdr);
XFS_WANT_CORRUPTED_RETURN(
- ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX);
+ xfs_dir3_dirent_get_ftype(mp, dep) < XFS_DIR3_FT_MAX);
count++;
lastfree = 0;
if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
@@ -185,7 +168,7 @@ __xfs_dir3_data_check(
}
XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count));
}
- p += ops->data_entsize(dep->namelen);
+ p += xfs_dir3_data_entsize(mp, dep->namelen);
}
/*
* Need to have seen all the entries and all the bestfree slots.
@@ -344,18 +327,19 @@ xfs_dir3_data_readahead(
*/
xfs_dir2_data_free_t *
xfs_dir2_data_freefind(
- struct xfs_dir2_data_hdr *hdr, /* data block header */
- struct xfs_dir2_data_free *bf, /* bestfree table pointer */
- struct xfs_dir2_data_unused *dup) /* unused space */
+ xfs_dir2_data_hdr_t *hdr, /* data block */
+ xfs_dir2_data_unused_t *dup) /* data unused entry */
{
xfs_dir2_data_free_t *dfp; /* bestfree entry */
xfs_dir2_data_aoff_t off; /* offset value needed */
+ struct xfs_dir2_data_free *bf;
#ifdef DEBUG
int matched; /* matched the value */
int seenzero; /* saw a 0 bestfree entry */
#endif
off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
+ bf = xfs_dir3_data_bestfree_p(hdr);
#ifdef DEBUG
/*
@@ -415,11 +399,11 @@ xfs_dir2_data_freefind(
*/
xfs_dir2_data_free_t * /* entry inserted */
xfs_dir2_data_freeinsert(
- struct xfs_dir2_data_hdr *hdr, /* data block pointer */
- struct xfs_dir2_data_free *dfp, /* bestfree table pointer */
- struct xfs_dir2_data_unused *dup, /* unused space */
+ xfs_dir2_data_hdr_t *hdr, /* data block pointer */
+ xfs_dir2_data_unused_t *dup, /* unused space */
int *loghead) /* log the data header (out) */
{
+ xfs_dir2_data_free_t *dfp; /* bestfree table pointer */
xfs_dir2_data_free_t new; /* new bestfree entry */
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
@@ -427,6 +411,7 @@ xfs_dir2_data_freeinsert(
hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
+ dfp = xfs_dir3_data_bestfree_p(hdr);
new.length = dup->length;
new.offset = cpu_to_be16((char *)dup - (char *)hdr);
@@ -459,11 +444,11 @@ xfs_dir2_data_freeinsert(
*/
STATIC void
xfs_dir2_data_freeremove(
- struct xfs_dir2_data_hdr *hdr, /* data block header */
- struct xfs_dir2_data_free *bf, /* bestfree table pointer */
- struct xfs_dir2_data_free *dfp, /* bestfree entry pointer */
+ xfs_dir2_data_hdr_t *hdr, /* data block header */
+ xfs_dir2_data_free_t *dfp, /* bestfree entry pointer */
int *loghead) /* out: log data header */
{
+ struct xfs_dir2_data_free *bf;
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
@@ -473,6 +458,7 @@ xfs_dir2_data_freeremove(
/*
* It's the first entry, slide the next 2 up.
*/
+ bf = xfs_dir3_data_bestfree_p(hdr);
if (dfp == &bf[0]) {
bf[0] = bf[1];
bf[1] = bf[2];
@@ -500,9 +486,9 @@ xfs_dir2_data_freeremove(
*/
void
xfs_dir2_data_freescan(
- struct xfs_inode *dp,
- struct xfs_dir2_data_hdr *hdr,
- int *loghead)
+ xfs_mount_t *mp, /* filesystem mount point */
+ xfs_dir2_data_hdr_t *hdr, /* data block header */
+ int *loghead) /* out: log data header */
{
xfs_dir2_block_tail_t *btp; /* block tail */
xfs_dir2_data_entry_t *dep; /* active data entry */
@@ -519,19 +505,19 @@ xfs_dir2_data_freescan(
/*
* Start by clearing the table.
*/
- bf = dp->d_ops->data_bestfree_p(hdr);
+ bf = xfs_dir3_data_bestfree_p(hdr);
memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT);
*loghead = 1;
/*
* Set up pointers.
*/
- p = (char *)dp->d_ops->data_entry_p(hdr);
+ p = (char *)xfs_dir3_data_entry_p(hdr);
if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
- btp = xfs_dir2_block_tail_p(dp->i_mount, hdr);
+ btp = xfs_dir2_block_tail_p(mp, hdr);
endp = (char *)xfs_dir2_block_leaf_p(btp);
} else
- endp = (char *)hdr + dp->i_mount->m_dirblksize;
+ endp = (char *)hdr + mp->m_dirblksize;
/*
* Loop over the block's entries.
*/
@@ -543,7 +529,7 @@ xfs_dir2_data_freescan(
if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
ASSERT((char *)dup - (char *)hdr ==
be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
- xfs_dir2_data_freeinsert(hdr, bf, dup, loghead);
+ xfs_dir2_data_freeinsert(hdr, dup, loghead);
p += be16_to_cpu(dup->length);
}
/*
@@ -552,8 +538,8 @@ xfs_dir2_data_freescan(
else {
dep = (xfs_dir2_data_entry_t *)p;
ASSERT((char *)dep - (char *)hdr ==
- be16_to_cpu(*dp->d_ops->data_entry_tag_p(dep)));
- p += dp->d_ops->data_entsize(dep->namelen);
+ be16_to_cpu(*xfs_dir3_data_entry_tag_p(mp, dep)));
+ p += xfs_dir3_data_entsize(mp, dep->namelen);
}
}
}
@@ -608,8 +594,8 @@ xfs_dir3_data_init(
} else
hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
- bf = dp->d_ops->data_bestfree_p(hdr);
- bf[0].offset = cpu_to_be16(dp->d_ops->data_entry_offset);
+ bf = xfs_dir3_data_bestfree_p(hdr);
+ bf[0].offset = cpu_to_be16(xfs_dir3_data_entry_offset(hdr));
for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
bf[i].length = 0;
bf[i].offset = 0;
@@ -618,17 +604,17 @@ xfs_dir3_data_init(
/*
* Set up an unused entry for the block's body.
*/
- dup = dp->d_ops->data_unused_p(hdr);
+ dup = xfs_dir3_data_unused_p(hdr);
dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
- t = mp->m_dirblksize - (uint)dp->d_ops->data_entry_offset;
+ t = mp->m_dirblksize - (uint)xfs_dir3_data_entry_offset(hdr);
bf[0].length = cpu_to_be16(t);
dup->length = cpu_to_be16(t);
*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr);
/*
* Log it and return it.
*/
- xfs_dir2_data_log_header(tp, dp, bp);
+ xfs_dir2_data_log_header(tp, bp);
xfs_dir2_data_log_unused(tp, bp, dup);
*bpp = bp;
return 0;
@@ -640,11 +626,11 @@ xfs_dir3_data_init(
void
xfs_dir2_data_log_entry(
struct xfs_trans *tp,
- struct xfs_inode *dp,
struct xfs_buf *bp,
xfs_dir2_data_entry_t *dep) /* data entry pointer */
{
struct xfs_dir2_data_hdr *hdr = bp->b_addr;
+ struct xfs_mount *mp = tp->t_mountp;
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
@@ -652,7 +638,7 @@ xfs_dir2_data_log_entry(
hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
xfs_trans_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr),
- (uint)((char *)(dp->d_ops->data_entry_tag_p(dep) + 1) -
+ (uint)((char *)(xfs_dir3_data_entry_tag_p(mp, dep) + 1) -
(char *)hdr - 1));
}
@@ -662,19 +648,16 @@ xfs_dir2_data_log_entry(
void
xfs_dir2_data_log_header(
struct xfs_trans *tp,
- struct xfs_inode *dp,
struct xfs_buf *bp)
{
-#ifdef DEBUG
- struct xfs_dir2_data_hdr *hdr = bp->b_addr;
+ xfs_dir2_data_hdr_t *hdr = bp->b_addr;
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
-#endif
- xfs_trans_log_buf(tp, bp, 0, dp->d_ops->data_entry_offset - 1);
+ xfs_trans_log_buf(tp, bp, 0, xfs_dir3_data_entry_offset(hdr) - 1);
}
/*
@@ -715,7 +698,6 @@ xfs_dir2_data_log_unused(
void
xfs_dir2_data_make_free(
struct xfs_trans *tp,
- struct xfs_inode *dp,
struct xfs_buf *bp,
xfs_dir2_data_aoff_t offset, /* starting byte offset */
xfs_dir2_data_aoff_t len, /* length in bytes */
@@ -753,7 +735,7 @@ xfs_dir2_data_make_free(
* If this isn't the start of the block, then back up to
* the previous entry and see if it's free.
*/
- if (offset > dp->d_ops->data_entry_offset) {
+ if (offset > xfs_dir3_data_entry_offset(hdr)) {
__be16 *tagp; /* tag just before us */
tagp = (__be16 *)((char *)hdr + offset) - 1;
@@ -779,15 +761,15 @@ xfs_dir2_data_make_free(
* Previous and following entries are both free,
* merge everything into a single free entry.
*/
- bf = dp->d_ops->data_bestfree_p(hdr);
+ bf = xfs_dir3_data_bestfree_p(hdr);
if (prevdup && postdup) {
xfs_dir2_data_free_t *dfp2; /* another bestfree pointer */
/*
* See if prevdup and/or postdup are in bestfree table.
*/
- dfp = xfs_dir2_data_freefind(hdr, bf, prevdup);
- dfp2 = xfs_dir2_data_freefind(hdr, bf, postdup);
+ dfp = xfs_dir2_data_freefind(hdr, prevdup);
+ dfp2 = xfs_dir2_data_freefind(hdr, postdup);
/*
* We need a rescan unless there are exactly 2 free entries
* namely our two. Then we know what's happening, otherwise
@@ -815,13 +797,12 @@ xfs_dir2_data_make_free(
ASSERT(dfp2 == dfp);
dfp2 = &bf[1];
}
- xfs_dir2_data_freeremove(hdr, bf, dfp2, needlogp);
- xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp2, needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp, needlogp);
/*
* Now insert the new entry.
*/
- dfp = xfs_dir2_data_freeinsert(hdr, bf, prevdup,
- needlogp);
+ dfp = xfs_dir2_data_freeinsert(hdr, prevdup, needlogp);
ASSERT(dfp == &bf[0]);
ASSERT(dfp->length == prevdup->length);
ASSERT(!dfp[1].length);
@@ -832,7 +813,7 @@ xfs_dir2_data_make_free(
* The entry before us is free, merge with it.
*/
else if (prevdup) {
- dfp = xfs_dir2_data_freefind(hdr, bf, prevdup);
+ dfp = xfs_dir2_data_freefind(hdr, prevdup);
be16_add_cpu(&prevdup->length, len);
*xfs_dir2_data_unused_tag_p(prevdup) =
cpu_to_be16((char *)prevdup - (char *)hdr);
@@ -843,8 +824,8 @@ xfs_dir2_data_make_free(
* the old one and add the new one.
*/
if (dfp) {
- xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
- xfs_dir2_data_freeinsert(hdr, bf, prevdup, needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+ xfs_dir2_data_freeinsert(hdr, prevdup, needlogp);
}
/*
* Otherwise we need a scan if the new entry is big enough.
@@ -858,7 +839,7 @@ xfs_dir2_data_make_free(
* The following entry is free, merge with it.
*/
else if (postdup) {
- dfp = xfs_dir2_data_freefind(hdr, bf, postdup);
+ dfp = xfs_dir2_data_freefind(hdr, postdup);
newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
@@ -871,8 +852,8 @@ xfs_dir2_data_make_free(
* the old one and add the new one.
*/
if (dfp) {
- xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
- xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+ xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
}
/*
* Otherwise we need a scan if the new entry is big enough.
@@ -892,7 +873,7 @@ xfs_dir2_data_make_free(
*xfs_dir2_data_unused_tag_p(newdup) =
cpu_to_be16((char *)newdup - (char *)hdr);
xfs_dir2_data_log_unused(tp, bp, newdup);
- xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp);
+ xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
}
*needscanp = needscan;
}
@@ -903,7 +884,6 @@ xfs_dir2_data_make_free(
void
xfs_dir2_data_use_free(
struct xfs_trans *tp,
- struct xfs_inode *dp,
struct xfs_buf *bp,
xfs_dir2_data_unused_t *dup, /* unused entry */
xfs_dir2_data_aoff_t offset, /* starting offset to use */
@@ -933,9 +913,9 @@ xfs_dir2_data_use_free(
/*
* Look up the entry in the bestfree table.
*/
+ dfp = xfs_dir2_data_freefind(hdr, dup);
oldlen = be16_to_cpu(dup->length);
- bf = dp->d_ops->data_bestfree_p(hdr);
- dfp = xfs_dir2_data_freefind(hdr, bf, dup);
+ bf = xfs_dir3_data_bestfree_p(hdr);
ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length));
/*
* Check for alignment with front and back of the entry.
@@ -952,8 +932,7 @@ xfs_dir2_data_use_free(
if (dfp) {
needscan = (bf[2].offset != 0);
if (!needscan)
- xfs_dir2_data_freeremove(hdr, bf, dfp,
- needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp, needlogp);
}
}
/*
@@ -971,9 +950,8 @@ xfs_dir2_data_use_free(
* If it was in the table, remove it and add the new one.
*/
if (dfp) {
- xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
- dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup,
- needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+ dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
ASSERT(dfp != NULL);
ASSERT(dfp->length == newdup->length);
ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
@@ -999,9 +977,8 @@ xfs_dir2_data_use_free(
* If it was in the table, remove it and add the new one.
*/
if (dfp) {
- xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
- dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup,
- needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+ dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
ASSERT(dfp != NULL);
ASSERT(dfp->length == newdup->length);
ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
@@ -1040,11 +1017,9 @@ xfs_dir2_data_use_free(
if (dfp) {
needscan = (bf[2].length != 0);
if (!needscan) {
- xfs_dir2_data_freeremove(hdr, bf, dfp,
- needlogp);
- xfs_dir2_data_freeinsert(hdr, bf, newdup,
- needlogp);
- xfs_dir2_data_freeinsert(hdr, bf, newdup2,
+ xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+ xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
+ xfs_dir2_data_freeinsert(hdr, newdup2,
needlogp);
}
}
diff --git a/fs/xfs/xfs_da_format.h b/fs/xfs/xfs_dir2_format.h
index a19d3f8..9cf6738 100644
--- a/fs/xfs/xfs_da_format.h
+++ b/fs/xfs/xfs_dir2_format.h
@@ -16,113 +16,8 @@
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#ifndef __XFS_DA_FORMAT_H__
-#define __XFS_DA_FORMAT_H__
-
-/*========================================================================
- * Directory Structure when greater than XFS_LBSIZE(mp) bytes.
- *========================================================================*/
-
-/*
- * This structure is common to both leaf nodes and non-leaf nodes in the Btree.
- *
- * It is used to manage a doubly linked list of all blocks at the same
- * level in the Btree, and to identify which type of block this is.
- */
-#define XFS_DA_NODE_MAGIC 0xfebe /* magic number: non-leaf blocks */
-#define XFS_ATTR_LEAF_MAGIC 0xfbee /* magic number: attribute leaf blks */
-#define XFS_DIR2_LEAF1_MAGIC 0xd2f1 /* magic number: v2 dirlf single blks */
-#define XFS_DIR2_LEAFN_MAGIC 0xd2ff /* magic number: v2 dirlf multi blks */
-
-typedef struct xfs_da_blkinfo {
- __be32 forw; /* previous block in list */
- __be32 back; /* following block in list */
- __be16 magic; /* validity check on block */
- __be16 pad; /* unused */
-} xfs_da_blkinfo_t;
-
-/*
- * CRC enabled directory structure types
- *
- * The headers change size for the additional verification information, but
- * otherwise the tree layouts and contents are unchanged. Hence the da btree
- * code can use the struct xfs_da_blkinfo for manipulating the tree links and
- * magic numbers without modification for both v2 and v3 nodes.
- */
-#define XFS_DA3_NODE_MAGIC 0x3ebe /* magic number: non-leaf blocks */
-#define XFS_ATTR3_LEAF_MAGIC 0x3bee /* magic number: attribute leaf blks */
-#define XFS_DIR3_LEAF1_MAGIC 0x3df1 /* magic number: v2 dirlf single blks */
-#define XFS_DIR3_LEAFN_MAGIC 0x3dff /* magic number: v2 dirlf multi blks */
-
-struct xfs_da3_blkinfo {
- /*
- * the node link manipulation code relies on the fact that the first
- * element of this structure is the struct xfs_da_blkinfo so it can
- * ignore the differences in the rest of the structures.
- */
- struct xfs_da_blkinfo hdr;
- __be32 crc; /* CRC of block */
- __be64 blkno; /* first block of the buffer */
- __be64 lsn; /* sequence number of last write */
- uuid_t uuid; /* filesystem we belong to */
- __be64 owner; /* inode that owns the block */
-};
-
-/*
- * This is the structure of the root and intermediate nodes in the Btree.
- * The leaf nodes are defined above.
- *
- * Entries are not packed.
- *
- * Since we have duplicate keys, use a binary search but always follow
- * all match in the block, not just the first match found.
- */
-#define XFS_DA_NODE_MAXDEPTH 5 /* max depth of Btree */
-
-typedef struct xfs_da_node_hdr {
- struct xfs_da_blkinfo info; /* block type, links, etc. */
- __be16 __count; /* count of active entries */
- __be16 __level; /* level above leaves (leaf == 0) */
-} xfs_da_node_hdr_t;
-
-struct xfs_da3_node_hdr {
- struct xfs_da3_blkinfo info; /* block type, links, etc. */
- __be16 __count; /* count of active entries */
- __be16 __level; /* level above leaves (leaf == 0) */
- __be32 __pad32;
-};
-
-#define XFS_DA3_NODE_CRC_OFF (offsetof(struct xfs_da3_node_hdr, info.crc))
-
-typedef struct xfs_da_node_entry {
- __be32 hashval; /* hash value for this descendant */
- __be32 before; /* Btree block before this key */
-} xfs_da_node_entry_t;
-
-typedef struct xfs_da_intnode {
- struct xfs_da_node_hdr hdr;
- struct xfs_da_node_entry __btree[];
-} xfs_da_intnode_t;
-
-struct xfs_da3_intnode {
- struct xfs_da3_node_hdr hdr;
- struct xfs_da_node_entry __btree[];
-};
-
-/*
- * In-core version of the node header to abstract the differences in the v2 and
- * v3 disk format of the headers. Callers need to convert to/from disk format as
- * appropriate.
- */
-struct xfs_da3_icnode_hdr {
- __uint32_t forw;
- __uint32_t back;
- __uint16_t magic;
- __uint16_t count;
- __uint16_t level;
-};
-
-#define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize
+#ifndef __XFS_DIR2_FORMAT_H__
+#define __XFS_DIR2_FORMAT_H__
/*
* Directory version 2.
@@ -294,6 +189,79 @@ xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr)
((char *)hdr + xfs_dir2_sf_hdr_size(hdr->i8count));
}
+static inline int
+xfs_dir3_sf_entsize(
+ struct xfs_mount *mp,
+ struct xfs_dir2_sf_hdr *hdr,
+ int len)
+{
+ int count = sizeof(struct xfs_dir2_sf_entry); /* namelen + offset */
+
+ count += len; /* name */
+ count += hdr->i8count ? sizeof(xfs_dir2_ino8_t) :
+ sizeof(xfs_dir2_ino4_t); /* ino # */
+ if (xfs_sb_version_hasftype(&mp->m_sb))
+ count += sizeof(__uint8_t); /* file type */
+ return count;
+}
+
+static inline struct xfs_dir2_sf_entry *
+xfs_dir3_sf_nextentry(
+ struct xfs_mount *mp,
+ struct xfs_dir2_sf_hdr *hdr,
+ struct xfs_dir2_sf_entry *sfep)
+{
+ return (struct xfs_dir2_sf_entry *)
+ ((char *)sfep + xfs_dir3_sf_entsize(mp, hdr, sfep->namelen));
+}
+
+/*
+ * in dir3 shortform directories, the file type field is stored at a variable
+ * offset after the inode number. Because it's only a single byte, endian
+ * conversion is not necessary.
+ */
+static inline __uint8_t *
+xfs_dir3_sfe_ftypep(
+ struct xfs_dir2_sf_hdr *hdr,
+ struct xfs_dir2_sf_entry *sfep)
+{
+ return (__uint8_t *)&sfep->name[sfep->namelen];
+}
+
+static inline __uint8_t
+xfs_dir3_sfe_get_ftype(
+ struct xfs_mount *mp,
+ struct xfs_dir2_sf_hdr *hdr,
+ struct xfs_dir2_sf_entry *sfep)
+{
+ __uint8_t *ftp;
+
+ if (!xfs_sb_version_hasftype(&mp->m_sb))
+ return XFS_DIR3_FT_UNKNOWN;
+
+ ftp = xfs_dir3_sfe_ftypep(hdr, sfep);
+ if (*ftp >= XFS_DIR3_FT_MAX)
+ return XFS_DIR3_FT_UNKNOWN;
+ return *ftp;
+}
+
+static inline void
+xfs_dir3_sfe_put_ftype(
+ struct xfs_mount *mp,
+ struct xfs_dir2_sf_hdr *hdr,
+ struct xfs_dir2_sf_entry *sfep,
+ __uint8_t ftype)
+{
+ __uint8_t *ftp;
+
+ ASSERT(ftype < XFS_DIR3_FT_MAX);
+
+ if (!xfs_sb_version_hasftype(&mp->m_sb))
+ return;
+ ftp = xfs_dir3_sfe_ftypep(hdr, sfep);
+ *ftp = ftype;
+}
+
/*
* Data block structures.
*
@@ -377,6 +345,17 @@ struct xfs_dir3_data_hdr {
#define XFS_DIR3_DATA_CRC_OFF offsetof(struct xfs_dir3_data_hdr, hdr.crc)
+static inline struct xfs_dir2_data_free *
+xfs_dir3_data_bestfree_p(struct xfs_dir2_data_hdr *hdr)
+{
+ if (hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
+ hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
+ struct xfs_dir3_data_hdr *hdr3 = (struct xfs_dir3_data_hdr *)hdr;
+ return hdr3->best_free;
+ }
+ return hdr->bestfree;
+}
+
/*
* Active entry in a data block.
*
@@ -410,6 +389,72 @@ typedef struct xfs_dir2_data_unused {
} xfs_dir2_data_unused_t;
/*
+ * Size of a data entry.
+ */
+static inline int
+__xfs_dir3_data_entsize(
+ bool ftype,
+ int n)
+{
+ int size = offsetof(struct xfs_dir2_data_entry, name[0]);
+
+ size += n;
+ size += sizeof(xfs_dir2_data_off_t);
+ if (ftype)
+ size += sizeof(__uint8_t);
+ return roundup(size, XFS_DIR2_DATA_ALIGN);
+}
+static inline int
+xfs_dir3_data_entsize(
+ struct xfs_mount *mp,
+ int n)
+{
+ bool ftype = xfs_sb_version_hasftype(&mp->m_sb) ? true : false;
+ return __xfs_dir3_data_entsize(ftype, n);
+}
+
+static inline __uint8_t
+xfs_dir3_dirent_get_ftype(
+ struct xfs_mount *mp,
+ struct xfs_dir2_data_entry *dep)
+{
+ if (xfs_sb_version_hasftype(&mp->m_sb)) {
+ __uint8_t type = dep->name[dep->namelen];
+
+ ASSERT(type < XFS_DIR3_FT_MAX);
+ if (type < XFS_DIR3_FT_MAX)
+ return type;
+
+ }
+ return XFS_DIR3_FT_UNKNOWN;
+}
+
+static inline void
+xfs_dir3_dirent_put_ftype(
+ struct xfs_mount *mp,
+ struct xfs_dir2_data_entry *dep,
+ __uint8_t type)
+{
+ ASSERT(type < XFS_DIR3_FT_MAX);
+ ASSERT(dep->namelen != 0);
+
+ if (xfs_sb_version_hasftype(&mp->m_sb))
+ dep->name[dep->namelen] = type;
+}
+
+/*
+ * Pointer to an entry's tag word.
+ */
+static inline __be16 *
+xfs_dir3_data_entry_tag_p(
+ struct xfs_mount *mp,
+ struct xfs_dir2_data_entry *dep)
+{
+ return (__be16 *)((char *)dep +
+ xfs_dir3_data_entsize(mp, dep->namelen) - sizeof(__be16));
+}
+
+/*
* Pointer to a freespace's tag word.
*/
static inline __be16 *
@@ -419,6 +464,93 @@ xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup)
be16_to_cpu(dup->length) - sizeof(__be16));
}
+static inline size_t
+xfs_dir3_data_hdr_size(bool dir3)
+{
+ if (dir3)
+ return sizeof(struct xfs_dir3_data_hdr);
+ return sizeof(struct xfs_dir2_data_hdr);
+}
+
+static inline size_t
+xfs_dir3_data_entry_offset(struct xfs_dir2_data_hdr *hdr)
+{
+ bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
+ hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
+ return xfs_dir3_data_hdr_size(dir3);
+}
+
+static inline struct xfs_dir2_data_entry *
+xfs_dir3_data_entry_p(struct xfs_dir2_data_hdr *hdr)
+{
+ return (struct xfs_dir2_data_entry *)
+ ((char *)hdr + xfs_dir3_data_entry_offset(hdr));
+}
+
+static inline struct xfs_dir2_data_unused *
+xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr)
+{
+ return (struct xfs_dir2_data_unused *)
+ ((char *)hdr + xfs_dir3_data_entry_offset(hdr));
+}
+
+/*
+ * Offsets of . and .. in data space (always block 0)
+ *
+ * XXX: there is scope for significant optimisation of the logic here. Right
+ * now we are checking for "dir3 format" over and over again. Ideally we should
+ * only do it once for each operation.
+ */
+static inline xfs_dir2_data_aoff_t
+xfs_dir3_data_dot_offset(struct xfs_mount *mp)
+{
+ return xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&mp->m_sb));
+}
+
+static inline xfs_dir2_data_aoff_t
+xfs_dir3_data_dotdot_offset(struct xfs_mount *mp)
+{
+ return xfs_dir3_data_dot_offset(mp) +
+ xfs_dir3_data_entsize(mp, 1);
+}
+
+static inline xfs_dir2_data_aoff_t
+xfs_dir3_data_first_offset(struct xfs_mount *mp)
+{
+ return xfs_dir3_data_dotdot_offset(mp) +
+ xfs_dir3_data_entsize(mp, 2);
+}
+
+/*
+ * location of . and .. in data space (always block 0)
+ */
+static inline struct xfs_dir2_data_entry *
+xfs_dir3_data_dot_entry_p(
+ struct xfs_mount *mp,
+ struct xfs_dir2_data_hdr *hdr)
+{
+ return (struct xfs_dir2_data_entry *)
+ ((char *)hdr + xfs_dir3_data_dot_offset(mp));
+}
+
+static inline struct xfs_dir2_data_entry *
+xfs_dir3_data_dotdot_entry_p(
+ struct xfs_mount *mp,
+ struct xfs_dir2_data_hdr *hdr)
+{
+ return (struct xfs_dir2_data_entry *)
+ ((char *)hdr + xfs_dir3_data_dotdot_offset(mp));
+}
+
+static inline struct xfs_dir2_data_entry *
+xfs_dir3_data_first_entry_p(
+ struct xfs_mount *mp,
+ struct xfs_dir2_data_hdr *hdr)
+{
+ return (struct xfs_dir2_data_entry *)
+ ((char *)hdr + xfs_dir3_data_first_offset(mp));
+}
+
/*
* Leaf block structures.
*
@@ -513,6 +645,39 @@ struct xfs_dir3_leaf {
#define XFS_DIR3_LEAF_CRC_OFF offsetof(struct xfs_dir3_leaf_hdr, info.crc)
+extern void xfs_dir3_leaf_hdr_from_disk(struct xfs_dir3_icleaf_hdr *to,
+ struct xfs_dir2_leaf *from);
+
+static inline int
+xfs_dir3_leaf_hdr_size(struct xfs_dir2_leaf *lp)
+{
+ if (lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
+ lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC))
+ return sizeof(struct xfs_dir3_leaf_hdr);
+ return sizeof(struct xfs_dir2_leaf_hdr);
+}
+
+static inline int
+xfs_dir3_max_leaf_ents(struct xfs_mount *mp, struct xfs_dir2_leaf *lp)
+{
+ return (mp->m_dirblksize - xfs_dir3_leaf_hdr_size(lp)) /
+ (uint)sizeof(struct xfs_dir2_leaf_entry);
+}
+
+/*
+ * Get address of the bestcount field in the single-leaf block.
+ */
+static inline struct xfs_dir2_leaf_entry *
+xfs_dir3_leaf_ents_p(struct xfs_dir2_leaf *lp)
+{
+ if (lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
+ lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
+ struct xfs_dir3_leaf *lp3 = (struct xfs_dir3_leaf *)lp;
+ return lp3->__ents;
+ }
+ return lp->__ents;
+}
+
/*
* Get address of the bestcount field in the single-leaf block.
*/
@@ -704,6 +869,48 @@ struct xfs_dir3_icfree_hdr {
};
+void xfs_dir3_free_hdr_from_disk(struct xfs_dir3_icfree_hdr *to,
+ struct xfs_dir2_free *from);
+
+static inline int
+xfs_dir3_free_hdr_size(struct xfs_mount *mp)
+{
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ return sizeof(struct xfs_dir3_free_hdr);
+ return sizeof(struct xfs_dir2_free_hdr);
+}
+
+static inline int
+xfs_dir3_free_max_bests(struct xfs_mount *mp)
+{
+ return (mp->m_dirblksize - xfs_dir3_free_hdr_size(mp)) /
+ sizeof(xfs_dir2_data_off_t);
+}
+
+static inline __be16 *
+xfs_dir3_free_bests_p(struct xfs_mount *mp, struct xfs_dir2_free *free)
+{
+ return (__be16 *)((char *)free + xfs_dir3_free_hdr_size(mp));
+}
+
+/*
+ * Convert data space db to the corresponding free db.
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
+{
+ return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir3_free_max_bests(mp);
+}
+
+/*
+ * Convert data space db to the corresponding index in a free db.
+ */
+static inline int
+xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
+{
+ return db % xfs_dir3_free_max_bests(mp);
+}
+
/*
* Single block format.
*
@@ -754,262 +961,4 @@ xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp)
return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count);
}
-
-/*
- * Attribute storage layout
- *
- * Attribute lists are structured around Btrees where all the data
- * elements are in the leaf nodes. Attribute names are hashed into an int,
- * then that int is used as the index into the Btree. Since the hashval
- * of an attribute name may not be unique, we may have duplicate keys. The
- * internal links in the Btree are logical block offsets into the file.
- *
- *========================================================================
- * Attribute structure when equal to XFS_LBSIZE(mp) bytes.
- *========================================================================
- *
- * Struct leaf_entry's are packed from the top. Name/values grow from the
- * bottom but are not packed. The freemap contains run-length-encoded entries
- * for the free bytes after the leaf_entry's, but only the N largest such,
- * smaller runs are dropped. When the freemap doesn't show enough space
- * for an allocation, we compact the name/value area and try again. If we
- * still don't have enough space, then we have to split the block. The
- * name/value structs (both local and remote versions) must be 32bit aligned.
- *
- * Since we have duplicate hash keys, for each key that matches, compare
- * the actual name string. The root and intermediate node search always
- * takes the first-in-the-block key match found, so we should only have
- * to work "forw"ard. If none matches, continue with the "forw"ard leaf
- * nodes until the hash key changes or the attribute name is found.
- *
- * We store the fact that an attribute is a ROOT/USER/SECURE attribute in
- * the leaf_entry. The namespaces are independent only because we also look
- * at the namespace bit when we are looking for a matching attribute name.
- *
- * We also store an "incomplete" bit in the leaf_entry. It shows that an
- * attribute is in the middle of being created and should not be shown to
- * the user if we crash during the time that the bit is set. We clear the
- * bit when we have finished setting up the attribute. We do this because
- * we cannot create some large attributes inside a single transaction, and we
- * need some indication that we weren't finished if we crash in the middle.
- */
-#define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */
-
-typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */
- __be16 base; /* base of free region */
- __be16 size; /* length of free region */
-} xfs_attr_leaf_map_t;
-
-typedef struct xfs_attr_leaf_hdr { /* constant-structure header block */
- xfs_da_blkinfo_t info; /* block type, links, etc. */
- __be16 count; /* count of active leaf_entry's */
- __be16 usedbytes; /* num bytes of names/values stored */
- __be16 firstused; /* first used byte in name area */
- __u8 holes; /* != 0 if blk needs compaction */
- __u8 pad1;
- xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE];
- /* N largest free regions */
-} xfs_attr_leaf_hdr_t;
-
-typedef struct xfs_attr_leaf_entry { /* sorted on key, not name */
- __be32 hashval; /* hash value of name */
- __be16 nameidx; /* index into buffer of name/value */
- __u8 flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */
- __u8 pad2; /* unused pad byte */
-} xfs_attr_leaf_entry_t;
-
-typedef struct xfs_attr_leaf_name_local {
- __be16 valuelen; /* number of bytes in value */
- __u8 namelen; /* length of name bytes */
- __u8 nameval[1]; /* name/value bytes */
-} xfs_attr_leaf_name_local_t;
-
-typedef struct xfs_attr_leaf_name_remote {
- __be32 valueblk; /* block number of value bytes */
- __be32 valuelen; /* number of bytes in value */
- __u8 namelen; /* length of name bytes */
- __u8 name[1]; /* name bytes */
-} xfs_attr_leaf_name_remote_t;
-
-typedef struct xfs_attr_leafblock {
- xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */
- xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */
- xfs_attr_leaf_name_local_t namelist; /* grows from bottom of buf */
- xfs_attr_leaf_name_remote_t valuelist; /* grows from bottom of buf */
-} xfs_attr_leafblock_t;
-
-/*
- * CRC enabled leaf structures. Called "version 3" structures to match the
- * version number of the directory and dablk structures for this feature, and
- * attr2 is already taken by the variable inode attribute fork size feature.
- */
-struct xfs_attr3_leaf_hdr {
- struct xfs_da3_blkinfo info;
- __be16 count;
- __be16 usedbytes;
- __be16 firstused;
- __u8 holes;
- __u8 pad1;
- struct xfs_attr_leaf_map freemap[XFS_ATTR_LEAF_MAPSIZE];
- __be32 pad2; /* 64 bit alignment */
-};
-
-#define XFS_ATTR3_LEAF_CRC_OFF (offsetof(struct xfs_attr3_leaf_hdr, info.crc))
-
-struct xfs_attr3_leafblock {
- struct xfs_attr3_leaf_hdr hdr;
- struct xfs_attr_leaf_entry entries[1];
-
- /*
- * The rest of the block contains the following structures after the
- * leaf entries, growing from the bottom up. The variables are never
- * referenced, the locations accessed purely from helper functions.
- *
- * struct xfs_attr_leaf_name_local
- * struct xfs_attr_leaf_name_remote
- */
-};
-
-/*
- * incore, neutral version of the attribute leaf header
- */
-struct xfs_attr3_icleaf_hdr {
- __uint32_t forw;
- __uint32_t back;
- __uint16_t magic;
- __uint16_t count;
- __uint16_t usedbytes;
- __uint16_t firstused;
- __u8 holes;
- struct {
- __uint16_t base;
- __uint16_t size;
- } freemap[XFS_ATTR_LEAF_MAPSIZE];
-};
-
-/*
- * Flags used in the leaf_entry[i].flags field.
- * NOTE: the INCOMPLETE bit must not collide with the flags bits specified
- * on the system call, they are "or"ed together for various operations.
- */
-#define XFS_ATTR_LOCAL_BIT 0 /* attr is stored locally */
-#define XFS_ATTR_ROOT_BIT 1 /* limit access to trusted attrs */
-#define XFS_ATTR_SECURE_BIT 2 /* limit access to secure attrs */
-#define XFS_ATTR_INCOMPLETE_BIT 7 /* attr in middle of create/delete */
-#define XFS_ATTR_LOCAL (1 << XFS_ATTR_LOCAL_BIT)
-#define XFS_ATTR_ROOT (1 << XFS_ATTR_ROOT_BIT)
-#define XFS_ATTR_SECURE (1 << XFS_ATTR_SECURE_BIT)
-#define XFS_ATTR_INCOMPLETE (1 << XFS_ATTR_INCOMPLETE_BIT)
-
-/*
- * Conversion macros for converting namespace bits from argument flags
- * to ondisk flags.
- */
-#define XFS_ATTR_NSP_ARGS_MASK (ATTR_ROOT | ATTR_SECURE)
-#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE)
-#define XFS_ATTR_NSP_ONDISK(flags) ((flags) & XFS_ATTR_NSP_ONDISK_MASK)
-#define XFS_ATTR_NSP_ARGS(flags) ((flags) & XFS_ATTR_NSP_ARGS_MASK)
-#define XFS_ATTR_NSP_ARGS_TO_ONDISK(x) (((x) & ATTR_ROOT ? XFS_ATTR_ROOT : 0) |\
- ((x) & ATTR_SECURE ? XFS_ATTR_SECURE : 0))
-#define XFS_ATTR_NSP_ONDISK_TO_ARGS(x) (((x) & XFS_ATTR_ROOT ? ATTR_ROOT : 0) |\
- ((x) & XFS_ATTR_SECURE ? ATTR_SECURE : 0))
-
-/*
- * Alignment for namelist and valuelist entries (since they are mixed
- * there can be only one alignment value)
- */
-#define XFS_ATTR_LEAF_NAME_ALIGN ((uint)sizeof(xfs_dablk_t))
-
-static inline int
-xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp)
-{
- if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
- return sizeof(struct xfs_attr3_leaf_hdr);
- return sizeof(struct xfs_attr_leaf_hdr);
-}
-
-static inline struct xfs_attr_leaf_entry *
-xfs_attr3_leaf_entryp(xfs_attr_leafblock_t *leafp)
-{
- if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
- return &((struct xfs_attr3_leafblock *)leafp)->entries[0];
- return &leafp->entries[0];
-}
-
-/*
- * Cast typed pointers for "local" and "remote" name/value structs.
- */
-static inline char *
-xfs_attr3_leaf_name(xfs_attr_leafblock_t *leafp, int idx)
-{
- struct xfs_attr_leaf_entry *entries = xfs_attr3_leaf_entryp(leafp);
-
- return &((char *)leafp)[be16_to_cpu(entries[idx].nameidx)];
-}
-
-static inline xfs_attr_leaf_name_remote_t *
-xfs_attr3_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx)
-{
- return (xfs_attr_leaf_name_remote_t *)xfs_attr3_leaf_name(leafp, idx);
-}
-
-static inline xfs_attr_leaf_name_local_t *
-xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
-{
- return (xfs_attr_leaf_name_local_t *)xfs_attr3_leaf_name(leafp, idx);
-}
-
-/*
- * Calculate total bytes used (including trailing pad for alignment) for
- * a "local" name/value structure, a "remote" name/value structure, and
- * a pointer which might be either.
- */
-static inline int xfs_attr_leaf_entsize_remote(int nlen)
-{
- return ((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \
- XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1);
-}
-
-static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen)
-{
- return ((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) +
- XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1);
-}
-
-static inline int xfs_attr_leaf_entsize_local_max(int bsize)
-{
- return (((bsize) >> 1) + ((bsize) >> 2));
-}
-
-
-
-/*
- * Remote attribute block format definition
- *
- * There is one of these headers per filesystem block in a remote attribute.
- * This is done to ensure there is a 1:1 mapping between the attribute value
- * length and the number of blocks needed to store the attribute. This makes the
- * verification of a buffer a little more complex, but greatly simplifies the
- * allocation, reading and writing of these attributes as we don't have to guess
- * the number of blocks needed to store the attribute data.
- */
-#define XFS_ATTR3_RMT_MAGIC 0x5841524d /* XARM */
-
-struct xfs_attr3_rmt_hdr {
- __be32 rm_magic;
- __be32 rm_offset;
- __be32 rm_bytes;
- __be32 rm_crc;
- uuid_t rm_uuid;
- __be64 rm_owner;
- __be64 rm_blkno;
- __be64 rm_lsn;
-};
-
-#define XFS_ATTR3_RMT_CRC_OFF offsetof(struct xfs_attr3_rmt_hdr, rm_crc)
-
-#define XFS_ATTR3_RMT_BUF_SPACE(mp, bufsize) \
- ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
- sizeof(struct xfs_attr3_rmt_hdr) : 0))
-
-#endif /* __XFS_DA_FORMAT_H__ */
+#endif /* __XFS_DIR2_FORMAT_H__ */
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index ae47ec6..1021c83 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -18,21 +18,23 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
+#include "xfs_dir2_format.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_cksum.h"
@@ -50,21 +52,21 @@ static void xfs_dir3_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp);
* Pop an assert if something is wrong.
*/
#ifdef DEBUG
-#define xfs_dir3_leaf_check(dp, bp) \
+#define xfs_dir3_leaf_check(mp, bp) \
do { \
- if (!xfs_dir3_leaf1_check((dp), (bp))) \
+ if (!xfs_dir3_leaf1_check((mp), (bp))) \
ASSERT(0); \
} while (0);
STATIC bool
xfs_dir3_leaf1_check(
- struct xfs_inode *dp,
+ struct xfs_mount *mp,
struct xfs_buf *bp)
{
struct xfs_dir2_leaf *leaf = bp->b_addr;
struct xfs_dir3_icleaf_hdr leafhdr;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+ xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {
struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
@@ -73,16 +75,71 @@ xfs_dir3_leaf1_check(
} else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC)
return false;
- return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf);
+ return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf);
}
#else
-#define xfs_dir3_leaf_check(dp, bp)
+#define xfs_dir3_leaf_check(mp, bp)
#endif
+void
+xfs_dir3_leaf_hdr_from_disk(
+ struct xfs_dir3_icleaf_hdr *to,
+ struct xfs_dir2_leaf *from)
+{
+ if (from->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
+ from->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) {
+ to->forw = be32_to_cpu(from->hdr.info.forw);
+ to->back = be32_to_cpu(from->hdr.info.back);
+ to->magic = be16_to_cpu(from->hdr.info.magic);
+ to->count = be16_to_cpu(from->hdr.count);
+ to->stale = be16_to_cpu(from->hdr.stale);
+ } else {
+ struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)from;
+
+ to->forw = be32_to_cpu(hdr3->info.hdr.forw);
+ to->back = be32_to_cpu(hdr3->info.hdr.back);
+ to->magic = be16_to_cpu(hdr3->info.hdr.magic);
+ to->count = be16_to_cpu(hdr3->count);
+ to->stale = be16_to_cpu(hdr3->stale);
+ }
+
+ ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC ||
+ to->magic == XFS_DIR3_LEAF1_MAGIC ||
+ to->magic == XFS_DIR2_LEAFN_MAGIC ||
+ to->magic == XFS_DIR3_LEAFN_MAGIC);
+}
+
+void
+xfs_dir3_leaf_hdr_to_disk(
+ struct xfs_dir2_leaf *to,
+ struct xfs_dir3_icleaf_hdr *from)
+{
+ ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC ||
+ from->magic == XFS_DIR3_LEAF1_MAGIC ||
+ from->magic == XFS_DIR2_LEAFN_MAGIC ||
+ from->magic == XFS_DIR3_LEAFN_MAGIC);
+
+ if (from->magic == XFS_DIR2_LEAF1_MAGIC ||
+ from->magic == XFS_DIR2_LEAFN_MAGIC) {
+ to->hdr.info.forw = cpu_to_be32(from->forw);
+ to->hdr.info.back = cpu_to_be32(from->back);
+ to->hdr.info.magic = cpu_to_be16(from->magic);
+ to->hdr.count = cpu_to_be16(from->count);
+ to->hdr.stale = cpu_to_be16(from->stale);
+ } else {
+ struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)to;
+
+ hdr3->info.hdr.forw = cpu_to_be32(from->forw);
+ hdr3->info.hdr.back = cpu_to_be32(from->back);
+ hdr3->info.hdr.magic = cpu_to_be16(from->magic);
+ hdr3->count = cpu_to_be16(from->count);
+ hdr3->stale = cpu_to_be16(from->stale);
+ }
+}
+
bool
xfs_dir3_leaf_check_int(
struct xfs_mount *mp,
- struct xfs_inode *dp,
struct xfs_dir3_icleaf_hdr *hdr,
struct xfs_dir2_leaf *leaf)
{
@@ -90,21 +147,8 @@ xfs_dir3_leaf_check_int(
xfs_dir2_leaf_tail_t *ltp;
int stale;
int i;
- const struct xfs_dir_ops *ops;
- struct xfs_dir3_icleaf_hdr leafhdr;
- /*
- * we can be passed a null dp here from a verifier, so we need to go the
- * hard way to get them.
- */
- ops = xfs_dir_get_ops(mp, dp);
-
- if (!hdr) {
- ops->leaf_hdr_from_disk(&leafhdr, leaf);
- hdr = &leafhdr;
- }
-
- ents = ops->leaf_ents_p(leaf);
+ ents = xfs_dir3_leaf_ents_p(leaf);
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
/*
@@ -112,7 +156,7 @@ xfs_dir3_leaf_check_int(
* Should factor in the size of the bests table as well.
* We can deduce a value for that from di_size.
*/
- if (hdr->count > ops->leaf_max_ents(mp))
+ if (hdr->count > xfs_dir3_max_leaf_ents(mp, leaf))
return false;
/* Leaves and bests don't overlap in leaf format. */
@@ -148,6 +192,7 @@ xfs_dir3_leaf_verify(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_dir2_leaf *leaf = bp->b_addr;
+ struct xfs_dir3_icleaf_hdr leafhdr;
ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
@@ -169,7 +214,8 @@ xfs_dir3_leaf_verify(
return false;
}
- return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf);
+ xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
+ return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf);
}
static void
@@ -355,7 +401,7 @@ xfs_dir3_leaf_get_buf(
return error;
xfs_dir3_leaf_init(mp, tp, bp, dp->i_ino, magic);
- xfs_dir3_leaf_log_header(tp, dp, bp);
+ xfs_dir3_leaf_log_header(tp, bp);
if (magic == XFS_DIR2_LEAF1_MAGIC)
xfs_dir3_leaf_log_tail(tp, bp);
*bpp = bp;
@@ -416,31 +462,31 @@ xfs_dir2_block_to_leaf(
xfs_dir3_data_check(dp, dbp);
btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
- bf = dp->d_ops->data_bestfree_p(hdr);
- ents = dp->d_ops->leaf_ents_p(leaf);
+ bf = xfs_dir3_data_bestfree_p(hdr);
+ ents = xfs_dir3_leaf_ents_p(leaf);
/*
* Set the counts in the leaf header.
*/
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+ xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
leafhdr.count = be32_to_cpu(btp->count);
leafhdr.stale = be32_to_cpu(btp->stale);
- dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
- xfs_dir3_leaf_log_header(tp, dp, lbp);
+ xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr);
+ xfs_dir3_leaf_log_header(tp, lbp);
/*
* Could compact these but I think we always do the conversion
* after squeezing out stale entries.
*/
memcpy(ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t));
- xfs_dir3_leaf_log_ents(tp, dp, lbp, 0, leafhdr.count - 1);
+ xfs_dir3_leaf_log_ents(tp, lbp, 0, leafhdr.count - 1);
needscan = 0;
needlog = 1;
/*
* Make the space formerly occupied by the leaf entries and block
* tail be free.
*/
- xfs_dir2_data_make_free(tp, dp, dbp,
+ xfs_dir2_data_make_free(tp, dbp,
(xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
(xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize -
(char *)blp),
@@ -456,7 +502,7 @@ xfs_dir2_block_to_leaf(
hdr->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
if (needscan)
- xfs_dir2_data_freescan(dp, hdr, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
/*
* Set up leaf tail and bests table.
*/
@@ -468,8 +514,8 @@ xfs_dir2_block_to_leaf(
* Log the data header and leaf bests table.
*/
if (needlog)
- xfs_dir2_data_log_header(tp, dp, dbp);
- xfs_dir3_leaf_check(dp, lbp);
+ xfs_dir2_data_log_header(tp, dbp);
+ xfs_dir3_leaf_check(mp, lbp);
xfs_dir3_data_check(dp, dbp);
xfs_dir3_leaf_log_bests(tp, lbp, 0, 0);
return 0;
@@ -653,10 +699,10 @@ xfs_dir2_leaf_addname(
index = xfs_dir2_leaf_search_hash(args, lbp);
leaf = lbp->b_addr;
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
- ents = dp->d_ops->leaf_ents_p(leaf);
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+ ents = xfs_dir3_leaf_ents_p(leaf);
+ xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
bestsp = xfs_dir2_leaf_bests_p(ltp);
- length = dp->d_ops->data_entsize(args->namelen);
+ length = xfs_dir3_data_entsize(mp, args->namelen);
/*
* See if there are any entries with the same hash value
@@ -818,7 +864,7 @@ xfs_dir2_leaf_addname(
else
xfs_dir3_leaf_log_bests(tp, lbp, use_block, use_block);
hdr = dbp->b_addr;
- bf = dp->d_ops->data_bestfree_p(hdr);
+ bf = xfs_dir3_data_bestfree_p(hdr);
bestsp[use_block] = bf[0].length;
grown = 1;
} else {
@@ -834,7 +880,7 @@ xfs_dir2_leaf_addname(
return error;
}
hdr = dbp->b_addr;
- bf = dp->d_ops->data_bestfree_p(hdr);
+ bf = xfs_dir3_data_bestfree_p(hdr);
grown = 0;
}
/*
@@ -847,7 +893,7 @@ xfs_dir2_leaf_addname(
/*
* Mark the initial part of our freespace in use for the new entry.
*/
- xfs_dir2_data_use_free(tp, dp, dbp, dup,
+ xfs_dir2_data_use_free(tp, dbp, dup,
(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
&needlog, &needscan);
/*
@@ -857,20 +903,20 @@ xfs_dir2_leaf_addname(
dep->inumber = cpu_to_be64(args->inumber);
dep->namelen = args->namelen;
memcpy(dep->name, args->name, dep->namelen);
- dp->d_ops->data_put_ftype(dep, args->filetype);
- tagp = dp->d_ops->data_entry_tag_p(dep);
+ xfs_dir3_dirent_put_ftype(mp, dep, args->filetype);
+ tagp = xfs_dir3_data_entry_tag_p(mp, dep);
*tagp = cpu_to_be16((char *)dep - (char *)hdr);
/*
* Need to scan fix up the bestfree table.
*/
if (needscan)
- xfs_dir2_data_freescan(dp, hdr, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
/*
* Need to log the data block's header.
*/
if (needlog)
- xfs_dir2_data_log_header(tp, dp, dbp);
- xfs_dir2_data_log_entry(tp, dp, dbp, dep);
+ xfs_dir2_data_log_header(tp, dbp);
+ xfs_dir2_data_log_entry(tp, dbp, dep);
/*
* If the bests table needs to be changed, do it.
* Log the change unless we've already done that.
@@ -893,10 +939,10 @@ xfs_dir2_leaf_addname(
/*
* Log the leaf fields and give up the buffers.
*/
- dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
- xfs_dir3_leaf_log_header(tp, dp, lbp);
- xfs_dir3_leaf_log_ents(tp, dp, lbp, lfloglow, lfloghigh);
- xfs_dir3_leaf_check(dp, lbp);
+ xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr);
+ xfs_dir3_leaf_log_header(tp, lbp);
+ xfs_dir3_leaf_log_ents(tp, lbp, lfloglow, lfloghigh);
+ xfs_dir3_leaf_check(mp, lbp);
xfs_dir3_data_check(dp, dbp);
return 0;
}
@@ -916,7 +962,6 @@ xfs_dir3_leaf_compact(
int loglow; /* first leaf entry to log */
int to; /* target leaf index */
struct xfs_dir2_leaf_entry *ents;
- struct xfs_inode *dp = args->dp;
leaf = bp->b_addr;
if (!leafhdr->stale)
@@ -925,7 +970,7 @@ xfs_dir3_leaf_compact(
/*
* Compress out the stale entries in place.
*/
- ents = dp->d_ops->leaf_ents_p(leaf);
+ ents = xfs_dir3_leaf_ents_p(leaf);
for (from = to = 0, loglow = -1; from < leafhdr->count; from++) {
if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
continue;
@@ -946,10 +991,10 @@ xfs_dir3_leaf_compact(
leafhdr->count -= leafhdr->stale;
leafhdr->stale = 0;
- dp->d_ops->leaf_hdr_to_disk(leaf, leafhdr);
- xfs_dir3_leaf_log_header(args->trans, dp, bp);
+ xfs_dir3_leaf_hdr_to_disk(leaf, leafhdr);
+ xfs_dir3_leaf_log_header(args->trans, bp);
if (loglow != -1)
- xfs_dir3_leaf_log_ents(args->trans, dp, bp, loglow, to - 1);
+ xfs_dir3_leaf_log_ents(args->trans, bp, loglow, to - 1);
}
/*
@@ -1076,11 +1121,10 @@ xfs_dir3_leaf_log_bests(
*/
void
xfs_dir3_leaf_log_ents(
- struct xfs_trans *tp,
- struct xfs_inode *dp,
- struct xfs_buf *bp,
- int first,
- int last)
+ xfs_trans_t *tp, /* transaction pointer */
+ struct xfs_buf *bp, /* leaf buffer */
+ int first, /* first entry to log */
+ int last) /* last entry to log */
{
xfs_dir2_leaf_entry_t *firstlep; /* pointer to first entry */
xfs_dir2_leaf_entry_t *lastlep; /* pointer to last entry */
@@ -1092,7 +1136,7 @@ xfs_dir3_leaf_log_ents(
leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
- ents = dp->d_ops->leaf_ents_p(leaf);
+ ents = xfs_dir3_leaf_ents_p(leaf);
firstlep = &ents[first];
lastlep = &ents[last];
xfs_trans_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
@@ -1105,7 +1149,6 @@ xfs_dir3_leaf_log_ents(
void
xfs_dir3_leaf_log_header(
struct xfs_trans *tp,
- struct xfs_inode *dp,
struct xfs_buf *bp)
{
struct xfs_dir2_leaf *leaf = bp->b_addr;
@@ -1116,7 +1159,7 @@ xfs_dir3_leaf_log_header(
leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
xfs_trans_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
- dp->d_ops->leaf_hdr_size - 1);
+ xfs_dir3_leaf_hdr_size(leaf) - 1);
}
/*
@@ -1171,9 +1214,9 @@ xfs_dir2_leaf_lookup(
}
tp = args->trans;
dp = args->dp;
- xfs_dir3_leaf_check(dp, lbp);
+ xfs_dir3_leaf_check(dp->i_mount, lbp);
leaf = lbp->b_addr;
- ents = dp->d_ops->leaf_ents_p(leaf);
+ ents = xfs_dir3_leaf_ents_p(leaf);
/*
* Get to the leaf entry and contained data entry address.
*/
@@ -1189,7 +1232,7 @@ xfs_dir2_leaf_lookup(
* Return the found inode number & CI name if appropriate
*/
args->inumber = be64_to_cpu(dep->inumber);
- args->filetype = dp->d_ops->data_get_ftype(dep);
+ args->filetype = xfs_dir3_dirent_get_ftype(dp->i_mount, dep);
error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
xfs_trans_brelse(tp, dbp);
xfs_trans_brelse(tp, lbp);
@@ -1236,9 +1279,9 @@ xfs_dir2_leaf_lookup_int(
*lbpp = lbp;
leaf = lbp->b_addr;
- xfs_dir3_leaf_check(dp, lbp);
- ents = dp->d_ops->leaf_ents_p(leaf);
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+ xfs_dir3_leaf_check(mp, lbp);
+ ents = xfs_dir3_leaf_ents_p(leaf);
+ xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
/*
* Look for the first leaf entry with our hash value.
@@ -1372,9 +1415,9 @@ xfs_dir2_leaf_removename(
leaf = lbp->b_addr;
hdr = dbp->b_addr;
xfs_dir3_data_check(dp, dbp);
- bf = dp->d_ops->data_bestfree_p(hdr);
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
- ents = dp->d_ops->leaf_ents_p(leaf);
+ bf = xfs_dir3_data_bestfree_p(hdr);
+ xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
+ ents = xfs_dir3_leaf_ents_p(leaf);
/*
* Point to the leaf entry, use that to point to the data entry.
*/
@@ -1390,27 +1433,27 @@ xfs_dir2_leaf_removename(
/*
* Mark the former data entry unused.
*/
- xfs_dir2_data_make_free(tp, dp, dbp,
+ xfs_dir2_data_make_free(tp, dbp,
(xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
- dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);
+ xfs_dir3_data_entsize(mp, dep->namelen), &needlog, &needscan);
/*
* We just mark the leaf entry stale by putting a null in it.
*/
leafhdr.stale++;
- dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
- xfs_dir3_leaf_log_header(tp, dp, lbp);
+ xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr);
+ xfs_dir3_leaf_log_header(tp, lbp);
lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
- xfs_dir3_leaf_log_ents(tp, dp, lbp, index, index);
+ xfs_dir3_leaf_log_ents(tp, lbp, index, index);
/*
* Scan the freespace in the data block again if necessary,
* log the data block header if necessary.
*/
if (needscan)
- xfs_dir2_data_freescan(dp, hdr, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
if (needlog)
- xfs_dir2_data_log_header(tp, dp, dbp);
+ xfs_dir2_data_log_header(tp, dbp);
/*
* If the longest freespace in the data block has changed,
* put the new value in the bests table and log that.
@@ -1424,7 +1467,7 @@ xfs_dir2_leaf_removename(
* If the data block is now empty then get rid of the data block.
*/
if (be16_to_cpu(bf[0].length) ==
- mp->m_dirblksize - dp->d_ops->data_entry_offset) {
+ mp->m_dirblksize - xfs_dir3_data_entry_offset(hdr)) {
ASSERT(db != mp->m_dirdatablk);
if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
/*
@@ -1435,7 +1478,7 @@ xfs_dir2_leaf_removename(
*/
if (error == ENOSPC && args->total == 0)
error = 0;
- xfs_dir3_leaf_check(dp, lbp);
+ xfs_dir3_leaf_check(mp, lbp);
return error;
}
dbp = NULL;
@@ -1469,7 +1512,7 @@ xfs_dir2_leaf_removename(
else if (db != mp->m_dirdatablk)
dbp = NULL;
- xfs_dir3_leaf_check(dp, lbp);
+ xfs_dir3_leaf_check(mp, lbp);
/*
* See if we can convert to block form.
*/
@@ -1504,7 +1547,7 @@ xfs_dir2_leaf_replace(
}
dp = args->dp;
leaf = lbp->b_addr;
- ents = dp->d_ops->leaf_ents_p(leaf);
+ ents = xfs_dir3_leaf_ents_p(leaf);
/*
* Point to the leaf entry, get data address from it.
*/
@@ -1520,10 +1563,10 @@ xfs_dir2_leaf_replace(
* Put the new inode number in, log it.
*/
dep->inumber = cpu_to_be64(args->inumber);
- dp->d_ops->data_put_ftype(dep, args->filetype);
+ xfs_dir3_dirent_put_ftype(dp->i_mount, dep, args->filetype);
tp = args->trans;
- xfs_dir2_data_log_entry(tp, dp, dbp, dep);
- xfs_dir3_leaf_check(dp, lbp);
+ xfs_dir2_data_log_entry(tp, dbp, dep);
+ xfs_dir3_leaf_check(dp->i_mount, lbp);
xfs_trans_brelse(tp, lbp);
return 0;
}
@@ -1549,8 +1592,8 @@ xfs_dir2_leaf_search_hash(
struct xfs_dir3_icleaf_hdr leafhdr;
leaf = lbp->b_addr;
- ents = args->dp->d_ops->leaf_ents_p(leaf);
- args->dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+ ents = xfs_dir3_leaf_ents_p(leaf);
+ xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
/*
* Note, the table cannot be empty, so we have to go through the loop.
@@ -1618,12 +1661,12 @@ xfs_dir2_leaf_trim_data(
#ifdef DEBUG
{
struct xfs_dir2_data_hdr *hdr = dbp->b_addr;
- struct xfs_dir2_data_free *bf = dp->d_ops->data_bestfree_p(hdr);
+ struct xfs_dir2_data_free *bf = xfs_dir3_data_bestfree_p(hdr);
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
ASSERT(be16_to_cpu(bf[0].length) ==
- mp->m_dirblksize - dp->d_ops->data_entry_offset);
+ mp->m_dirblksize - xfs_dir3_data_entry_offset(hdr));
ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
}
#endif
@@ -1739,7 +1782,7 @@ xfs_dir2_node_to_leaf(
return 0;
lbp = state->path.blk[0].bp;
leaf = lbp->b_addr;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+ xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
@@ -1751,7 +1794,7 @@ xfs_dir2_node_to_leaf(
if (error)
return error;
free = fbp->b_addr;
- dp->d_ops->free_hdr_from_disk(&freehdr, free);
+ xfs_dir3_free_hdr_from_disk(&freehdr, free);
ASSERT(!freehdr.firstdb);
@@ -1785,14 +1828,14 @@ xfs_dir2_node_to_leaf(
/*
* Set up the leaf bests table.
*/
- memcpy(xfs_dir2_leaf_bests_p(ltp), dp->d_ops->free_bests_p(free),
+ memcpy(xfs_dir2_leaf_bests_p(ltp), xfs_dir3_free_bests_p(mp, free),
freehdr.nvalid * sizeof(xfs_dir2_data_off_t));
- dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
- xfs_dir3_leaf_log_header(tp, dp, lbp);
+ xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr);
+ xfs_dir3_leaf_log_header(tp, lbp);
xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
xfs_dir3_leaf_log_tail(tp, lbp);
- xfs_dir3_leaf_check(dp, lbp);
+ xfs_dir3_leaf_check(mp, lbp);
/*
* Get rid of the freespace block.
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 56369d4..4c3dba7 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -18,21 +18,22 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
+#include "xfs_dir2_format.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_cksum.h"
@@ -54,21 +55,21 @@ static int xfs_dir2_node_addname_int(xfs_da_args_t *args,
* Check internal consistency of a leafn block.
*/
#ifdef DEBUG
-#define xfs_dir3_leaf_check(dp, bp) \
+#define xfs_dir3_leaf_check(mp, bp) \
do { \
- if (!xfs_dir3_leafn_check((dp), (bp))) \
+ if (!xfs_dir3_leafn_check((mp), (bp))) \
ASSERT(0); \
} while (0);
static bool
xfs_dir3_leafn_check(
- struct xfs_inode *dp,
+ struct xfs_mount *mp,
struct xfs_buf *bp)
{
struct xfs_dir2_leaf *leaf = bp->b_addr;
struct xfs_dir3_icleaf_hdr leafhdr;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+ xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) {
struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
@@ -77,10 +78,10 @@ xfs_dir3_leafn_check(
} else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC)
return false;
- return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf);
+ return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf);
}
#else
-#define xfs_dir3_leaf_check(dp, bp)
+#define xfs_dir3_leaf_check(mp, bp)
#endif
static bool
@@ -192,6 +193,53 @@ xfs_dir2_free_try_read(
return __xfs_dir3_free_read(tp, dp, fbno, -2, bpp);
}
+
+void
+xfs_dir3_free_hdr_from_disk(
+ struct xfs_dir3_icfree_hdr *to,
+ struct xfs_dir2_free *from)
+{
+ if (from->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)) {
+ to->magic = be32_to_cpu(from->hdr.magic);
+ to->firstdb = be32_to_cpu(from->hdr.firstdb);
+ to->nvalid = be32_to_cpu(from->hdr.nvalid);
+ to->nused = be32_to_cpu(from->hdr.nused);
+ } else {
+ struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)from;
+
+ to->magic = be32_to_cpu(hdr3->hdr.magic);
+ to->firstdb = be32_to_cpu(hdr3->firstdb);
+ to->nvalid = be32_to_cpu(hdr3->nvalid);
+ to->nused = be32_to_cpu(hdr3->nused);
+ }
+
+ ASSERT(to->magic == XFS_DIR2_FREE_MAGIC ||
+ to->magic == XFS_DIR3_FREE_MAGIC);
+}
+
+static void
+xfs_dir3_free_hdr_to_disk(
+ struct xfs_dir2_free *to,
+ struct xfs_dir3_icfree_hdr *from)
+{
+ ASSERT(from->magic == XFS_DIR2_FREE_MAGIC ||
+ from->magic == XFS_DIR3_FREE_MAGIC);
+
+ if (from->magic == XFS_DIR2_FREE_MAGIC) {
+ to->hdr.magic = cpu_to_be32(from->magic);
+ to->hdr.firstdb = cpu_to_be32(from->firstdb);
+ to->hdr.nvalid = cpu_to_be32(from->nvalid);
+ to->hdr.nused = cpu_to_be32(from->nused);
+ } else {
+ struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)to;
+
+ hdr3->hdr.magic = cpu_to_be32(from->magic);
+ hdr3->firstdb = cpu_to_be32(from->firstdb);
+ hdr3->nvalid = cpu_to_be32(from->nvalid);
+ hdr3->nused = cpu_to_be32(from->nused);
+ }
+}
+
static int
xfs_dir3_free_get_buf(
struct xfs_trans *tp,
@@ -229,7 +277,7 @@ xfs_dir3_free_get_buf(
uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_uuid);
} else
hdr.magic = XFS_DIR2_FREE_MAGIC;
- dp->d_ops->free_hdr_to_disk(bp->b_addr, &hdr);
+ xfs_dir3_free_hdr_to_disk(bp->b_addr, &hdr);
*bpp = bp;
return 0;
}
@@ -240,7 +288,6 @@ xfs_dir3_free_get_buf(
STATIC void
xfs_dir2_free_log_bests(
struct xfs_trans *tp,
- struct xfs_inode *dp,
struct xfs_buf *bp,
int first, /* first entry to log */
int last) /* last entry to log */
@@ -249,7 +296,7 @@ xfs_dir2_free_log_bests(
__be16 *bests;
free = bp->b_addr;
- bests = dp->d_ops->free_bests_p(free);
+ bests = xfs_dir3_free_bests_p(tp->t_mountp, free);
ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
xfs_trans_log_buf(tp, bp,
@@ -264,7 +311,6 @@ xfs_dir2_free_log_bests(
static void
xfs_dir2_free_log_header(
struct xfs_trans *tp,
- struct xfs_inode *dp,
struct xfs_buf *bp)
{
#ifdef DEBUG
@@ -274,7 +320,7 @@ xfs_dir2_free_log_header(
ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
#endif
- xfs_trans_log_buf(tp, bp, 0, dp->d_ops->free_hdr_size - 1);
+ xfs_trans_log_buf(tp, bp, 0, xfs_dir3_free_hdr_size(tp->t_mountp) - 1);
}
/*
@@ -323,7 +369,7 @@ xfs_dir2_leaf_to_node(
return error;
free = fbp->b_addr;
- dp->d_ops->free_hdr_from_disk(&freehdr, free);
+ xfs_dir3_free_hdr_from_disk(&freehdr, free);
leaf = lbp->b_addr;
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
ASSERT(be32_to_cpu(ltp->bestcount) <=
@@ -334,7 +380,7 @@ xfs_dir2_leaf_to_node(
* Count active entries.
*/
from = xfs_dir2_leaf_bests_p(ltp);
- to = dp->d_ops->free_bests_p(free);
+ to = xfs_dir3_free_bests_p(mp, free);
for (i = n = 0; i < be32_to_cpu(ltp->bestcount); i++, from++, to++) {
if ((off = be16_to_cpu(*from)) != NULLDATAOFF)
n++;
@@ -347,9 +393,9 @@ xfs_dir2_leaf_to_node(
freehdr.nused = n;
freehdr.nvalid = be32_to_cpu(ltp->bestcount);
- dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr);
- xfs_dir2_free_log_bests(tp, dp, fbp, 0, freehdr.nvalid - 1);
- xfs_dir2_free_log_header(tp, dp, fbp);
+ xfs_dir3_free_hdr_to_disk(fbp->b_addr, &freehdr);
+ xfs_dir2_free_log_bests(tp, fbp, 0, freehdr.nvalid - 1);
+ xfs_dir2_free_log_header(tp, fbp);
/*
* Converting the leaf to a leafnode is just a matter of changing the
@@ -363,8 +409,8 @@ xfs_dir2_leaf_to_node(
leaf->hdr.info.magic = cpu_to_be16(XFS_DIR3_LEAFN_MAGIC);
lbp->b_ops = &xfs_dir3_leafn_buf_ops;
xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAFN_BUF);
- xfs_dir3_leaf_log_header(tp, dp, lbp);
- xfs_dir3_leaf_check(dp, lbp);
+ xfs_dir3_leaf_log_header(tp, lbp);
+ xfs_dir3_leaf_check(mp, lbp);
return 0;
}
@@ -397,8 +443,8 @@ xfs_dir2_leafn_add(
mp = dp->i_mount;
tp = args->trans;
leaf = bp->b_addr;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
- ents = dp->d_ops->leaf_ents_p(leaf);
+ xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
+ ents = xfs_dir3_leaf_ents_p(leaf);
/*
* Quick check just to make sure we are not going to index
@@ -414,7 +460,7 @@ xfs_dir2_leafn_add(
* a compact.
*/
- if (leafhdr.count == dp->d_ops->leaf_max_ents(mp)) {
+ if (leafhdr.count == xfs_dir3_max_leaf_ents(mp, leaf)) {
if (!leafhdr.stale)
return XFS_ERROR(ENOSPC);
compact = leafhdr.stale > 1;
@@ -452,30 +498,30 @@ xfs_dir2_leafn_add(
lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp,
args->blkno, args->index));
- dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
- xfs_dir3_leaf_log_header(tp, dp, bp);
- xfs_dir3_leaf_log_ents(tp, dp, bp, lfloglow, lfloghigh);
- xfs_dir3_leaf_check(dp, bp);
+ xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr);
+ xfs_dir3_leaf_log_header(tp, bp);
+ xfs_dir3_leaf_log_ents(tp, bp, lfloglow, lfloghigh);
+ xfs_dir3_leaf_check(mp, bp);
return 0;
}
#ifdef DEBUG
static void
xfs_dir2_free_hdr_check(
- struct xfs_inode *dp,
+ struct xfs_mount *mp,
struct xfs_buf *bp,
xfs_dir2_db_t db)
{
struct xfs_dir3_icfree_hdr hdr;
- dp->d_ops->free_hdr_from_disk(&hdr, bp->b_addr);
+ xfs_dir3_free_hdr_from_disk(&hdr, bp->b_addr);
- ASSERT((hdr.firstdb % dp->d_ops->free_max_bests(dp->i_mount)) == 0);
+ ASSERT((hdr.firstdb % xfs_dir3_free_max_bests(mp)) == 0);
ASSERT(hdr.firstdb <= db);
ASSERT(db < hdr.firstdb + hdr.nvalid);
}
#else
-#define xfs_dir2_free_hdr_check(dp, bp, db)
+#define xfs_dir2_free_hdr_check(mp, dp, db)
#endif /* DEBUG */
/*
@@ -484,7 +530,6 @@ xfs_dir2_free_hdr_check(
*/
xfs_dahash_t /* hash value */
xfs_dir2_leafn_lasthash(
- struct xfs_inode *dp,
struct xfs_buf *bp, /* leaf buffer */
int *count) /* count of entries in leaf */
{
@@ -492,7 +537,7 @@ xfs_dir2_leafn_lasthash(
struct xfs_dir2_leaf_entry *ents;
struct xfs_dir3_icleaf_hdr leafhdr;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+ xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
@@ -502,7 +547,7 @@ xfs_dir2_leafn_lasthash(
if (!leafhdr.count)
return 0;
- ents = dp->d_ops->leaf_ents_p(leaf);
+ ents = xfs_dir3_leaf_ents_p(leaf);
return be32_to_cpu(ents[leafhdr.count - 1].hashval);
}
@@ -539,10 +584,10 @@ xfs_dir2_leafn_lookup_for_addname(
tp = args->trans;
mp = dp->i_mount;
leaf = bp->b_addr;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
- ents = dp->d_ops->leaf_ents_p(leaf);
+ xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
+ ents = xfs_dir3_leaf_ents_p(leaf);
- xfs_dir3_leaf_check(dp, bp);
+ xfs_dir3_leaf_check(mp, bp);
ASSERT(leafhdr.count > 0);
/*
@@ -560,7 +605,7 @@ xfs_dir2_leafn_lookup_for_addname(
ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
}
- length = dp->d_ops->data_entsize(args->namelen);
+ length = xfs_dir3_data_entsize(mp, args->namelen);
/*
* Loop over leaf entries with the right hash value.
*/
@@ -592,7 +637,7 @@ xfs_dir2_leafn_lookup_for_addname(
* Convert the data block to the free block
* holding its freespace information.
*/
- newfdb = dp->d_ops->db_to_fdb(mp, newdb);
+ newfdb = xfs_dir2_db_to_fdb(mp, newdb);
/*
* If it's not the one we have in hand, read it in.
*/
@@ -610,16 +655,16 @@ xfs_dir2_leafn_lookup_for_addname(
return error;
free = curbp->b_addr;
- xfs_dir2_free_hdr_check(dp, curbp, curdb);
+ xfs_dir2_free_hdr_check(mp, curbp, curdb);
}
/*
* Get the index for our entry.
*/
- fi = dp->d_ops->db_to_fdindex(mp, curdb);
+ fi = xfs_dir2_db_to_fdindex(mp, curdb);
/*
* If it has room, return it.
*/
- bests = dp->d_ops->free_bests_p(free);
+ bests = xfs_dir3_free_bests_p(mp, free);
if (unlikely(bests[fi] == cpu_to_be16(NULLDATAOFF))) {
XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
XFS_ERRLEVEL_LOW, mp);
@@ -689,10 +734,10 @@ xfs_dir2_leafn_lookup_for_entry(
tp = args->trans;
mp = dp->i_mount;
leaf = bp->b_addr;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
- ents = dp->d_ops->leaf_ents_p(leaf);
+ xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
+ ents = xfs_dir3_leaf_ents_p(leaf);
- xfs_dir3_leaf_check(dp, bp);
+ xfs_dir3_leaf_check(mp, bp);
ASSERT(leafhdr.count > 0);
/*
@@ -771,7 +816,7 @@ xfs_dir2_leafn_lookup_for_entry(
xfs_trans_brelse(tp, state->extrablk.bp);
args->cmpresult = cmp;
args->inumber = be64_to_cpu(dep->inumber);
- args->filetype = dp->d_ops->data_get_ftype(dep);
+ args->filetype = xfs_dir3_dirent_get_ftype(mp, dep);
*indexp = index;
state->extravalid = 1;
state->extrablk.bp = curbp;
@@ -862,7 +907,7 @@ xfs_dir3_leafn_moveents(
if (start_d < dhdr->count) {
memmove(&dents[start_d + count], &dents[start_d],
(dhdr->count - start_d) * sizeof(xfs_dir2_leaf_entry_t));
- xfs_dir3_leaf_log_ents(tp, args->dp, bp_d, start_d + count,
+ xfs_dir3_leaf_log_ents(tp, bp_d, start_d + count,
count + dhdr->count - 1);
}
/*
@@ -884,8 +929,7 @@ xfs_dir3_leafn_moveents(
*/
memcpy(&dents[start_d], &sents[start_s],
count * sizeof(xfs_dir2_leaf_entry_t));
- xfs_dir3_leaf_log_ents(tp, args->dp, bp_d,
- start_d, start_d + count - 1);
+ xfs_dir3_leaf_log_ents(tp, bp_d, start_d, start_d + count - 1);
/*
* If there are source entries after the ones we copied,
@@ -894,8 +938,7 @@ xfs_dir3_leafn_moveents(
if (start_s + count < shdr->count) {
memmove(&sents[start_s], &sents[start_s + count],
count * sizeof(xfs_dir2_leaf_entry_t));
- xfs_dir3_leaf_log_ents(tp, args->dp, bp_s,
- start_s, start_s + count - 1);
+ xfs_dir3_leaf_log_ents(tp, bp_s, start_s, start_s + count - 1);
}
/*
@@ -913,7 +956,6 @@ xfs_dir3_leafn_moveents(
*/
int /* sort order */
xfs_dir2_leafn_order(
- struct xfs_inode *dp,
struct xfs_buf *leaf1_bp, /* leaf1 buffer */
struct xfs_buf *leaf2_bp) /* leaf2 buffer */
{
@@ -924,10 +966,10 @@ xfs_dir2_leafn_order(
struct xfs_dir3_icleaf_hdr hdr1;
struct xfs_dir3_icleaf_hdr hdr2;
- dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1);
- dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2);
- ents1 = dp->d_ops->leaf_ents_p(leaf1);
- ents2 = dp->d_ops->leaf_ents_p(leaf2);
+ xfs_dir3_leaf_hdr_from_disk(&hdr1, leaf1);
+ xfs_dir3_leaf_hdr_from_disk(&hdr2, leaf2);
+ ents1 = xfs_dir3_leaf_ents_p(leaf1);
+ ents2 = xfs_dir3_leaf_ents_p(leaf2);
if (hdr1.count > 0 && hdr2.count > 0 &&
(be32_to_cpu(ents2[0].hashval) < be32_to_cpu(ents1[0].hashval) ||
@@ -965,13 +1007,12 @@ xfs_dir2_leafn_rebalance(
struct xfs_dir2_leaf_entry *ents2;
struct xfs_dir3_icleaf_hdr hdr1;
struct xfs_dir3_icleaf_hdr hdr2;
- struct xfs_inode *dp = state->args->dp;
args = state->args;
/*
* If the block order is wrong, swap the arguments.
*/
- if ((swap = xfs_dir2_leafn_order(dp, blk1->bp, blk2->bp))) {
+ if ((swap = xfs_dir2_leafn_order(blk1->bp, blk2->bp))) {
xfs_da_state_blk_t *tmp; /* temp for block swap */
tmp = blk1;
@@ -980,10 +1021,10 @@ xfs_dir2_leafn_rebalance(
}
leaf1 = blk1->bp->b_addr;
leaf2 = blk2->bp->b_addr;
- dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1);
- dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2);
- ents1 = dp->d_ops->leaf_ents_p(leaf1);
- ents2 = dp->d_ops->leaf_ents_p(leaf2);
+ xfs_dir3_leaf_hdr_from_disk(&hdr1, leaf1);
+ xfs_dir3_leaf_hdr_from_disk(&hdr2, leaf2);
+ ents1 = xfs_dir3_leaf_ents_p(leaf1);
+ ents2 = xfs_dir3_leaf_ents_p(leaf2);
oldsum = hdr1.count + hdr2.count;
#if defined(DEBUG) || defined(XFS_WARN)
@@ -1029,13 +1070,13 @@ xfs_dir2_leafn_rebalance(
ASSERT(hdr1.stale + hdr2.stale == oldstale);
/* log the changes made when moving the entries */
- dp->d_ops->leaf_hdr_to_disk(leaf1, &hdr1);
- dp->d_ops->leaf_hdr_to_disk(leaf2, &hdr2);
- xfs_dir3_leaf_log_header(args->trans, dp, blk1->bp);
- xfs_dir3_leaf_log_header(args->trans, dp, blk2->bp);
+ xfs_dir3_leaf_hdr_to_disk(leaf1, &hdr1);
+ xfs_dir3_leaf_hdr_to_disk(leaf2, &hdr2);
+ xfs_dir3_leaf_log_header(args->trans, blk1->bp);
+ xfs_dir3_leaf_log_header(args->trans, blk2->bp);
- xfs_dir3_leaf_check(dp, blk1->bp);
- xfs_dir3_leaf_check(dp, blk2->bp);
+ xfs_dir3_leaf_check(args->dp->i_mount, blk1->bp);
+ xfs_dir3_leaf_check(args->dp->i_mount, blk2->bp);
/*
* Mark whether we're inserting into the old or new leaf.
@@ -1056,11 +1097,11 @@ xfs_dir2_leafn_rebalance(
* Finally sanity check just to make sure we are not returning a
* negative index
*/
- if (blk2->index < 0) {
+ if(blk2->index < 0) {
state->inleaf = 1;
blk2->index = 0;
- xfs_alert(dp->i_mount,
- "%s: picked the wrong leaf? reverting original leaf: blk1->index %d",
+ xfs_alert(args->dp->i_mount,
+ "%s: picked the wrong leaf? reverting original leaf: blk1->index %d\n",
__func__, blk1->index);
}
}
@@ -1079,17 +1120,17 @@ xfs_dir3_data_block_free(
int logfree = 0;
__be16 *bests;
struct xfs_dir3_icfree_hdr freehdr;
- struct xfs_inode *dp = args->dp;
- dp->d_ops->free_hdr_from_disk(&freehdr, free);
- bests = dp->d_ops->free_bests_p(free);
+ xfs_dir3_free_hdr_from_disk(&freehdr, free);
+
+ bests = xfs_dir3_free_bests_p(tp->t_mountp, free);
if (hdr) {
/*
* Data block is not empty, just set the free entry to the new
* value.
*/
bests[findex] = cpu_to_be16(longest);
- xfs_dir2_free_log_bests(tp, dp, fbp, findex, findex);
+ xfs_dir2_free_log_bests(tp, fbp, findex, findex);
return 0;
}
@@ -1116,8 +1157,8 @@ xfs_dir3_data_block_free(
logfree = 1;
}
- dp->d_ops->free_hdr_to_disk(free, &freehdr);
- xfs_dir2_free_log_header(tp, dp, fbp);
+ xfs_dir3_free_hdr_to_disk(free, &freehdr);
+ xfs_dir2_free_log_header(tp, fbp);
/*
* If there are no useful entries left in the block, get rid of the
@@ -1141,7 +1182,7 @@ xfs_dir3_data_block_free(
/* Log the free entry that changed, unless we got rid of it. */
if (logfree)
- xfs_dir2_free_log_bests(tp, dp, fbp, findex, findex);
+ xfs_dir2_free_log_bests(tp, fbp, findex, findex);
return 0;
}
@@ -1181,8 +1222,8 @@ xfs_dir2_leafn_remove(
tp = args->trans;
mp = dp->i_mount;
leaf = bp->b_addr;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
- ents = dp->d_ops->leaf_ents_p(leaf);
+ xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
+ ents = xfs_dir3_leaf_ents_p(leaf);
/*
* Point to the entry we're removing.
@@ -1202,11 +1243,11 @@ xfs_dir2_leafn_remove(
* Log the leaf block changes.
*/
leafhdr.stale++;
- dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
- xfs_dir3_leaf_log_header(tp, dp, bp);
+ xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr);
+ xfs_dir3_leaf_log_header(tp, bp);
lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
- xfs_dir3_leaf_log_ents(tp, dp, bp, index, index);
+ xfs_dir3_leaf_log_ents(tp, bp, index, index);
/*
* Make the data entry free. Keep track of the longest freespace
@@ -1215,19 +1256,19 @@ xfs_dir2_leafn_remove(
dbp = dblk->bp;
hdr = dbp->b_addr;
dep = (xfs_dir2_data_entry_t *)((char *)hdr + off);
- bf = dp->d_ops->data_bestfree_p(hdr);
+ bf = xfs_dir3_data_bestfree_p(hdr);
longest = be16_to_cpu(bf[0].length);
needlog = needscan = 0;
- xfs_dir2_data_make_free(tp, dp, dbp, off,
- dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);
+ xfs_dir2_data_make_free(tp, dbp, off,
+ xfs_dir3_data_entsize(mp, dep->namelen), &needlog, &needscan);
/*
* Rescan the data block freespaces for bestfree.
* Log the data block header if needed.
*/
if (needscan)
- xfs_dir2_data_freescan(dp, hdr, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
if (needlog)
- xfs_dir2_data_log_header(tp, dp, dbp);
+ xfs_dir2_data_log_header(tp, dbp);
xfs_dir3_data_check(dp, dbp);
/*
* If the longest data block freespace changes, need to update
@@ -1244,7 +1285,7 @@ xfs_dir2_leafn_remove(
* Convert the data block number to a free block,
* read in the free block.
*/
- fdb = dp->d_ops->db_to_fdb(mp, db);
+ fdb = xfs_dir2_db_to_fdb(mp, db);
error = xfs_dir2_free_read(tp, dp, xfs_dir2_db_to_da(mp, fdb),
&fbp);
if (error)
@@ -1253,22 +1294,22 @@ xfs_dir2_leafn_remove(
#ifdef DEBUG
{
struct xfs_dir3_icfree_hdr freehdr;
- dp->d_ops->free_hdr_from_disk(&freehdr, free);
- ASSERT(freehdr.firstdb == dp->d_ops->free_max_bests(mp) *
+ xfs_dir3_free_hdr_from_disk(&freehdr, free);
+ ASSERT(freehdr.firstdb == xfs_dir3_free_max_bests(mp) *
(fdb - XFS_DIR2_FREE_FIRSTDB(mp)));
}
#endif
/*
* Calculate which entry we need to fix.
*/
- findex = dp->d_ops->db_to_fdindex(mp, db);
+ findex = xfs_dir2_db_to_fdindex(mp, db);
longest = be16_to_cpu(bf[0].length);
/*
* If the data block is now empty we can get rid of it
* (usually).
*/
if (longest == mp->m_dirblksize -
- dp->d_ops->data_entry_offset) {
+ xfs_dir3_data_entry_offset(hdr)) {
/*
* Try to punch out the data block.
*/
@@ -1295,12 +1336,12 @@ xfs_dir2_leafn_remove(
return error;
}
- xfs_dir3_leaf_check(dp, bp);
+ xfs_dir3_leaf_check(mp, bp);
/*
* Return indication of whether this leaf block is empty enough
* to justify trying to join it with a neighbor.
*/
- *rval = (dp->d_ops->leaf_hdr_size +
+ *rval = (xfs_dir3_leaf_hdr_size(leaf) +
(uint)sizeof(ents[0]) * (leafhdr.count - leafhdr.stale)) <
mp->m_dir_magicpct;
return 0;
@@ -1319,14 +1360,13 @@ xfs_dir2_leafn_split(
xfs_dablk_t blkno; /* new leaf block number */
int error; /* error return value */
xfs_mount_t *mp; /* filesystem mount point */
- struct xfs_inode *dp;
/*
* Allocate space for a new leaf node.
*/
args = state->args;
- dp = args->dp;
- mp = dp->i_mount;
+ mp = args->dp->i_mount;
+ ASSERT(args != NULL);
ASSERT(oldblk->magic == XFS_DIR2_LEAFN_MAGIC);
error = xfs_da_grow_inode(args, &blkno);
if (error) {
@@ -1361,10 +1401,10 @@ xfs_dir2_leafn_split(
/*
* Update last hashval in each block since we added the name.
*/
- oldblk->hashval = xfs_dir2_leafn_lasthash(dp, oldblk->bp, NULL);
- newblk->hashval = xfs_dir2_leafn_lasthash(dp, newblk->bp, NULL);
- xfs_dir3_leaf_check(dp, oldblk->bp);
- xfs_dir3_leaf_check(dp, newblk->bp);
+ oldblk->hashval = xfs_dir2_leafn_lasthash(oldblk->bp, NULL);
+ newblk->hashval = xfs_dir2_leafn_lasthash(newblk->bp, NULL);
+ xfs_dir3_leaf_check(mp, oldblk->bp);
+ xfs_dir3_leaf_check(mp, newblk->bp);
return error;
}
@@ -1394,7 +1434,6 @@ xfs_dir2_leafn_toosmall(
int rval; /* result from path_shift */
struct xfs_dir3_icleaf_hdr leafhdr;
struct xfs_dir2_leaf_entry *ents;
- struct xfs_inode *dp = state->args->dp;
/*
* Check for the degenerate case of the block being over 50% full.
@@ -1403,12 +1442,12 @@ xfs_dir2_leafn_toosmall(
*/
blk = &state->path.blk[state->path.active - 1];
leaf = blk->bp->b_addr;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
- ents = dp->d_ops->leaf_ents_p(leaf);
- xfs_dir3_leaf_check(dp, blk->bp);
+ xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
+ ents = xfs_dir3_leaf_ents_p(leaf);
+ xfs_dir3_leaf_check(state->args->dp->i_mount, blk->bp);
count = leafhdr.count - leafhdr.stale;
- bytes = dp->d_ops->leaf_hdr_size + count * sizeof(ents[0]);
+ bytes = xfs_dir3_leaf_hdr_size(leaf) + count * sizeof(ents[0]);
if (bytes > (state->blocksize >> 1)) {
/*
* Blk over 50%, don't try to join.
@@ -1453,7 +1492,7 @@ xfs_dir2_leafn_toosmall(
/*
* Read the sibling leaf block.
*/
- error = xfs_dir3_leafn_read(state->args->trans, dp,
+ error = xfs_dir3_leafn_read(state->args->trans, state->args->dp,
blkno, -1, &bp);
if (error)
return error;
@@ -1465,8 +1504,8 @@ xfs_dir2_leafn_toosmall(
bytes = state->blocksize - (state->blocksize >> 2);
leaf = bp->b_addr;
- dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf);
- ents = dp->d_ops->leaf_ents_p(leaf);
+ xfs_dir3_leaf_hdr_from_disk(&hdr2, leaf);
+ ents = xfs_dir3_leaf_ents_p(leaf);
count += hdr2.count - hdr2.stale;
bytes -= count * sizeof(ents[0]);
@@ -1520,7 +1559,6 @@ xfs_dir2_leafn_unbalance(
struct xfs_dir3_icleaf_hdr drophdr;
struct xfs_dir2_leaf_entry *sents;
struct xfs_dir2_leaf_entry *dents;
- struct xfs_inode *dp = state->args->dp;
args = state->args;
ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
@@ -1528,10 +1566,10 @@ xfs_dir2_leafn_unbalance(
drop_leaf = drop_blk->bp->b_addr;
save_leaf = save_blk->bp->b_addr;
- dp->d_ops->leaf_hdr_from_disk(&savehdr, save_leaf);
- dp->d_ops->leaf_hdr_from_disk(&drophdr, drop_leaf);
- sents = dp->d_ops->leaf_ents_p(save_leaf);
- dents = dp->d_ops->leaf_ents_p(drop_leaf);
+ xfs_dir3_leaf_hdr_from_disk(&savehdr, save_leaf);
+ xfs_dir3_leaf_hdr_from_disk(&drophdr, drop_leaf);
+ sents = xfs_dir3_leaf_ents_p(save_leaf);
+ dents = xfs_dir3_leaf_ents_p(drop_leaf);
/*
* If there are any stale leaf entries, take this opportunity
@@ -1546,7 +1584,7 @@ xfs_dir2_leafn_unbalance(
* Move the entries from drop to the appropriate end of save.
*/
drop_blk->hashval = be32_to_cpu(dents[drophdr.count - 1].hashval);
- if (xfs_dir2_leafn_order(dp, save_blk->bp, drop_blk->bp))
+ if (xfs_dir2_leafn_order(save_blk->bp, drop_blk->bp))
xfs_dir3_leafn_moveents(args, drop_blk->bp, &drophdr, dents, 0,
save_blk->bp, &savehdr, sents, 0,
drophdr.count);
@@ -1557,13 +1595,13 @@ xfs_dir2_leafn_unbalance(
save_blk->hashval = be32_to_cpu(sents[savehdr.count - 1].hashval);
/* log the changes made when moving the entries */
- dp->d_ops->leaf_hdr_to_disk(save_leaf, &savehdr);
- dp->d_ops->leaf_hdr_to_disk(drop_leaf, &drophdr);
- xfs_dir3_leaf_log_header(args->trans, dp, save_blk->bp);
- xfs_dir3_leaf_log_header(args->trans, dp, drop_blk->bp);
+ xfs_dir3_leaf_hdr_to_disk(save_leaf, &savehdr);
+ xfs_dir3_leaf_hdr_to_disk(drop_leaf, &drophdr);
+ xfs_dir3_leaf_log_header(args->trans, save_blk->bp);
+ xfs_dir3_leaf_log_header(args->trans, drop_blk->bp);
- xfs_dir3_leaf_check(dp, save_blk->bp);
- xfs_dir3_leaf_check(dp, drop_blk->bp);
+ xfs_dir3_leaf_check(args->dp->i_mount, save_blk->bp);
+ xfs_dir3_leaf_check(args->dp->i_mount, drop_blk->bp);
}
/*
@@ -1674,7 +1712,7 @@ xfs_dir2_node_addname_int(
dp = args->dp;
mp = dp->i_mount;
tp = args->trans;
- length = dp->d_ops->data_entsize(args->namelen);
+ length = xfs_dir3_data_entsize(mp, args->namelen);
/*
* If we came in with a freespace block that means that lookup
* found an entry with our hash value. This is the freespace
@@ -1688,8 +1726,8 @@ xfs_dir2_node_addname_int(
ifbno = fblk->blkno;
free = fbp->b_addr;
findex = fblk->index;
- bests = dp->d_ops->free_bests_p(free);
- dp->d_ops->free_hdr_from_disk(&freehdr, free);
+ bests = xfs_dir3_free_bests_p(mp, free);
+ xfs_dir3_free_hdr_from_disk(&freehdr, free);
/*
* This means the free entry showed that the data block had
@@ -1781,8 +1819,8 @@ xfs_dir2_node_addname_int(
* and the freehdr are actually initialised if they are placed
* there, so we have to do it here to avoid warnings. Blech.
*/
- bests = dp->d_ops->free_bests_p(free);
- dp->d_ops->free_hdr_from_disk(&freehdr, free);
+ bests = xfs_dir3_free_bests_p(mp, free);
+ xfs_dir3_free_hdr_from_disk(&freehdr, free);
if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
be16_to_cpu(bests[findex]) >= length)
dbno = freehdr.firstdb + findex;
@@ -1833,7 +1871,7 @@ xfs_dir2_node_addname_int(
* Get the freespace block corresponding to the data block
* that was just allocated.
*/
- fbno = dp->d_ops->db_to_fdb(mp, dbno);
+ fbno = xfs_dir2_db_to_fdb(mp, dbno);
error = xfs_dir2_free_try_read(tp, dp,
xfs_dir2_db_to_da(mp, fbno),
&fbp);
@@ -1850,12 +1888,12 @@ xfs_dir2_node_addname_int(
if (error)
return error;
- if (unlikely(dp->d_ops->db_to_fdb(mp, dbno) != fbno)) {
+ if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) {
xfs_alert(mp,
"%s: dir ino %llu needed freesp block %lld for\n"
" data block %lld, got %lld ifbno %llu lastfbno %d",
__func__, (unsigned long long)dp->i_ino,
- (long long)dp->d_ops->db_to_fdb(mp, dbno),
+ (long long)xfs_dir2_db_to_fdb(mp, dbno),
(long long)dbno, (long long)fbno,
(unsigned long long)ifbno, lastfbno);
if (fblk) {
@@ -1880,30 +1918,30 @@ xfs_dir2_node_addname_int(
if (error)
return error;
free = fbp->b_addr;
- bests = dp->d_ops->free_bests_p(free);
- dp->d_ops->free_hdr_from_disk(&freehdr, free);
+ bests = xfs_dir3_free_bests_p(mp, free);
+ xfs_dir3_free_hdr_from_disk(&freehdr, free);
/*
* Remember the first slot as our empty slot.
*/
freehdr.firstdb = (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
- dp->d_ops->free_max_bests(mp);
+ xfs_dir3_free_max_bests(mp);
} else {
free = fbp->b_addr;
- bests = dp->d_ops->free_bests_p(free);
- dp->d_ops->free_hdr_from_disk(&freehdr, free);
+ bests = xfs_dir3_free_bests_p(mp, free);
+ xfs_dir3_free_hdr_from_disk(&freehdr, free);
}
/*
* Set the freespace block index from the data block number.
*/
- findex = dp->d_ops->db_to_fdindex(mp, dbno);
+ findex = xfs_dir2_db_to_fdindex(mp, dbno);
/*
* If it's after the end of the current entries in the
* freespace block, extend that table.
*/
if (findex >= freehdr.nvalid) {
- ASSERT(findex < dp->d_ops->free_max_bests(mp));
+ ASSERT(findex < xfs_dir3_free_max_bests(mp));
freehdr.nvalid = findex + 1;
/*
* Tag new entry so nused will go up.
@@ -1916,8 +1954,8 @@ xfs_dir2_node_addname_int(
*/
if (bests[findex] == cpu_to_be16(NULLDATAOFF)) {
freehdr.nused++;
- dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr);
- xfs_dir2_free_log_header(tp, dp, fbp);
+ xfs_dir3_free_hdr_to_disk(fbp->b_addr, &freehdr);
+ xfs_dir2_free_log_header(tp, fbp);
}
/*
* Update the real value in the table.
@@ -1925,7 +1963,7 @@ xfs_dir2_node_addname_int(
* change again.
*/
hdr = dbp->b_addr;
- bf = dp->d_ops->data_bestfree_p(hdr);
+ bf = xfs_dir3_data_bestfree_p(hdr);
bests[findex] = bf[0].length;
logfree = 1;
}
@@ -1947,7 +1985,7 @@ xfs_dir2_node_addname_int(
if (error)
return error;
hdr = dbp->b_addr;
- bf = dp->d_ops->data_bestfree_p(hdr);
+ bf = xfs_dir3_data_bestfree_p(hdr);
logfree = 0;
}
ASSERT(be16_to_cpu(bf[0].length) >= length);
@@ -1960,7 +1998,7 @@ xfs_dir2_node_addname_int(
/*
* Mark the first part of the unused space, inuse for us.
*/
- xfs_dir2_data_use_free(tp, dp, dbp, dup,
+ xfs_dir2_data_use_free(tp, dbp, dup,
(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
&needlog, &needscan);
/*
@@ -1970,24 +2008,24 @@ xfs_dir2_node_addname_int(
dep->inumber = cpu_to_be64(args->inumber);
dep->namelen = args->namelen;
memcpy(dep->name, args->name, dep->namelen);
- dp->d_ops->data_put_ftype(dep, args->filetype);
- tagp = dp->d_ops->data_entry_tag_p(dep);
+ xfs_dir3_dirent_put_ftype(mp, dep, args->filetype);
+ tagp = xfs_dir3_data_entry_tag_p(mp, dep);
*tagp = cpu_to_be16((char *)dep - (char *)hdr);
- xfs_dir2_data_log_entry(tp, dp, dbp, dep);
+ xfs_dir2_data_log_entry(tp, dbp, dep);
/*
* Rescan the block for bestfree if needed.
*/
if (needscan)
- xfs_dir2_data_freescan(dp, hdr, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
/*
* Log the data block header if needed.
*/
if (needlog)
- xfs_dir2_data_log_header(tp, dp, dbp);
+ xfs_dir2_data_log_header(tp, dbp);
/*
* If the freespace entry is now wrong, update it.
*/
- bests = dp->d_ops->free_bests_p(free); /* gcc is so stupid */
+ bests = xfs_dir3_free_bests_p(mp, free); /* gcc is so stupid */
if (be16_to_cpu(bests[findex]) != be16_to_cpu(bf[0].length)) {
bests[findex] = bf[0].length;
logfree = 1;
@@ -1996,7 +2034,7 @@ xfs_dir2_node_addname_int(
* Log the freespace entry if needed.
*/
if (logfree)
- xfs_dir2_free_log_bests(tp, dp, fbp, findex, findex);
+ xfs_dir2_free_log_bests(tp, fbp, findex, findex);
/*
* Return the data block and offset in args, then drop the data block.
*/
@@ -2174,7 +2212,7 @@ xfs_dir2_node_replace(
blk = &state->path.blk[state->path.active - 1];
ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
leaf = blk->bp->b_addr;
- ents = args->dp->d_ops->leaf_ents_p(leaf);
+ ents = xfs_dir3_leaf_ents_p(leaf);
lep = &ents[blk->index];
ASSERT(state->extravalid);
/*
@@ -2191,9 +2229,8 @@ xfs_dir2_node_replace(
* Fill in the new inode number and log the entry.
*/
dep->inumber = cpu_to_be64(inum);
- args->dp->d_ops->data_put_ftype(dep, args->filetype);
- xfs_dir2_data_log_entry(args->trans, args->dp,
- state->extrablk.bp, dep);
+ xfs_dir3_dirent_put_ftype(state->mp, dep, args->filetype);
+ xfs_dir2_data_log_entry(args->trans, state->extrablk.bp, dep);
rval = 0;
}
/*
@@ -2248,7 +2285,7 @@ xfs_dir2_node_trim_free(
if (!bp)
return 0;
free = bp->b_addr;
- dp->d_ops->free_hdr_from_disk(&freehdr, free);
+ xfs_dir3_free_hdr_from_disk(&freehdr, free);
/*
* If there are used entries, there's nothing to do.
diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h
index 8b9d228..1bad84c 100644
--- a/fs/xfs/xfs_dir2_priv.h
+++ b/fs/xfs/xfs_dir2_priv.h
@@ -59,8 +59,7 @@ extern int xfs_dir3_data_readahead(struct xfs_trans *tp, struct xfs_inode *dp,
extern struct xfs_dir2_data_free *
xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
- struct xfs_dir2_data_free *bf, struct xfs_dir2_data_unused *dup,
- int *loghead);
+ struct xfs_dir2_data_unused *dup, int *loghead);
extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
struct xfs_buf **bpp);
@@ -77,9 +76,9 @@ extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr,
int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno,
struct xfs_buf **bpp, __uint16_t magic);
-extern void xfs_dir3_leaf_log_ents(struct xfs_trans *tp, struct xfs_inode *dp,
- struct xfs_buf *bp, int first, int last);
-extern void xfs_dir3_leaf_log_header(struct xfs_trans *tp, struct xfs_inode *dp,
+extern void xfs_dir3_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp,
+ int first, int last);
+extern void xfs_dir3_leaf_log_header(struct xfs_trans *tp,
struct xfs_buf *bp);
extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
@@ -94,18 +93,21 @@ xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr,
int lowstale, int highstale, int *lfloglow, int *lfloghigh);
extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
-extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp,
+extern void xfs_dir3_leaf_hdr_from_disk(struct xfs_dir3_icleaf_hdr *to,
+ struct xfs_dir2_leaf *from);
+extern void xfs_dir3_leaf_hdr_to_disk(struct xfs_dir2_leaf *to,
+ struct xfs_dir3_icleaf_hdr *from);
+extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp,
struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf);
/* xfs_dir2_node.c */
extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
struct xfs_buf *lbp);
-extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_inode *dp,
- struct xfs_buf *bp, int *count);
+extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_buf *bp, int *count);
extern int xfs_dir2_leafn_lookup_int(struct xfs_buf *bp,
struct xfs_da_args *args, int *indexp,
struct xfs_da_state *state);
-extern int xfs_dir2_leafn_order(struct xfs_inode *dp, struct xfs_buf *leaf1_bp,
+extern int xfs_dir2_leafn_order(struct xfs_buf *leaf1_bp,
struct xfs_buf *leaf2_bp);
extern int xfs_dir2_leafn_split(struct xfs_da_state *state,
struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk);
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index c4e50c6..8f84153 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -18,23 +18,23 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
+#include "xfs_dir2_format.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_bmap.h"
-#include "xfs_trans.h"
-#include "xfs_dinode.h"
/*
* Directory file type support functions
@@ -119,9 +119,9 @@ xfs_dir2_sf_getdents(
* mp->m_dirdatablk.
*/
dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
- dp->d_ops->data_dot_offset);
+ xfs_dir3_data_dot_offset(mp));
dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
- dp->d_ops->data_dotdot_offset);
+ xfs_dir3_data_dotdot_offset(mp));
/*
* Put . entry unless we're starting past it.
@@ -136,7 +136,7 @@ xfs_dir2_sf_getdents(
* Put .. entry unless we're starting past it.
*/
if (ctx->pos <= dotdot_offset) {
- ino = dp->d_ops->sf_get_parent_ino(sfp);
+ ino = xfs_dir2_sf_get_parent_ino(sfp);
ctx->pos = dotdot_offset & 0x7fffffff;
if (!dir_emit(ctx, "..", 2, ino, DT_DIR))
return 0;
@@ -153,17 +153,17 @@ xfs_dir2_sf_getdents(
xfs_dir2_sf_get_offset(sfep));
if (ctx->pos > off) {
- sfep = dp->d_ops->sf_nextentry(sfp, sfep);
+ sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep);
continue;
}
- ino = dp->d_ops->sf_get_ino(sfp, sfep);
- filetype = dp->d_ops->sf_get_ftype(sfep);
+ ino = xfs_dir3_sfe_get_ino(mp, sfp, sfep);
+ filetype = xfs_dir3_sfe_get_ftype(mp, sfp, sfep);
ctx->pos = off & 0x7fffffff;
if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, ino,
xfs_dir3_get_dtype(mp, filetype)))
return 0;
- sfep = dp->d_ops->sf_nextentry(sfp, sfep);
+ sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep);
}
ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) &
@@ -213,7 +213,7 @@ xfs_dir2_block_getdents(
* Set up values for the loop.
*/
btp = xfs_dir2_block_tail_p(mp, hdr);
- ptr = (char *)dp->d_ops->data_entry_p(hdr);
+ ptr = (char *)xfs_dir3_data_entry_p(hdr);
endptr = (char *)xfs_dir2_block_leaf_p(btp);
/*
@@ -237,7 +237,7 @@ xfs_dir2_block_getdents(
/*
* Bump pointer for the next iteration.
*/
- ptr += dp->d_ops->data_entsize(dep->namelen);
+ ptr += xfs_dir3_data_entsize(mp, dep->namelen);
/*
* The entry is before the desired starting point, skip it.
*/
@@ -248,7 +248,7 @@ xfs_dir2_block_getdents(
(char *)dep - (char *)hdr);
ctx->pos = cook & 0x7fffffff;
- filetype = dp->d_ops->data_get_ftype(dep);
+ filetype = xfs_dir3_dirent_get_ftype(mp, dep);
/*
* If it didn't fit, set the final offset to here & return.
*/
@@ -578,13 +578,13 @@ xfs_dir2_leaf_getdents(
/*
* Find our position in the block.
*/
- ptr = (char *)dp->d_ops->data_entry_p(hdr);
+ ptr = (char *)xfs_dir3_data_entry_p(hdr);
byteoff = xfs_dir2_byte_to_off(mp, curoff);
/*
* Skip past the header.
*/
if (byteoff == 0)
- curoff += dp->d_ops->data_entry_offset;
+ curoff += xfs_dir3_data_entry_offset(hdr);
/*
* Skip past entries until we reach our offset.
*/
@@ -601,7 +601,7 @@ xfs_dir2_leaf_getdents(
}
dep = (xfs_dir2_data_entry_t *)ptr;
length =
- dp->d_ops->data_entsize(dep->namelen);
+ xfs_dir3_data_entsize(mp, dep->namelen);
ptr += length;
}
/*
@@ -632,8 +632,8 @@ xfs_dir2_leaf_getdents(
}
dep = (xfs_dir2_data_entry_t *)ptr;
- length = dp->d_ops->data_entsize(dep->namelen);
- filetype = dp->d_ops->data_get_ftype(dep);
+ length = xfs_dir3_data_entsize(mp, dep->namelen);
+ filetype = xfs_dir3_dirent_get_ftype(mp, dep);
ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index aafc6e4..3ef6d40 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -17,22 +17,22 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_error.h"
+#include "xfs_dir2_format.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_trace.h"
-#include "xfs_dinode.h"
/*
* Prototypes for internal functions.
@@ -57,6 +57,89 @@ static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
#endif /* XFS_BIG_INUMS */
/*
+ * Inode numbers in short-form directories can come in two versions,
+ * either 4 bytes or 8 bytes wide. These helpers deal with the
+ * two forms transparently by looking at the headers i8count field.
+ *
+ * For 64-bit inode number the most significant byte must be zero.
+ */
+static xfs_ino_t
+xfs_dir2_sf_get_ino(
+ struct xfs_dir2_sf_hdr *hdr,
+ xfs_dir2_inou_t *from)
+{
+ if (hdr->i8count)
+ return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL;
+ else
+ return get_unaligned_be32(&from->i4.i);
+}
+
+static void
+xfs_dir2_sf_put_ino(
+ struct xfs_dir2_sf_hdr *hdr,
+ xfs_dir2_inou_t *to,
+ xfs_ino_t ino)
+{
+ ASSERT((ino & 0xff00000000000000ULL) == 0);
+
+ if (hdr->i8count)
+ put_unaligned_be64(ino, &to->i8.i);
+ else
+ put_unaligned_be32(ino, &to->i4.i);
+}
+
+xfs_ino_t
+xfs_dir2_sf_get_parent_ino(
+ struct xfs_dir2_sf_hdr *hdr)
+{
+ return xfs_dir2_sf_get_ino(hdr, &hdr->parent);
+}
+
+void
+xfs_dir2_sf_put_parent_ino(
+ struct xfs_dir2_sf_hdr *hdr,
+ xfs_ino_t ino)
+{
+ xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino);
+}
+
+/*
+ * In short-form directory entries the inode numbers are stored at variable
+ * offset behind the entry name. If the entry stores a filetype value, then it
+ * sits between the name and the inode number. Hence the inode numbers may only
+ * be accessed through the helpers below.
+ */
+static xfs_dir2_inou_t *
+xfs_dir3_sfe_inop(
+ struct xfs_mount *mp,
+ struct xfs_dir2_sf_entry *sfep)
+{
+ __uint8_t *ptr = &sfep->name[sfep->namelen];
+ if (xfs_sb_version_hasftype(&mp->m_sb))
+ ptr++;
+ return (xfs_dir2_inou_t *)ptr;
+}
+
+xfs_ino_t
+xfs_dir3_sfe_get_ino(
+ struct xfs_mount *mp,
+ struct xfs_dir2_sf_hdr *hdr,
+ struct xfs_dir2_sf_entry *sfep)
+{
+ return xfs_dir2_sf_get_ino(hdr, xfs_dir3_sfe_inop(mp, sfep));
+}
+
+void
+xfs_dir3_sfe_put_ino(
+ struct xfs_mount *mp,
+ struct xfs_dir2_sf_hdr *hdr,
+ struct xfs_dir2_sf_entry *sfep,
+ xfs_ino_t ino)
+{
+ xfs_dir2_sf_put_ino(hdr, xfs_dir3_sfe_inop(mp, sfep), ino);
+}
+
+/*
* Given a block directory (dp/block), calculate its size as a shortform (sf)
* directory and a header for the sf directory, if it will fit it the
* space currently present in the inode. If it won't fit, the output
@@ -143,7 +226,7 @@ xfs_dir2_block_sfsize(
*/
sfhp->count = count;
sfhp->i8count = i8count;
- dp->d_ops->sf_put_parent_ino(sfhp, parent);
+ xfs_dir2_sf_put_parent_ino(sfhp, parent);
return size;
}
@@ -210,7 +293,7 @@ xfs_dir2_block_to_sf(
* Set up to loop over the block's entries.
*/
btp = xfs_dir2_block_tail_p(mp, hdr);
- ptr = (char *)dp->d_ops->data_entry_p(hdr);
+ ptr = (char *)xfs_dir3_data_entry_p(hdr);
endptr = (char *)xfs_dir2_block_leaf_p(btp);
sfep = xfs_dir2_sf_firstentry(sfp);
/*
@@ -238,7 +321,7 @@ xfs_dir2_block_to_sf(
else if (dep->namelen == 2 &&
dep->name[0] == '.' && dep->name[1] == '.')
ASSERT(be64_to_cpu(dep->inumber) ==
- dp->d_ops->sf_get_parent_ino(sfp));
+ xfs_dir2_sf_get_parent_ino(sfp));
/*
* Normal entry, copy it into shortform.
*/
@@ -248,14 +331,14 @@ xfs_dir2_block_to_sf(
(xfs_dir2_data_aoff_t)
((char *)dep - (char *)hdr));
memcpy(sfep->name, dep->name, dep->namelen);
- dp->d_ops->sf_put_ino(sfp, sfep,
- be64_to_cpu(dep->inumber));
- dp->d_ops->sf_put_ftype(sfep,
- dp->d_ops->data_get_ftype(dep));
+ xfs_dir3_sfe_put_ino(mp, sfp, sfep,
+ be64_to_cpu(dep->inumber));
+ xfs_dir3_sfe_put_ftype(mp, sfp, sfep,
+ xfs_dir3_dirent_get_ftype(mp, dep));
- sfep = dp->d_ops->sf_nextentry(sfp, sfep);
+ sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep);
}
- ptr += dp->d_ops->data_entsize(dep->namelen);
+ ptr += xfs_dir3_data_entsize(mp, dep->namelen);
}
ASSERT((char *)sfep - (char *)sfp == size);
xfs_dir2_sf_check(args);
@@ -306,7 +389,7 @@ xfs_dir2_sf_addname(
/*
* Compute entry (and change in) size.
*/
- add_entsize = dp->d_ops->sf_entsize(sfp, args->namelen);
+ add_entsize = xfs_dir3_sf_entsize(dp->i_mount, sfp, args->namelen);
incr_isize = add_entsize;
objchange = 0;
#if XFS_BIG_INUMS
@@ -400,7 +483,8 @@ xfs_dir2_sf_addname_easy(
/*
* Grow the in-inode space.
*/
- xfs_idata_realloc(dp, dp->d_ops->sf_entsize(sfp, args->namelen),
+ xfs_idata_realloc(dp,
+ xfs_dir3_sf_entsize(dp->i_mount, sfp, args->namelen),
XFS_DATA_FORK);
/*
* Need to set up again due to realloc of the inode data.
@@ -413,8 +497,8 @@ xfs_dir2_sf_addname_easy(
sfep->namelen = args->namelen;
xfs_dir2_sf_put_offset(sfep, offset);
memcpy(sfep->name, args->name, sfep->namelen);
- dp->d_ops->sf_put_ino(sfp, sfep, args->inumber);
- dp->d_ops->sf_put_ftype(sfep, args->filetype);
+ xfs_dir3_sfe_put_ino(dp->i_mount, sfp, sfep, args->inumber);
+ xfs_dir3_sfe_put_ftype(dp->i_mount, sfp, sfep, args->filetype);
/*
* Update the header and inode.
@@ -473,13 +557,13 @@ xfs_dir2_sf_addname_hard(
* to insert the new entry.
* If it's going to end up at the end then oldsfep will point there.
*/
- for (offset = dp->d_ops->data_first_offset,
+ for (offset = xfs_dir3_data_first_offset(mp),
oldsfep = xfs_dir2_sf_firstentry(oldsfp),
- add_datasize = dp->d_ops->data_entsize(args->namelen),
+ add_datasize = xfs_dir3_data_entsize(mp, args->namelen),
eof = (char *)oldsfep == &buf[old_isize];
!eof;
- offset = new_offset + dp->d_ops->data_entsize(oldsfep->namelen),
- oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep),
+ offset = new_offset + xfs_dir3_data_entsize(mp, oldsfep->namelen),
+ oldsfep = xfs_dir3_sf_nextentry(mp, oldsfp, oldsfep),
eof = (char *)oldsfep == &buf[old_isize]) {
new_offset = xfs_dir2_sf_get_offset(oldsfep);
if (offset + add_datasize <= new_offset)
@@ -508,8 +592,8 @@ xfs_dir2_sf_addname_hard(
sfep->namelen = args->namelen;
xfs_dir2_sf_put_offset(sfep, offset);
memcpy(sfep->name, args->name, sfep->namelen);
- dp->d_ops->sf_put_ino(sfp, sfep, args->inumber);
- dp->d_ops->sf_put_ftype(sfep, args->filetype);
+ xfs_dir3_sfe_put_ino(mp, sfp, sfep, args->inumber);
+ xfs_dir3_sfe_put_ftype(mp, sfp, sfep, args->filetype);
sfp->count++;
#if XFS_BIG_INUMS
if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
@@ -519,7 +603,7 @@ xfs_dir2_sf_addname_hard(
* If there's more left to copy, do that.
*/
if (!eof) {
- sfep = dp->d_ops->sf_nextentry(sfp, sfep);
+ sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep);
memcpy(sfep, oldsfep, old_isize - nbytes);
}
kmem_free(buf);
@@ -555,8 +639,8 @@ xfs_dir2_sf_addname_pick(
mp = dp->i_mount;
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
- size = dp->d_ops->data_entsize(args->namelen);
- offset = dp->d_ops->data_first_offset;
+ size = xfs_dir3_data_entsize(mp, args->namelen);
+ offset = xfs_dir3_data_first_offset(mp);
sfep = xfs_dir2_sf_firstentry(sfp);
holefit = 0;
/*
@@ -568,8 +652,8 @@ xfs_dir2_sf_addname_pick(
if (!holefit)
holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);
offset = xfs_dir2_sf_get_offset(sfep) +
- dp->d_ops->data_entsize(sfep->namelen);
- sfep = dp->d_ops->sf_nextentry(sfp, sfep);
+ xfs_dir3_data_entsize(mp, sfep->namelen);
+ sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep);
}
/*
* Calculate data bytes used excluding the new entry, if this
@@ -629,20 +713,21 @@ xfs_dir2_sf_check(
mp = dp->i_mount;
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
- offset = dp->d_ops->data_first_offset;
- ino = dp->d_ops->sf_get_parent_ino(sfp);
+ offset = xfs_dir3_data_first_offset(mp);
+ ino = xfs_dir2_sf_get_parent_ino(sfp);
i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
i < sfp->count;
- i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {
+ i++, sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep)) {
ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset);
- ino = dp->d_ops->sf_get_ino(sfp, sfep);
+ ino = xfs_dir3_sfe_get_ino(mp, sfp, sfep);
i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
offset =
xfs_dir2_sf_get_offset(sfep) +
- dp->d_ops->data_entsize(sfep->namelen);
- ASSERT(dp->d_ops->sf_get_ftype(sfep) < XFS_DIR3_FT_MAX);
+ xfs_dir3_data_entsize(mp, sfep->namelen);
+ ASSERT(xfs_dir3_sfe_get_ftype(mp, sfp, sfep) <
+ XFS_DIR3_FT_MAX);
}
ASSERT(i8count == sfp->i8count);
ASSERT(XFS_BIG_INUMS || i8count == 0);
@@ -698,7 +783,7 @@ xfs_dir2_sf_create(
/*
* Now can put in the inode number, since i8count is set.
*/
- dp->d_ops->sf_put_parent_ino(sfp, pino);
+ xfs_dir2_sf_put_parent_ino(sfp, pino);
sfp->count = 0;
dp->i_d.di_size = size;
xfs_dir2_sf_check(args);
@@ -753,7 +838,7 @@ xfs_dir2_sf_lookup(
*/
if (args->namelen == 2 &&
args->name[0] == '.' && args->name[1] == '.') {
- args->inumber = dp->d_ops->sf_get_parent_ino(sfp);
+ args->inumber = xfs_dir2_sf_get_parent_ino(sfp);
args->cmpresult = XFS_CMP_EXACT;
args->filetype = XFS_DIR3_FT_DIR;
return XFS_ERROR(EEXIST);
@@ -763,7 +848,7 @@ xfs_dir2_sf_lookup(
*/
ci_sfep = NULL;
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
- i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {
+ i++, sfep = xfs_dir3_sf_nextentry(dp->i_mount, sfp, sfep)) {
/*
* Compare name and if it's an exact match, return the inode
* number. If it's the first case-insensitive match, store the
@@ -773,8 +858,10 @@ xfs_dir2_sf_lookup(
sfep->namelen);
if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
args->cmpresult = cmp;
- args->inumber = dp->d_ops->sf_get_ino(sfp, sfep);
- args->filetype = dp->d_ops->sf_get_ftype(sfep);
+ args->inumber = xfs_dir3_sfe_get_ino(dp->i_mount,
+ sfp, sfep);
+ args->filetype = xfs_dir3_sfe_get_ftype(dp->i_mount,
+ sfp, sfep);
if (cmp == XFS_CMP_EXACT)
return XFS_ERROR(EEXIST);
ci_sfep = sfep;
@@ -830,10 +917,10 @@ xfs_dir2_sf_removename(
* Find the one we're deleting.
*/
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
- i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {
+ i++, sfep = xfs_dir3_sf_nextentry(dp->i_mount, sfp, sfep)) {
if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
XFS_CMP_EXACT) {
- ASSERT(dp->d_ops->sf_get_ino(sfp, sfep) ==
+ ASSERT(xfs_dir3_sfe_get_ino(dp->i_mount, sfp, sfep) ==
args->inumber);
break;
}
@@ -847,7 +934,7 @@ xfs_dir2_sf_removename(
* Calculate sizes.
*/
byteoff = (int)((char *)sfep - (char *)sfp);
- entsize = dp->d_ops->sf_entsize(sfp, args->namelen);
+ entsize = xfs_dir3_sf_entsize(dp->i_mount, sfp, args->namelen);
newsize = oldsize - entsize;
/*
* Copy the part if any after the removed entry, sliding it down.
@@ -954,25 +1041,28 @@ xfs_dir2_sf_replace(
if (args->namelen == 2 &&
args->name[0] == '.' && args->name[1] == '.') {
#if XFS_BIG_INUMS || defined(DEBUG)
- ino = dp->d_ops->sf_get_parent_ino(sfp);
+ ino = xfs_dir2_sf_get_parent_ino(sfp);
ASSERT(args->inumber != ino);
#endif
- dp->d_ops->sf_put_parent_ino(sfp, args->inumber);
+ xfs_dir2_sf_put_parent_ino(sfp, args->inumber);
}
/*
* Normal entry, look for the name.
*/
else {
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
- i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {
+ i++, sfep = xfs_dir3_sf_nextentry(dp->i_mount, sfp, sfep)) {
if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
XFS_CMP_EXACT) {
#if XFS_BIG_INUMS || defined(DEBUG)
- ino = dp->d_ops->sf_get_ino(sfp, sfep);
+ ino = xfs_dir3_sfe_get_ino(dp->i_mount,
+ sfp, sfep);
ASSERT(args->inumber != ino);
#endif
- dp->d_ops->sf_put_ino(sfp, sfep, args->inumber);
- dp->d_ops->sf_put_ftype(sfep, args->filetype);
+ xfs_dir3_sfe_put_ino(dp->i_mount, sfp, sfep,
+ args->inumber);
+ xfs_dir3_sfe_put_ftype(dp->i_mount, sfp, sfep,
+ args->filetype);
break;
}
}
@@ -1075,21 +1165,22 @@ xfs_dir2_sf_toino4(
*/
sfp->count = oldsfp->count;
sfp->i8count = 0;
- dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp));
+ xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp));
/*
* Copy the entries field by field.
*/
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
oldsfep = xfs_dir2_sf_firstentry(oldsfp);
i < sfp->count;
- i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep),
- oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) {
+ i++, sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep),
+ oldsfep = xfs_dir3_sf_nextentry(mp, oldsfp, oldsfep)) {
sfep->namelen = oldsfep->namelen;
sfep->offset = oldsfep->offset;
memcpy(sfep->name, oldsfep->name, sfep->namelen);
- dp->d_ops->sf_put_ino(sfp, sfep,
- dp->d_ops->sf_get_ino(oldsfp, oldsfep));
- dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep));
+ xfs_dir3_sfe_put_ino(mp, sfp, sfep,
+ xfs_dir3_sfe_get_ino(mp, oldsfp, oldsfep));
+ xfs_dir3_sfe_put_ftype(mp, sfp, sfep,
+ xfs_dir3_sfe_get_ftype(mp, oldsfp, oldsfep));
}
/*
* Clean up the inode.
@@ -1153,21 +1244,22 @@ xfs_dir2_sf_toino8(
*/
sfp->count = oldsfp->count;
sfp->i8count = 1;
- dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp));
+ xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp));
/*
* Copy the entries field by field.
*/
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
oldsfep = xfs_dir2_sf_firstentry(oldsfp);
i < sfp->count;
- i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep),
- oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) {
+ i++, sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep),
+ oldsfep = xfs_dir3_sf_nextentry(mp, oldsfp, oldsfep)) {
sfep->namelen = oldsfep->namelen;
sfep->offset = oldsfep->offset;
memcpy(sfep->name, oldsfep->name, sfep->namelen);
- dp->d_ops->sf_put_ino(sfp, sfep,
- dp->d_ops->sf_get_ino(oldsfp, oldsfep));
- dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep));
+ xfs_dir3_sfe_put_ino(mp, sfp, sfep,
+ xfs_dir3_sfe_get_ino(mp, oldsfp, oldsfep));
+ xfs_dir3_sfe_put_ftype(mp, sfp, sfep,
+ xfs_dir3_sfe_get_ftype(mp, oldsfp, oldsfep));
}
/*
* Clean up the inode.
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 8367d6d..45560ee 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -17,21 +17,22 @@
*/
#include "xfs.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_quota.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_inode.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_extent_busy.h"
#include "xfs_discard.h"
#include "xfs_trace.h"
-#include "xfs_log.h"
STATIC int
xfs_trim_extents(
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 6b1e695..1ee776d 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -18,28 +18,28 @@
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_shared.h"
-#include "xfs_trans_resv.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
+#include "xfs_rtalloc.h"
#include "xfs_error.h"
-#include "xfs_trans.h"
+#include "xfs_itable.h"
+#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_space.h"
#include "xfs_trans_priv.h"
#include "xfs_qm.h"
#include "xfs_cksum.h"
#include "xfs_trace.h"
-#include "xfs_log.h"
-#include "xfs_bmap_btree.h"
/*
* Lock order:
@@ -292,6 +292,118 @@ xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5;
}
+STATIC bool
+xfs_dquot_buf_verify_crc(
+ struct xfs_mount *mp,
+ struct xfs_buf *bp)
+{
+ struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
+ int ndquots;
+ int i;
+
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return true;
+
+ /*
+ * if we are in log recovery, the quota subsystem has not been
+ * initialised so we have no quotainfo structure. In that case, we need
+ * to manually calculate the number of dquots in the buffer.
+ */
+ if (mp->m_quotainfo)
+ ndquots = mp->m_quotainfo->qi_dqperchunk;
+ else
+ ndquots = xfs_qm_calc_dquots_per_chunk(mp,
+ XFS_BB_TO_FSB(mp, bp->b_length));
+
+ for (i = 0; i < ndquots; i++, d++) {
+ if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
+ XFS_DQUOT_CRC_OFF))
+ return false;
+ if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid))
+ return false;
+ }
+ return true;
+}
+
+STATIC bool
+xfs_dquot_buf_verify(
+ struct xfs_mount *mp,
+ struct xfs_buf *bp)
+{
+ struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
+ xfs_dqid_t id = 0;
+ int ndquots;
+ int i;
+
+ /*
+ * if we are in log recovery, the quota subsystem has not been
+ * initialised so we have no quotainfo structure. In that case, we need
+ * to manually calculate the number of dquots in the buffer.
+ */
+ if (mp->m_quotainfo)
+ ndquots = mp->m_quotainfo->qi_dqperchunk;
+ else
+ ndquots = xfs_qm_calc_dquots_per_chunk(mp, bp->b_length);
+
+ /*
+ * On the first read of the buffer, verify that each dquot is valid.
+ * We don't know what the id of the dquot is supposed to be, just that
+ * they should be increasing monotonically within the buffer. If the
+ * first id is corrupt, then it will fail on the second dquot in the
+ * buffer so corruptions could point to the wrong dquot in this case.
+ */
+ for (i = 0; i < ndquots; i++) {
+ struct xfs_disk_dquot *ddq;
+ int error;
+
+ ddq = &d[i].dd_diskdq;
+
+ if (i == 0)
+ id = be32_to_cpu(ddq->d_id);
+
+ error = xfs_qm_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN,
+ "xfs_dquot_buf_verify");
+ if (error)
+ return false;
+ }
+ return true;
+}
+
+static void
+xfs_dquot_buf_read_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+
+ if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+ xfs_buf_ioerror(bp, EFSCORRUPTED);
+ }
+}
+
+/*
+ * we don't calculate the CRC here as that is done when the dquot is flushed to
+ * the buffer after the update is done. This ensures that the dquot in the
+ * buffer always has an up-to-date CRC value.
+ */
+void
+xfs_dquot_buf_write_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+
+ if (!xfs_dquot_buf_verify(mp, bp)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+ xfs_buf_ioerror(bp, EFSCORRUPTED);
+ return;
+ }
+}
+
+const struct xfs_buf_ops xfs_dquot_buf_ops = {
+ .verify_read = xfs_dquot_buf_read_verify,
+ .verify_write = xfs_dquot_buf_write_verify,
+};
+
/*
* Allocate a block and fill it with dquots.
* This is called when the bmapi finds a hole.
@@ -402,7 +514,6 @@ xfs_qm_dqalloc(
return (error);
}
-
STATIC int
xfs_qm_dqrepair(
struct xfs_mount *mp,
@@ -436,7 +547,7 @@ xfs_qm_dqrepair(
/* Do the actual repair of dquots in this buffer */
for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) {
ddq = &d[i].dd_diskdq;
- error = xfs_dqcheck(mp, ddq, firstid + i,
+ error = xfs_qm_dqcheck(mp, ddq, firstid + i,
dqp->dq_flags & XFS_DQ_ALLTYPES,
XFS_QMOPT_DQREPAIR, "xfs_qm_dqrepair");
if (error) {
@@ -1022,7 +1133,7 @@ xfs_qm_dqflush(
/*
* A simple sanity check in case we got a corrupted dquot..
*/
- error = xfs_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
+ error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
XFS_QMOPT_DOWARN, "dqflush (incore copy)");
if (error) {
xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index d22ed00..55abbca 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -172,4 +172,6 @@ static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
return dqp;
}
+extern const struct xfs_buf_ops xfs_dquot_buf_ops;
+
#endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/xfs_dquot_buf.c
deleted file mode 100644
index d401457..0000000
--- a/fs/xfs/xfs_dquot_buf.c
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * Copyright (c) 2013 Red Hat, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_inode.h"
-#include "xfs_quota.h"
-#include "xfs_trans.h"
-#include "xfs_qm.h"
-#include "xfs_error.h"
-#include "xfs_cksum.h"
-#include "xfs_trace.h"
-
-int
-xfs_calc_dquots_per_chunk(
- struct xfs_mount *mp,
- unsigned int nbblks) /* basic block units */
-{
- unsigned int ndquots;
-
- ASSERT(nbblks > 0);
- ndquots = BBTOB(nbblks);
- do_div(ndquots, sizeof(xfs_dqblk_t));
-
- return ndquots;
-}
-
-/*
- * Do some primitive error checking on ondisk dquot data structures.
- */
-int
-xfs_dqcheck(
- struct xfs_mount *mp,
- xfs_disk_dquot_t *ddq,
- xfs_dqid_t id,
- uint type, /* used only when IO_dorepair is true */
- uint flags,
- char *str)
-{
- xfs_dqblk_t *d = (xfs_dqblk_t *)ddq;
- int errs = 0;
-
- /*
- * We can encounter an uninitialized dquot buffer for 2 reasons:
- * 1. If we crash while deleting the quotainode(s), and those blks got
- * used for user data. This is because we take the path of regular
- * file deletion; however, the size field of quotainodes is never
- * updated, so all the tricks that we play in itruncate_finish
- * don't quite matter.
- *
- * 2. We don't play the quota buffers when there's a quotaoff logitem.
- * But the allocation will be replayed so we'll end up with an
- * uninitialized quota block.
- *
- * This is all fine; things are still consistent, and we haven't lost
- * any quota information. Just don't complain about bad dquot blks.
- */
- if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) {
- if (flags & XFS_QMOPT_DOWARN)
- xfs_alert(mp,
- "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x",
- str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC);
- errs++;
- }
- if (ddq->d_version != XFS_DQUOT_VERSION) {
- if (flags & XFS_QMOPT_DOWARN)
- xfs_alert(mp,
- "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x",
- str, id, ddq->d_version, XFS_DQUOT_VERSION);
- errs++;
- }
-
- if (ddq->d_flags != XFS_DQ_USER &&
- ddq->d_flags != XFS_DQ_PROJ &&
- ddq->d_flags != XFS_DQ_GROUP) {
- if (flags & XFS_QMOPT_DOWARN)
- xfs_alert(mp,
- "%s : XFS dquot ID 0x%x, unknown flags 0x%x",
- str, id, ddq->d_flags);
- errs++;
- }
-
- if (id != -1 && id != be32_to_cpu(ddq->d_id)) {
- if (flags & XFS_QMOPT_DOWARN)
- xfs_alert(mp,
- "%s : ondisk-dquot 0x%p, ID mismatch: "
- "0x%x expected, found id 0x%x",
- str, ddq, id, be32_to_cpu(ddq->d_id));
- errs++;
- }
-
- if (!errs && ddq->d_id) {
- if (ddq->d_blk_softlimit &&
- be64_to_cpu(ddq->d_bcount) >
- be64_to_cpu(ddq->d_blk_softlimit)) {
- if (!ddq->d_btimer) {
- if (flags & XFS_QMOPT_DOWARN)
- xfs_alert(mp,
- "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED",
- str, (int)be32_to_cpu(ddq->d_id), ddq);
- errs++;
- }
- }
- if (ddq->d_ino_softlimit &&
- be64_to_cpu(ddq->d_icount) >
- be64_to_cpu(ddq->d_ino_softlimit)) {
- if (!ddq->d_itimer) {
- if (flags & XFS_QMOPT_DOWARN)
- xfs_alert(mp,
- "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED",
- str, (int)be32_to_cpu(ddq->d_id), ddq);
- errs++;
- }
- }
- if (ddq->d_rtb_softlimit &&
- be64_to_cpu(ddq->d_rtbcount) >
- be64_to_cpu(ddq->d_rtb_softlimit)) {
- if (!ddq->d_rtbtimer) {
- if (flags & XFS_QMOPT_DOWARN)
- xfs_alert(mp,
- "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED",
- str, (int)be32_to_cpu(ddq->d_id), ddq);
- errs++;
- }
- }
- }
-
- if (!errs || !(flags & XFS_QMOPT_DQREPAIR))
- return errs;
-
- if (flags & XFS_QMOPT_DOWARN)
- xfs_notice(mp, "Re-initializing dquot ID 0x%x", id);
-
- /*
- * Typically, a repair is only requested by quotacheck.
- */
- ASSERT(id != -1);
- ASSERT(flags & XFS_QMOPT_DQREPAIR);
- memset(d, 0, sizeof(xfs_dqblk_t));
-
- d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
- d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
- d->dd_diskdq.d_flags = type;
- d->dd_diskdq.d_id = cpu_to_be32(id);
-
- if (xfs_sb_version_hascrc(&mp->m_sb)) {
- uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
- xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
- XFS_DQUOT_CRC_OFF);
- }
-
- return errs;
-}
-
-STATIC bool
-xfs_dquot_buf_verify_crc(
- struct xfs_mount *mp,
- struct xfs_buf *bp)
-{
- struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
- int ndquots;
- int i;
-
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- return true;
-
- /*
- * if we are in log recovery, the quota subsystem has not been
- * initialised so we have no quotainfo structure. In that case, we need
- * to manually calculate the number of dquots in the buffer.
- */
- if (mp->m_quotainfo)
- ndquots = mp->m_quotainfo->qi_dqperchunk;
- else
- ndquots = xfs_calc_dquots_per_chunk(mp,
- XFS_BB_TO_FSB(mp, bp->b_length));
-
- for (i = 0; i < ndquots; i++, d++) {
- if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
- XFS_DQUOT_CRC_OFF))
- return false;
- if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid))
- return false;
- }
- return true;
-}
-
-STATIC bool
-xfs_dquot_buf_verify(
- struct xfs_mount *mp,
- struct xfs_buf *bp)
-{
- struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
- xfs_dqid_t id = 0;
- int ndquots;
- int i;
-
- /*
- * if we are in log recovery, the quota subsystem has not been
- * initialised so we have no quotainfo structure. In that case, we need
- * to manually calculate the number of dquots in the buffer.
- */
- if (mp->m_quotainfo)
- ndquots = mp->m_quotainfo->qi_dqperchunk;
- else
- ndquots = xfs_calc_dquots_per_chunk(mp, bp->b_length);
-
- /*
- * On the first read of the buffer, verify that each dquot is valid.
- * We don't know what the id of the dquot is supposed to be, just that
- * they should be increasing monotonically within the buffer. If the
- * first id is corrupt, then it will fail on the second dquot in the
- * buffer so corruptions could point to the wrong dquot in this case.
- */
- for (i = 0; i < ndquots; i++) {
- struct xfs_disk_dquot *ddq;
- int error;
-
- ddq = &d[i].dd_diskdq;
-
- if (i == 0)
- id = be32_to_cpu(ddq->d_id);
-
- error = xfs_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN,
- "xfs_dquot_buf_verify");
- if (error)
- return false;
- }
- return true;
-}
-
-static void
-xfs_dquot_buf_read_verify(
- struct xfs_buf *bp)
-{
- struct xfs_mount *mp = bp->b_target->bt_mount;
-
- if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
-}
-
-/*
- * we don't calculate the CRC here as that is done when the dquot is flushed to
- * the buffer after the update is done. This ensures that the dquot in the
- * buffer always has an up-to-date CRC value.
- */
-static void
-xfs_dquot_buf_write_verify(
- struct xfs_buf *bp)
-{
- struct xfs_mount *mp = bp->b_target->bt_mount;
-
- if (!xfs_dquot_buf_verify(mp, bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
- return;
- }
-}
-
-const struct xfs_buf_ops xfs_dquot_buf_ops = {
- .verify_read = xfs_dquot_buf_read_verify,
- .verify_write = xfs_dquot_buf_write_verify,
-};
-
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 92e5f62..e838d84 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -18,19 +18,23 @@
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
-#include "xfs_quota.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
#include "xfs_error.h"
-#include "xfs_trans.h"
+#include "xfs_itable.h"
+#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
#include "xfs_qm.h"
-#include "xfs_log.h"
static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip)
{
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 9995b80..1123d93f 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -16,13 +16,16 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
-#include "xfs_format.h"
#include "xfs_fs.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
#include "xfs_error.h"
#ifdef DEBUG
@@ -156,7 +159,7 @@ xfs_error_report(
{
if (level <= xfs_error_level) {
xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
- "Internal error %s at line %d of file %s. Caller 0x%p",
+ "Internal error %s at line %d of file %s. Caller 0x%p\n",
tag, linenum, filename, ra);
xfs_stack_trace();
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index 1399e18..066df42 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -16,21 +16,21 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_format.h"
#include "xfs_dir2.h"
#include "xfs_export.h"
+#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
-#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
-#include "xfs_log.h"
/*
* Note that we only accept fileids which are long enough rather than allow
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c
index fd22f69..e43708e 100644
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -19,18 +19,17 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_shared.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
#include "xfs_alloc.h"
+#include "xfs_inode.h"
#include "xfs_extent_busy.h"
#include "xfs_trace.h"
-#include "xfs_trans.h"
-#include "xfs_log.h"
void
xfs_extent_busy_insert(
diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h
index bfff284..985412d 100644
--- a/fs/xfs/xfs_extent_busy.h
+++ b/fs/xfs/xfs_extent_busy.h
@@ -20,10 +20,6 @@
#ifndef __XFS_EXTENT_BUSY_H__
#define __XFS_EXTENT_BUSY_H__
-struct xfs_mount;
-struct xfs_trans;
-struct xfs_alloc_arg;
-
/*
* Busy block/extent entry. Indexed by a rbtree in perag to mark blocks that
* have been freed but whose transactions aren't committed to disk yet.
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 3680d04..dc53e8f 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -17,14 +17,14 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_buf_item.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_trans.h"
#include "xfs_trans_priv.h"
-#include "xfs_buf_item.h"
#include "xfs_extfree_item.h"
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 52c91e1..4c749ab 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -17,27 +17,25 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_log.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
+#include "xfs_trans.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
#include "xfs_error.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_format.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_ioctl.h"
#include "xfs_trace.h"
-#include "xfs_log.h"
-#include "xfs_dinode.h"
#include <linux/aio.h>
#include <linux/dcache.h>
@@ -807,64 +805,44 @@ out:
STATIC long
xfs_file_fallocate(
- struct file *file,
- int mode,
- loff_t offset,
- loff_t len)
+ struct file *file,
+ int mode,
+ loff_t offset,
+ loff_t len)
{
- struct inode *inode = file_inode(file);
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_trans *tp;
- long error;
- loff_t new_size = 0;
+ struct inode *inode = file_inode(file);
+ long error;
+ loff_t new_size = 0;
+ xfs_flock64_t bf;
+ xfs_inode_t *ip = XFS_I(inode);
+ int cmd = XFS_IOC_RESVSP;
+ int attr_flags = XFS_ATTR_NOLOCK;
- if (!S_ISREG(inode->i_mode))
- return -EINVAL;
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
+ bf.l_whence = 0;
+ bf.l_start = offset;
+ bf.l_len = len;
+
xfs_ilock(ip, XFS_IOLOCK_EXCL);
- if (mode & FALLOC_FL_PUNCH_HOLE) {
- error = xfs_free_file_space(ip, offset, len);
- if (error)
- goto out_unlock;
- } else {
- if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- offset + len > i_size_read(inode)) {
- new_size = offset + len;
- error = -inode_newsize_ok(inode, new_size);
- if (error)
- goto out_unlock;
- }
- error = xfs_alloc_file_space(ip, offset, len,
- XFS_BMAPI_PREALLOC);
+ if (mode & FALLOC_FL_PUNCH_HOLE)
+ cmd = XFS_IOC_UNRESVSP;
+
+ /* check the new inode size is valid before allocating */
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+ offset + len > i_size_read(inode)) {
+ new_size = offset + len;
+ error = inode_newsize_ok(inode, new_size);
if (error)
goto out_unlock;
}
- tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID);
- error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0);
- if (error) {
- xfs_trans_cancel(tp, 0);
- goto out_unlock;
- }
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
- ip->i_d.di_mode &= ~S_ISUID;
- if (ip->i_d.di_mode & S_IXGRP)
- ip->i_d.di_mode &= ~S_ISGID;
-
- if (!(mode & FALLOC_FL_PUNCH_HOLE))
- ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
-
- xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
if (file->f_flags & O_DSYNC)
- xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0);
+ attr_flags |= XFS_ATTR_SYNC;
+
+ error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags);
if (error)
goto out_unlock;
@@ -874,12 +852,12 @@ xfs_file_fallocate(
iattr.ia_valid = ATTR_SIZE;
iattr.ia_size = new_size;
- error = xfs_setattr_size(ip, &iattr);
+ error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK);
}
out_unlock:
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- return -error;
+ return error;
}
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 12b6e77..ce78e65 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -16,19 +16,19 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_log.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inum.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
#include "xfs_ag.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_inum.h"
-#include "xfs_inode.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
#include "xfs_alloc.h"
#include "xfs_mru_cache.h"
-#include "xfs_dinode.h"
#include "xfs_filestream.h"
#include "xfs_trace.h"
diff --git a/fs/xfs/xfs_format.h b/fs/xfs/xfs_format.h
index b6ab5a3..35c08ff 100644
--- a/fs/xfs/xfs_format.h
+++ b/fs/xfs/xfs_format.h
@@ -156,259 +156,14 @@ struct xfs_dsymlink_hdr {
((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
sizeof(struct xfs_dsymlink_hdr) : 0))
-
-/*
- * Allocation Btree format definitions
- *
- * There are two on-disk btrees, one sorted by blockno and one sorted
- * by blockcount and blockno. All blocks look the same to make the code
- * simpler; if we have time later, we'll make the optimizations.
- */
-#define XFS_ABTB_MAGIC 0x41425442 /* 'ABTB' for bno tree */
-#define XFS_ABTB_CRC_MAGIC 0x41423342 /* 'AB3B' */
-#define XFS_ABTC_MAGIC 0x41425443 /* 'ABTC' for cnt tree */
-#define XFS_ABTC_CRC_MAGIC 0x41423343 /* 'AB3C' */
-
-/*
- * Data record/key structure
- */
-typedef struct xfs_alloc_rec {
- __be32 ar_startblock; /* starting block number */
- __be32 ar_blockcount; /* count of free blocks */
-} xfs_alloc_rec_t, xfs_alloc_key_t;
-
-typedef struct xfs_alloc_rec_incore {
- xfs_agblock_t ar_startblock; /* starting block number */
- xfs_extlen_t ar_blockcount; /* count of free blocks */
-} xfs_alloc_rec_incore_t;
-
-/* btree pointer type */
-typedef __be32 xfs_alloc_ptr_t;
-
-/*
- * Block numbers in the AG:
- * SB is sector 0, AGF is sector 1, AGI is sector 2, AGFL is sector 3.
- */
-#define XFS_BNO_BLOCK(mp) ((xfs_agblock_t)(XFS_AGFL_BLOCK(mp) + 1))
-#define XFS_CNT_BLOCK(mp) ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1))
-
-
-/*
- * Inode Allocation Btree format definitions
- *
- * There is a btree for the inode map per allocation group.
- */
-#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */
-#define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */
-
-typedef __uint64_t xfs_inofree_t;
-#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t))
-#define XFS_INODES_PER_CHUNK_LOG (XFS_NBBYLOG + 3)
-#define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1)
-#define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i))
-
-static inline xfs_inofree_t xfs_inobt_maskn(int i, int n)
-{
- return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i;
-}
-
-/*
- * Data record structure
- */
-typedef struct xfs_inobt_rec {
- __be32 ir_startino; /* starting inode number */
- __be32 ir_freecount; /* count of free inodes (set bits) */
- __be64 ir_free; /* free inode mask */
-} xfs_inobt_rec_t;
-
-typedef struct xfs_inobt_rec_incore {
- xfs_agino_t ir_startino; /* starting inode number */
- __int32_t ir_freecount; /* count of free inodes (set bits) */
- xfs_inofree_t ir_free; /* free inode mask */
-} xfs_inobt_rec_incore_t;
-
-
-/*
- * Key structure
- */
-typedef struct xfs_inobt_key {
- __be32 ir_startino; /* starting inode number */
-} xfs_inobt_key_t;
-
-/* btree pointer type */
-typedef __be32 xfs_inobt_ptr_t;
-
-/*
- * block numbers in the AG.
- */
-#define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1))
-#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
-
-
-
-/*
- * BMAP Btree format definitions
- *
- * This includes both the root block definition that sits inside an inode fork
- * and the record/pointer formats for the leaf/node in the blocks.
- */
-#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */
-#define XFS_BMAP_CRC_MAGIC 0x424d4133 /* 'BMA3' */
-
-/*
- * Bmap root header, on-disk form only.
- */
-typedef struct xfs_bmdr_block {
- __be16 bb_level; /* 0 is a leaf */
- __be16 bb_numrecs; /* current # of data records */
-} xfs_bmdr_block_t;
-
-/*
- * Bmap btree record and extent descriptor.
- * l0:63 is an extent flag (value 1 indicates non-normal).
- * l0:9-62 are startoff.
- * l0:0-8 and l1:21-63 are startblock.
- * l1:0-20 are blockcount.
- */
-#define BMBT_EXNTFLAG_BITLEN 1
-#define BMBT_STARTOFF_BITLEN 54
-#define BMBT_STARTBLOCK_BITLEN 52
-#define BMBT_BLOCKCOUNT_BITLEN 21
-
-typedef struct xfs_bmbt_rec {
- __be64 l0, l1;
-} xfs_bmbt_rec_t;
-
-typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */
-typedef xfs_bmbt_rec_t xfs_bmdr_rec_t;
-
-typedef struct xfs_bmbt_rec_host {
- __uint64_t l0, l1;
-} xfs_bmbt_rec_host_t;
-
-/*
- * Values and macros for delayed-allocation startblock fields.
- */
-#define STARTBLOCKVALBITS 17
-#define STARTBLOCKMASKBITS (15 + XFS_BIG_BLKNOS * 20)
-#define DSTARTBLOCKMASKBITS (15 + 20)
-#define STARTBLOCKMASK \
- (((((xfs_fsblock_t)1) << STARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS)
-#define DSTARTBLOCKMASK \
- (((((xfs_dfsbno_t)1) << DSTARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS)
-
-static inline int isnullstartblock(xfs_fsblock_t x)
-{
- return ((x) & STARTBLOCKMASK) == STARTBLOCKMASK;
-}
-
-static inline int isnulldstartblock(xfs_dfsbno_t x)
-{
- return ((x) & DSTARTBLOCKMASK) == DSTARTBLOCKMASK;
-}
-
-static inline xfs_fsblock_t nullstartblock(int k)
-{
- ASSERT(k < (1 << STARTBLOCKVALBITS));
- return STARTBLOCKMASK | (k);
-}
-
-static inline xfs_filblks_t startblockval(xfs_fsblock_t x)
-{
- return (xfs_filblks_t)((x) & ~STARTBLOCKMASK);
-}
-
-/*
- * Possible extent formats.
- */
-typedef enum {
- XFS_EXTFMT_NOSTATE = 0,
- XFS_EXTFMT_HASSTATE
-} xfs_exntfmt_t;
-
-/*
- * Possible extent states.
- */
-typedef enum {
- XFS_EXT_NORM, XFS_EXT_UNWRITTEN,
- XFS_EXT_DMAPI_OFFLINE, XFS_EXT_INVALID
-} xfs_exntst_t;
-
-/*
- * Incore version of above.
- */
-typedef struct xfs_bmbt_irec
-{
- xfs_fileoff_t br_startoff; /* starting file offset */
- xfs_fsblock_t br_startblock; /* starting block number */
- xfs_filblks_t br_blockcount; /* number of blocks */
- xfs_exntst_t br_state; /* extent state */
-} xfs_bmbt_irec_t;
-
-/*
- * Key structure for non-leaf levels of the tree.
- */
-typedef struct xfs_bmbt_key {
- __be64 br_startoff; /* starting file offset */
-} xfs_bmbt_key_t, xfs_bmdr_key_t;
-
-/* btree pointer type */
-typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
-
-
-/*
- * Generic Btree block format definitions
- *
- * This is a combination of the actual format used on disk for short and long
- * format btrees. The first three fields are shared by both format, but the
- * pointers are different and should be used with care.
- *
- * To get the size of the actual short or long form headers please use the size
- * macros below. Never use sizeof(xfs_btree_block).
- *
- * The blkno, crc, lsn, owner and uuid fields are only available in filesystems
- * with the crc feature bit, and all accesses to them must be conditional on
- * that flag.
- */
-struct xfs_btree_block {
- __be32 bb_magic; /* magic number for block type */
- __be16 bb_level; /* 0 is a leaf */
- __be16 bb_numrecs; /* current # of data records */
- union {
- struct {
- __be32 bb_leftsib;
- __be32 bb_rightsib;
-
- __be64 bb_blkno;
- __be64 bb_lsn;
- uuid_t bb_uuid;
- __be32 bb_owner;
- __le32 bb_crc;
- } s; /* short form pointers */
- struct {
- __be64 bb_leftsib;
- __be64 bb_rightsib;
-
- __be64 bb_blkno;
- __be64 bb_lsn;
- uuid_t bb_uuid;
- __be64 bb_owner;
- __le32 bb_crc;
- __be32 bb_pad; /* padding for alignment */
- } l; /* long form pointers */
- } bb_u; /* rest */
-};
-
-#define XFS_BTREE_SBLOCK_LEN 16 /* size of a short form block */
-#define XFS_BTREE_LBLOCK_LEN 24 /* size of a long form block */
-
-/* sizes of CRC enabled btree blocks */
-#define XFS_BTREE_SBLOCK_CRC_LEN (XFS_BTREE_SBLOCK_LEN + 40)
-#define XFS_BTREE_LBLOCK_CRC_LEN (XFS_BTREE_LBLOCK_LEN + 48)
-
-#define XFS_BTREE_SBLOCK_CRC_OFF \
- offsetof(struct xfs_btree_block, bb_u.s.bb_crc)
-#define XFS_BTREE_LBLOCK_CRC_OFF \
- offsetof(struct xfs_btree_block, bb_u.l.bb_crc)
+int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen);
+int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset,
+ uint32_t size, struct xfs_buf *bp);
+bool xfs_symlink_hdr_ok(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset,
+ uint32_t size, struct xfs_buf *bp);
+void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
+ struct xfs_inode *ip, struct xfs_ifork *ifp);
+
+extern const struct xfs_buf_ops xfs_symlink_buf_ops;
#endif /* __XFS_FORMAT_H__ */
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index c5fc116..18272c7 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -233,11 +233,11 @@ typedef struct xfs_fsop_resblks {
#define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */
#define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */
#define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */
-#define XFS_FSOP_GEOM_FLAGS_PROJID32 0x0800 /* 32-bit project IDs */
+#define XFS_FSOP_GEOM_FLAGS_PROJID32 0x0800 /* 32-bit project IDs */
#define XFS_FSOP_GEOM_FLAGS_DIRV2CI 0x1000 /* ASCII only CI names */
#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */
#define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */
-#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */
+
/*
* Minimum and maximum sizes need for growth checks.
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index a6e54b3..e64ee52 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -17,29 +17,28 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_trans.h"
#include "xfs_inode_item.h"
-#include "xfs_error.h"
#include "xfs_btree.h"
-#include "xfs_alloc_btree.h"
+#include "xfs_error.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_fsops.h"
#include "xfs_itable.h"
#include "xfs_trans_space.h"
#include "xfs_rtalloc.h"
-#include "xfs_trace.h"
-#include "xfs_log.h"
-#include "xfs_dinode.h"
#include "xfs_filestream.h"
+#include "xfs_trace.h"
/*
* File system operations
@@ -102,9 +101,7 @@ xfs_fs_geometry(
(xfs_sb_version_hasprojid32bit(&mp->m_sb) ?
XFS_FSOP_GEOM_FLAGS_PROJID32 : 0) |
(xfs_sb_version_hascrc(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_V5SB : 0) |
- (xfs_sb_version_hasftype(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_FTYPE : 0);
+ XFS_FSOP_GEOM_FLAGS_V5SB : 0);
geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
mp->m_sb.sb_logsectsize : BBSIZE;
geo->rtsectsize = mp->m_sb.sb_blocksize;
@@ -156,7 +153,7 @@ xfs_growfs_data_private(
xfs_buf_t *bp;
int bucket;
int dpct;
- int error, saved_error = 0;
+ int error;
xfs_agnumber_t nagcount;
xfs_agnumber_t nagimax = 0;
xfs_rfsblock_t nb, nb_mod;
@@ -499,33 +496,29 @@ xfs_growfs_data_private(
error = ENOMEM;
}
- /*
- * If we get an error reading or writing alternate superblocks,
- * continue. xfs_repair chooses the "best" superblock based
- * on most matches; if we break early, we'll leave more
- * superblocks un-updated than updated, and xfs_repair may
- * pick them over the properly-updated primary.
- */
if (error) {
xfs_warn(mp,
"error %d reading secondary superblock for ag %d",
error, agno);
- saved_error = error;
- continue;
+ break;
}
xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, XFS_SB_ALL_BITS);
+ /*
+ * If we get an error writing out the alternate superblocks,
+ * just issue a warning and continue. The real work is
+ * already done and committed.
+ */
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
if (error) {
xfs_warn(mp,
"write error %d updating secondary superblock for ag %d",
error, agno);
- saved_error = error;
- continue;
+ break; /* no point in continuing */
}
}
- return saved_error ? saved_error : error;
+ return error;
error0:
xfs_trans_cancel(tp, XFS_TRANS_ABORT);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index e87719c..ccf2fb1 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -17,30 +17,29 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
#include "xfs_inum.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_ialloc.h"
-#include "xfs_ialloc_btree.h"
#include "xfs_alloc.h"
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_bmap.h"
#include "xfs_cksum.h"
-#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_icreate_item.h"
#include "xfs_icache.h"
-#include "xfs_dinode.h"
-#include "xfs_trace.h"
/*
@@ -1628,9 +1627,8 @@ xfs_read_agi(
{
int error;
- trace_xfs_read_agi(mp, agno);
-
ASSERT(agno != NULLAGNUMBER);
+
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops);
@@ -1653,8 +1651,6 @@ xfs_ialloc_read_agi(
struct xfs_perag *pag; /* per allocation group data */
int error;
- trace_xfs_ialloc_read_agi(mp, agno);
-
error = xfs_read_agi(mp, tp, agno, bpp);
if (error)
return error;
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index a8f76a5..68c0732 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -23,7 +23,6 @@ struct xfs_dinode;
struct xfs_imap;
struct xfs_mount;
struct xfs_trans;
-struct xfs_btree_cur;
/*
* Allocation parameters for inode allocation.
@@ -43,7 +42,7 @@ struct xfs_btree_cur;
static inline struct xfs_dinode *
xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o)
{
- return (struct xfs_dinode *)
+ return (xfs_dinode_t *)
(xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog));
}
@@ -159,4 +158,6 @@ int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, xfs_agblock_t agbno,
xfs_agblock_t length, unsigned int gen);
+extern const struct xfs_buf_ops xfs_agi_buf_ops;
+
#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index c8fa5bb..5448eb6 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -17,23 +17,24 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_ialloc.h"
-#include "xfs_ialloc_btree.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_cksum.h"
-#include "xfs_trans.h"
STATIC int
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h
index f38b220..3ac36b76 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/xfs_ialloc_btree.h
@@ -27,6 +27,55 @@ struct xfs_btree_cur;
struct xfs_mount;
/*
+ * There is a btree for the inode map per allocation group.
+ */
+#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */
+#define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */
+
+typedef __uint64_t xfs_inofree_t;
+#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t))
+#define XFS_INODES_PER_CHUNK_LOG (XFS_NBBYLOG + 3)
+#define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1)
+#define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i))
+
+static inline xfs_inofree_t xfs_inobt_maskn(int i, int n)
+{
+ return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i;
+}
+
+/*
+ * Data record structure
+ */
+typedef struct xfs_inobt_rec {
+ __be32 ir_startino; /* starting inode number */
+ __be32 ir_freecount; /* count of free inodes (set bits) */
+ __be64 ir_free; /* free inode mask */
+} xfs_inobt_rec_t;
+
+typedef struct xfs_inobt_rec_incore {
+ xfs_agino_t ir_startino; /* starting inode number */
+ __int32_t ir_freecount; /* count of free inodes (set bits) */
+ xfs_inofree_t ir_free; /* free inode mask */
+} xfs_inobt_rec_incore_t;
+
+
+/*
+ * Key structure
+ */
+typedef struct xfs_inobt_key {
+ __be32 ir_startino; /* starting inode number */
+} xfs_inobt_key_t;
+
+/* btree pointer type */
+typedef __be32 xfs_inobt_ptr_t;
+
+/*
+ * block numbers in the AG.
+ */
+#define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1))
+#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
+
+/*
* Btree block header size depends on a superblock flag.
*/
#define XFS_INOBT_BLOCK_LEN(mp) \
@@ -61,4 +110,6 @@ extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *,
struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t);
extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
+extern const struct xfs_buf_ops xfs_inobt_buf_ops;
+
#endif /* __XFS_IALLOC_BTREE_H__ */
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 98d3524..474807a 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -18,19 +18,24 @@
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_log_priv.h"
#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
+#include "xfs_dinode.h"
#include "xfs_error.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
+#include "xfs_filestream.h"
#include "xfs_inode_item.h"
#include "xfs_quota.h"
#include "xfs_trace.h"
+#include "xfs_fsops.h"
#include "xfs_icache.h"
#include "xfs_bmap_util.h"
@@ -495,6 +500,11 @@ xfs_inode_ag_walk_grab(
if (!igrab(inode))
return ENOENT;
+ if (is_bad_inode(inode)) {
+ IRELE(ip);
+ return ENOENT;
+ }
+
/* inode is valid */
return 0;
@@ -908,6 +918,8 @@ restart:
xfs_iflock(ip);
}
+ if (is_bad_inode(VFS_I(ip)))
+ goto reclaim;
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
xfs_iunpin_wait(ip);
xfs_iflush_abort(ip, false);
diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c
index d2eaccf..5a5a593 100644
--- a/fs/xfs/xfs_icreate_item.c
+++ b/fs/xfs/xfs_icreate_item.c
@@ -17,14 +17,13 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_trans.h"
#include "xfs_trans_priv.h"
#include "xfs_error.h"
#include "xfs_icreate_item.h"
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 001aa89..e3d7538 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -19,38 +19,39 @@
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_log.h"
#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_trans_space.h"
+#include "xfs_trans_priv.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_inode.h"
-#include "xfs_da_format.h"
#include "xfs_da_btree.h"
+#include "xfs_dir2_format.h"
#include "xfs_dir2.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
#include "xfs_attr_sf.h"
#include "xfs_attr.h"
-#include "xfs_trans_space.h"
-#include "xfs_trans.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
#include "xfs_buf_item.h"
#include "xfs_inode_item.h"
+#include "xfs_btree.h"
+#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
#include "xfs_error.h"
#include "xfs_quota.h"
-#include "xfs_dinode.h"
#include "xfs_filestream.h"
#include "xfs_cksum.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_symlink.h"
-#include "xfs_trans_priv.h"
-#include "xfs_log.h"
-#include "xfs_bmap_btree.h"
kmem_zone_t *xfs_inode_zone;
@@ -1662,126 +1663,6 @@ xfs_release(
}
/*
- * xfs_inactive_truncate
- *
- * Called to perform a truncate when an inode becomes unlinked.
- */
-STATIC int
-xfs_inactive_truncate(
- struct xfs_inode *ip)
-{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_trans *tp;
- int error;
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
- if (error) {
- ASSERT(XFS_FORCED_SHUTDOWN(mp));
- xfs_trans_cancel(tp, 0);
- return error;
- }
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, 0);
-
- /*
- * Log the inode size first to prevent stale data exposure in the event
- * of a system crash before the truncate completes. See the related
- * comment in xfs_setattr_size() for details.
- */
- ip->i_d.di_size = 0;
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
- error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
- if (error)
- goto error_trans_cancel;
-
- ASSERT(ip->i_d.di_nextents == 0);
-
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
- if (error)
- goto error_unlock;
-
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return 0;
-
-error_trans_cancel:
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
-error_unlock:
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return error;
-}
-
-/*
- * xfs_inactive_ifree()
- *
- * Perform the inode free when an inode is unlinked.
- */
-STATIC int
-xfs_inactive_ifree(
- struct xfs_inode *ip)
-{
- xfs_bmap_free_t free_list;
- xfs_fsblock_t first_block;
- int committed;
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_trans *tp;
- int error;
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0);
- if (error) {
- ASSERT(XFS_FORCED_SHUTDOWN(mp));
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
- return error;
- }
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, 0);
-
- xfs_bmap_init(&free_list, &first_block);
- error = xfs_ifree(tp, ip, &free_list);
- if (error) {
- /*
- * If we fail to free the inode, shut down. The cancel
- * might do that, we need to make sure. Otherwise the
- * inode might be lost for a long time or forever.
- */
- if (!XFS_FORCED_SHUTDOWN(mp)) {
- xfs_notice(mp, "%s: xfs_ifree returned error %d",
- __func__, error);
- xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
- }
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return error;
- }
-
- /*
- * Credit the quota account(s). The inode is gone.
- */
- xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
-
- /*
- * Just ignore errors at this point. There is nothing we can
- * do except to try to keep going. Make sure it's not a silent
- * error.
- */
- error = xfs_bmap_finish(&tp, &free_list, &committed);
- if (error)
- xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
- __func__, error);
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
- if (error)
- xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
- __func__, error);
-
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return 0;
-}
-
-/*
* xfs_inactive
*
* This is called when the vnode reference count for the vnode
@@ -1789,11 +1670,16 @@ xfs_inactive_ifree(
* now be truncated. Also, we clear all of the read-ahead state
* kept for the inode here since the file is now closed.
*/
-void
+int
xfs_inactive(
xfs_inode_t *ip)
{
+ xfs_bmap_free_t free_list;
+ xfs_fsblock_t first_block;
+ int committed;
+ struct xfs_trans *tp;
struct xfs_mount *mp;
+ struct xfs_trans_res *resp;
int error;
int truncate = 0;
@@ -1801,17 +1687,19 @@ xfs_inactive(
* If the inode is already free, then there can be nothing
* to clean up here.
*/
- if (ip->i_d.di_mode == 0) {
+ if (ip->i_d.di_mode == 0 || is_bad_inode(VFS_I(ip))) {
ASSERT(ip->i_df.if_real_bytes == 0);
ASSERT(ip->i_df.if_broot_bytes == 0);
- return;
+ return VN_INACTIVE_CACHE;
}
mp = ip->i_mount;
+ error = 0;
+
/* If this is a read-only mount, don't do this (would generate I/O) */
if (mp->m_flags & XFS_MOUNT_RDONLY)
- return;
+ goto out;
if (ip->i_d.di_nlink != 0) {
/*
@@ -1819,10 +1707,12 @@ xfs_inactive(
* cache. Post-eof blocks must be freed, lest we end up with
* broken free space accounting.
*/
- if (xfs_can_free_eofblocks(ip, true))
- xfs_free_eofblocks(mp, ip, false);
-
- return;
+ if (xfs_can_free_eofblocks(ip, true)) {
+ error = xfs_free_eofblocks(mp, ip, false);
+ if (error)
+ return VN_INACTIVE_CACHE;
+ }
+ goto out;
}
if (S_ISREG(ip->i_d.di_mode) &&
@@ -1832,14 +1722,36 @@ xfs_inactive(
error = xfs_qm_dqattach(ip, 0);
if (error)
- return;
+ return VN_INACTIVE_CACHE;
- if (S_ISLNK(ip->i_d.di_mode))
- error = xfs_inactive_symlink(ip);
- else if (truncate)
- error = xfs_inactive_truncate(ip);
- if (error)
- return;
+ tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
+ resp = (truncate || S_ISLNK(ip->i_d.di_mode)) ?
+ &M_RES(mp)->tr_itruncate : &M_RES(mp)->tr_ifree;
+
+ error = xfs_trans_reserve(tp, resp, 0, 0);
+ if (error) {
+ ASSERT(XFS_FORCED_SHUTDOWN(mp));
+ xfs_trans_cancel(tp, 0);
+ return VN_INACTIVE_CACHE;
+ }
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, 0);
+
+ if (S_ISLNK(ip->i_d.di_mode)) {
+ error = xfs_inactive_symlink(ip, &tp);
+ if (error)
+ goto out_cancel;
+ } else if (truncate) {
+ ip->i_d.di_size = 0;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
+ if (error)
+ goto out_cancel;
+
+ ASSERT(ip->i_d.di_nextents == 0);
+ }
/*
* If there are attributes associated with the file then blow them away
@@ -1850,9 +1762,25 @@ xfs_inactive(
if (ip->i_d.di_anextents > 0) {
ASSERT(ip->i_d.di_forkoff != 0);
+ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ if (error)
+ goto out_unlock;
+
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
error = xfs_attr_inactive(ip);
if (error)
- return;
+ goto out;
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ goto out;
+ }
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, 0);
}
if (ip->i_afp)
@@ -1863,14 +1791,52 @@ xfs_inactive(
/*
* Free the inode.
*/
- error = xfs_inactive_ifree(ip);
- if (error)
- return;
+ xfs_bmap_init(&free_list, &first_block);
+ error = xfs_ifree(tp, ip, &free_list);
+ if (error) {
+ /*
+ * If we fail to free the inode, shut down. The cancel
+ * might do that, we need to make sure. Otherwise the
+ * inode might be lost for a long time or forever.
+ */
+ if (!XFS_FORCED_SHUTDOWN(mp)) {
+ xfs_notice(mp, "%s: xfs_ifree returned error %d",
+ __func__, error);
+ xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
+ }
+ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
+ } else {
+ /*
+ * Credit the quota account(s). The inode is gone.
+ */
+ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
+
+ /*
+ * Just ignore errors at this point. There is nothing we can
+ * do except to try to keep going. Make sure it's not a silent
+ * error.
+ */
+ error = xfs_bmap_finish(&tp, &free_list, &committed);
+ if (error)
+ xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
+ __func__, error);
+ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ if (error)
+ xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
+ __func__, error);
+ }
/*
* Release the dquots held by inode, if any.
*/
xfs_qm_dqdetach(ip);
+out_unlock:
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out:
+ return VN_INACTIVE_CACHE;
+out_cancel:
+ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ goto out_unlock;
}
/*
@@ -2404,33 +2370,6 @@ xfs_iunpin_wait(
__xfs_iunpin_wait(ip);
}
-/*
- * Removing an inode from the namespace involves removing the directory entry
- * and dropping the link count on the inode. Removing the directory entry can
- * result in locking an AGF (directory blocks were freed) and removing a link
- * count can result in placing the inode on an unlinked list which results in
- * locking an AGI.
- *
- * The big problem here is that we have an ordering constraint on AGF and AGI
- * locking - inode allocation locks the AGI, then can allocate a new extent for
- * new inodes, locking the AGF after the AGI. Similarly, freeing the inode
- * removes the inode from the unlinked list, requiring that we lock the AGI
- * first, and then freeing the inode can result in an inode chunk being freed
- * and hence freeing disk space requiring that we lock an AGF.
- *
- * Hence the ordering that is imposed by other parts of the code is AGI before
- * AGF. This means we cannot remove the directory entry before we drop the inode
- * reference count and put it on the unlinked list as this results in a lock
- * order of AGF then AGI, and this can deadlock against inode allocation and
- * freeing. Therefore we must drop the link counts before we remove the
- * directory entry.
- *
- * This is still safe from a transactional point of view - it is not until we
- * get to xfs_bmap_finish() that we have the possibility of multiple
- * transactions in this operation. Hence as long as we remove the directory
- * entry and drop the link count in the first transaction of the remove
- * operation, there are no transactional constraints on the ordering here.
- */
int
xfs_remove(
xfs_inode_t *dp,
@@ -2500,7 +2439,6 @@ xfs_remove(
/*
* If we're removing a directory perform some additional validation.
*/
- cancel_flags |= XFS_TRANS_ABORT;
if (is_dir) {
ASSERT(ip->i_d.di_nlink >= 2);
if (ip->i_d.di_nlink != 2) {
@@ -2511,16 +2449,31 @@ xfs_remove(
error = XFS_ERROR(ENOTEMPTY);
goto out_trans_cancel;
}
+ }
- /* Drop the link from ip's "..". */
+ xfs_bmap_init(&free_list, &first_block);
+ error = xfs_dir_removename(tp, dp, name, ip->i_ino,
+ &first_block, &free_list, resblks);
+ if (error) {
+ ASSERT(error != ENOENT);
+ goto out_bmap_cancel;
+ }
+ xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+
+ if (is_dir) {
+ /*
+ * Drop the link from ip's "..".
+ */
error = xfs_droplink(tp, dp);
if (error)
- goto out_trans_cancel;
+ goto out_bmap_cancel;
- /* Drop the "." link from ip to self. */
+ /*
+ * Drop the "." link from ip to self.
+ */
error = xfs_droplink(tp, ip);
if (error)
- goto out_trans_cancel;
+ goto out_bmap_cancel;
} else {
/*
* When removing a non-directory we need to log the parent
@@ -2529,24 +2482,20 @@ xfs_remove(
*/
xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
}
- xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- /* Drop the link from dp to ip. */
+ /*
+ * Drop the link from dp to ip.
+ */
error = xfs_droplink(tp, ip);
if (error)
- goto out_trans_cancel;
+ goto out_bmap_cancel;
- /* Determine if this is the last link while the inode is locked */
+ /*
+ * Determine if this is the last link while
+ * we are in the transaction.
+ */
link_zero = (ip->i_d.di_nlink == 0);
- xfs_bmap_init(&free_list, &first_block);
- error = xfs_dir_removename(tp, dp, name, ip->i_ino,
- &first_block, &free_list, resblks);
- if (error) {
- ASSERT(error != ENOENT);
- goto out_bmap_cancel;
- }
-
/*
* If this is a synchronous mount, make sure that the
* remove transaction goes to disk before returning to
@@ -2576,6 +2525,7 @@ xfs_remove(
out_bmap_cancel:
xfs_bmap_cancel(&free_list);
+ cancel_flags |= XFS_TRANS_ABORT;
out_trans_cancel:
xfs_trans_cancel(tp, cancel_flags);
std_return:
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 9e6efccb..4a91358 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -24,6 +24,7 @@
/*
* Kernel only inode definitions
*/
+
struct xfs_dinode;
struct xfs_inode;
struct xfs_buf;
@@ -49,9 +50,6 @@ typedef struct xfs_inode {
xfs_ifork_t *i_afp; /* attribute fork pointer */
xfs_ifork_t i_df; /* data fork */
- /* operations vectors */
- const struct xfs_dir_ops *d_ops; /* directory ops vector */
-
/* Transaction and locking information. */
struct xfs_inode_log_item *i_itemp; /* logging information */
mrlock_t i_lock; /* inode lock */
@@ -318,7 +316,7 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
int xfs_release(struct xfs_inode *ip);
-void xfs_inactive(struct xfs_inode *ip);
+int xfs_inactive(struct xfs_inode *ip);
int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
struct xfs_inode **ipp, struct xfs_name *ci_name);
int xfs_create(struct xfs_inode *dp, struct xfs_name *name,
diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c
index 4fc9f39..63382d3 100644
--- a/fs/xfs/xfs_inode_buf.c
+++ b/fs/xfs/xfs_inode_buf.c
@@ -17,20 +17,20 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_error.h"
#include "xfs_cksum.h"
#include "xfs_icache.h"
-#include "xfs_trans.h"
#include "xfs_ialloc.h"
-#include "xfs_dinode.h"
/*
* Check that none of the inode's in the buffer have a next
diff --git a/fs/xfs/xfs_inode_buf.h b/fs/xfs/xfs_inode_buf.h
index 9308c47..abba0ae 100644
--- a/fs/xfs/xfs_inode_buf.h
+++ b/fs/xfs/xfs_inode_buf.h
@@ -47,4 +47,7 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
#define xfs_inobp_check(mp, bp)
#endif /* DEBUG */
+extern const struct xfs_buf_ops xfs_inode_buf_ops;
+extern const struct xfs_buf_ops xfs_inode_buf_ra_ops;
+
#endif /* __XFS_INODE_BUF_H__ */
diff --git a/fs/xfs/xfs_inode_fork.c b/fs/xfs/xfs_inode_fork.c
index cfee14a..02f1083 100644
--- a/fs/xfs/xfs_inode_fork.c
+++ b/fs/xfs/xfs_inode_fork.c
@@ -20,21 +20,31 @@
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_log.h"
#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_trans.h"
+#include "xfs_buf_item.h"
#include "xfs_inode_item.h"
-#include "xfs_bmap_btree.h"
+#include "xfs_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
#include "xfs_bmap.h"
#include "xfs_error.h"
+#include "xfs_quota.h"
+#include "xfs_filestream.h"
+#include "xfs_cksum.h"
#include "xfs_trace.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
+#include "xfs_icache.h"
kmem_zone_t *xfs_ifork_zone;
@@ -1021,14 +1031,15 @@ xfs_iext_add(
* the next index needed in the indirection array.
*/
else {
- uint count = ext_diff;
+ int count = ext_diff;
while (count) {
erp = xfs_iext_irec_new(ifp, erp_idx);
- erp->er_extcount = min(count, XFS_LINEAR_EXTS);
- count -= erp->er_extcount;
- if (count)
+ erp->er_extcount = count;
+ count -= MIN(count, (int)XFS_LINEAR_EXTS);
+ if (count) {
erp_idx++;
+ }
}
}
}
@@ -1348,7 +1359,7 @@ xfs_iext_remove_indirect(
void
xfs_iext_realloc_direct(
xfs_ifork_t *ifp, /* inode fork pointer */
- int new_size) /* new size of extents after adding */
+ int new_size) /* new size of extents */
{
int rnew_size; /* real new size of extents */
@@ -1386,8 +1397,13 @@ xfs_iext_realloc_direct(
rnew_size - ifp->if_real_bytes);
}
}
- /* Switch from the inline extent buffer to a direct extent list */
+ /*
+ * Switch from the inline extent buffer to a direct
+ * extent list. Be sure to include the inline extent
+ * bytes in new_size.
+ */
else {
+ new_size += ifp->if_bytes;
if (!is_power_of_2(new_size)) {
rnew_size = roundup_pow_of_two(new_size);
}
diff --git a/fs/xfs/xfs_inode_fork.h b/fs/xfs/xfs_inode_fork.h
index eb329a1..28661a0 100644
--- a/fs/xfs/xfs_inode_fork.h
+++ b/fs/xfs/xfs_inode_fork.h
@@ -19,7 +19,6 @@
#define __XFS_INODE_FORK_H__
struct xfs_inode_log_item;
-struct xfs_dinode;
/*
* The following xfs_ext_irec_t struct introduces a second (top) level
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 7c0d391f..3780811 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -17,19 +17,19 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
+#include "xfs_trans_priv.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_trans_priv.h"
-#include "xfs_dinode.h"
kmem_zone_t *xfs_ili_zone; /* inode log item zone */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 4d61340..668e8f4 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -17,31 +17,32 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
+#include "xfs_alloc.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_ioctl.h"
-#include "xfs_alloc.h"
#include "xfs_rtalloc.h"
#include "xfs_itable.h"
#include "xfs_error.h"
#include "xfs_attr.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
+#include "xfs_buf_item.h"
#include "xfs_fsops.h"
#include "xfs_discard.h"
#include "xfs_quota.h"
+#include "xfs_inode_item.h"
#include "xfs_export.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_symlink.h"
-#include "xfs_dinode.h"
-#include "xfs_trans.h"
#include <linux/capability.h>
#include <linux/dcache.h>
@@ -640,11 +641,7 @@ xfs_ioc_space(
unsigned int cmd,
xfs_flock64_t *bf)
{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_trans *tp;
- struct iattr iattr;
- bool setprealloc = false;
- bool clrprealloc = false;
+ int attr_flags = 0;
int error;
/*
@@ -664,128 +661,19 @@ xfs_ioc_space(
if (!S_ISREG(inode->i_mode))
return -XFS_ERROR(EINVAL);
- error = mnt_want_write_file(filp);
- if (error)
- return error;
-
- xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
- switch (bf->l_whence) {
- case 0: /*SEEK_SET*/
- break;
- case 1: /*SEEK_CUR*/
- bf->l_start += filp->f_pos;
- break;
- case 2: /*SEEK_END*/
- bf->l_start += XFS_ISIZE(ip);
- break;
- default:
- error = XFS_ERROR(EINVAL);
- goto out_unlock;
- }
-
- /*
- * length of <= 0 for resv/unresv/zero is invalid. length for
- * alloc/free is ignored completely and we have no idea what userspace
- * might have set it to, so set it to zero to allow range
- * checks to pass.
- */
- switch (cmd) {
- case XFS_IOC_ZERO_RANGE:
- case XFS_IOC_RESVSP:
- case XFS_IOC_RESVSP64:
- case XFS_IOC_UNRESVSP:
- case XFS_IOC_UNRESVSP64:
- if (bf->l_len <= 0) {
- error = XFS_ERROR(EINVAL);
- goto out_unlock;
- }
- break;
- default:
- bf->l_len = 0;
- break;
- }
-
- if (bf->l_start < 0 ||
- bf->l_start > mp->m_super->s_maxbytes ||
- bf->l_start + bf->l_len < 0 ||
- bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) {
- error = XFS_ERROR(EINVAL);
- goto out_unlock;
- }
-
- switch (cmd) {
- case XFS_IOC_ZERO_RANGE:
- error = xfs_zero_file_space(ip, bf->l_start, bf->l_len);
- if (!error)
- setprealloc = true;
- break;
- case XFS_IOC_RESVSP:
- case XFS_IOC_RESVSP64:
- error = xfs_alloc_file_space(ip, bf->l_start, bf->l_len,
- XFS_BMAPI_PREALLOC);
- if (!error)
- setprealloc = true;
- break;
- case XFS_IOC_UNRESVSP:
- case XFS_IOC_UNRESVSP64:
- error = xfs_free_file_space(ip, bf->l_start, bf->l_len);
- break;
- case XFS_IOC_ALLOCSP:
- case XFS_IOC_ALLOCSP64:
- case XFS_IOC_FREESP:
- case XFS_IOC_FREESP64:
- if (bf->l_start > XFS_ISIZE(ip)) {
- error = xfs_alloc_file_space(ip, XFS_ISIZE(ip),
- bf->l_start - XFS_ISIZE(ip), 0);
- if (error)
- goto out_unlock;
- }
+ if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+ attr_flags |= XFS_ATTR_NONBLOCK;
- iattr.ia_valid = ATTR_SIZE;
- iattr.ia_size = bf->l_start;
+ if (filp->f_flags & O_DSYNC)
+ attr_flags |= XFS_ATTR_SYNC;
- error = xfs_setattr_size(ip, &iattr);
- if (!error)
- clrprealloc = true;
- break;
- default:
- ASSERT(0);
- error = XFS_ERROR(EINVAL);
- }
+ if (ioflags & IO_INVIS)
+ attr_flags |= XFS_ATTR_DMI;
+ error = mnt_want_write_file(filp);
if (error)
- goto out_unlock;
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID);
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_writeid, 0, 0);
- if (error) {
- xfs_trans_cancel(tp, 0);
- goto out_unlock;
- }
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-
- if (!(ioflags & IO_INVIS)) {
- ip->i_d.di_mode &= ~S_ISUID;
- if (ip->i_d.di_mode & S_IXGRP)
- ip->i_d.di_mode &= ~S_ISGID;
- xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- }
-
- if (setprealloc)
- ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
- else if (clrprealloc)
- ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
-
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- if (filp->f_flags & O_DSYNC)
- xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0);
-
-out_unlock:
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ return error;
+ error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags);
mnt_drop_write_file(filp);
return -error;
}
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index e8fb123..f671f7e 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -22,13 +22,14 @@
#include <asm/uaccess.h>
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
#include "xfs_vnode.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_itable.h"
#include "xfs_error.h"
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 22d1cbe..8d4d49b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -17,28 +17,34 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
+#include "xfs_inode_item.h"
#include "xfs_btree.h"
-#include "xfs_bmap_btree.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
+#include "xfs_rtalloc.h"
#include "xfs_error.h"
-#include "xfs_trans.h"
+#include "xfs_itable.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
#include "xfs_trans_space.h"
#include "xfs_iomap.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
-#include "xfs_quota.h"
#include "xfs_dquot_item.h"
#include "xfs_dquot.h"
-#include "xfs_dinode.h"
#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \
@@ -104,7 +110,7 @@ xfs_alert_fsblock_zero(
xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
"Access to block zero in inode %llu "
"start_block: %llx start_off: %llx "
- "blkcnt: %llx extent-state: %x",
+ "blkcnt: %llx extent-state: %x\n",
(unsigned long long)ip->i_ino,
(unsigned long long)imap->br_startblock,
(unsigned long long)imap->br_startoff,
@@ -649,6 +655,7 @@ int
xfs_iomap_write_allocate(
xfs_inode_t *ip,
xfs_off_t offset,
+ size_t count,
xfs_bmbt_irec_t *imap)
{
xfs_mount_t *mp = ip->i_mount;
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 411fbb8..8061576 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -21,12 +21,12 @@
struct xfs_inode;
struct xfs_bmbt_irec;
-int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
+extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
struct xfs_bmbt_irec *, int);
-int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t,
+extern int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t,
struct xfs_bmbt_irec *);
-int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t,
+extern int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t,
struct xfs_bmbt_irec *);
-int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t);
+extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t);
#endif /* __XFS_IOMAP_H__*/
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 27e0e54..2b8952d 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -17,28 +17,32 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_acl.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
-#include "xfs_acl.h"
-#include "xfs_quota.h"
+#include "xfs_rtalloc.h"
#include "xfs_error.h"
+#include "xfs_itable.h"
#include "xfs_attr.h"
-#include "xfs_trans.h"
+#include "xfs_buf_item.h"
+#include "xfs_inode_item.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_symlink.h"
#include "xfs_da_btree.h"
+#include "xfs_dir2_format.h"
#include "xfs_dir2_priv.h"
-#include "xfs_dinode.h"
#include <linux/capability.h>
#include <linux/xattr.h>
@@ -705,7 +709,8 @@ out_dqrele:
int
xfs_setattr_size(
struct xfs_inode *ip,
- struct iattr *iattr)
+ struct iattr *iattr,
+ int flags)
{
struct xfs_mount *mp = ip->i_mount;
struct inode *inode = VFS_I(ip);
@@ -728,11 +733,15 @@ xfs_setattr_size(
if (error)
return XFS_ERROR(error);
- ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
ASSERT(S_ISREG(ip->i_d.di_mode));
ASSERT((mask & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
+ if (!(flags & XFS_ATTR_NOLOCK)) {
+ lock_flags |= XFS_IOLOCK_EXCL;
+ xfs_ilock(ip, lock_flags);
+ }
+
oldsize = inode->i_size;
newsize = iattr->ia_size;
@@ -741,11 +750,12 @@ xfs_setattr_size(
*/
if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) {
if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
- return 0;
+ goto out_unlock;
/*
* Use the regular setattr path to update the timestamps.
*/
+ xfs_iunlock(ip, lock_flags);
iattr->ia_valid &= ~ATTR_SIZE;
return xfs_setattr_nonsize(ip, iattr, 0);
}
@@ -755,7 +765,7 @@ xfs_setattr_size(
*/
error = xfs_qm_dqattach(ip, 0);
if (error)
- return error;
+ goto out_unlock;
/*
* Now we can make the changes. Before we join the inode to the
@@ -773,7 +783,7 @@ xfs_setattr_size(
*/
error = xfs_zero_eof(ip, newsize, oldsize);
if (error)
- return error;
+ goto out_unlock;
}
/*
@@ -792,7 +802,7 @@ xfs_setattr_size(
error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
ip->i_d.di_size, newsize);
if (error)
- return error;
+ goto out_unlock;
}
/*
@@ -802,7 +812,7 @@ xfs_setattr_size(
error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
if (error)
- return error;
+ goto out_unlock;
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
@@ -906,21 +916,12 @@ out_trans_cancel:
STATIC int
xfs_vn_setattr(
- struct dentry *dentry,
- struct iattr *iattr)
+ struct dentry *dentry,
+ struct iattr *iattr)
{
- struct xfs_inode *ip = XFS_I(dentry->d_inode);
- int error;
-
- if (iattr->ia_valid & ATTR_SIZE) {
- xfs_ilock(ip, XFS_IOLOCK_EXCL);
- error = xfs_setattr_size(ip, iattr);
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- } else {
- error = xfs_setattr_nonsize(ip, iattr, 0);
- }
-
- return -error;
+ if (iattr->ia_valid & ATTR_SIZE)
+ return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0);
+ return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0);
}
STATIC int
@@ -1168,7 +1169,6 @@ xfs_setup_inode(
struct xfs_inode *ip)
{
struct inode *inode = &ip->i_vnode;
- gfp_t gfp_mask;
inode->i_ino = ip->i_ino;
inode->i_state = I_NEW;
@@ -1204,7 +1204,6 @@ xfs_setup_inode(
inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
xfs_diflags_to_iflags(inode, ip);
- ip->d_ops = ip->i_mount->m_nondir_inode_ops;
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
inode->i_op = &xfs_inode_operations;
@@ -1217,7 +1216,6 @@ xfs_setup_inode(
else
inode->i_op = &xfs_dir_inode_operations;
inode->i_fop = &xfs_dir_file_operations;
- ip->d_ops = ip->i_mount->m_dir_inode_ops;
break;
case S_IFLNK:
inode->i_op = &xfs_symlink_inode_operations;
@@ -1231,14 +1229,6 @@ xfs_setup_inode(
}
/*
- * Ensure all page cache allocations are done from GFP_NOFS context to
- * prevent direct reclaim recursion back into the filesystem and blowing
- * stacks or deadlocking.
- */
- gfp_mask = mapping_gfp_mask(inode->i_mapping);
- mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS)));
-
- /*
* If there is no attribute fork no ACL can exist on this inode,
* and it can't have any file capabilities attached to it either.
*/
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
index d2c5057..d81fb41 100644
--- a/fs/xfs/xfs_iops.h
+++ b/fs/xfs/xfs_iops.h
@@ -30,10 +30,14 @@ extern void xfs_setup_inode(struct xfs_inode *);
/*
* Internal setattr interfaces.
*/
-#define XFS_ATTR_NOACL 0x01 /* Don't call xfs_acl_chmod */
+#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */
+#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if op would block */
+#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */
+#define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */
+#define XFS_ATTR_SYNC 0x10 /* synchronous operation required */
extern int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap,
int flags);
-extern int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap);
+extern int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap, int flags);
#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index c237ad1..084b3e1 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -17,23 +17,24 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
#include "xfs_inum.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_btree.h"
#include "xfs_ialloc.h"
-#include "xfs_ialloc_btree.h"
#include "xfs_itable.h"
#include "xfs_error.h"
+#include "xfs_btree.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
-#include "xfs_dinode.h"
STATIC int
xfs_internal_inum(
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 8497a00..a2dea108 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -17,19 +17,21 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_error.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_log.h"
#include "xfs_log_priv.h"
+#include "xfs_buf_item.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
#include "xfs_log_recover.h"
+#include "xfs_trans_priv.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_trace.h"
#include "xfs_fsops.h"
@@ -998,34 +1000,27 @@ xfs_log_space_wake(
}
/*
- * Determine if we have a transaction that has gone to disk that needs to be
- * covered. To begin the transition to the idle state firstly the log needs to
- * be idle. That means the CIL, the AIL and the iclogs needs to be empty before
- * we start attempting to cover the log.
+ * Determine if we have a transaction that has gone to disk
+ * that needs to be covered. To begin the transition to the idle state
+ * firstly the log needs to be idle (no AIL and nothing in the iclogs).
+ * If we are then in a state where covering is needed, the caller is informed
+ * that dummy transactions are required to move the log into the idle state.
*
- * Only if we are then in a state where covering is needed, the caller is
- * informed that dummy transactions are required to move the log into the idle
- * state.
- *
- * If there are any items in the AIl or CIL, then we do not want to attempt to
- * cover the log as we may be in a situation where there isn't log space
- * available to run a dummy transaction and this can lead to deadlocks when the
- * tail of the log is pinned by an item that is modified in the CIL. Hence
- * there's no point in running a dummy transaction at this point because we
- * can't start trying to idle the log until both the CIL and AIL are empty.
+ * Because this is called as part of the sync process, we should also indicate
+ * that dummy transactions should be issued in anything but the covered or
+ * idle states. This ensures that the log tail is accurately reflected in
+ * the log at the end of the sync, hence if a crash occurrs avoids replay
+ * of transactions where the metadata is already on disk.
*/
int
xfs_log_need_covered(xfs_mount_t *mp)
{
- struct xlog *log = mp->m_log;
int needed = 0;
+ struct xlog *log = mp->m_log;
if (!xfs_fs_writable(mp))
return 0;
- if (!xlog_cil_empty(log))
- return 0;
-
spin_lock(&log->l_icloglock);
switch (log->l_covered_state) {
case XLOG_STATE_COVER_DONE:
@@ -1034,17 +1029,14 @@ xfs_log_need_covered(xfs_mount_t *mp)
break;
case XLOG_STATE_COVER_NEED:
case XLOG_STATE_COVER_NEED2:
- if (xfs_ail_min_lsn(log->l_ailp))
- break;
- if (!xlog_iclogs_empty(log))
- break;
-
- needed = 1;
- if (log->l_covered_state == XLOG_STATE_COVER_NEED)
- log->l_covered_state = XLOG_STATE_COVER_DONE;
- else
- log->l_covered_state = XLOG_STATE_COVER_DONE2;
- break;
+ if (!xfs_ail_min_lsn(log->l_ailp) &&
+ xlog_iclogs_empty(log)) {
+ if (log->l_covered_state == XLOG_STATE_COVER_NEED)
+ log->l_covered_state = XLOG_STATE_COVER_DONE;
+ else
+ log->l_covered_state = XLOG_STATE_COVER_DONE2;
+ }
+ /* FALLTHRU */
default:
needed = 1;
break;
@@ -1076,7 +1068,6 @@ xlog_assign_tail_lsn_locked(
tail_lsn = lip->li_lsn;
else
tail_lsn = atomic64_read(&log->l_last_sync_lsn);
- trace_xfs_log_assign_tail_lsn(log, tail_lsn);
atomic64_set(&log->l_tail_lsn, tail_lsn);
return tail_lsn;
}
@@ -1988,7 +1979,7 @@ xlog_print_tic_res(
for (i = 0; i < ticket->t_res_num; i++) {
uint r_type = ticket->t_res_arr[i].r_type;
- xfs_warn(mp, "region[%u]: %s - %u bytes", i,
+ xfs_warn(mp, "region[%u]: %s - %u bytes\n", i,
((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ?
"bad-rtype" : res_type_str[r_type-1]),
ticket->t_res_arr[i].r_len);
@@ -3711,9 +3702,11 @@ xlog_verify_iclog(
/* check validity of iclog pointers */
spin_lock(&log->l_icloglock);
icptr = log->l_iclog;
- for (i = 0; i < log->l_iclog_bufs; i++, icptr = icptr->ic_next)
- ASSERT(icptr);
-
+ for (i=0; i < log->l_iclog_bufs; i++) {
+ if (icptr == NULL)
+ xfs_emerg(log->l_mp, "%s: invalid ptr", __func__);
+ icptr = icptr->ic_next;
+ }
if (icptr != log->l_iclog)
xfs_emerg(log->l_mp, "%s: corrupt iclog ring", __func__);
spin_unlock(&log->l_icloglock);
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index e148719..1c45848 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -18,6 +18,8 @@
#ifndef __XFS_LOG_H__
#define __XFS_LOG_H__
+#include "xfs_log_format.h"
+
struct xfs_log_vec {
struct xfs_log_vec *lv_next; /* next lv in build list */
int lv_niovecs; /* number of iovecs in lv */
@@ -80,7 +82,11 @@ struct xlog_ticket;
struct xfs_log_item;
struct xfs_item_ops;
struct xfs_trans;
-struct xfs_log_callback;
+
+void xfs_log_item_init(struct xfs_mount *mp,
+ struct xfs_log_item *item,
+ int type,
+ const struct xfs_item_ops *ops);
xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
struct xlog_ticket *ticket,
@@ -108,7 +114,7 @@ xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp);
void xfs_log_space_wake(struct xfs_mount *mp);
int xfs_log_notify(struct xfs_mount *mp,
struct xlog_in_core *iclog,
- struct xfs_log_callback *callback_entry);
+ xfs_log_callback_t *callback_entry);
int xfs_log_release_iclog(struct xfs_mount *mp,
struct xlog_in_core *iclog);
int xfs_log_reserve(struct xfs_mount *mp,
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 5eb51fc..cfe9797 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -17,9 +17,11 @@
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_log_format.h"
-#include "xfs_shared.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
+#include "xfs_log_priv.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
@@ -27,10 +29,6 @@
#include "xfs_alloc.h"
#include "xfs_extent_busy.h"
#include "xfs_discard.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_log.h"
-#include "xfs_log_priv.h"
/*
* Allocate a new ticket. Failing to get a new ticket makes it really hard to
@@ -713,20 +711,6 @@ xlog_cil_push_foreground(
xlog_cil_push(log);
}
-bool
-xlog_cil_empty(
- struct xlog *log)
-{
- struct xfs_cil *cil = log->l_cilp;
- bool empty = false;
-
- spin_lock(&cil->xc_push_lock);
- if (list_empty(&cil->xc_cil))
- empty = true;
- spin_unlock(&cil->xc_push_lock);
- return empty;
-}
-
/*
* Commit a transaction with the given vector to the Committed Item List.
*
diff --git a/fs/xfs/xfs_log_format.h b/fs/xfs/xfs_log_format.h
index f0969c7..ca7e28a 100644
--- a/fs/xfs/xfs_log_format.h
+++ b/fs/xfs/xfs_log_format.h
@@ -234,6 +234,178 @@ typedef struct xfs_trans_header {
{ XFS_LI_ICREATE, "XFS_LI_ICREATE" }
/*
+ * Transaction types. Used to distinguish types of buffers.
+ */
+#define XFS_TRANS_SETATTR_NOT_SIZE 1
+#define XFS_TRANS_SETATTR_SIZE 2
+#define XFS_TRANS_INACTIVE 3
+#define XFS_TRANS_CREATE 4
+#define XFS_TRANS_CREATE_TRUNC 5
+#define XFS_TRANS_TRUNCATE_FILE 6
+#define XFS_TRANS_REMOVE 7
+#define XFS_TRANS_LINK 8
+#define XFS_TRANS_RENAME 9
+#define XFS_TRANS_MKDIR 10
+#define XFS_TRANS_RMDIR 11
+#define XFS_TRANS_SYMLINK 12
+#define XFS_TRANS_SET_DMATTRS 13
+#define XFS_TRANS_GROWFS 14
+#define XFS_TRANS_STRAT_WRITE 15
+#define XFS_TRANS_DIOSTRAT 16
+/* 17 was XFS_TRANS_WRITE_SYNC */
+#define XFS_TRANS_WRITEID 18
+#define XFS_TRANS_ADDAFORK 19
+#define XFS_TRANS_ATTRINVAL 20
+#define XFS_TRANS_ATRUNCATE 21
+#define XFS_TRANS_ATTR_SET 22
+#define XFS_TRANS_ATTR_RM 23
+#define XFS_TRANS_ATTR_FLAG 24
+#define XFS_TRANS_CLEAR_AGI_BUCKET 25
+#define XFS_TRANS_QM_SBCHANGE 26
+/*
+ * Dummy entries since we use the transaction type to index into the
+ * trans_type[] in xlog_recover_print_trans_head()
+ */
+#define XFS_TRANS_DUMMY1 27
+#define XFS_TRANS_DUMMY2 28
+#define XFS_TRANS_QM_QUOTAOFF 29
+#define XFS_TRANS_QM_DQALLOC 30
+#define XFS_TRANS_QM_SETQLIM 31
+#define XFS_TRANS_QM_DQCLUSTER 32
+#define XFS_TRANS_QM_QINOCREATE 33
+#define XFS_TRANS_QM_QUOTAOFF_END 34
+#define XFS_TRANS_SB_UNIT 35
+#define XFS_TRANS_FSYNC_TS 36
+#define XFS_TRANS_GROWFSRT_ALLOC 37
+#define XFS_TRANS_GROWFSRT_ZERO 38
+#define XFS_TRANS_GROWFSRT_FREE 39
+#define XFS_TRANS_SWAPEXT 40
+#define XFS_TRANS_SB_COUNT 41
+#define XFS_TRANS_CHECKPOINT 42
+#define XFS_TRANS_ICREATE 43
+#define XFS_TRANS_TYPE_MAX 43
+/* new transaction types need to be reflected in xfs_logprint(8) */
+
+#define XFS_TRANS_TYPES \
+ { XFS_TRANS_SETATTR_NOT_SIZE, "SETATTR_NOT_SIZE" }, \
+ { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \
+ { XFS_TRANS_INACTIVE, "INACTIVE" }, \
+ { XFS_TRANS_CREATE, "CREATE" }, \
+ { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \
+ { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \
+ { XFS_TRANS_REMOVE, "REMOVE" }, \
+ { XFS_TRANS_LINK, "LINK" }, \
+ { XFS_TRANS_RENAME, "RENAME" }, \
+ { XFS_TRANS_MKDIR, "MKDIR" }, \
+ { XFS_TRANS_RMDIR, "RMDIR" }, \
+ { XFS_TRANS_SYMLINK, "SYMLINK" }, \
+ { XFS_TRANS_SET_DMATTRS, "SET_DMATTRS" }, \
+ { XFS_TRANS_GROWFS, "GROWFS" }, \
+ { XFS_TRANS_STRAT_WRITE, "STRAT_WRITE" }, \
+ { XFS_TRANS_DIOSTRAT, "DIOSTRAT" }, \
+ { XFS_TRANS_WRITEID, "WRITEID" }, \
+ { XFS_TRANS_ADDAFORK, "ADDAFORK" }, \
+ { XFS_TRANS_ATTRINVAL, "ATTRINVAL" }, \
+ { XFS_TRANS_ATRUNCATE, "ATRUNCATE" }, \
+ { XFS_TRANS_ATTR_SET, "ATTR_SET" }, \
+ { XFS_TRANS_ATTR_RM, "ATTR_RM" }, \
+ { XFS_TRANS_ATTR_FLAG, "ATTR_FLAG" }, \
+ { XFS_TRANS_CLEAR_AGI_BUCKET, "CLEAR_AGI_BUCKET" }, \
+ { XFS_TRANS_QM_SBCHANGE, "QM_SBCHANGE" }, \
+ { XFS_TRANS_QM_QUOTAOFF, "QM_QUOTAOFF" }, \
+ { XFS_TRANS_QM_DQALLOC, "QM_DQALLOC" }, \
+ { XFS_TRANS_QM_SETQLIM, "QM_SETQLIM" }, \
+ { XFS_TRANS_QM_DQCLUSTER, "QM_DQCLUSTER" }, \
+ { XFS_TRANS_QM_QINOCREATE, "QM_QINOCREATE" }, \
+ { XFS_TRANS_QM_QUOTAOFF_END, "QM_QOFF_END" }, \
+ { XFS_TRANS_SB_UNIT, "SB_UNIT" }, \
+ { XFS_TRANS_FSYNC_TS, "FSYNC_TS" }, \
+ { XFS_TRANS_GROWFSRT_ALLOC, "GROWFSRT_ALLOC" }, \
+ { XFS_TRANS_GROWFSRT_ZERO, "GROWFSRT_ZERO" }, \
+ { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \
+ { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \
+ { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \
+ { XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \
+ { XFS_TRANS_DUMMY1, "DUMMY1" }, \
+ { XFS_TRANS_DUMMY2, "DUMMY2" }, \
+ { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" }
+
+/*
+ * This structure is used to track log items associated with
+ * a transaction. It points to the log item and keeps some
+ * flags to track the state of the log item. It also tracks
+ * the amount of space needed to log the item it describes
+ * once we get to commit processing (see xfs_trans_commit()).
+ */
+struct xfs_log_item_desc {
+ struct xfs_log_item *lid_item;
+ struct list_head lid_trans;
+ unsigned char lid_flags;
+};
+
+#define XFS_LID_DIRTY 0x1
+
+/*
+ * Values for t_flags.
+ */
+#define XFS_TRANS_DIRTY 0x01 /* something needs to be logged */
+#define XFS_TRANS_SB_DIRTY 0x02 /* superblock is modified */
+#define XFS_TRANS_PERM_LOG_RES 0x04 /* xact took a permanent log res */
+#define XFS_TRANS_SYNC 0x08 /* make commit synchronous */
+#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */
+#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */
+#define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer
+ count in superblock */
+
+/*
+ * Values for call flags parameter.
+ */
+#define XFS_TRANS_RELEASE_LOG_RES 0x4
+#define XFS_TRANS_ABORT 0x8
+
+/*
+ * Field values for xfs_trans_mod_sb.
+ */
+#define XFS_TRANS_SB_ICOUNT 0x00000001
+#define XFS_TRANS_SB_IFREE 0x00000002
+#define XFS_TRANS_SB_FDBLOCKS 0x00000004
+#define XFS_TRANS_SB_RES_FDBLOCKS 0x00000008
+#define XFS_TRANS_SB_FREXTENTS 0x00000010
+#define XFS_TRANS_SB_RES_FREXTENTS 0x00000020
+#define XFS_TRANS_SB_DBLOCKS 0x00000040
+#define XFS_TRANS_SB_AGCOUNT 0x00000080
+#define XFS_TRANS_SB_IMAXPCT 0x00000100
+#define XFS_TRANS_SB_REXTSIZE 0x00000200
+#define XFS_TRANS_SB_RBMBLOCKS 0x00000400
+#define XFS_TRANS_SB_RBLOCKS 0x00000800
+#define XFS_TRANS_SB_REXTENTS 0x00001000
+#define XFS_TRANS_SB_REXTSLOG 0x00002000
+
+/*
+ * Here we centralize the specification of XFS meta-data buffer
+ * reference count values. This determine how hard the buffer
+ * cache tries to hold onto the buffer.
+ */
+#define XFS_AGF_REF 4
+#define XFS_AGI_REF 4
+#define XFS_AGFL_REF 3
+#define XFS_INO_BTREE_REF 3
+#define XFS_ALLOC_BTREE_REF 2
+#define XFS_BMAP_BTREE_REF 2
+#define XFS_DIR_BTREE_REF 2
+#define XFS_INO_REF 2
+#define XFS_ATTR_BTREE_REF 1
+#define XFS_DQUOT_REF 1
+
+/*
+ * Flags for xfs_trans_ichgtime().
+ */
+#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
+#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
+#define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */
+
+
+/*
* Inode Log Item Format definitions.
*
* This is the structure used to lay out an inode log item in the
@@ -625,6 +797,7 @@ typedef struct xfs_qoff_logformat {
char qf_pad[12]; /* padding for future */
} xfs_qoff_logformat_t;
+
/*
* Disk quotas status in m_qflags, and also sb_qflags. 16 bits.
*/
@@ -676,4 +849,8 @@ struct xfs_icreate_log {
__be32 icl_gen; /* inode generation number to use */
};
+int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes);
+int xfs_log_calc_minimum_size(struct xfs_mount *);
+
+
#endif /* __XFS_LOG_FORMAT_H__ */
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 9bc403a..136654b 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -22,7 +22,6 @@ struct xfs_buf;
struct xlog;
struct xlog_ticket;
struct xfs_mount;
-struct xfs_log_callback;
/*
* Flags for log structure
@@ -228,8 +227,8 @@ typedef struct xlog_in_core {
/* Callback structures need their own cacheline */
spinlock_t ic_callback_lock ____cacheline_aligned_in_smp;
- struct xfs_log_callback *ic_callback;
- struct xfs_log_callback **ic_callback_tail;
+ xfs_log_callback_t *ic_callback;
+ xfs_log_callback_t **ic_callback_tail;
/* reference counts need their own cacheline */
atomic_t ic_refcnt ____cacheline_aligned_in_smp;
@@ -255,7 +254,7 @@ struct xfs_cil_ctx {
int space_used; /* aggregate size of regions */
struct list_head busy_extents; /* busy extents in chkpt */
struct xfs_log_vec *lv_chain; /* logvecs being pushed */
- struct xfs_log_callback log_cb; /* completion callback hook. */
+ xfs_log_callback_t log_cb; /* completion callback hook. */
struct list_head committing; /* ctx committing list */
};
@@ -515,10 +514,12 @@ xlog_assign_grant_head(atomic64_t *head, int cycle, int space)
/*
* Committed Item List interfaces
*/
-int xlog_cil_init(struct xlog *log);
-void xlog_cil_init_post_recovery(struct xlog *log);
-void xlog_cil_destroy(struct xlog *log);
-bool xlog_cil_empty(struct xlog *log);
+int
+xlog_cil_init(struct xlog *log);
+void
+xlog_cil_init_post_recovery(struct xlog *log);
+void
+xlog_cil_destroy(struct xlog *log);
/*
* CIL force routines
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index b6b669d..3979749 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -17,34 +17,42 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
#include "xfs_inum.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
+#include "xfs_error.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_trans.h"
-#include "xfs_log.h"
+#include "xfs_inode_item.h"
+#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
#include "xfs_log_priv.h"
+#include "xfs_buf_item.h"
#include "xfs_log_recover.h"
-#include "xfs_inode_item.h"
#include "xfs_extfree_item.h"
#include "xfs_trans_priv.h"
-#include "xfs_alloc.h"
-#include "xfs_ialloc.h"
#include "xfs_quota.h"
#include "xfs_cksum.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_error.h"
+#include "xfs_icreate_item.h"
+
+/* Need all the magic numbers and buffer ops structures from these headers */
+#include "xfs_symlink.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_format.h"
#include "xfs_dir2.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_attr_remote.h"
#define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1)
@@ -297,9 +305,9 @@ xlog_header_check_dump(
xfs_mount_t *mp,
xlog_rec_header_t *head)
{
- xfs_debug(mp, "%s: SB : uuid = %pU, fmt = %d",
+ xfs_debug(mp, "%s: SB : uuid = %pU, fmt = %d\n",
__func__, &mp->m_sb.sb_uuid, XLOG_FMT);
- xfs_debug(mp, " log : uuid = %pU, fmt = %d",
+ xfs_debug(mp, " log : uuid = %pU, fmt = %d\n",
&head->h_fs_uuid, be32_to_cpu(head->h_fmt));
}
#else
@@ -2354,7 +2362,7 @@ xlog_recover_do_reg_buffer(
item->ri_buf[i].i_len, __func__);
goto next;
}
- error = xfs_dqcheck(mp, item->ri_buf[i].i_addr,
+ error = xfs_qm_dqcheck(mp, item->ri_buf[i].i_addr,
-1, 0, XFS_QMOPT_DOWARN,
"dquot_buf_recover");
if (error)
@@ -2386,6 +2394,133 @@ xlog_recover_do_reg_buffer(
}
/*
+ * Do some primitive error checking on ondisk dquot data structures.
+ */
+int
+xfs_qm_dqcheck(
+ struct xfs_mount *mp,
+ xfs_disk_dquot_t *ddq,
+ xfs_dqid_t id,
+ uint type, /* used only when IO_dorepair is true */
+ uint flags,
+ char *str)
+{
+ xfs_dqblk_t *d = (xfs_dqblk_t *)ddq;
+ int errs = 0;
+
+ /*
+ * We can encounter an uninitialized dquot buffer for 2 reasons:
+ * 1. If we crash while deleting the quotainode(s), and those blks got
+ * used for user data. This is because we take the path of regular
+ * file deletion; however, the size field of quotainodes is never
+ * updated, so all the tricks that we play in itruncate_finish
+ * don't quite matter.
+ *
+ * 2. We don't play the quota buffers when there's a quotaoff logitem.
+ * But the allocation will be replayed so we'll end up with an
+ * uninitialized quota block.
+ *
+ * This is all fine; things are still consistent, and we haven't lost
+ * any quota information. Just don't complain about bad dquot blks.
+ */
+ if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) {
+ if (flags & XFS_QMOPT_DOWARN)
+ xfs_alert(mp,
+ "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x",
+ str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC);
+ errs++;
+ }
+ if (ddq->d_version != XFS_DQUOT_VERSION) {
+ if (flags & XFS_QMOPT_DOWARN)
+ xfs_alert(mp,
+ "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x",
+ str, id, ddq->d_version, XFS_DQUOT_VERSION);
+ errs++;
+ }
+
+ if (ddq->d_flags != XFS_DQ_USER &&
+ ddq->d_flags != XFS_DQ_PROJ &&
+ ddq->d_flags != XFS_DQ_GROUP) {
+ if (flags & XFS_QMOPT_DOWARN)
+ xfs_alert(mp,
+ "%s : XFS dquot ID 0x%x, unknown flags 0x%x",
+ str, id, ddq->d_flags);
+ errs++;
+ }
+
+ if (id != -1 && id != be32_to_cpu(ddq->d_id)) {
+ if (flags & XFS_QMOPT_DOWARN)
+ xfs_alert(mp,
+ "%s : ondisk-dquot 0x%p, ID mismatch: "
+ "0x%x expected, found id 0x%x",
+ str, ddq, id, be32_to_cpu(ddq->d_id));
+ errs++;
+ }
+
+ if (!errs && ddq->d_id) {
+ if (ddq->d_blk_softlimit &&
+ be64_to_cpu(ddq->d_bcount) >
+ be64_to_cpu(ddq->d_blk_softlimit)) {
+ if (!ddq->d_btimer) {
+ if (flags & XFS_QMOPT_DOWARN)
+ xfs_alert(mp,
+ "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED",
+ str, (int)be32_to_cpu(ddq->d_id), ddq);
+ errs++;
+ }
+ }
+ if (ddq->d_ino_softlimit &&
+ be64_to_cpu(ddq->d_icount) >
+ be64_to_cpu(ddq->d_ino_softlimit)) {
+ if (!ddq->d_itimer) {
+ if (flags & XFS_QMOPT_DOWARN)
+ xfs_alert(mp,
+ "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED",
+ str, (int)be32_to_cpu(ddq->d_id), ddq);
+ errs++;
+ }
+ }
+ if (ddq->d_rtb_softlimit &&
+ be64_to_cpu(ddq->d_rtbcount) >
+ be64_to_cpu(ddq->d_rtb_softlimit)) {
+ if (!ddq->d_rtbtimer) {
+ if (flags & XFS_QMOPT_DOWARN)
+ xfs_alert(mp,
+ "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED",
+ str, (int)be32_to_cpu(ddq->d_id), ddq);
+ errs++;
+ }
+ }
+ }
+
+ if (!errs || !(flags & XFS_QMOPT_DQREPAIR))
+ return errs;
+
+ if (flags & XFS_QMOPT_DOWARN)
+ xfs_notice(mp, "Re-initializing dquot ID 0x%x", id);
+
+ /*
+ * Typically, a repair is only requested by quotacheck.
+ */
+ ASSERT(id != -1);
+ ASSERT(flags & XFS_QMOPT_DQREPAIR);
+ memset(d, 0, sizeof(xfs_dqblk_t));
+
+ d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
+ d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
+ d->dd_diskdq.d_flags = type;
+ d->dd_diskdq.d_id = cpu_to_be32(id);
+
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
+ xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
+ XFS_DQUOT_CRC_OFF);
+ }
+
+ return errs;
+}
+
+/*
* Perform a dquot buffer recovery.
* Simple algorithm: if we have found a QUOTAOFF log item of the same type
* (ie. USR or GRP), then just toss this buffer away; don't recover it.
@@ -2990,7 +3125,7 @@ xlog_recover_dquot_pass2(
*/
dq_f = item->ri_buf[0].i_addr;
ASSERT(dq_f);
- error = xfs_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
+ error = xfs_qm_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
"xlog_recover_dquot_pass2 (log copy)");
if (error)
return XFS_ERROR(EIO);
@@ -3010,7 +3145,7 @@ xlog_recover_dquot_pass2(
* was among a chunk of dquots created earlier, and we did some
* minimal initialization then.
*/
- error = xfs_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
+ error = xfs_qm_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
"xlog_recover_dquot_pass2");
if (error) {
xfs_buf_relse(bp);
@@ -3942,7 +4077,7 @@ xlog_unpack_data_crc(
if (crc != rhead->h_crc) {
if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
xfs_alert(log->l_mp,
- "log record CRC mismatch: found 0x%x, expected 0x%x.",
+ "log record CRC mismatch: found 0x%x, expected 0x%x.\n",
le32_to_cpu(rhead->h_crc),
le32_to_cpu(crc));
xfs_hex_dump(dp, 32);
diff --git a/fs/xfs/xfs_log_rlimit.c b/fs/xfs/xfs_log_rlimit.c
index 2af1a0a..bbcec0b 100644
--- a/fs/xfs/xfs_log_rlimit.c
+++ b/fs/xfs/xfs_log_rlimit.c
@@ -17,19 +17,16 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_ag.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
#include "xfs_trans_space.h"
+#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
#include "xfs_da_btree.h"
#include "xfs_attr_leaf.h"
-#include "xfs_bmap_btree.h"
/*
* Calculate the maximum length in bytes that would be required for a local
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index 63ca2f0..9163dc1 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -17,8 +17,9 @@
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 02df7b4..5dcc680 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -17,31 +17,35 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_inode.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_format.h"
#include "xfs_dir2.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
#include "xfs_ialloc.h"
#include "xfs_alloc.h"
#include "xfs_rtalloc.h"
#include "xfs_bmap.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_log.h"
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_fsops.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
-#include "xfs_dinode.h"
+#include "xfs_cksum.h"
+#include "xfs_buf_item.h"
#ifdef HAVE_PERCPU_SB
@@ -719,22 +723,8 @@ xfs_mountfs(
* Set the inode cluster size.
* This may still be overridden by the file system
* block size if it is larger than the chosen cluster size.
- *
- * For v5 filesystems, scale the cluster size with the inode size to
- * keep a constant ratio of inode per cluster buffer, but only if mkfs
- * has set the inode alignment value appropriately for larger cluster
- * sizes.
*/
mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
- if (xfs_sb_version_hascrc(&mp->m_sb)) {
- int new_size = mp->m_inode_cluster_size;
-
- new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
- if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
- mp->m_inode_cluster_size = new_size;
- xfs_info(mp, "Using inode cluster size of %d bytes",
- mp->m_inode_cluster_size);
- }
/*
* Set inode alignment fields
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a466c5e..1fa0584 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -26,7 +26,6 @@ struct xfs_mru_cache;
struct xfs_nameops;
struct xfs_ail;
struct xfs_quotainfo;
-struct xfs_dir_ops;
#ifdef HAVE_PERCPU_SB
@@ -112,7 +111,7 @@ typedef struct xfs_mount {
__uint8_t m_blkbb_log; /* blocklog - BBSHIFT */
__uint8_t m_agno_log; /* log #ag's */
__uint8_t m_agino_log; /* #bits for agino in inum */
- uint m_inode_cluster_size;/* min inode buf size */
+ __uint16_t m_inode_cluster_size;/* min inode buf size */
uint m_blockmask; /* sb_blocksize-1 */
uint m_blockwsize; /* sb_blocksize in words */
uint m_blockwmask; /* blockwsize-1 */
@@ -149,8 +148,6 @@ typedef struct xfs_mount {
int m_dir_magicpct; /* 37% of the dir blocksize */
__uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */
- const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */
- const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */
int m_dirblksize; /* directory block sz--bytes */
int m_dirblkfsbs; /* directory block sz--fsbs */
xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 14a4996..3e6c2e6 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -17,28 +17,31 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_ialloc.h"
#include "xfs_itable.h"
-#include "xfs_quota.h"
+#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_bmap.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_trans.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
#include "xfs_trans_space.h"
#include "xfs_qm.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_cksum.h"
-#include "xfs_dinode.h"
/*
* The global quota manager. There is only one of these for the entire
@@ -661,6 +664,20 @@ xfs_qm_dqdetach(
}
}
+int
+xfs_qm_calc_dquots_per_chunk(
+ struct xfs_mount *mp,
+ unsigned int nbblks) /* basic block units */
+{
+ unsigned int ndquots;
+
+ ASSERT(nbblks > 0);
+ ndquots = BBTOB(nbblks);
+ do_div(ndquots, sizeof(xfs_dqblk_t));
+
+ return ndquots;
+}
+
struct xfs_qm_isolate {
struct list_head buffers;
struct list_head dispose;
@@ -841,7 +858,7 @@ xfs_qm_init_quotainfo(
/* Precalc some constants */
qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
- qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(mp,
+ qinf->qi_dqperchunk = xfs_qm_calc_dquots_per_chunk(mp,
qinf->qi_dqchunklen);
mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
@@ -1075,10 +1092,10 @@ xfs_qm_reset_dqcounts(
/*
* Do a sanity check, and if needed, repair the dqblk. Don't
* output any warnings because it's perfectly possible to
- * find uninitialised dquot blks. See comment in xfs_dqcheck.
+ * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
*/
- xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
- "xfs_quotacheck");
+ (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
+ "xfs_quotacheck");
ddq->d_bcount = 0;
ddq->d_icount = 0;
ddq->d_rtbcount = 0;
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index a788b66..2b602df 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -103,6 +103,8 @@ xfs_dq_to_quota_inode(struct xfs_dquot *dqp)
return NULL;
}
+extern int xfs_qm_calc_dquots_per_chunk(struct xfs_mount *mp,
+ unsigned int nbblks);
extern void xfs_trans_mod_dquot(struct xfs_trans *,
struct xfs_dquot *, uint, long);
extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *,
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index e9be63a..3af50cc 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -18,15 +18,21 @@
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
+#include "xfs_alloc.h"
#include "xfs_quota.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
#include "xfs_error.h"
-#include "xfs_trans.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
#include "xfs_qm.h"
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 437c919..8174aad 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -20,18 +20,24 @@
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
-#include "xfs_trans.h"
+#include "xfs_inode_item.h"
+#include "xfs_itable.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
#include "xfs_error.h"
-#include "xfs_quota.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
#include "xfs_qm.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
@@ -281,7 +287,7 @@ xfs_qm_scall_trunc_qfiles(
int error = 0, error2 = 0;
if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
- xfs_debug(mp, "%s: flags=%x m_qflags=%x",
+ xfs_debug(mp, "%s: flags=%x m_qflags=%x\n",
__func__, flags, mp->m_qflags);
return XFS_ERROR(EINVAL);
}
@@ -319,7 +325,7 @@ xfs_qm_scall_quotaon(
sbflags = 0;
if (flags == 0) {
- xfs_debug(mp, "%s: zero flags, m_qflags=%x",
+ xfs_debug(mp, "%s: zero flags, m_qflags=%x\n",
__func__, mp->m_qflags);
return XFS_ERROR(EINVAL);
}
@@ -342,7 +348,7 @@ xfs_qm_scall_quotaon(
(mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 &&
(flags & XFS_PQUOTA_ENFD))) {
xfs_debug(mp,
- "%s: Can't enforce without acct, flags=%x sbflags=%x",
+ "%s: Can't enforce without acct, flags=%x sbflags=%x\n",
__func__, flags, mp->m_sb.sb_qflags);
return XFS_ERROR(EINVAL);
}
@@ -642,7 +648,7 @@ xfs_qm_scall_setqlim(
q->qi_bsoftlimit = soft;
}
} else {
- xfs_debug(mp, "blkhard %Ld < blksoft %Ld", hard, soft);
+ xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft);
}
hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ?
(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) :
@@ -658,7 +664,7 @@ xfs_qm_scall_setqlim(
q->qi_rtbsoftlimit = soft;
}
} else {
- xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld", hard, soft);
+ xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft);
}
hard = (newlim->d_fieldmask & FS_DQ_IHARD) ?
@@ -675,7 +681,7 @@ xfs_qm_scall_setqlim(
q->qi_isoftlimit = soft;
}
} else {
- xfs_debug(mp, "ihard %Ld < isoft %Ld", hard, soft);
+ xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft);
}
/*
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 5376dd4..e7d84d2 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -150,6 +150,10 @@ static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,
xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, pd, nb, ni, \
f | XFS_QMOPT_RES_REGBLKS)
+extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *,
+ xfs_dqid_t, uint, uint, char *);
extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
+extern const struct xfs_buf_ops xfs_dquot_buf_ops;
+
#endif /* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/xfs_quota_defs.h b/fs/xfs/xfs_quota_defs.h
index b3b2b10..e6b0d6e 100644
--- a/fs/xfs/xfs_quota_defs.h
+++ b/fs/xfs/xfs_quota_defs.h
@@ -154,8 +154,4 @@ typedef __uint16_t xfs_qwarncnt_t;
(XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA)
#define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
-extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq,
- xfs_dqid_t id, uint type, uint flags, char *str);
-extern int xfs_calc_dquots_per_chunk(struct xfs_mount *mp, unsigned int nbblks);
-
#endif /* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index af33caf..1326d81 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -17,14 +17,15 @@
*/
#include "xfs.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
+#include "xfs_log.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_inode.h"
#include "xfs_quota.h"
#include "xfs_trans.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
#include "xfs_qm.h"
#include <linux/quota.h>
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index a6a76b2..6f9e63c 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -17,260 +17,172 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
+#include "xfs_alloc.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc.h"
+#include "xfs_rtalloc.h"
+#include "xfs_fsops.h"
#include "xfs_error.h"
-#include "xfs_trans.h"
+#include "xfs_inode_item.h"
#include "xfs_trans_space.h"
#include "xfs_trace.h"
#include "xfs_buf.h"
#include "xfs_icache.h"
-#include "xfs_dinode.h"
-#include "xfs_rtalloc.h"
/*
- * Read and return the summary information for a given extent size,
- * bitmap block combination.
- * Keeps track of a current summary block, so we don't keep reading
- * it from the buffer cache.
+ * Prototypes for internal functions.
*/
-STATIC int /* error */
-xfs_rtget_summary(
- xfs_mount_t *mp, /* file system mount structure */
- xfs_trans_t *tp, /* transaction pointer */
- int log, /* log2 of extent size */
- xfs_rtblock_t bbno, /* bitmap block number */
- xfs_buf_t **rbpp, /* in/out: summary block buffer */
- xfs_fsblock_t *rsb, /* in/out: summary block number */
- xfs_suminfo_t *sum) /* out: summary info for this block */
-{
- xfs_buf_t *bp; /* buffer for summary block */
- int error; /* error value */
- xfs_fsblock_t sb; /* summary fsblock */
- int so; /* index into the summary file */
- xfs_suminfo_t *sp; /* pointer to returned data */
- /*
- * Compute entry number in the summary file.
- */
- so = XFS_SUMOFFS(mp, log, bbno);
- /*
- * Compute the block number in the summary file.
- */
- sb = XFS_SUMOFFSTOBLOCK(mp, so);
- /*
- * If we have an old buffer, and the block number matches, use that.
- */
- if (rbpp && *rbpp && *rsb == sb)
- bp = *rbpp;
- /*
- * Otherwise we have to get the buffer.
- */
- else {
- /*
- * If there was an old one, get rid of it first.
- */
- if (rbpp && *rbpp)
- xfs_trans_brelse(tp, *rbpp);
- error = xfs_rtbuf_get(mp, tp, sb, 1, &bp);
- if (error) {
- return error;
- }
- /*
- * Remember this buffer and block for the next call.
- */
- if (rbpp) {
- *rbpp = bp;
- *rsb = sb;
- }
- }
- /*
- * Point to the summary information & copy it out.
- */
- sp = XFS_SUMPTR(mp, bp, so);
- *sum = *sp;
- /*
- * Drop the buffer if we're not asked to remember it.
- */
- if (!rbpp)
- xfs_trans_brelse(tp, bp);
- return 0;
-}
+STATIC int xfs_rtallocate_range(xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t,
+ xfs_extlen_t, xfs_buf_t **, xfs_fsblock_t *);
+STATIC int xfs_rtany_summary(xfs_mount_t *, xfs_trans_t *, int, int,
+ xfs_rtblock_t, xfs_buf_t **, xfs_fsblock_t *, int *);
+STATIC int xfs_rtcheck_range(xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t,
+ xfs_extlen_t, int, xfs_rtblock_t *, int *);
+STATIC int xfs_rtfind_back(xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t,
+ xfs_rtblock_t, xfs_rtblock_t *);
+STATIC int xfs_rtfind_forw(xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t,
+ xfs_rtblock_t, xfs_rtblock_t *);
+STATIC int xfs_rtget_summary( xfs_mount_t *, xfs_trans_t *, int,
+ xfs_rtblock_t, xfs_buf_t **, xfs_fsblock_t *, xfs_suminfo_t *);
+STATIC int xfs_rtmodify_range(xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t,
+ xfs_extlen_t, int);
+STATIC int xfs_rtmodify_summary(xfs_mount_t *, xfs_trans_t *, int,
+ xfs_rtblock_t, int, xfs_buf_t **, xfs_fsblock_t *);
/*
- * Return whether there are any free extents in the size range given
- * by low and high, for the bitmap block bbno.
+ * Internal functions.
+ */
+
+/*
+ * Allocate space to the bitmap or summary file, and zero it, for growfs.
*/
STATIC int /* error */
-xfs_rtany_summary(
- xfs_mount_t *mp, /* file system mount structure */
- xfs_trans_t *tp, /* transaction pointer */
- int low, /* low log2 extent size */
- int high, /* high log2 extent size */
- xfs_rtblock_t bbno, /* bitmap block number */
- xfs_buf_t **rbpp, /* in/out: summary block buffer */
- xfs_fsblock_t *rsb, /* in/out: summary block number */
- int *stat) /* out: any good extents here? */
+xfs_growfs_rt_alloc(
+ xfs_mount_t *mp, /* file system mount point */
+ xfs_extlen_t oblocks, /* old count of blocks */
+ xfs_extlen_t nblocks, /* new count of blocks */
+ xfs_inode_t *ip) /* inode (bitmap/summary) */
{
- int error; /* error value */
- int log; /* loop counter, log2 of ext. size */
- xfs_suminfo_t sum; /* summary data */
+ xfs_fileoff_t bno; /* block number in file */
+ xfs_buf_t *bp; /* temporary buffer for zeroing */
+ int committed; /* transaction committed flag */
+ xfs_daddr_t d; /* disk block address */
+ int error; /* error return value */
+ xfs_fsblock_t firstblock; /* first block allocated in xaction */
+ xfs_bmap_free_t flist; /* list of freed blocks */
+ xfs_fsblock_t fsbno; /* filesystem block for bno */
+ xfs_bmbt_irec_t map; /* block map output */
+ int nmap; /* number of block maps */
+ int resblks; /* space reservation */
/*
- * Loop over logs of extent sizes. Order is irrelevant.
+ * Allocate space to the file, as necessary.
*/
- for (log = low; log <= high; log++) {
+ while (oblocks < nblocks) {
+ int cancelflags = 0;
+ xfs_trans_t *tp;
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC);
+ resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks);
/*
- * Get one summary datum.
+ * Reserve space & log for one extent added to the file.
*/
- error = xfs_rtget_summary(mp, tp, log, bbno, rbpp, rsb, &sum);
- if (error) {
- return error;
- }
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata,
+ resblks, 0);
+ if (error)
+ goto error_cancel;
+ cancelflags = XFS_TRANS_RELEASE_LOG_RES;
/*
- * If there are any, return success.
+ * Lock the inode.
*/
- if (sum) {
- *stat = 1;
- return 0;
- }
- }
- /*
- * Found nothing, return failure.
- */
- *stat = 0;
- return 0;
-}
-
-
-/*
- * Copy and transform the summary file, given the old and new
- * parameters in the mount structures.
- */
-STATIC int /* error */
-xfs_rtcopy_summary(
- xfs_mount_t *omp, /* old file system mount point */
- xfs_mount_t *nmp, /* new file system mount point */
- xfs_trans_t *tp) /* transaction pointer */
-{
- xfs_rtblock_t bbno; /* bitmap block number */
- xfs_buf_t *bp; /* summary buffer */
- int error; /* error return value */
- int log; /* summary level number (log length) */
- xfs_suminfo_t sum; /* summary data */
- xfs_fsblock_t sumbno; /* summary block number */
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
- bp = NULL;
- for (log = omp->m_rsumlevels - 1; log >= 0; log--) {
- for (bbno = omp->m_sb.sb_rbmblocks - 1;
- (xfs_srtblock_t)bbno >= 0;
- bbno--) {
- error = xfs_rtget_summary(omp, tp, log, bbno, &bp,
- &sumbno, &sum);
- if (error)
- return error;
- if (sum == 0)
- continue;
- error = xfs_rtmodify_summary(omp, tp, log, bbno, -sum,
- &bp, &sumbno);
+ xfs_bmap_init(&flist, &firstblock);
+ /*
+ * Allocate blocks to the bitmap file.
+ */
+ nmap = 1;
+ cancelflags |= XFS_TRANS_ABORT;
+ error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks,
+ XFS_BMAPI_METADATA, &firstblock,
+ resblks, &map, &nmap, &flist);
+ if (!error && nmap < 1)
+ error = XFS_ERROR(ENOSPC);
+ if (error)
+ goto error_cancel;
+ /*
+ * Free any blocks freed up in the transaction, then commit.
+ */
+ error = xfs_bmap_finish(&tp, &flist, &committed);
+ if (error)
+ goto error_cancel;
+ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ if (error)
+ goto error;
+ /*
+ * Now we need to clear the allocated blocks.
+ * Do this one block per transaction, to keep it simple.
+ */
+ cancelflags = 0;
+ for (bno = map.br_startoff, fsbno = map.br_startblock;
+ bno < map.br_startoff + map.br_blockcount;
+ bno++, fsbno++) {
+ tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ZERO);
+ /*
+ * Reserve log for one block zeroing.
+ */
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtzero,
+ 0, 0);
if (error)
- return error;
- error = xfs_rtmodify_summary(nmp, tp, log, bbno, sum,
- &bp, &sumbno);
+ goto error_cancel;
+ /*
+ * Lock the bitmap inode.
+ */
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ /*
+ * Get a buffer for the block.
+ */
+ d = XFS_FSB_TO_DADDR(mp, fsbno);
+ bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
+ mp->m_bsize, 0);
+ if (bp == NULL) {
+ error = XFS_ERROR(EIO);
+error_cancel:
+ xfs_trans_cancel(tp, cancelflags);
+ goto error;
+ }
+ memset(bp->b_addr, 0, mp->m_sb.sb_blocksize);
+ xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
+ /*
+ * Commit the transaction.
+ */
+ error = xfs_trans_commit(tp, 0);
if (error)
- return error;
- ASSERT(sum > 0);
+ goto error;
}
+ /*
+ * Go on to the next extent, if any.
+ */
+ oblocks = map.br_startoff + map.br_blockcount;
}
return 0;
-}
-/*
- * Mark an extent specified by start and len allocated.
- * Updates all the summary information as well as the bitmap.
- */
-STATIC int /* error */
-xfs_rtallocate_range(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t start, /* start block to allocate */
- xfs_extlen_t len, /* length to allocate */
- xfs_buf_t **rbpp, /* in/out: summary block buffer */
- xfs_fsblock_t *rsb) /* in/out: summary block number */
-{
- xfs_rtblock_t end; /* end of the allocated extent */
- int error; /* error value */
- xfs_rtblock_t postblock = 0; /* first block allocated > end */
- xfs_rtblock_t preblock = 0; /* first block allocated < start */
- end = start + len - 1;
- /*
- * Assume we're allocating out of the middle of a free extent.
- * We need to find the beginning and end of the extent so we can
- * properly update the summary.
- */
- error = xfs_rtfind_back(mp, tp, start, 0, &preblock);
- if (error) {
- return error;
- }
- /*
- * Find the next allocated block (end of free extent).
- */
- error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
- &postblock);
- if (error) {
- return error;
- }
- /*
- * Decrement the summary information corresponding to the entire
- * (old) free extent.
- */
- error = xfs_rtmodify_summary(mp, tp,
- XFS_RTBLOCKLOG(postblock + 1 - preblock),
- XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb);
- if (error) {
- return error;
- }
- /*
- * If there are blocks not being allocated at the front of the
- * old extent, add summary data for them to be free.
- */
- if (preblock < start) {
- error = xfs_rtmodify_summary(mp, tp,
- XFS_RTBLOCKLOG(start - preblock),
- XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb);
- if (error) {
- return error;
- }
- }
- /*
- * If there are blocks not being allocated at the end of the
- * old extent, add summary data for them to be free.
- */
- if (postblock > end) {
- error = xfs_rtmodify_summary(mp, tp,
- XFS_RTBLOCKLOG(postblock - end),
- XFS_BITTOBLOCK(mp, end + 1), 1, rbpp, rsb);
- if (error) {
- return error;
- }
- }
- /*
- * Modify the bitmap to mark this extent allocated.
- */
- error = xfs_rtmodify_range(mp, tp, start, len, 0);
+error:
return error;
}
@@ -809,126 +721,1112 @@ xfs_rtallocate_extent_size(
}
/*
- * Allocate space to the bitmap or summary file, and zero it, for growfs.
+ * Mark an extent specified by start and len allocated.
+ * Updates all the summary information as well as the bitmap.
*/
STATIC int /* error */
-xfs_growfs_rt_alloc(
+xfs_rtallocate_range(
xfs_mount_t *mp, /* file system mount point */
- xfs_extlen_t oblocks, /* old count of blocks */
- xfs_extlen_t nblocks, /* new count of blocks */
- xfs_inode_t *ip) /* inode (bitmap/summary) */
+ xfs_trans_t *tp, /* transaction pointer */
+ xfs_rtblock_t start, /* start block to allocate */
+ xfs_extlen_t len, /* length to allocate */
+ xfs_buf_t **rbpp, /* in/out: summary block buffer */
+ xfs_fsblock_t *rsb) /* in/out: summary block number */
{
- xfs_fileoff_t bno; /* block number in file */
- xfs_buf_t *bp; /* temporary buffer for zeroing */
- int committed; /* transaction committed flag */
- xfs_daddr_t d; /* disk block address */
- int error; /* error return value */
- xfs_fsblock_t firstblock; /* first block allocated in xaction */
- xfs_bmap_free_t flist; /* list of freed blocks */
- xfs_fsblock_t fsbno; /* filesystem block for bno */
- xfs_bmbt_irec_t map; /* block map output */
- int nmap; /* number of block maps */
- int resblks; /* space reservation */
+ xfs_rtblock_t end; /* end of the allocated extent */
+ int error; /* error value */
+ xfs_rtblock_t postblock = 0; /* first block allocated > end */
+ xfs_rtblock_t preblock = 0; /* first block allocated < start */
+ end = start + len - 1;
/*
- * Allocate space to the file, as necessary.
+ * Assume we're allocating out of the middle of a free extent.
+ * We need to find the beginning and end of the extent so we can
+ * properly update the summary.
*/
- while (oblocks < nblocks) {
- int cancelflags = 0;
- xfs_trans_t *tp;
+ error = xfs_rtfind_back(mp, tp, start, 0, &preblock);
+ if (error) {
+ return error;
+ }
+ /*
+ * Find the next allocated block (end of free extent).
+ */
+ error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
+ &postblock);
+ if (error) {
+ return error;
+ }
+ /*
+ * Decrement the summary information corresponding to the entire
+ * (old) free extent.
+ */
+ error = xfs_rtmodify_summary(mp, tp,
+ XFS_RTBLOCKLOG(postblock + 1 - preblock),
+ XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb);
+ if (error) {
+ return error;
+ }
+ /*
+ * If there are blocks not being allocated at the front of the
+ * old extent, add summary data for them to be free.
+ */
+ if (preblock < start) {
+ error = xfs_rtmodify_summary(mp, tp,
+ XFS_RTBLOCKLOG(start - preblock),
+ XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb);
+ if (error) {
+ return error;
+ }
+ }
+ /*
+ * If there are blocks not being allocated at the end of the
+ * old extent, add summary data for them to be free.
+ */
+ if (postblock > end) {
+ error = xfs_rtmodify_summary(mp, tp,
+ XFS_RTBLOCKLOG(postblock - end),
+ XFS_BITTOBLOCK(mp, end + 1), 1, rbpp, rsb);
+ if (error) {
+ return error;
+ }
+ }
+ /*
+ * Modify the bitmap to mark this extent allocated.
+ */
+ error = xfs_rtmodify_range(mp, tp, start, len, 0);
+ return error;
+}
- tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC);
- resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks);
+/*
+ * Return whether there are any free extents in the size range given
+ * by low and high, for the bitmap block bbno.
+ */
+STATIC int /* error */
+xfs_rtany_summary(
+ xfs_mount_t *mp, /* file system mount structure */
+ xfs_trans_t *tp, /* transaction pointer */
+ int low, /* low log2 extent size */
+ int high, /* high log2 extent size */
+ xfs_rtblock_t bbno, /* bitmap block number */
+ xfs_buf_t **rbpp, /* in/out: summary block buffer */
+ xfs_fsblock_t *rsb, /* in/out: summary block number */
+ int *stat) /* out: any good extents here? */
+{
+ int error; /* error value */
+ int log; /* loop counter, log2 of ext. size */
+ xfs_suminfo_t sum; /* summary data */
+
+ /*
+ * Loop over logs of extent sizes. Order is irrelevant.
+ */
+ for (log = low; log <= high; log++) {
/*
- * Reserve space & log for one extent added to the file.
+ * Get one summary datum.
*/
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata,
- resblks, 0);
- if (error)
- goto error_cancel;
- cancelflags = XFS_TRANS_RELEASE_LOG_RES;
+ error = xfs_rtget_summary(mp, tp, log, bbno, rbpp, rsb, &sum);
+ if (error) {
+ return error;
+ }
/*
- * Lock the inode.
+ * If there are any, return success.
*/
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ if (sum) {
+ *stat = 1;
+ return 0;
+ }
+ }
+ /*
+ * Found nothing, return failure.
+ */
+ *stat = 0;
+ return 0;
+}
- xfs_bmap_init(&flist, &firstblock);
+/*
+ * Get a buffer for the bitmap or summary file block specified.
+ * The buffer is returned read and locked.
+ */
+STATIC int /* error */
+xfs_rtbuf_get(
+ xfs_mount_t *mp, /* file system mount structure */
+ xfs_trans_t *tp, /* transaction pointer */
+ xfs_rtblock_t block, /* block number in bitmap or summary */
+ int issum, /* is summary not bitmap */
+ xfs_buf_t **bpp) /* output: buffer for the block */
+{
+ xfs_buf_t *bp; /* block buffer, result */
+ xfs_inode_t *ip; /* bitmap or summary inode */
+ xfs_bmbt_irec_t map;
+ int nmap = 1;
+ int error; /* error value */
+
+ ip = issum ? mp->m_rsumip : mp->m_rbmip;
+
+ error = xfs_bmapi_read(ip, block, 1, &map, &nmap, XFS_DATA_FORK);
+ if (error)
+ return error;
+
+ ASSERT(map.br_startblock != NULLFSBLOCK);
+ error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp, map.br_startblock),
+ mp->m_bsize, 0, &bp, NULL);
+ if (error)
+ return error;
+ ASSERT(!xfs_buf_geterror(bp));
+ *bpp = bp;
+ return 0;
+}
+
+#ifdef DEBUG
+/*
+ * Check that the given extent (block range) is allocated already.
+ */
+STATIC int /* error */
+xfs_rtcheck_alloc_range(
+ xfs_mount_t *mp, /* file system mount point */
+ xfs_trans_t *tp, /* transaction pointer */
+ xfs_rtblock_t bno, /* starting block number of extent */
+ xfs_extlen_t len, /* length of extent */
+ int *stat) /* out: 1 for allocated, 0 for not */
+{
+ xfs_rtblock_t new; /* dummy for xfs_rtcheck_range */
+
+ return xfs_rtcheck_range(mp, tp, bno, len, 0, &new, stat);
+}
+#endif
+
+/*
+ * Check that the given range is either all allocated (val = 0) or
+ * all free (val = 1).
+ */
+STATIC int /* error */
+xfs_rtcheck_range(
+ xfs_mount_t *mp, /* file system mount point */
+ xfs_trans_t *tp, /* transaction pointer */
+ xfs_rtblock_t start, /* starting block number of extent */
+ xfs_extlen_t len, /* length of extent */
+ int val, /* 1 for free, 0 for allocated */
+ xfs_rtblock_t *new, /* out: first block not matching */
+ int *stat) /* out: 1 for matches, 0 for not */
+{
+ xfs_rtword_t *b; /* current word in buffer */
+ int bit; /* bit number in the word */
+ xfs_rtblock_t block; /* bitmap block number */
+ xfs_buf_t *bp; /* buf for the block */
+ xfs_rtword_t *bufp; /* starting word in buffer */
+ int error; /* error value */
+ xfs_rtblock_t i; /* current bit number rel. to start */
+ xfs_rtblock_t lastbit; /* last useful bit in word */
+ xfs_rtword_t mask; /* mask of relevant bits for value */
+ xfs_rtword_t wdiff; /* difference from wanted value */
+ int word; /* word number in the buffer */
+
+ /*
+ * Compute starting bitmap block number
+ */
+ block = XFS_BITTOBLOCK(mp, start);
+ /*
+ * Read the bitmap block.
+ */
+ error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
+ if (error) {
+ return error;
+ }
+ bufp = bp->b_addr;
+ /*
+ * Compute the starting word's address, and starting bit.
+ */
+ word = XFS_BITTOWORD(mp, start);
+ b = &bufp[word];
+ bit = (int)(start & (XFS_NBWORD - 1));
+ /*
+ * 0 (allocated) => all zero's; 1 (free) => all one's.
+ */
+ val = -val;
+ /*
+ * If not starting on a word boundary, deal with the first
+ * (partial) word.
+ */
+ if (bit) {
/*
- * Allocate blocks to the bitmap file.
+ * Compute first bit not examined.
*/
- nmap = 1;
- cancelflags |= XFS_TRANS_ABORT;
- error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks,
- XFS_BMAPI_METADATA, &firstblock,
- resblks, &map, &nmap, &flist);
- if (!error && nmap < 1)
- error = XFS_ERROR(ENOSPC);
- if (error)
- goto error_cancel;
+ lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
/*
- * Free any blocks freed up in the transaction, then commit.
+ * Mask of relevant bits.
*/
- error = xfs_bmap_finish(&tp, &flist, &committed);
- if (error)
- goto error_cancel;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
- if (error)
- goto error;
+ mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
/*
- * Now we need to clear the allocated blocks.
- * Do this one block per transaction, to keep it simple.
+ * Compute difference between actual and desired value.
*/
- cancelflags = 0;
- for (bno = map.br_startoff, fsbno = map.br_startblock;
- bno < map.br_startoff + map.br_blockcount;
- bno++, fsbno++) {
- tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ZERO);
+ if ((wdiff = (*b ^ val) & mask)) {
/*
- * Reserve log for one block zeroing.
+ * Different, compute first wrong bit and return.
*/
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtzero,
- 0, 0);
- if (error)
- goto error_cancel;
+ xfs_trans_brelse(tp, bp);
+ i = XFS_RTLOBIT(wdiff) - bit;
+ *new = start + i;
+ *stat = 0;
+ return 0;
+ }
+ i = lastbit - bit;
+ /*
+ * Go on to next block if that's where the next word is
+ * and we need the next word.
+ */
+ if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
/*
- * Lock the bitmap inode.
+ * If done with this block, get the next one.
*/
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_brelse(tp, bp);
+ error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
+ if (error) {
+ return error;
+ }
+ b = bufp = bp->b_addr;
+ word = 0;
+ } else {
/*
- * Get a buffer for the block.
+ * Go on to the next word in the buffer.
*/
- d = XFS_FSB_TO_DADDR(mp, fsbno);
- bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
- mp->m_bsize, 0);
- if (bp == NULL) {
- error = XFS_ERROR(EIO);
-error_cancel:
- xfs_trans_cancel(tp, cancelflags);
- goto error;
+ b++;
+ }
+ } else {
+ /*
+ * Starting on a word boundary, no partial word.
+ */
+ i = 0;
+ }
+ /*
+ * Loop over whole words in buffers. When we use up one buffer
+ * we move on to the next one.
+ */
+ while (len - i >= XFS_NBWORD) {
+ /*
+ * Compute difference between actual and desired value.
+ */
+ if ((wdiff = *b ^ val)) {
+ /*
+ * Different, compute first wrong bit and return.
+ */
+ xfs_trans_brelse(tp, bp);
+ i += XFS_RTLOBIT(wdiff);
+ *new = start + i;
+ *stat = 0;
+ return 0;
+ }
+ i += XFS_NBWORD;
+ /*
+ * Go on to next block if that's where the next word is
+ * and we need the next word.
+ */
+ if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+ /*
+ * If done with this block, get the next one.
+ */
+ xfs_trans_brelse(tp, bp);
+ error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
+ if (error) {
+ return error;
}
- memset(bp->b_addr, 0, mp->m_sb.sb_blocksize);
- xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
+ b = bufp = bp->b_addr;
+ word = 0;
+ } else {
/*
- * Commit the transaction.
+ * Go on to the next word in the buffer.
*/
- error = xfs_trans_commit(tp, 0);
+ b++;
+ }
+ }
+ /*
+ * If not ending on a word boundary, deal with the last
+ * (partial) word.
+ */
+ if ((lastbit = len - i)) {
+ /*
+ * Mask of relevant bits.
+ */
+ mask = ((xfs_rtword_t)1 << lastbit) - 1;
+ /*
+ * Compute difference between actual and desired value.
+ */
+ if ((wdiff = (*b ^ val) & mask)) {
+ /*
+ * Different, compute first wrong bit and return.
+ */
+ xfs_trans_brelse(tp, bp);
+ i += XFS_RTLOBIT(wdiff);
+ *new = start + i;
+ *stat = 0;
+ return 0;
+ } else
+ i = len;
+ }
+ /*
+ * Successful, return.
+ */
+ xfs_trans_brelse(tp, bp);
+ *new = start + i;
+ *stat = 1;
+ return 0;
+}
+
+/*
+ * Copy and transform the summary file, given the old and new
+ * parameters in the mount structures.
+ */
+STATIC int /* error */
+xfs_rtcopy_summary(
+ xfs_mount_t *omp, /* old file system mount point */
+ xfs_mount_t *nmp, /* new file system mount point */
+ xfs_trans_t *tp) /* transaction pointer */
+{
+ xfs_rtblock_t bbno; /* bitmap block number */
+ xfs_buf_t *bp; /* summary buffer */
+ int error; /* error return value */
+ int log; /* summary level number (log length) */
+ xfs_suminfo_t sum; /* summary data */
+ xfs_fsblock_t sumbno; /* summary block number */
+
+ bp = NULL;
+ for (log = omp->m_rsumlevels - 1; log >= 0; log--) {
+ for (bbno = omp->m_sb.sb_rbmblocks - 1;
+ (xfs_srtblock_t)bbno >= 0;
+ bbno--) {
+ error = xfs_rtget_summary(omp, tp, log, bbno, &bp,
+ &sumbno, &sum);
if (error)
- goto error;
+ return error;
+ if (sum == 0)
+ continue;
+ error = xfs_rtmodify_summary(omp, tp, log, bbno, -sum,
+ &bp, &sumbno);
+ if (error)
+ return error;
+ error = xfs_rtmodify_summary(nmp, tp, log, bbno, sum,
+ &bp, &sumbno);
+ if (error)
+ return error;
+ ASSERT(sum > 0);
+ }
+ }
+ return 0;
+}
+
+/*
+ * Searching backward from start to limit, find the first block whose
+ * allocated/free state is different from start's.
+ */
+STATIC int /* error */
+xfs_rtfind_back(
+ xfs_mount_t *mp, /* file system mount point */
+ xfs_trans_t *tp, /* transaction pointer */
+ xfs_rtblock_t start, /* starting block to look at */
+ xfs_rtblock_t limit, /* last block to look at */
+ xfs_rtblock_t *rtblock) /* out: start block found */
+{
+ xfs_rtword_t *b; /* current word in buffer */
+ int bit; /* bit number in the word */
+ xfs_rtblock_t block; /* bitmap block number */
+ xfs_buf_t *bp; /* buf for the block */
+ xfs_rtword_t *bufp; /* starting word in buffer */
+ int error; /* error value */
+ xfs_rtblock_t firstbit; /* first useful bit in the word */
+ xfs_rtblock_t i; /* current bit number rel. to start */
+ xfs_rtblock_t len; /* length of inspected area */
+ xfs_rtword_t mask; /* mask of relevant bits for value */
+ xfs_rtword_t want; /* mask for "good" values */
+ xfs_rtword_t wdiff; /* difference from wanted value */
+ int word; /* word number in the buffer */
+
+ /*
+ * Compute and read in starting bitmap block for starting block.
+ */
+ block = XFS_BITTOBLOCK(mp, start);
+ error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
+ if (error) {
+ return error;
+ }
+ bufp = bp->b_addr;
+ /*
+ * Get the first word's index & point to it.
+ */
+ word = XFS_BITTOWORD(mp, start);
+ b = &bufp[word];
+ bit = (int)(start & (XFS_NBWORD - 1));
+ len = start - limit + 1;
+ /*
+ * Compute match value, based on the bit at start: if 1 (free)
+ * then all-ones, else all-zeroes.
+ */
+ want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
+ /*
+ * If the starting position is not word-aligned, deal with the
+ * partial word.
+ */
+ if (bit < XFS_NBWORD - 1) {
+ /*
+ * Calculate first (leftmost) bit number to look at,
+ * and mask for all the relevant bits in this word.
+ */
+ firstbit = XFS_RTMAX((xfs_srtblock_t)(bit - len + 1), 0);
+ mask = (((xfs_rtword_t)1 << (bit - firstbit + 1)) - 1) <<
+ firstbit;
+ /*
+ * Calculate the difference between the value there
+ * and what we're looking for.
+ */
+ if ((wdiff = (*b ^ want) & mask)) {
+ /*
+ * Different. Mark where we are and return.
+ */
+ xfs_trans_brelse(tp, bp);
+ i = bit - XFS_RTHIBIT(wdiff);
+ *rtblock = start - i + 1;
+ return 0;
}
+ i = bit - firstbit + 1;
/*
- * Go on to the next extent, if any.
+ * Go on to previous block if that's where the previous word is
+ * and we need the previous word.
*/
- oblocks = map.br_startoff + map.br_blockcount;
+ if (--word == -1 && i < len) {
+ /*
+ * If done with this block, get the previous one.
+ */
+ xfs_trans_brelse(tp, bp);
+ error = xfs_rtbuf_get(mp, tp, --block, 0, &bp);
+ if (error) {
+ return error;
+ }
+ bufp = bp->b_addr;
+ word = XFS_BLOCKWMASK(mp);
+ b = &bufp[word];
+ } else {
+ /*
+ * Go on to the previous word in the buffer.
+ */
+ b--;
+ }
+ } else {
+ /*
+ * Starting on a word boundary, no partial word.
+ */
+ i = 0;
+ }
+ /*
+ * Loop over whole words in buffers. When we use up one buffer
+ * we move on to the previous one.
+ */
+ while (len - i >= XFS_NBWORD) {
+ /*
+ * Compute difference between actual and desired value.
+ */
+ if ((wdiff = *b ^ want)) {
+ /*
+ * Different, mark where we are and return.
+ */
+ xfs_trans_brelse(tp, bp);
+ i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff);
+ *rtblock = start - i + 1;
+ return 0;
+ }
+ i += XFS_NBWORD;
+ /*
+ * Go on to previous block if that's where the previous word is
+ * and we need the previous word.
+ */
+ if (--word == -1 && i < len) {
+ /*
+ * If done with this block, get the previous one.
+ */
+ xfs_trans_brelse(tp, bp);
+ error = xfs_rtbuf_get(mp, tp, --block, 0, &bp);
+ if (error) {
+ return error;
+ }
+ bufp = bp->b_addr;
+ word = XFS_BLOCKWMASK(mp);
+ b = &bufp[word];
+ } else {
+ /*
+ * Go on to the previous word in the buffer.
+ */
+ b--;
+ }
}
+ /*
+ * If not ending on a word boundary, deal with the last
+ * (partial) word.
+ */
+ if (len - i) {
+ /*
+ * Calculate first (leftmost) bit number to look at,
+ * and mask for all the relevant bits in this word.
+ */
+ firstbit = XFS_NBWORD - (len - i);
+ mask = (((xfs_rtword_t)1 << (len - i)) - 1) << firstbit;
+ /*
+ * Compute difference between actual and desired value.
+ */
+ if ((wdiff = (*b ^ want) & mask)) {
+ /*
+ * Different, mark where we are and return.
+ */
+ xfs_trans_brelse(tp, bp);
+ i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff);
+ *rtblock = start - i + 1;
+ return 0;
+ } else
+ i = len;
+ }
+ /*
+ * No match, return that we scanned the whole area.
+ */
+ xfs_trans_brelse(tp, bp);
+ *rtblock = start - i + 1;
return 0;
+}
-error:
+/*
+ * Searching forward from start to limit, find the first block whose
+ * allocated/free state is different from start's.
+ */
+STATIC int /* error */
+xfs_rtfind_forw(
+ xfs_mount_t *mp, /* file system mount point */
+ xfs_trans_t *tp, /* transaction pointer */
+ xfs_rtblock_t start, /* starting block to look at */
+ xfs_rtblock_t limit, /* last block to look at */
+ xfs_rtblock_t *rtblock) /* out: start block found */
+{
+ xfs_rtword_t *b; /* current word in buffer */
+ int bit; /* bit number in the word */
+ xfs_rtblock_t block; /* bitmap block number */
+ xfs_buf_t *bp; /* buf for the block */
+ xfs_rtword_t *bufp; /* starting word in buffer */
+ int error; /* error value */
+ xfs_rtblock_t i; /* current bit number rel. to start */
+ xfs_rtblock_t lastbit; /* last useful bit in the word */
+ xfs_rtblock_t len; /* length of inspected area */
+ xfs_rtword_t mask; /* mask of relevant bits for value */
+ xfs_rtword_t want; /* mask for "good" values */
+ xfs_rtword_t wdiff; /* difference from wanted value */
+ int word; /* word number in the buffer */
+
+ /*
+ * Compute and read in starting bitmap block for starting block.
+ */
+ block = XFS_BITTOBLOCK(mp, start);
+ error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
+ if (error) {
+ return error;
+ }
+ bufp = bp->b_addr;
+ /*
+ * Get the first word's index & point to it.
+ */
+ word = XFS_BITTOWORD(mp, start);
+ b = &bufp[word];
+ bit = (int)(start & (XFS_NBWORD - 1));
+ len = limit - start + 1;
+ /*
+ * Compute match value, based on the bit at start: if 1 (free)
+ * then all-ones, else all-zeroes.
+ */
+ want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
+ /*
+ * If the starting position is not word-aligned, deal with the
+ * partial word.
+ */
+ if (bit) {
+ /*
+ * Calculate last (rightmost) bit number to look at,
+ * and mask for all the relevant bits in this word.
+ */
+ lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
+ mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
+ /*
+ * Calculate the difference between the value there
+ * and what we're looking for.
+ */
+ if ((wdiff = (*b ^ want) & mask)) {
+ /*
+ * Different. Mark where we are and return.
+ */
+ xfs_trans_brelse(tp, bp);
+ i = XFS_RTLOBIT(wdiff) - bit;
+ *rtblock = start + i - 1;
+ return 0;
+ }
+ i = lastbit - bit;
+ /*
+ * Go on to next block if that's where the next word is
+ * and we need the next word.
+ */
+ if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+ /*
+ * If done with this block, get the previous one.
+ */
+ xfs_trans_brelse(tp, bp);
+ error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
+ if (error) {
+ return error;
+ }
+ b = bufp = bp->b_addr;
+ word = 0;
+ } else {
+ /*
+ * Go on to the previous word in the buffer.
+ */
+ b++;
+ }
+ } else {
+ /*
+ * Starting on a word boundary, no partial word.
+ */
+ i = 0;
+ }
+ /*
+ * Loop over whole words in buffers. When we use up one buffer
+ * we move on to the next one.
+ */
+ while (len - i >= XFS_NBWORD) {
+ /*
+ * Compute difference between actual and desired value.
+ */
+ if ((wdiff = *b ^ want)) {
+ /*
+ * Different, mark where we are and return.
+ */
+ xfs_trans_brelse(tp, bp);
+ i += XFS_RTLOBIT(wdiff);
+ *rtblock = start + i - 1;
+ return 0;
+ }
+ i += XFS_NBWORD;
+ /*
+ * Go on to next block if that's where the next word is
+ * and we need the next word.
+ */
+ if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+ /*
+ * If done with this block, get the next one.
+ */
+ xfs_trans_brelse(tp, bp);
+ error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
+ if (error) {
+ return error;
+ }
+ b = bufp = bp->b_addr;
+ word = 0;
+ } else {
+ /*
+ * Go on to the next word in the buffer.
+ */
+ b++;
+ }
+ }
+ /*
+ * If not ending on a word boundary, deal with the last
+ * (partial) word.
+ */
+ if ((lastbit = len - i)) {
+ /*
+ * Calculate mask for all the relevant bits in this word.
+ */
+ mask = ((xfs_rtword_t)1 << lastbit) - 1;
+ /*
+ * Compute difference between actual and desired value.
+ */
+ if ((wdiff = (*b ^ want) & mask)) {
+ /*
+ * Different, mark where we are and return.
+ */
+ xfs_trans_brelse(tp, bp);
+ i += XFS_RTLOBIT(wdiff);
+ *rtblock = start + i - 1;
+ return 0;
+ } else
+ i = len;
+ }
+ /*
+ * No match, return that we scanned the whole area.
+ */
+ xfs_trans_brelse(tp, bp);
+ *rtblock = start + i - 1;
+ return 0;
+}
+
+/*
+ * Mark an extent specified by start and len freed.
+ * Updates all the summary information as well as the bitmap.
+ */
+STATIC int /* error */
+xfs_rtfree_range(
+ xfs_mount_t *mp, /* file system mount point */
+ xfs_trans_t *tp, /* transaction pointer */
+ xfs_rtblock_t start, /* starting block to free */
+ xfs_extlen_t len, /* length to free */
+ xfs_buf_t **rbpp, /* in/out: summary block buffer */
+ xfs_fsblock_t *rsb) /* in/out: summary block number */
+{
+ xfs_rtblock_t end; /* end of the freed extent */
+ int error; /* error value */
+ xfs_rtblock_t postblock; /* first block freed > end */
+ xfs_rtblock_t preblock; /* first block freed < start */
+
+ end = start + len - 1;
+ /*
+ * Modify the bitmap to mark this extent freed.
+ */
+ error = xfs_rtmodify_range(mp, tp, start, len, 1);
+ if (error) {
+ return error;
+ }
+ /*
+ * Assume we're freeing out of the middle of an allocated extent.
+ * We need to find the beginning and end of the extent so we can
+ * properly update the summary.
+ */
+ error = xfs_rtfind_back(mp, tp, start, 0, &preblock);
+ if (error) {
+ return error;
+ }
+ /*
+ * Find the next allocated block (end of allocated extent).
+ */
+ error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
+ &postblock);
+ if (error)
+ return error;
+ /*
+ * If there are blocks not being freed at the front of the
+ * old extent, add summary data for them to be allocated.
+ */
+ if (preblock < start) {
+ error = xfs_rtmodify_summary(mp, tp,
+ XFS_RTBLOCKLOG(start - preblock),
+ XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb);
+ if (error) {
+ return error;
+ }
+ }
+ /*
+ * If there are blocks not being freed at the end of the
+ * old extent, add summary data for them to be allocated.
+ */
+ if (postblock > end) {
+ error = xfs_rtmodify_summary(mp, tp,
+ XFS_RTBLOCKLOG(postblock - end),
+ XFS_BITTOBLOCK(mp, end + 1), -1, rbpp, rsb);
+ if (error) {
+ return error;
+ }
+ }
+ /*
+ * Increment the summary information corresponding to the entire
+ * (new) free extent.
+ */
+ error = xfs_rtmodify_summary(mp, tp,
+ XFS_RTBLOCKLOG(postblock + 1 - preblock),
+ XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb);
return error;
}
/*
+ * Read and return the summary information for a given extent size,
+ * bitmap block combination.
+ * Keeps track of a current summary block, so we don't keep reading
+ * it from the buffer cache.
+ */
+STATIC int /* error */
+xfs_rtget_summary(
+ xfs_mount_t *mp, /* file system mount structure */
+ xfs_trans_t *tp, /* transaction pointer */
+ int log, /* log2 of extent size */
+ xfs_rtblock_t bbno, /* bitmap block number */
+ xfs_buf_t **rbpp, /* in/out: summary block buffer */
+ xfs_fsblock_t *rsb, /* in/out: summary block number */
+ xfs_suminfo_t *sum) /* out: summary info for this block */
+{
+ xfs_buf_t *bp; /* buffer for summary block */
+ int error; /* error value */
+ xfs_fsblock_t sb; /* summary fsblock */
+ int so; /* index into the summary file */
+ xfs_suminfo_t *sp; /* pointer to returned data */
+
+ /*
+ * Compute entry number in the summary file.
+ */
+ so = XFS_SUMOFFS(mp, log, bbno);
+ /*
+ * Compute the block number in the summary file.
+ */
+ sb = XFS_SUMOFFSTOBLOCK(mp, so);
+ /*
+ * If we have an old buffer, and the block number matches, use that.
+ */
+ if (rbpp && *rbpp && *rsb == sb)
+ bp = *rbpp;
+ /*
+ * Otherwise we have to get the buffer.
+ */
+ else {
+ /*
+ * If there was an old one, get rid of it first.
+ */
+ if (rbpp && *rbpp)
+ xfs_trans_brelse(tp, *rbpp);
+ error = xfs_rtbuf_get(mp, tp, sb, 1, &bp);
+ if (error) {
+ return error;
+ }
+ /*
+ * Remember this buffer and block for the next call.
+ */
+ if (rbpp) {
+ *rbpp = bp;
+ *rsb = sb;
+ }
+ }
+ /*
+ * Point to the summary information & copy it out.
+ */
+ sp = XFS_SUMPTR(mp, bp, so);
+ *sum = *sp;
+ /*
+ * Drop the buffer if we're not asked to remember it.
+ */
+ if (!rbpp)
+ xfs_trans_brelse(tp, bp);
+ return 0;
+}
+
+/*
+ * Set the given range of bitmap bits to the given value.
+ * Do whatever I/O and logging is required.
+ */
+STATIC int /* error */
+xfs_rtmodify_range(
+ xfs_mount_t *mp, /* file system mount point */
+ xfs_trans_t *tp, /* transaction pointer */
+ xfs_rtblock_t start, /* starting block to modify */
+ xfs_extlen_t len, /* length of extent to modify */
+ int val) /* 1 for free, 0 for allocated */
+{
+ xfs_rtword_t *b; /* current word in buffer */
+ int bit; /* bit number in the word */
+ xfs_rtblock_t block; /* bitmap block number */
+ xfs_buf_t *bp; /* buf for the block */
+ xfs_rtword_t *bufp; /* starting word in buffer */
+ int error; /* error value */
+ xfs_rtword_t *first; /* first used word in the buffer */
+ int i; /* current bit number rel. to start */
+ int lastbit; /* last useful bit in word */
+ xfs_rtword_t mask; /* mask o frelevant bits for value */
+ int word; /* word number in the buffer */
+
+ /*
+ * Compute starting bitmap block number.
+ */
+ block = XFS_BITTOBLOCK(mp, start);
+ /*
+ * Read the bitmap block, and point to its data.
+ */
+ error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
+ if (error) {
+ return error;
+ }
+ bufp = bp->b_addr;
+ /*
+ * Compute the starting word's address, and starting bit.
+ */
+ word = XFS_BITTOWORD(mp, start);
+ first = b = &bufp[word];
+ bit = (int)(start & (XFS_NBWORD - 1));
+ /*
+ * 0 (allocated) => all zeroes; 1 (free) => all ones.
+ */
+ val = -val;
+ /*
+ * If not starting on a word boundary, deal with the first
+ * (partial) word.
+ */
+ if (bit) {
+ /*
+ * Compute first bit not changed and mask of relevant bits.
+ */
+ lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
+ mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
+ /*
+ * Set/clear the active bits.
+ */
+ if (val)
+ *b |= mask;
+ else
+ *b &= ~mask;
+ i = lastbit - bit;
+ /*
+ * Go on to the next block if that's where the next word is
+ * and we need the next word.
+ */
+ if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+ /*
+ * Log the changed part of this block.
+ * Get the next one.
+ */
+ xfs_trans_log_buf(tp, bp,
+ (uint)((char *)first - (char *)bufp),
+ (uint)((char *)b - (char *)bufp));
+ error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
+ if (error) {
+ return error;
+ }
+ first = b = bufp = bp->b_addr;
+ word = 0;
+ } else {
+ /*
+ * Go on to the next word in the buffer
+ */
+ b++;
+ }
+ } else {
+ /*
+ * Starting on a word boundary, no partial word.
+ */
+ i = 0;
+ }
+ /*
+ * Loop over whole words in buffers. When we use up one buffer
+ * we move on to the next one.
+ */
+ while (len - i >= XFS_NBWORD) {
+ /*
+ * Set the word value correctly.
+ */
+ *b = val;
+ i += XFS_NBWORD;
+ /*
+ * Go on to the next block if that's where the next word is
+ * and we need the next word.
+ */
+ if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+ /*
+ * Log the changed part of this block.
+ * Get the next one.
+ */
+ xfs_trans_log_buf(tp, bp,
+ (uint)((char *)first - (char *)bufp),
+ (uint)((char *)b - (char *)bufp));
+ error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
+ if (error) {
+ return error;
+ }
+ first = b = bufp = bp->b_addr;
+ word = 0;
+ } else {
+ /*
+ * Go on to the next word in the buffer
+ */
+ b++;
+ }
+ }
+ /*
+ * If not ending on a word boundary, deal with the last
+ * (partial) word.
+ */
+ if ((lastbit = len - i)) {
+ /*
+ * Compute a mask of relevant bits.
+ */
+ bit = 0;
+ mask = ((xfs_rtword_t)1 << lastbit) - 1;
+ /*
+ * Set/clear the active bits.
+ */
+ if (val)
+ *b |= mask;
+ else
+ *b &= ~mask;
+ b++;
+ }
+ /*
+ * Log any remaining changed bytes.
+ */
+ if (b > first)
+ xfs_trans_log_buf(tp, bp, (uint)((char *)first - (char *)bufp),
+ (uint)((char *)b - (char *)bufp - 1));
+ return 0;
+}
+
+/*
+ * Read and modify the summary information for a given extent size,
+ * bitmap block combination.
+ * Keeps track of a current summary block, so we don't keep reading
+ * it from the buffer cache.
+ */
+STATIC int /* error */
+xfs_rtmodify_summary(
+ xfs_mount_t *mp, /* file system mount point */
+ xfs_trans_t *tp, /* transaction pointer */
+ int log, /* log2 of extent size */
+ xfs_rtblock_t bbno, /* bitmap block number */
+ int delta, /* change to make to summary info */
+ xfs_buf_t **rbpp, /* in/out: summary block buffer */
+ xfs_fsblock_t *rsb) /* in/out: summary block number */
+{
+ xfs_buf_t *bp; /* buffer for the summary block */
+ int error; /* error value */
+ xfs_fsblock_t sb; /* summary fsblock */
+ int so; /* index into the summary file */
+ xfs_suminfo_t *sp; /* pointer to returned data */
+
+ /*
+ * Compute entry number in the summary file.
+ */
+ so = XFS_SUMOFFS(mp, log, bbno);
+ /*
+ * Compute the block number in the summary file.
+ */
+ sb = XFS_SUMOFFSTOBLOCK(mp, so);
+ /*
+ * If we have an old buffer, and the block number matches, use that.
+ */
+ if (rbpp && *rbpp && *rsb == sb)
+ bp = *rbpp;
+ /*
+ * Otherwise we have to get the buffer.
+ */
+ else {
+ /*
+ * If there was an old one, get rid of it first.
+ */
+ if (rbpp && *rbpp)
+ xfs_trans_brelse(tp, *rbpp);
+ error = xfs_rtbuf_get(mp, tp, sb, 1, &bp);
+ if (error) {
+ return error;
+ }
+ /*
+ * Remember this buffer and block for the next call.
+ */
+ if (rbpp) {
+ *rbpp = bp;
+ *rsb = sb;
+ }
+ }
+ /*
+ * Point to the summary information, modify and log it.
+ */
+ sp = XFS_SUMPTR(mp, bp, so);
+ *sp += delta;
+ xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr),
+ (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1));
+ return 0;
+}
+
+/*
* Visible (exported) functions.
*/
@@ -1231,6 +2129,66 @@ xfs_rtallocate_extent(
}
/*
+ * Free an extent in the realtime subvolume. Length is expressed in
+ * realtime extents, as is the block number.
+ */
+int /* error */
+xfs_rtfree_extent(
+ xfs_trans_t *tp, /* transaction pointer */
+ xfs_rtblock_t bno, /* starting block number to free */
+ xfs_extlen_t len) /* length of extent freed */
+{
+ int error; /* error value */
+ xfs_mount_t *mp; /* file system mount structure */
+ xfs_fsblock_t sb; /* summary file block number */
+ xfs_buf_t *sumbp; /* summary file block buffer */
+
+ mp = tp->t_mountp;
+
+ ASSERT(mp->m_rbmip->i_itemp != NULL);
+ ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
+
+#ifdef DEBUG
+ /*
+ * Check to see that this whole range is currently allocated.
+ */
+ {
+ int stat; /* result from checking range */
+
+ error = xfs_rtcheck_alloc_range(mp, tp, bno, len, &stat);
+ if (error) {
+ return error;
+ }
+ ASSERT(stat);
+ }
+#endif
+ sumbp = NULL;
+ /*
+ * Free the range of realtime blocks.
+ */
+ error = xfs_rtfree_range(mp, tp, bno, len, &sumbp, &sb);
+ if (error) {
+ return error;
+ }
+ /*
+ * Mark more blocks free in the superblock.
+ */
+ xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, (long)len);
+ /*
+ * If we've now freed all the blocks, reset the file sequence
+ * number to 0.
+ */
+ if (tp->t_frextents_delta + mp->m_sb.sb_frextents ==
+ mp->m_sb.sb_rextents) {
+ if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM))
+ mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
+ *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0;
+ xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
+ }
+ return 0;
+}
+
+/*
* Initialize realtime fields in the mount structure.
*/
int /* error */
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 752b63d..b2a1a24 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -95,30 +95,6 @@ xfs_growfs_rt(
struct xfs_mount *mp, /* file system mount structure */
xfs_growfs_rt_t *in); /* user supplied growfs struct */
-/*
- * From xfs_rtbitmap.c
- */
-int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtblock_t block, int issum, struct xfs_buf **bpp);
-int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtblock_t start, xfs_extlen_t len, int val,
- xfs_rtblock_t *new, int *stat);
-int xfs_rtfind_back(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtblock_t start, xfs_rtblock_t limit,
- xfs_rtblock_t *rtblock);
-int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtblock_t start, xfs_rtblock_t limit,
- xfs_rtblock_t *rtblock);
-int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtblock_t start, xfs_extlen_t len, int val);
-int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log,
- xfs_rtblock_t bbno, int delta, xfs_buf_t **rbpp,
- xfs_fsblock_t *rsb);
-int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtblock_t start, xfs_extlen_t len,
- struct xfs_buf **rbpp, xfs_fsblock_t *rsb);
-
-
#else
# define xfs_rtallocate_extent(t,b,min,max,l,a,f,p,rb) (ENOSYS)
# define xfs_rtfree_extent(t,b,l) (ENOSYS)
diff --git a/fs/xfs/xfs_rtbitmap.c b/fs/xfs/xfs_rtbitmap.c
deleted file mode 100644
index b1f2fe8..0000000
--- a/fs/xfs/xfs_rtbitmap.c
+++ /dev/null
@@ -1,974 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
-#include "xfs_trans.h"
-#include "xfs_trans_space.h"
-#include "xfs_trace.h"
-#include "xfs_buf.h"
-#include "xfs_icache.h"
-#include "xfs_dinode.h"
-#include "xfs_rtalloc.h"
-
-
-/*
- * Realtime allocator bitmap functions shared with userspace.
- */
-
-/*
- * Get a buffer for the bitmap or summary file block specified.
- * The buffer is returned read and locked.
- */
-int
-xfs_rtbuf_get(
- xfs_mount_t *mp, /* file system mount structure */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t block, /* block number in bitmap or summary */
- int issum, /* is summary not bitmap */
- xfs_buf_t **bpp) /* output: buffer for the block */
-{
- xfs_buf_t *bp; /* block buffer, result */
- xfs_inode_t *ip; /* bitmap or summary inode */
- xfs_bmbt_irec_t map;
- int nmap = 1;
- int error; /* error value */
-
- ip = issum ? mp->m_rsumip : mp->m_rbmip;
-
- error = xfs_bmapi_read(ip, block, 1, &map, &nmap, XFS_DATA_FORK);
- if (error)
- return error;
-
- ASSERT(map.br_startblock != NULLFSBLOCK);
- error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
- XFS_FSB_TO_DADDR(mp, map.br_startblock),
- mp->m_bsize, 0, &bp, NULL);
- if (error)
- return error;
- ASSERT(!xfs_buf_geterror(bp));
- *bpp = bp;
- return 0;
-}
-
-/*
- * Searching backward from start to limit, find the first block whose
- * allocated/free state is different from start's.
- */
-int
-xfs_rtfind_back(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t start, /* starting block to look at */
- xfs_rtblock_t limit, /* last block to look at */
- xfs_rtblock_t *rtblock) /* out: start block found */
-{
- xfs_rtword_t *b; /* current word in buffer */
- int bit; /* bit number in the word */
- xfs_rtblock_t block; /* bitmap block number */
- xfs_buf_t *bp; /* buf for the block */
- xfs_rtword_t *bufp; /* starting word in buffer */
- int error; /* error value */
- xfs_rtblock_t firstbit; /* first useful bit in the word */
- xfs_rtblock_t i; /* current bit number rel. to start */
- xfs_rtblock_t len; /* length of inspected area */
- xfs_rtword_t mask; /* mask of relevant bits for value */
- xfs_rtword_t want; /* mask for "good" values */
- xfs_rtword_t wdiff; /* difference from wanted value */
- int word; /* word number in the buffer */
-
- /*
- * Compute and read in starting bitmap block for starting block.
- */
- block = XFS_BITTOBLOCK(mp, start);
- error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
- if (error) {
- return error;
- }
- bufp = bp->b_addr;
- /*
- * Get the first word's index & point to it.
- */
- word = XFS_BITTOWORD(mp, start);
- b = &bufp[word];
- bit = (int)(start & (XFS_NBWORD - 1));
- len = start - limit + 1;
- /*
- * Compute match value, based on the bit at start: if 1 (free)
- * then all-ones, else all-zeroes.
- */
- want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
- /*
- * If the starting position is not word-aligned, deal with the
- * partial word.
- */
- if (bit < XFS_NBWORD - 1) {
- /*
- * Calculate first (leftmost) bit number to look at,
- * and mask for all the relevant bits in this word.
- */
- firstbit = XFS_RTMAX((xfs_srtblock_t)(bit - len + 1), 0);
- mask = (((xfs_rtword_t)1 << (bit - firstbit + 1)) - 1) <<
- firstbit;
- /*
- * Calculate the difference between the value there
- * and what we're looking for.
- */
- if ((wdiff = (*b ^ want) & mask)) {
- /*
- * Different. Mark where we are and return.
- */
- xfs_trans_brelse(tp, bp);
- i = bit - XFS_RTHIBIT(wdiff);
- *rtblock = start - i + 1;
- return 0;
- }
- i = bit - firstbit + 1;
- /*
- * Go on to previous block if that's where the previous word is
- * and we need the previous word.
- */
- if (--word == -1 && i < len) {
- /*
- * If done with this block, get the previous one.
- */
- xfs_trans_brelse(tp, bp);
- error = xfs_rtbuf_get(mp, tp, --block, 0, &bp);
- if (error) {
- return error;
- }
- bufp = bp->b_addr;
- word = XFS_BLOCKWMASK(mp);
- b = &bufp[word];
- } else {
- /*
- * Go on to the previous word in the buffer.
- */
- b--;
- }
- } else {
- /*
- * Starting on a word boundary, no partial word.
- */
- i = 0;
- }
- /*
- * Loop over whole words in buffers. When we use up one buffer
- * we move on to the previous one.
- */
- while (len - i >= XFS_NBWORD) {
- /*
- * Compute difference between actual and desired value.
- */
- if ((wdiff = *b ^ want)) {
- /*
- * Different, mark where we are and return.
- */
- xfs_trans_brelse(tp, bp);
- i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff);
- *rtblock = start - i + 1;
- return 0;
- }
- i += XFS_NBWORD;
- /*
- * Go on to previous block if that's where the previous word is
- * and we need the previous word.
- */
- if (--word == -1 && i < len) {
- /*
- * If done with this block, get the previous one.
- */
- xfs_trans_brelse(tp, bp);
- error = xfs_rtbuf_get(mp, tp, --block, 0, &bp);
- if (error) {
- return error;
- }
- bufp = bp->b_addr;
- word = XFS_BLOCKWMASK(mp);
- b = &bufp[word];
- } else {
- /*
- * Go on to the previous word in the buffer.
- */
- b--;
- }
- }
- /*
- * If not ending on a word boundary, deal with the last
- * (partial) word.
- */
- if (len - i) {
- /*
- * Calculate first (leftmost) bit number to look at,
- * and mask for all the relevant bits in this word.
- */
- firstbit = XFS_NBWORD - (len - i);
- mask = (((xfs_rtword_t)1 << (len - i)) - 1) << firstbit;
- /*
- * Compute difference between actual and desired value.
- */
- if ((wdiff = (*b ^ want) & mask)) {
- /*
- * Different, mark where we are and return.
- */
- xfs_trans_brelse(tp, bp);
- i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff);
- *rtblock = start - i + 1;
- return 0;
- } else
- i = len;
- }
- /*
- * No match, return that we scanned the whole area.
- */
- xfs_trans_brelse(tp, bp);
- *rtblock = start - i + 1;
- return 0;
-}
-
-/*
- * Searching forward from start to limit, find the first block whose
- * allocated/free state is different from start's.
- */
-int
-xfs_rtfind_forw(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t start, /* starting block to look at */
- xfs_rtblock_t limit, /* last block to look at */
- xfs_rtblock_t *rtblock) /* out: start block found */
-{
- xfs_rtword_t *b; /* current word in buffer */
- int bit; /* bit number in the word */
- xfs_rtblock_t block; /* bitmap block number */
- xfs_buf_t *bp; /* buf for the block */
- xfs_rtword_t *bufp; /* starting word in buffer */
- int error; /* error value */
- xfs_rtblock_t i; /* current bit number rel. to start */
- xfs_rtblock_t lastbit; /* last useful bit in the word */
- xfs_rtblock_t len; /* length of inspected area */
- xfs_rtword_t mask; /* mask of relevant bits for value */
- xfs_rtword_t want; /* mask for "good" values */
- xfs_rtword_t wdiff; /* difference from wanted value */
- int word; /* word number in the buffer */
-
- /*
- * Compute and read in starting bitmap block for starting block.
- */
- block = XFS_BITTOBLOCK(mp, start);
- error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
- if (error) {
- return error;
- }
- bufp = bp->b_addr;
- /*
- * Get the first word's index & point to it.
- */
- word = XFS_BITTOWORD(mp, start);
- b = &bufp[word];
- bit = (int)(start & (XFS_NBWORD - 1));
- len = limit - start + 1;
- /*
- * Compute match value, based on the bit at start: if 1 (free)
- * then all-ones, else all-zeroes.
- */
- want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
- /*
- * If the starting position is not word-aligned, deal with the
- * partial word.
- */
- if (bit) {
- /*
- * Calculate last (rightmost) bit number to look at,
- * and mask for all the relevant bits in this word.
- */
- lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
- mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
- /*
- * Calculate the difference between the value there
- * and what we're looking for.
- */
- if ((wdiff = (*b ^ want) & mask)) {
- /*
- * Different. Mark where we are and return.
- */
- xfs_trans_brelse(tp, bp);
- i = XFS_RTLOBIT(wdiff) - bit;
- *rtblock = start + i - 1;
- return 0;
- }
- i = lastbit - bit;
- /*
- * Go on to next block if that's where the next word is
- * and we need the next word.
- */
- if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
- /*
- * If done with this block, get the previous one.
- */
- xfs_trans_brelse(tp, bp);
- error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
- if (error) {
- return error;
- }
- b = bufp = bp->b_addr;
- word = 0;
- } else {
- /*
- * Go on to the previous word in the buffer.
- */
- b++;
- }
- } else {
- /*
- * Starting on a word boundary, no partial word.
- */
- i = 0;
- }
- /*
- * Loop over whole words in buffers. When we use up one buffer
- * we move on to the next one.
- */
- while (len - i >= XFS_NBWORD) {
- /*
- * Compute difference between actual and desired value.
- */
- if ((wdiff = *b ^ want)) {
- /*
- * Different, mark where we are and return.
- */
- xfs_trans_brelse(tp, bp);
- i += XFS_RTLOBIT(wdiff);
- *rtblock = start + i - 1;
- return 0;
- }
- i += XFS_NBWORD;
- /*
- * Go on to next block if that's where the next word is
- * and we need the next word.
- */
- if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
- /*
- * If done with this block, get the next one.
- */
- xfs_trans_brelse(tp, bp);
- error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
- if (error) {
- return error;
- }
- b = bufp = bp->b_addr;
- word = 0;
- } else {
- /*
- * Go on to the next word in the buffer.
- */
- b++;
- }
- }
- /*
- * If not ending on a word boundary, deal with the last
- * (partial) word.
- */
- if ((lastbit = len - i)) {
- /*
- * Calculate mask for all the relevant bits in this word.
- */
- mask = ((xfs_rtword_t)1 << lastbit) - 1;
- /*
- * Compute difference between actual and desired value.
- */
- if ((wdiff = (*b ^ want) & mask)) {
- /*
- * Different, mark where we are and return.
- */
- xfs_trans_brelse(tp, bp);
- i += XFS_RTLOBIT(wdiff);
- *rtblock = start + i - 1;
- return 0;
- } else
- i = len;
- }
- /*
- * No match, return that we scanned the whole area.
- */
- xfs_trans_brelse(tp, bp);
- *rtblock = start + i - 1;
- return 0;
-}
-
-/*
- * Read and modify the summary information for a given extent size,
- * bitmap block combination.
- * Keeps track of a current summary block, so we don't keep reading
- * it from the buffer cache.
- */
-int
-xfs_rtmodify_summary(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- int log, /* log2 of extent size */
- xfs_rtblock_t bbno, /* bitmap block number */
- int delta, /* change to make to summary info */
- xfs_buf_t **rbpp, /* in/out: summary block buffer */
- xfs_fsblock_t *rsb) /* in/out: summary block number */
-{
- xfs_buf_t *bp; /* buffer for the summary block */
- int error; /* error value */
- xfs_fsblock_t sb; /* summary fsblock */
- int so; /* index into the summary file */
- xfs_suminfo_t *sp; /* pointer to returned data */
-
- /*
- * Compute entry number in the summary file.
- */
- so = XFS_SUMOFFS(mp, log, bbno);
- /*
- * Compute the block number in the summary file.
- */
- sb = XFS_SUMOFFSTOBLOCK(mp, so);
- /*
- * If we have an old buffer, and the block number matches, use that.
- */
- if (rbpp && *rbpp && *rsb == sb)
- bp = *rbpp;
- /*
- * Otherwise we have to get the buffer.
- */
- else {
- /*
- * If there was an old one, get rid of it first.
- */
- if (rbpp && *rbpp)
- xfs_trans_brelse(tp, *rbpp);
- error = xfs_rtbuf_get(mp, tp, sb, 1, &bp);
- if (error) {
- return error;
- }
- /*
- * Remember this buffer and block for the next call.
- */
- if (rbpp) {
- *rbpp = bp;
- *rsb = sb;
- }
- }
- /*
- * Point to the summary information, modify and log it.
- */
- sp = XFS_SUMPTR(mp, bp, so);
- *sp += delta;
- xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr),
- (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1));
- return 0;
-}
-
-/*
- * Set the given range of bitmap bits to the given value.
- * Do whatever I/O and logging is required.
- */
-int
-xfs_rtmodify_range(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t start, /* starting block to modify */
- xfs_extlen_t len, /* length of extent to modify */
- int val) /* 1 for free, 0 for allocated */
-{
- xfs_rtword_t *b; /* current word in buffer */
- int bit; /* bit number in the word */
- xfs_rtblock_t block; /* bitmap block number */
- xfs_buf_t *bp; /* buf for the block */
- xfs_rtword_t *bufp; /* starting word in buffer */
- int error; /* error value */
- xfs_rtword_t *first; /* first used word in the buffer */
- int i; /* current bit number rel. to start */
- int lastbit; /* last useful bit in word */
- xfs_rtword_t mask; /* mask o frelevant bits for value */
- int word; /* word number in the buffer */
-
- /*
- * Compute starting bitmap block number.
- */
- block = XFS_BITTOBLOCK(mp, start);
- /*
- * Read the bitmap block, and point to its data.
- */
- error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
- if (error) {
- return error;
- }
- bufp = bp->b_addr;
- /*
- * Compute the starting word's address, and starting bit.
- */
- word = XFS_BITTOWORD(mp, start);
- first = b = &bufp[word];
- bit = (int)(start & (XFS_NBWORD - 1));
- /*
- * 0 (allocated) => all zeroes; 1 (free) => all ones.
- */
- val = -val;
- /*
- * If not starting on a word boundary, deal with the first
- * (partial) word.
- */
- if (bit) {
- /*
- * Compute first bit not changed and mask of relevant bits.
- */
- lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
- mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
- /*
- * Set/clear the active bits.
- */
- if (val)
- *b |= mask;
- else
- *b &= ~mask;
- i = lastbit - bit;
- /*
- * Go on to the next block if that's where the next word is
- * and we need the next word.
- */
- if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
- /*
- * Log the changed part of this block.
- * Get the next one.
- */
- xfs_trans_log_buf(tp, bp,
- (uint)((char *)first - (char *)bufp),
- (uint)((char *)b - (char *)bufp));
- error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
- if (error) {
- return error;
- }
- first = b = bufp = bp->b_addr;
- word = 0;
- } else {
- /*
- * Go on to the next word in the buffer
- */
- b++;
- }
- } else {
- /*
- * Starting on a word boundary, no partial word.
- */
- i = 0;
- }
- /*
- * Loop over whole words in buffers. When we use up one buffer
- * we move on to the next one.
- */
- while (len - i >= XFS_NBWORD) {
- /*
- * Set the word value correctly.
- */
- *b = val;
- i += XFS_NBWORD;
- /*
- * Go on to the next block if that's where the next word is
- * and we need the next word.
- */
- if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
- /*
- * Log the changed part of this block.
- * Get the next one.
- */
- xfs_trans_log_buf(tp, bp,
- (uint)((char *)first - (char *)bufp),
- (uint)((char *)b - (char *)bufp));
- error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
- if (error) {
- return error;
- }
- first = b = bufp = bp->b_addr;
- word = 0;
- } else {
- /*
- * Go on to the next word in the buffer
- */
- b++;
- }
- }
- /*
- * If not ending on a word boundary, deal with the last
- * (partial) word.
- */
- if ((lastbit = len - i)) {
- /*
- * Compute a mask of relevant bits.
- */
- bit = 0;
- mask = ((xfs_rtword_t)1 << lastbit) - 1;
- /*
- * Set/clear the active bits.
- */
- if (val)
- *b |= mask;
- else
- *b &= ~mask;
- b++;
- }
- /*
- * Log any remaining changed bytes.
- */
- if (b > first)
- xfs_trans_log_buf(tp, bp, (uint)((char *)first - (char *)bufp),
- (uint)((char *)b - (char *)bufp - 1));
- return 0;
-}
-
-/*
- * Mark an extent specified by start and len freed.
- * Updates all the summary information as well as the bitmap.
- */
-int
-xfs_rtfree_range(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t start, /* starting block to free */
- xfs_extlen_t len, /* length to free */
- xfs_buf_t **rbpp, /* in/out: summary block buffer */
- xfs_fsblock_t *rsb) /* in/out: summary block number */
-{
- xfs_rtblock_t end; /* end of the freed extent */
- int error; /* error value */
- xfs_rtblock_t postblock; /* first block freed > end */
- xfs_rtblock_t preblock; /* first block freed < start */
-
- end = start + len - 1;
- /*
- * Modify the bitmap to mark this extent freed.
- */
- error = xfs_rtmodify_range(mp, tp, start, len, 1);
- if (error) {
- return error;
- }
- /*
- * Assume we're freeing out of the middle of an allocated extent.
- * We need to find the beginning and end of the extent so we can
- * properly update the summary.
- */
- error = xfs_rtfind_back(mp, tp, start, 0, &preblock);
- if (error) {
- return error;
- }
- /*
- * Find the next allocated block (end of allocated extent).
- */
- error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
- &postblock);
- if (error)
- return error;
- /*
- * If there are blocks not being freed at the front of the
- * old extent, add summary data for them to be allocated.
- */
- if (preblock < start) {
- error = xfs_rtmodify_summary(mp, tp,
- XFS_RTBLOCKLOG(start - preblock),
- XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb);
- if (error) {
- return error;
- }
- }
- /*
- * If there are blocks not being freed at the end of the
- * old extent, add summary data for them to be allocated.
- */
- if (postblock > end) {
- error = xfs_rtmodify_summary(mp, tp,
- XFS_RTBLOCKLOG(postblock - end),
- XFS_BITTOBLOCK(mp, end + 1), -1, rbpp, rsb);
- if (error) {
- return error;
- }
- }
- /*
- * Increment the summary information corresponding to the entire
- * (new) free extent.
- */
- error = xfs_rtmodify_summary(mp, tp,
- XFS_RTBLOCKLOG(postblock + 1 - preblock),
- XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb);
- return error;
-}
-
-/*
- * Check that the given range is either all allocated (val = 0) or
- * all free (val = 1).
- */
-int
-xfs_rtcheck_range(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t start, /* starting block number of extent */
- xfs_extlen_t len, /* length of extent */
- int val, /* 1 for free, 0 for allocated */
- xfs_rtblock_t *new, /* out: first block not matching */
- int *stat) /* out: 1 for matches, 0 for not */
-{
- xfs_rtword_t *b; /* current word in buffer */
- int bit; /* bit number in the word */
- xfs_rtblock_t block; /* bitmap block number */
- xfs_buf_t *bp; /* buf for the block */
- xfs_rtword_t *bufp; /* starting word in buffer */
- int error; /* error value */
- xfs_rtblock_t i; /* current bit number rel. to start */
- xfs_rtblock_t lastbit; /* last useful bit in word */
- xfs_rtword_t mask; /* mask of relevant bits for value */
- xfs_rtword_t wdiff; /* difference from wanted value */
- int word; /* word number in the buffer */
-
- /*
- * Compute starting bitmap block number
- */
- block = XFS_BITTOBLOCK(mp, start);
- /*
- * Read the bitmap block.
- */
- error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
- if (error) {
- return error;
- }
- bufp = bp->b_addr;
- /*
- * Compute the starting word's address, and starting bit.
- */
- word = XFS_BITTOWORD(mp, start);
- b = &bufp[word];
- bit = (int)(start & (XFS_NBWORD - 1));
- /*
- * 0 (allocated) => all zero's; 1 (free) => all one's.
- */
- val = -val;
- /*
- * If not starting on a word boundary, deal with the first
- * (partial) word.
- */
- if (bit) {
- /*
- * Compute first bit not examined.
- */
- lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
- /*
- * Mask of relevant bits.
- */
- mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
- /*
- * Compute difference between actual and desired value.
- */
- if ((wdiff = (*b ^ val) & mask)) {
- /*
- * Different, compute first wrong bit and return.
- */
- xfs_trans_brelse(tp, bp);
- i = XFS_RTLOBIT(wdiff) - bit;
- *new = start + i;
- *stat = 0;
- return 0;
- }
- i = lastbit - bit;
- /*
- * Go on to next block if that's where the next word is
- * and we need the next word.
- */
- if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
- /*
- * If done with this block, get the next one.
- */
- xfs_trans_brelse(tp, bp);
- error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
- if (error) {
- return error;
- }
- b = bufp = bp->b_addr;
- word = 0;
- } else {
- /*
- * Go on to the next word in the buffer.
- */
- b++;
- }
- } else {
- /*
- * Starting on a word boundary, no partial word.
- */
- i = 0;
- }
- /*
- * Loop over whole words in buffers. When we use up one buffer
- * we move on to the next one.
- */
- while (len - i >= XFS_NBWORD) {
- /*
- * Compute difference between actual and desired value.
- */
- if ((wdiff = *b ^ val)) {
- /*
- * Different, compute first wrong bit and return.
- */
- xfs_trans_brelse(tp, bp);
- i += XFS_RTLOBIT(wdiff);
- *new = start + i;
- *stat = 0;
- return 0;
- }
- i += XFS_NBWORD;
- /*
- * Go on to next block if that's where the next word is
- * and we need the next word.
- */
- if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
- /*
- * If done with this block, get the next one.
- */
- xfs_trans_brelse(tp, bp);
- error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
- if (error) {
- return error;
- }
- b = bufp = bp->b_addr;
- word = 0;
- } else {
- /*
- * Go on to the next word in the buffer.
- */
- b++;
- }
- }
- /*
- * If not ending on a word boundary, deal with the last
- * (partial) word.
- */
- if ((lastbit = len - i)) {
- /*
- * Mask of relevant bits.
- */
- mask = ((xfs_rtword_t)1 << lastbit) - 1;
- /*
- * Compute difference between actual and desired value.
- */
- if ((wdiff = (*b ^ val) & mask)) {
- /*
- * Different, compute first wrong bit and return.
- */
- xfs_trans_brelse(tp, bp);
- i += XFS_RTLOBIT(wdiff);
- *new = start + i;
- *stat = 0;
- return 0;
- } else
- i = len;
- }
- /*
- * Successful, return.
- */
- xfs_trans_brelse(tp, bp);
- *new = start + i;
- *stat = 1;
- return 0;
-}
-
-#ifdef DEBUG
-/*
- * Check that the given extent (block range) is allocated already.
- */
-STATIC int /* error */
-xfs_rtcheck_alloc_range(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t bno, /* starting block number of extent */
- xfs_extlen_t len) /* length of extent */
-{
- xfs_rtblock_t new; /* dummy for xfs_rtcheck_range */
- int stat;
- int error;
-
- error = xfs_rtcheck_range(mp, tp, bno, len, 0, &new, &stat);
- if (error)
- return error;
- ASSERT(stat);
- return 0;
-}
-#else
-#define xfs_rtcheck_alloc_range(m,t,b,l) (0)
-#endif
-/*
- * Free an extent in the realtime subvolume. Length is expressed in
- * realtime extents, as is the block number.
- */
-int /* error */
-xfs_rtfree_extent(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t bno, /* starting block number to free */
- xfs_extlen_t len) /* length of extent freed */
-{
- int error; /* error value */
- xfs_mount_t *mp; /* file system mount structure */
- xfs_fsblock_t sb; /* summary file block number */
- xfs_buf_t *sumbp = NULL; /* summary file block buffer */
-
- mp = tp->t_mountp;
-
- ASSERT(mp->m_rbmip->i_itemp != NULL);
- ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
-
- error = xfs_rtcheck_alloc_range(mp, tp, bno, len);
- if (error)
- return error;
-
- /*
- * Free the range of realtime blocks.
- */
- error = xfs_rtfree_range(mp, tp, bno, len, &sumbp, &sb);
- if (error) {
- return error;
- }
- /*
- * Mark more blocks free in the superblock.
- */
- xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, (long)len);
- /*
- * If we've now freed all the blocks, reset the file sequence
- * number to 0.
- */
- if (tp->t_frextents_delta + mp->m_sb.sb_frextents ==
- mp->m_sb.sb_rextents) {
- if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM))
- mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
- *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0;
- xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
- }
- return 0;
-}
-
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c
index b7c9aea..a5b59d9 100644
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/xfs_sb.c
@@ -17,26 +17,34 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
+#include "xfs_btree.h"
#include "xfs_ialloc.h"
#include "xfs_alloc.h"
+#include "xfs_rtalloc.h"
+#include "xfs_bmap.h"
#include "xfs_error.h"
+#include "xfs_quota.h"
+#include "xfs_fsops.h"
#include "xfs_trace.h"
#include "xfs_cksum.h"
-#include "xfs_trans.h"
#include "xfs_buf_item.h"
-#include "xfs_dinode.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
/*
* Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -241,13 +249,13 @@ xfs_mount_validate_sb(
if (xfs_sb_version_has_pquotino(sbp)) {
if (sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) {
xfs_notice(mp,
- "Version 5 of Super block has XFS_OQUOTA bits.");
+ "Version 5 of Super block has XFS_OQUOTA bits.\n");
return XFS_ERROR(EFSCORRUPTED);
}
} else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD |
XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) {
xfs_notice(mp,
-"Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits.");
+"Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits.\n");
return XFS_ERROR(EFSCORRUPTED);
}
@@ -588,11 +596,6 @@ xfs_sb_verify(
* single bit error could clear the feature bit and unused parts of the
* superblock are supposed to be zero. Hence a non-null crc field indicates that
* we've potentially lost a feature bit and we should check it anyway.
- *
- * However, past bugs (i.e. in growfs) left non-zeroed regions beyond the
- * last field in V4 secondary superblocks. So for secondary superblocks,
- * we are more forgiving, and ignore CRC failures if the primary doesn't
- * indicate that the fs version is V5.
*/
static void
xfs_sb_read_verify(
@@ -613,21 +616,16 @@ xfs_sb_read_verify(
if (!xfs_verify_cksum(bp->b_addr, be16_to_cpu(dsb->sb_sectsize),
offsetof(struct xfs_sb, sb_crc))) {
- /* Only fail bad secondaries on a known V5 filesystem */
- if (bp->b_bn != XFS_SB_DADDR &&
- xfs_sb_version_hascrc(&mp->m_sb)) {
- error = EFSCORRUPTED;
- goto out_error;
- }
+ error = EFSCORRUPTED;
+ goto out_error;
}
}
error = xfs_sb_verify(bp, true);
out_error:
if (error) {
- if (error != EWRONGFS)
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
- mp, bp->b_addr);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
+ mp, bp->b_addr);
xfs_buf_ioerror(bp, error);
}
}
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 35061d4..6835b44 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -699,4 +699,7 @@ extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp);
+extern const struct xfs_buf_ops xfs_sb_buf_ops;
+extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
+
#endif /* __XFS_SB_H__ */
diff --git a/fs/xfs/xfs_shared.h b/fs/xfs/xfs_shared.h
deleted file mode 100644
index 8c5035a1..0000000
--- a/fs/xfs/xfs_shared.h
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * Copyright (c) 2013 Red Hat, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_SHARED_H__
-#define __XFS_SHARED_H__
-
-/*
- * Definitions shared between kernel and userspace that don't fit into any other
- * header file that is shared with userspace.
- */
-struct xfs_ifork;
-struct xfs_buf;
-struct xfs_buf_ops;
-struct xfs_mount;
-struct xfs_trans;
-struct xfs_inode;
-
-/*
- * Buffer verifier operations are widely used, including userspace tools
- */
-extern const struct xfs_buf_ops xfs_agf_buf_ops;
-extern const struct xfs_buf_ops xfs_agi_buf_ops;
-extern const struct xfs_buf_ops xfs_agf_buf_ops;
-extern const struct xfs_buf_ops xfs_agfl_buf_ops;
-extern const struct xfs_buf_ops xfs_allocbt_buf_ops;
-extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops;
-extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
-extern const struct xfs_buf_ops xfs_bmbt_buf_ops;
-extern const struct xfs_buf_ops xfs_da3_node_buf_ops;
-extern const struct xfs_buf_ops xfs_dquot_buf_ops;
-extern const struct xfs_buf_ops xfs_symlink_buf_ops;
-extern const struct xfs_buf_ops xfs_agi_buf_ops;
-extern const struct xfs_buf_ops xfs_inobt_buf_ops;
-extern const struct xfs_buf_ops xfs_inode_buf_ops;
-extern const struct xfs_buf_ops xfs_inode_buf_ra_ops;
-extern const struct xfs_buf_ops xfs_dquot_buf_ops;
-extern const struct xfs_buf_ops xfs_sb_buf_ops;
-extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
-extern const struct xfs_buf_ops xfs_symlink_buf_ops;
-
-/*
- * Transaction types. Used to distinguish types of buffers. These never reach
- * the log.
- */
-#define XFS_TRANS_SETATTR_NOT_SIZE 1
-#define XFS_TRANS_SETATTR_SIZE 2
-#define XFS_TRANS_INACTIVE 3
-#define XFS_TRANS_CREATE 4
-#define XFS_TRANS_CREATE_TRUNC 5
-#define XFS_TRANS_TRUNCATE_FILE 6
-#define XFS_TRANS_REMOVE 7
-#define XFS_TRANS_LINK 8
-#define XFS_TRANS_RENAME 9
-#define XFS_TRANS_MKDIR 10
-#define XFS_TRANS_RMDIR 11
-#define XFS_TRANS_SYMLINK 12
-#define XFS_TRANS_SET_DMATTRS 13
-#define XFS_TRANS_GROWFS 14
-#define XFS_TRANS_STRAT_WRITE 15
-#define XFS_TRANS_DIOSTRAT 16
-/* 17 was XFS_TRANS_WRITE_SYNC */
-#define XFS_TRANS_WRITEID 18
-#define XFS_TRANS_ADDAFORK 19
-#define XFS_TRANS_ATTRINVAL 20
-#define XFS_TRANS_ATRUNCATE 21
-#define XFS_TRANS_ATTR_SET 22
-#define XFS_TRANS_ATTR_RM 23
-#define XFS_TRANS_ATTR_FLAG 24
-#define XFS_TRANS_CLEAR_AGI_BUCKET 25
-#define XFS_TRANS_QM_SBCHANGE 26
-/*
- * Dummy entries since we use the transaction type to index into the
- * trans_type[] in xlog_recover_print_trans_head()
- */
-#define XFS_TRANS_DUMMY1 27
-#define XFS_TRANS_DUMMY2 28
-#define XFS_TRANS_QM_QUOTAOFF 29
-#define XFS_TRANS_QM_DQALLOC 30
-#define XFS_TRANS_QM_SETQLIM 31
-#define XFS_TRANS_QM_DQCLUSTER 32
-#define XFS_TRANS_QM_QINOCREATE 33
-#define XFS_TRANS_QM_QUOTAOFF_END 34
-#define XFS_TRANS_SB_UNIT 35
-#define XFS_TRANS_FSYNC_TS 36
-#define XFS_TRANS_GROWFSRT_ALLOC 37
-#define XFS_TRANS_GROWFSRT_ZERO 38
-#define XFS_TRANS_GROWFSRT_FREE 39
-#define XFS_TRANS_SWAPEXT 40
-#define XFS_TRANS_SB_COUNT 41
-#define XFS_TRANS_CHECKPOINT 42
-#define XFS_TRANS_ICREATE 43
-#define XFS_TRANS_TYPE_MAX 43
-/* new transaction types need to be reflected in xfs_logprint(8) */
-
-#define XFS_TRANS_TYPES \
- { XFS_TRANS_SETATTR_NOT_SIZE, "SETATTR_NOT_SIZE" }, \
- { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \
- { XFS_TRANS_INACTIVE, "INACTIVE" }, \
- { XFS_TRANS_CREATE, "CREATE" }, \
- { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \
- { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \
- { XFS_TRANS_REMOVE, "REMOVE" }, \
- { XFS_TRANS_LINK, "LINK" }, \
- { XFS_TRANS_RENAME, "RENAME" }, \
- { XFS_TRANS_MKDIR, "MKDIR" }, \
- { XFS_TRANS_RMDIR, "RMDIR" }, \
- { XFS_TRANS_SYMLINK, "SYMLINK" }, \
- { XFS_TRANS_SET_DMATTRS, "SET_DMATTRS" }, \
- { XFS_TRANS_GROWFS, "GROWFS" }, \
- { XFS_TRANS_STRAT_WRITE, "STRAT_WRITE" }, \
- { XFS_TRANS_DIOSTRAT, "DIOSTRAT" }, \
- { XFS_TRANS_WRITEID, "WRITEID" }, \
- { XFS_TRANS_ADDAFORK, "ADDAFORK" }, \
- { XFS_TRANS_ATTRINVAL, "ATTRINVAL" }, \
- { XFS_TRANS_ATRUNCATE, "ATRUNCATE" }, \
- { XFS_TRANS_ATTR_SET, "ATTR_SET" }, \
- { XFS_TRANS_ATTR_RM, "ATTR_RM" }, \
- { XFS_TRANS_ATTR_FLAG, "ATTR_FLAG" }, \
- { XFS_TRANS_CLEAR_AGI_BUCKET, "CLEAR_AGI_BUCKET" }, \
- { XFS_TRANS_QM_SBCHANGE, "QM_SBCHANGE" }, \
- { XFS_TRANS_QM_QUOTAOFF, "QM_QUOTAOFF" }, \
- { XFS_TRANS_QM_DQALLOC, "QM_DQALLOC" }, \
- { XFS_TRANS_QM_SETQLIM, "QM_SETQLIM" }, \
- { XFS_TRANS_QM_DQCLUSTER, "QM_DQCLUSTER" }, \
- { XFS_TRANS_QM_QINOCREATE, "QM_QINOCREATE" }, \
- { XFS_TRANS_QM_QUOTAOFF_END, "QM_QOFF_END" }, \
- { XFS_TRANS_SB_UNIT, "SB_UNIT" }, \
- { XFS_TRANS_FSYNC_TS, "FSYNC_TS" }, \
- { XFS_TRANS_GROWFSRT_ALLOC, "GROWFSRT_ALLOC" }, \
- { XFS_TRANS_GROWFSRT_ZERO, "GROWFSRT_ZERO" }, \
- { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \
- { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \
- { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \
- { XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \
- { XFS_TRANS_DUMMY1, "DUMMY1" }, \
- { XFS_TRANS_DUMMY2, "DUMMY2" }, \
- { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" }
-
-/*
- * This structure is used to track log items associated with
- * a transaction. It points to the log item and keeps some
- * flags to track the state of the log item. It also tracks
- * the amount of space needed to log the item it describes
- * once we get to commit processing (see xfs_trans_commit()).
- */
-struct xfs_log_item_desc {
- struct xfs_log_item *lid_item;
- struct list_head lid_trans;
- unsigned char lid_flags;
-};
-
-#define XFS_LID_DIRTY 0x1
-
-/* log size calculation functions */
-int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes);
-int xfs_log_calc_minimum_size(struct xfs_mount *);
-
-
-/*
- * Values for t_flags.
- */
-#define XFS_TRANS_DIRTY 0x01 /* something needs to be logged */
-#define XFS_TRANS_SB_DIRTY 0x02 /* superblock is modified */
-#define XFS_TRANS_PERM_LOG_RES 0x04 /* xact took a permanent log res */
-#define XFS_TRANS_SYNC 0x08 /* make commit synchronous */
-#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */
-#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */
-#define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer
- count in superblock */
-/*
- * Values for call flags parameter.
- */
-#define XFS_TRANS_RELEASE_LOG_RES 0x4
-#define XFS_TRANS_ABORT 0x8
-
-/*
- * Field values for xfs_trans_mod_sb.
- */
-#define XFS_TRANS_SB_ICOUNT 0x00000001
-#define XFS_TRANS_SB_IFREE 0x00000002
-#define XFS_TRANS_SB_FDBLOCKS 0x00000004
-#define XFS_TRANS_SB_RES_FDBLOCKS 0x00000008
-#define XFS_TRANS_SB_FREXTENTS 0x00000010
-#define XFS_TRANS_SB_RES_FREXTENTS 0x00000020
-#define XFS_TRANS_SB_DBLOCKS 0x00000040
-#define XFS_TRANS_SB_AGCOUNT 0x00000080
-#define XFS_TRANS_SB_IMAXPCT 0x00000100
-#define XFS_TRANS_SB_REXTSIZE 0x00000200
-#define XFS_TRANS_SB_RBMBLOCKS 0x00000400
-#define XFS_TRANS_SB_RBLOCKS 0x00000800
-#define XFS_TRANS_SB_REXTENTS 0x00001000
-#define XFS_TRANS_SB_REXTSLOG 0x00002000
-
-/*
- * Here we centralize the specification of XFS meta-data buffer reference count
- * values. This determines how hard the buffer cache tries to hold onto the
- * buffer.
- */
-#define XFS_AGF_REF 4
-#define XFS_AGI_REF 4
-#define XFS_AGFL_REF 3
-#define XFS_INO_BTREE_REF 3
-#define XFS_ALLOC_BTREE_REF 2
-#define XFS_BMAP_BTREE_REF 2
-#define XFS_DIR_BTREE_REF 2
-#define XFS_INO_REF 2
-#define XFS_ATTR_BTREE_REF 1
-#define XFS_DQUOT_REF 1
-
-/*
- * Flags for xfs_trans_ichgtime().
- */
-#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
-#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
-#define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */
-
-
-/*
- * Symlink decoding/encoding functions
- */
-int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen);
-int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset,
- uint32_t size, struct xfs_buf *bp);
-bool xfs_symlink_hdr_ok(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset,
- uint32_t size, struct xfs_buf *bp);
-void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
- struct xfs_inode *ip, struct xfs_ifork *ifp);
-
-#endif /* __XFS_SHARED_H__ */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index f317488..15188cc 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -17,26 +17,34 @@
*/
#include "xfs.h"
-#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_log.h"
#include "xfs_inum.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
+#include "xfs_ialloc.h"
#include "xfs_bmap.h"
-#include "xfs_alloc.h"
+#include "xfs_rtalloc.h"
#include "xfs_error.h"
+#include "xfs_itable.h"
#include "xfs_fsops.h"
-#include "xfs_trans.h"
+#include "xfs_attr.h"
#include "xfs_buf_item.h"
-#include "xfs_log.h"
#include "xfs_log_priv.h"
+#include "xfs_trans_priv.h"
+#include "xfs_filestream.h"
#include "xfs_da_btree.h"
+#include "xfs_dir2_format.h"
#include "xfs_dir2.h"
#include "xfs_extfree_item.h"
#include "xfs_mru_cache.h"
@@ -44,9 +52,6 @@
#include "xfs_icache.h"
#include "xfs_trace.h"
#include "xfs_icreate_item.h"
-#include "xfs_dinode.h"
-#include "xfs_filestream.h"
-#include "xfs_quota.h"
#include <linux/namei.h>
#include <linux/init.h>
@@ -913,7 +918,7 @@ xfs_flush_inodes(
struct super_block *sb = mp->m_super;
if (down_read_trylock(&sb->s_umount)) {
- sync_inodes_sb(sb, jiffies);
+ sync_inodes_sb(sb);
up_read(&sb->s_umount);
}
}
@@ -941,6 +946,10 @@ xfs_fs_destroy_inode(
XFS_STATS_INC(vn_reclaim);
+ /* bad inode, get out here ASAP */
+ if (is_bad_inode(inode))
+ goto out_reclaim;
+
ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
/*
@@ -956,6 +965,7 @@ xfs_fs_destroy_inode(
* this more efficiently than we can here, so simply let background
* reclaim tear down all inodes.
*/
+out_reclaim:
xfs_inode_set_reclaim_tag(ip);
}
@@ -1155,7 +1165,7 @@ xfs_restore_resvblks(struct xfs_mount *mp)
* Note: xfs_log_quiesce() stops background log work - the callers must ensure
* it is started again when appropriate.
*/
-static void
+void
xfs_quiesce_attr(
struct xfs_mount *mp)
{
@@ -1236,7 +1246,7 @@ xfs_fs_remount(
*/
#if 0
xfs_info(mp,
- "mount option \"%s\" not supported for remount", p);
+ "mount option \"%s\" not supported for remount\n", p);
return -EINVAL;
#else
break;
@@ -1481,6 +1491,10 @@ xfs_fs_fill_super(
error = ENOENT;
goto out_unmount;
}
+ if (is_bad_inode(root)) {
+ error = EINVAL;
+ goto out_unmount;
+ }
sb->s_root = d_make_root(root);
if (!sb->s_root) {
error = ENOMEM;
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 14e58f2..f622a97 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -17,31 +17,31 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
-#include "xfs_shared.h"
#include "xfs_fs.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_format.h"
#include "xfs_dir2.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_ialloc.h"
#include "xfs_alloc.h"
#include "xfs_bmap.h"
-#include "xfs_bmap_btree.h"
#include "xfs_bmap_util.h"
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_trans_space.h"
#include "xfs_trace.h"
#include "xfs_symlink.h"
-#include "xfs_trans.h"
-#include "xfs_log.h"
-#include "xfs_dinode.h"
+#include "xfs_buf_item.h"
/* ----- Kernel only functions below ----- */
STATIC int
@@ -424,7 +424,8 @@ xfs_symlink(
*/
STATIC int
xfs_inactive_symlink_rmt(
- struct xfs_inode *ip)
+ xfs_inode_t *ip,
+ xfs_trans_t **tpp)
{
xfs_buf_t *bp;
int committed;
@@ -436,9 +437,11 @@ xfs_inactive_symlink_rmt(
xfs_mount_t *mp;
xfs_bmbt_irec_t mval[XFS_SYMLINK_MAPS];
int nmaps;
+ xfs_trans_t *ntp;
int size;
xfs_trans_t *tp;
+ tp = *tpp;
mp = ip->i_mount;
ASSERT(ip->i_df.if_flags & XFS_IFEXTENTS);
/*
@@ -450,16 +453,6 @@ xfs_inactive_symlink_rmt(
*/
ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2);
- tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
- if (error) {
- xfs_trans_cancel(tp, 0);
- return error;
- }
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, 0);
-
/*
* Lock the inode, fix the size, and join it to the transaction.
* Hold it so in the normal path, we still have it locked for
@@ -478,7 +471,7 @@ xfs_inactive_symlink_rmt(
error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size),
mval, &nmaps, 0);
if (error)
- goto error_trans_cancel;
+ goto error0;
/*
* Invalidate the block(s). No validation is done.
*/
@@ -488,24 +481,22 @@ xfs_inactive_symlink_rmt(
XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);
if (!bp) {
error = ENOMEM;
- goto error_bmap_cancel;
+ goto error1;
}
xfs_trans_binval(tp, bp);
}
/*
* Unmap the dead block(s) to the free_list.
*/
- error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps,
- &first_block, &free_list, &done);
- if (error)
- goto error_bmap_cancel;
+ if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps,
+ &first_block, &free_list, &done)))
+ goto error1;
ASSERT(done);
/*
* Commit the first transaction. This logs the EFI and the inode.
*/
- error = xfs_bmap_finish(&tp, &free_list, &committed);
- if (error)
- goto error_bmap_cancel;
+ if ((error = xfs_bmap_finish(&tp, &free_list, &committed)))
+ goto error1;
/*
* The transaction must have been committed, since there were
* actually extents freed by xfs_bunmapi. See xfs_bmap_finish.
@@ -520,13 +511,26 @@ xfs_inactive_symlink_rmt(
xfs_trans_ijoin(tp, ip, 0);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
/*
+ * Get a new, empty transaction to return to our caller.
+ */
+ ntp = xfs_trans_dup(tp);
+ /*
* Commit the transaction containing extent freeing and EFDs.
+ * If we get an error on the commit here or on the reserve below,
+ * we need to unlock the inode since the new transaction doesn't
+ * have the inode attached.
*/
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp, 0);
+ tp = ntp;
if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
- goto error_unlock;
+ goto error0;
}
+ /*
+ * transaction commit worked ok so we can drop the extra ticket
+ * reference that we gained in xfs_trans_dup()
+ */
+ xfs_log_ticket_put(tp->t_ticket);
/*
* Remove the memory for extent descriptions (just bookkeeping).
@@ -534,16 +538,23 @@ xfs_inactive_symlink_rmt(
if (ip->i_df.if_bytes)
xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK);
ASSERT(ip->i_df.if_bytes == 0);
+ /*
+ * Put an itruncate log reservation in the new transaction
+ * for our caller.
+ */
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
+ if (error) {
+ ASSERT(XFS_FORCED_SHUTDOWN(mp));
+ goto error0;
+ }
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, 0);
+ *tpp = tp;
return 0;
-error_bmap_cancel:
+ error1:
xfs_bmap_cancel(&free_list);
-error_trans_cancel:
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
-error_unlock:
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ error0:
return error;
}
@@ -552,46 +563,41 @@ error_unlock:
*/
int
xfs_inactive_symlink(
- struct xfs_inode *ip)
+ struct xfs_inode *ip,
+ struct xfs_trans **tp)
{
struct xfs_mount *mp = ip->i_mount;
int pathlen;
trace_xfs_inactive_symlink(ip);
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
if (XFS_FORCED_SHUTDOWN(mp))
return XFS_ERROR(EIO);
- xfs_ilock(ip, XFS_ILOCK_EXCL);
-
/*
* Zero length symlinks _can_ exist.
*/
pathlen = (int)ip->i_d.di_size;
- if (!pathlen) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ if (!pathlen)
return 0;
- }
if (pathlen < 0 || pathlen > MAXPATHLEN) {
xfs_alert(mp, "%s: inode (0x%llx) bad symlink length (%d)",
__func__, (unsigned long long)ip->i_ino, pathlen);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
ASSERT(0);
return XFS_ERROR(EFSCORRUPTED);
}
if (ip->i_df.if_flags & XFS_IFINLINE) {
- if (ip->i_df.if_bytes > 0)
+ if (ip->i_df.if_bytes > 0)
xfs_idata_realloc(ip, -(ip->i_df.if_bytes),
XFS_DATA_FORK);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
ASSERT(ip->i_df.if_bytes == 0);
return 0;
}
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
/* remove the remote symlink */
- return xfs_inactive_symlink_rmt(ip);
+ return xfs_inactive_symlink_rmt(ip, tp);
}
diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h
index e75245d..99338ba 100644
--- a/fs/xfs/xfs_symlink.h
+++ b/fs/xfs/xfs_symlink.h
@@ -22,6 +22,6 @@
int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
const char *target_path, umode_t mode, struct xfs_inode **ipp);
int xfs_readlink(struct xfs_inode *ip, char *link);
-int xfs_inactive_symlink(struct xfs_inode *ip);
+int xfs_inactive_symlink(struct xfs_inode *ip, struct xfs_trans **tpp);
#endif /* __XFS_SYMLINK_H */
diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/xfs_symlink_remote.c
index bf59a2b..01c85e3 100644
--- a/fs/xfs/xfs_symlink_remote.c
+++ b/fs/xfs/xfs_symlink_remote.c
@@ -19,9 +19,8 @@
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_shared.h"
-#include "xfs_trans_resv.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_ag.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
@@ -31,7 +30,6 @@
#include "xfs_trace.h"
#include "xfs_symlink.h"
#include "xfs_cksum.h"
-#include "xfs_trans.h"
#include "xfs_buf_item.h"
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index dee3279..5d7b3e4 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -17,16 +17,19 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
+#include "xfs_types.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_da_format.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
+#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_ialloc.h"
#include "xfs_itable.h"
@@ -34,8 +37,6 @@
#include "xfs_bmap.h"
#include "xfs_attr.h"
#include "xfs_attr_leaf.h"
-#include "xfs_trans.h"
-#include "xfs_log.h"
#include "xfs_log_priv.h"
#include "xfs_buf_item.h"
#include "xfs_quota.h"
@@ -45,7 +46,6 @@
#include "xfs_dquot.h"
#include "xfs_log_recover.h"
#include "xfs_inode_item.h"
-#include "xfs_bmap_btree.h"
/*
* We include this last to have the helpers above available for the trace
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 425dfa4..47910e6 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -31,8 +31,8 @@ struct xfs_da_args;
struct xfs_da_node_entry;
struct xfs_dquot;
struct xfs_log_item;
-struct xlog;
struct xlog_ticket;
+struct xlog;
struct xlog_recover;
struct xlog_recover_item;
struct xfs_buf_log_format;
@@ -135,31 +135,6 @@ DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
DEFINE_PERAG_REF_EVENT(xfs_perag_set_eofblocks);
DEFINE_PERAG_REF_EVENT(xfs_perag_clear_eofblocks);
-DECLARE_EVENT_CLASS(xfs_ag_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno),
- TP_ARGS(mp, agno),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_agnumber_t, agno)
- ),
- TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
- ),
- TP_printk("dev %d:%d agno %u",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->agno)
-);
-#define DEFINE_AG_EVENT(name) \
-DEFINE_EVENT(xfs_ag_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno), \
- TP_ARGS(mp, agno))
-
-DEFINE_AG_EVENT(xfs_read_agf);
-DEFINE_AG_EVENT(xfs_alloc_read_agf);
-DEFINE_AG_EVENT(xfs_read_agi);
-DEFINE_AG_EVENT(xfs_ialloc_read_agi);
-
TRACE_EVENT(xfs_attr_list_node_descend,
TP_PROTO(struct xfs_attr_list_context *ctx,
struct xfs_da_node_entry *btree),
@@ -963,63 +938,6 @@ DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned);
DEFINE_LOG_ITEM_EVENT(xfs_ail_locked);
DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing);
-DECLARE_EVENT_CLASS(xfs_ail_class,
- TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn),
- TP_ARGS(lip, old_lsn, new_lsn),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(void *, lip)
- __field(uint, type)
- __field(uint, flags)
- __field(xfs_lsn_t, old_lsn)
- __field(xfs_lsn_t, new_lsn)
- ),
- TP_fast_assign(
- __entry->dev = lip->li_mountp->m_super->s_dev;
- __entry->lip = lip;
- __entry->type = lip->li_type;
- __entry->flags = lip->li_flags;
- __entry->old_lsn = old_lsn;
- __entry->new_lsn = new_lsn;
- ),
- TP_printk("dev %d:%d lip 0x%p old lsn %d/%d new lsn %d/%d type %s flags %s",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->lip,
- CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn),
- CYCLE_LSN(__entry->new_lsn), BLOCK_LSN(__entry->new_lsn),
- __print_symbolic(__entry->type, XFS_LI_TYPE_DESC),
- __print_flags(__entry->flags, "|", XFS_LI_FLAGS))
-)
-
-#define DEFINE_AIL_EVENT(name) \
-DEFINE_EVENT(xfs_ail_class, name, \
- TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn), \
- TP_ARGS(lip, old_lsn, new_lsn))
-DEFINE_AIL_EVENT(xfs_ail_insert);
-DEFINE_AIL_EVENT(xfs_ail_move);
-DEFINE_AIL_EVENT(xfs_ail_delete);
-
-TRACE_EVENT(xfs_log_assign_tail_lsn,
- TP_PROTO(struct xlog *log, xfs_lsn_t new_lsn),
- TP_ARGS(log, new_lsn),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_lsn_t, new_lsn)
- __field(xfs_lsn_t, old_lsn)
- __field(xfs_lsn_t, last_sync_lsn)
- ),
- TP_fast_assign(
- __entry->dev = log->l_mp->m_super->s_dev;
- __entry->new_lsn = new_lsn;
- __entry->old_lsn = atomic64_read(&log->l_tail_lsn);
- __entry->last_sync_lsn = atomic64_read(&log->l_last_sync_lsn);
- ),
- TP_printk("dev %d:%d new tail lsn %d/%d, old lsn %d/%d, last sync %d/%d",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- CYCLE_LSN(__entry->new_lsn), BLOCK_LSN(__entry->new_lsn),
- CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn),
- CYCLE_LSN(__entry->last_sync_lsn), BLOCK_LSN(__entry->last_sync_lsn))
-)
DECLARE_EVENT_CLASS(xfs_file_class,
TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index c812c5c..5411e01 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -18,21 +18,32 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
+#include "xfs_error.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_alloc.h"
#include "xfs_extent_busy.h"
+#include "xfs_bmap.h"
#include "xfs_quota.h"
-#include "xfs_trans.h"
+#include "xfs_qm.h"
#include "xfs_trans_priv.h"
-#include "xfs_log.h"
+#include "xfs_trans_space.h"
+#include "xfs_inode_item.h"
+#include "xfs_log_priv.h"
+#include "xfs_buf_item.h"
#include "xfs_trace.h"
-#include "xfs_error.h"
kmem_zone_t *xfs_trans_zone;
kmem_zone_t *xfs_log_item_desc_zone;
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 9b96d35..09cf40b 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -18,6 +18,10 @@
#ifndef __XFS_TRANS_H__
#define __XFS_TRANS_H__
+struct xfs_log_item;
+
+#include "xfs_trans_resv.h"
+
/* kernel only transaction subsystem defines */
struct xfs_buf;
@@ -73,9 +77,6 @@ struct xfs_item_ops {
void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
};
-void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,
- int type, const struct xfs_item_ops *ops);
-
/*
* Return values for the iop_push() routines.
*/
@@ -84,12 +85,18 @@ void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,
#define XFS_ITEM_LOCKED 2
#define XFS_ITEM_FLUSHING 3
+/*
+ * This is the type of function which can be given to xfs_trans_callback()
+ * to be called upon the transaction's commit to disk.
+ */
+typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *);
/*
* This is the structure maintained for every active transaction.
*/
typedef struct xfs_trans {
unsigned int t_magic; /* magic number */
+ xfs_log_callback_t t_logcb; /* log callback struct */
unsigned int t_type; /* transaction type */
unsigned int t_log_res; /* amt of log space resvd */
unsigned int t_log_count; /* count for perm log res */
@@ -125,6 +132,7 @@ typedef struct xfs_trans {
int64_t t_rextents_delta;/* superblocks rextents chg */
int64_t t_rextslog_delta;/* superblocks rextslog chg */
struct list_head t_items; /* log item descriptors */
+ xfs_trans_header_t t_header; /* header for in-log trans */
struct list_head t_busy; /* list of busy extents */
unsigned long t_pflags; /* saved process flags state */
} xfs_trans_t;
@@ -229,16 +237,10 @@ void xfs_trans_log_efd_extent(xfs_trans_t *,
xfs_fsblock_t,
xfs_extlen_t);
int xfs_trans_commit(xfs_trans_t *, uint flags);
-int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
void xfs_trans_cancel(xfs_trans_t *, int);
int xfs_trans_ail_init(struct xfs_mount *);
void xfs_trans_ail_destroy(struct xfs_mount *);
-void xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *,
- enum xfs_blft);
-void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp,
- struct xfs_buf *src_bp);
-
extern kmem_zone_t *xfs_trans_zone;
extern kmem_zone_t *xfs_log_item_desc_zone;
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index a728735..21c6d7d 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -18,16 +18,15 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_trans.h"
#include "xfs_trans_priv.h"
#include "xfs_trace.h"
#include "xfs_error.h"
-#include "xfs_log.h"
#ifdef DEBUG
/*
@@ -659,13 +658,11 @@ xfs_trans_ail_update_bulk(
if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0)
continue;
- trace_xfs_ail_move(lip, lip->li_lsn, lsn);
xfs_ail_delete(ailp, lip);
if (mlip == lip)
mlip_changed = 1;
} else {
lip->li_flags |= XFS_LI_IN_AIL;
- trace_xfs_ail_insert(lip, 0, lsn);
}
lip->li_lsn = lsn;
list_add(&lip->li_ail, &tmp);
@@ -734,7 +731,6 @@ xfs_trans_ail_delete_bulk(
return;
}
- trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
xfs_ail_delete(ailp, lip);
lip->li_flags &= ~XFS_LI_IN_AIL;
lip->li_lsn = 0;
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index c035d11..8c75b8f 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -17,15 +17,17 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
#include "xfs_error.h"
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index cd2a10e..54ee3c5 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -17,18 +17,23 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
#include "xfs_error.h"
-#include "xfs_trans.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
-#include "xfs_quota.h"
#include "xfs_qm.h"
STATIC void xfs_trans_alloc_dqinfo(xfs_trans_t *);
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
index 47978ba..8d71b16 100644
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -17,13 +17,12 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_trans.h"
#include "xfs_trans_priv.h"
#include "xfs_extfree_item.h"
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 50c3f56..53dfe46 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -17,15 +17,18 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_trans.h"
+#include "xfs_btree.h"
#include "xfs_trans_priv.h"
#include "xfs_inode_item.h"
#include "xfs_trace.h"
@@ -111,14 +114,12 @@ xfs_trans_log_inode(
/*
* First time we log the inode in a transaction, bump the inode change
- * counter if it is configured for this to occur. We don't use
- * inode_inc_version() because there is no need for extra locking around
- * i_version as we already hold the inode locked exclusively for
- * metadata modification.
+ * counter if it is configured for this to occur.
*/
if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) &&
IS_I_VERSION(VFS_I(ip))) {
- ip->i_d.di_changecount = ++VFS_I(ip)->i_version;
+ inode_inc_iversion(VFS_I(ip));
+ ip->i_d.di_changecount = VFS_I(ip)->i_version;
flags |= XFS_ILOG_CORE;
}
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 12e86af..c52def0 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -27,6 +27,7 @@ struct xfs_log_vec;
void xfs_trans_init(struct xfs_mount *);
+int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
void xfs_trans_del_item(struct xfs_log_item *);
void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c
index 2fd59c0..a65a3cc4 100644
--- a/fs/xfs/xfs_trans_resv.c
+++ b/fs/xfs/xfs_trans_resv.c
@@ -18,19 +18,27 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
+#include "xfs_log.h"
#include "xfs_trans_resv.h"
+#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_inode.h"
+#include "xfs_error.h"
+#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
#include "xfs_ialloc.h"
+#include "xfs_alloc.h"
+#include "xfs_extent_busy.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
#include "xfs_quota.h"
-#include "xfs_trans.h"
#include "xfs_qm.h"
#include "xfs_trans_space.h"
#include "xfs_trace.h"
@@ -385,7 +393,8 @@ xfs_calc_ifree_reservation(
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
- max_t(uint, XFS_FSB_TO_B(mp, 1), XFS_INODE_CLUSTER_SIZE(mp)) +
+ MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
+ XFS_INODE_CLUSTER_SIZE(mp)) +
xfs_calc_buf_res(1, 0) +
xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
mp->m_in_maxlevels, 0) +
diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h
index 3e8e797..db14d0c 100644
--- a/fs/xfs/xfs_vnode.h
+++ b/fs/xfs/xfs_vnode.h
@@ -25,6 +25,14 @@ struct xfs_inode;
struct attrlist_cursor_kern;
/*
+ * Return values for xfs_inactive. A return value of
+ * VN_INACTIVE_NOCACHE implies that the file system behavior
+ * has disassociated its state and bhv_desc_t from the vnode.
+ */
+#define VN_INACTIVE_CACHE 0
+#define VN_INACTIVE_NOCACHE 1
+
+/*
* Flags for read/write calls - same values as IRIX
*/
#define IO_ISDIRECT 0x00004 /* bypass page cache */
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 9d47907..e01f35e 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -17,13 +17,9 @@
*/
#include "xfs.h"
-#include "xfs_format.h"
#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
#include "xfs_attr.h"
#include "xfs_attr_leaf.h"