diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-23 02:02:39 (GMT) |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-23 02:02:39 (GMT) |
commit | bbd9d6f7fbb0305c9a592bf05a32e87eb364a4ff (patch) | |
tree | 12b2bb4202b05f6ae6a43c6ce830a0472043dbe5 /fs/ext4 | |
parent | 8e204874db000928e37199c2db82b7eb8966cc3c (diff) | |
parent | 5a9a43646cf709312d71eca71cef90ad802f28f9 (diff) | |
download | linux-fsl-qoriq-bbd9d6f7fbb0305c9a592bf05a32e87eb364a4ff.tar.xz |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6: (107 commits)
vfs: use ERR_CAST for err-ptr tossing in lookup_instantiate_filp
isofs: Remove global fs lock
jffs2: fix IN_DELETE_SELF on overwriting rename() killing a directory
fix IN_DELETE_SELF on overwriting rename() on ramfs et.al.
mm/truncate.c: fix build for CONFIG_BLOCK not enabled
fs:update the NOTE of the file_operations structure
Remove dead code in dget_parent()
AFS: Fix silly characters in a comment
switch d_add_ci() to d_splice_alias() in "found negative" case as well
simplify gfs2_lookup()
jfs_lookup(): don't bother with . or ..
get rid of useless dget_parent() in btrfs rename() and link()
get rid of useless dget_parent() in fs/btrfs/ioctl.c
fs: push i_mutex and filemap_write_and_wait down into ->fsync() handlers
drivers: fix up various ->llseek() implementations
fs: handle SEEK_HOLE/SEEK_DATA properly in all fs's that define their own llseek
Ext4: handle SEEK_HOLE/SEEK_DATA generically
Btrfs: implement our own ->llseek
fs: add SEEK_HOLE and SEEK_DATA flags
reiserfs: make reiserfs default to barrier=flush
...
Fix up trivial conflicts in fs/xfs/linux-2.6/xfs_super.c due to the new
shrinker callout for the inode cache, that clashed with the xfs code to
start the periodic workers later.
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/acl.c | 4 | ||||
-rw-r--r-- | fs/ext4/acl.h | 2 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 2 | ||||
-rw-r--r-- | fs/ext4/file.c | 21 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 38 | ||||
-rw-r--r-- | fs/ext4/inode.c | 125 | ||||
-rw-r--r-- | fs/ext4/namei.c | 14 |
7 files changed, 133 insertions, 73 deletions
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 21eacd7..60d900f 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -238,11 +238,11 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, } int -ext4_check_acl(struct inode *inode, int mask, unsigned int flags) +ext4_check_acl(struct inode *inode, int mask) { struct posix_acl *acl; - if (flags & IPERM_FLAG_RCU) { + if (mask & MAY_NOT_BLOCK) { if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) return -ECHILD; return -EAGAIN; diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index dec8211..9d843d5 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h @@ -54,7 +54,7 @@ static inline int ext4_acl_count(size_t size) #ifdef CONFIG_EXT4_FS_POSIX_ACL /* acl.c */ -extern int ext4_check_acl(struct inode *, int, unsigned int); +extern int ext4_check_acl(struct inode *, int); extern int ext4_acl_chmod(struct inode *); extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1921392..fa44df8 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1758,7 +1758,7 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, extern void ext4_htree_free_dir_info(struct dir_private_info *p); /* fsync.c */ -extern int ext4_sync_file(struct file *, int); +extern int ext4_sync_file(struct file *, loff_t, loff_t, int); extern int ext4_flush_completed_IO(struct inode *); /* hash.c */ diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 2c09723..ce766f9 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -236,6 +236,27 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int origin) } offset += file->f_pos; break; + case SEEK_DATA: + /* + * In the generic case the entire file is data, so as long as + * offset isn't at the end of the file then the offset is data. + */ + if (offset >= inode->i_size) { + mutex_unlock(&inode->i_mutex); + return -ENXIO; + } + break; + case SEEK_HOLE: + /* + * There is a virtual hole at the end of the file, so as long as + * offset isn't i_size or larger, return i_size. + */ + if (offset >= inode->i_size) { + mutex_unlock(&inode->i_mutex); + return -ENXIO; + } + offset = inode->i_size; + break; } if (offset < 0 || offset > maxbytes) { diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index ce66d2f..da3bed3 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -151,6 +151,32 @@ static int ext4_sync_parent(struct inode *inode) return ret; } +/** + * __sync_file - generic_file_fsync without the locking and filemap_write + * @inode: inode to sync + * @datasync: only sync essential metadata if true + * + * This is just generic_file_fsync without the locking. This is needed for + * nojournal mode to make sure this inodes data/metadata makes it to disk + * properly. The i_mutex should be held already. + */ +static int __sync_inode(struct inode *inode, int datasync) +{ + int err; + int ret; + + ret = sync_mapping_buffers(inode->i_mapping); + if (!(inode->i_state & I_DIRTY)) + return ret; + if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) + return ret; + + err = sync_inode_metadata(inode, 1); + if (ret == 0) + ret = err; + return ret; +} + /* * akpm: A new design for ext4_sync_file(). * @@ -165,7 +191,7 @@ static int ext4_sync_parent(struct inode *inode) * i_mutex lock is held when entering and exiting this function */ -int ext4_sync_file(struct file *file, int datasync) +int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct ext4_inode_info *ei = EXT4_I(inode); @@ -178,15 +204,20 @@ int ext4_sync_file(struct file *file, int datasync) trace_ext4_sync_file_enter(file, datasync); + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret) + return ret; + mutex_lock(&inode->i_mutex); + if (inode->i_sb->s_flags & MS_RDONLY) - return 0; + goto out; ret = ext4_flush_completed_IO(inode); if (ret < 0) goto out; if (!journal) { - ret = generic_file_fsync(file, datasync); + ret = __sync_inode(inode, datasync); if (!ret && !list_empty(&inode->i_dentry)) ret = ext4_sync_parent(inode); goto out; @@ -220,6 +251,7 @@ int ext4_sync_file(struct file *file, int datasync) if (needs_barrier) blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); out: + mutex_unlock(&inode->i_mutex); trace_ext4_sync_file_exit(inode, ret); return ret; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e3126c0..678cde8 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3501,10 +3501,8 @@ retry: offset, nr_segs, ext4_get_block, NULL, NULL, 0); else { - ret = blockdev_direct_IO(rw, iocb, inode, - inode->i_sb->s_bdev, iov, - offset, nr_segs, - ext4_get_block, NULL); + ret = blockdev_direct_IO(rw, iocb, inode, iov, + offset, nr_segs, ext4_get_block); if (unlikely((rw & WRITE) && ret < 0)) { loff_t isize = i_size_read(inode); @@ -3575,6 +3573,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, ssize_t size, void *private, int ret, bool is_async) { + struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; ext4_io_end_t *io_end = iocb->private; struct workqueue_struct *wq; unsigned long flags; @@ -3596,6 +3595,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, out: if (is_async) aio_complete(iocb, ret, 0); + inode_dio_done(inode); return; } @@ -3616,6 +3616,9 @@ out: /* queue the work to convert unwritten extents to written */ queue_work(wq, &io_end->work); iocb->private = NULL; + + /* XXX: probably should move into the real I/O completion handler */ + inode_dio_done(inode); } static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) @@ -3748,11 +3751,13 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, EXT4_I(inode)->cur_aio_dio = iocb->private; } - ret = blockdev_direct_IO(rw, iocb, inode, + ret = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, ext4_get_block_write, - ext4_end_io_dio); + ext4_end_io_dio, + NULL, + DIO_LOCKING | DIO_SKIP_HOLES); if (iocb->private) EXT4_I(inode)->cur_aio_dio = NULL; /* @@ -5351,6 +5356,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) } if (attr->ia_valid & ATTR_SIZE) { + inode_dio_wait(inode); + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); @@ -5843,80 +5850,84 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct page *page = vmf->page; loff_t size; unsigned long len; - int ret = -EINVAL; - void *fsdata; + int ret; struct file *file = vma->vm_file; struct inode *inode = file->f_path.dentry->d_inode; struct address_space *mapping = inode->i_mapping; + handle_t *handle; + get_block_t *get_block; + int retries = 0; /* - * Get i_alloc_sem to stop truncates messing with the inode. We cannot - * get i_mutex because we are already holding mmap_sem. + * This check is racy but catches the common case. We rely on + * __block_page_mkwrite() to do a reliable check. */ - down_read(&inode->i_alloc_sem); - size = i_size_read(inode); - if (page->mapping != mapping || size <= page_offset(page) - || !PageUptodate(page)) { - /* page got truncated from under us? */ - goto out_unlock; + vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + /* Delalloc case is easy... */ + if (test_opt(inode->i_sb, DELALLOC) && + !ext4_should_journal_data(inode) && + !ext4_nonda_switch(inode->i_sb)) { + do { + ret = __block_page_mkwrite(vma, vmf, + ext4_da_get_block_prep); + } while (ret == -ENOSPC && + ext4_should_retry_alloc(inode->i_sb, &retries)); + goto out_ret; } - ret = 0; lock_page(page); - wait_on_page_writeback(page); - if (PageMappedToDisk(page)) { - up_read(&inode->i_alloc_sem); - return VM_FAULT_LOCKED; + size = i_size_read(inode); + /* Page got truncated from under us? */ + if (page->mapping != mapping || page_offset(page) > size) { + unlock_page(page); + ret = VM_FAULT_NOPAGE; + goto out; } if (page->index == size >> PAGE_CACHE_SHIFT) len = size & ~PAGE_CACHE_MASK; else len = PAGE_CACHE_SIZE; - /* - * return if we have all the buffers mapped. This avoid - * the need to call write_begin/write_end which does a - * journal_start/journal_stop which can block and take - * long time + * Return if we have all the buffers mapped. This avoids the need to do + * journal_start/journal_stop which can block and take a long time */ if (page_has_buffers(page)) { if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, ext4_bh_unmapped)) { - up_read(&inode->i_alloc_sem); - return VM_FAULT_LOCKED; + /* Wait so that we don't change page under IO */ + wait_on_page_writeback(page); + ret = VM_FAULT_LOCKED; + goto out; } } unlock_page(page); - /* - * OK, we need to fill the hole... Do write_begin write_end - * to do block allocation/reservation.We are not holding - * inode.i__mutex here. That allow * parallel write_begin, - * write_end call. lock_page prevent this from happening - * on the same page though - */ - ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), - len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); - if (ret < 0) - goto out_unlock; - ret = mapping->a_ops->write_end(file, mapping, page_offset(page), - len, len, page, fsdata); - if (ret < 0) - goto out_unlock; - ret = 0; - - /* - * write_begin/end might have created a dirty page and someone - * could wander in and start the IO. Make sure that hasn't - * happened. - */ - lock_page(page); - wait_on_page_writeback(page); - up_read(&inode->i_alloc_sem); - return VM_FAULT_LOCKED; -out_unlock: - if (ret) + /* OK, we need to fill the hole... */ + if (ext4_should_dioread_nolock(inode)) + get_block = ext4_get_block_write; + else + get_block = ext4_get_block; +retry_alloc: + handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); + if (IS_ERR(handle)) { ret = VM_FAULT_SIGBUS; - up_read(&inode->i_alloc_sem); + goto out; + } + ret = __block_page_mkwrite(vma, vmf, get_block); + if (!ret && ext4_should_journal_data(inode)) { + if (walk_page_buffers(handle, page_buffers(page), 0, + PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { + unlock_page(page); + ret = VM_FAULT_SIGBUS; + goto out; + } + ext4_set_inode_state(inode, EXT4_STATE_JDATA); + } + ext4_journal_stop(handle); + if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) + goto retry_alloc; +out_ret: + ret = block_page_mkwrite_return(ret); +out: return ret; } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index b754b77..707d605 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1037,15 +1037,11 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru return ERR_PTR(-EIO); } inode = ext4_iget(dir->i_sb, ino); - if (IS_ERR(inode)) { - if (PTR_ERR(inode) == -ESTALE) { - EXT4_ERROR_INODE(dir, - "deleted inode referenced: %u", - ino); - return ERR_PTR(-EIO); - } else { - return ERR_CAST(inode); - } + if (inode == ERR_PTR(-ESTALE)) { + EXT4_ERROR_INODE(dir, + "deleted inode referenced: %u", + ino); + return ERR_PTR(-EIO); } } return d_splice_alias(inode, dentry); |