summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/backref.c48
-rw-r--r--fs/btrfs/ctree.c1
-rw-r--r--fs/btrfs/extent-tree.c27
-rw-r--r--fs/btrfs/extent_io.c9
-rw-r--r--fs/btrfs/file.c62
-rw-r--r--fs/btrfs/inode.c52
-rw-r--r--fs/btrfs/scrub.c2
-rw-r--r--fs/btrfs/transaction.c8
-rw-r--r--fs/btrfs/transaction.h2
-rw-r--r--fs/btrfs/tree-log.c5
-rw-r--r--fs/debugfs/inode.c69
-rw-r--r--fs/dlm/user.c1
-rw-r--r--fs/ext3/namei.c2
-rw-r--r--fs/ext4/extents.c23
-rw-r--r--fs/ext4/extents_status.c51
-rw-r--r--fs/ext4/ialloc.c10
-rw-r--r--fs/ext4/inode.c46
-rw-r--r--fs/ext4/namei.c2
-rw-r--r--fs/ext4/super.c1
-rw-r--r--fs/fcntl.c4
-rw-r--r--fs/fuse/dir.c51
-rw-r--r--fs/lockd/clntlock.c13
-rw-r--r--fs/lockd/clntproc.c5
-rw-r--r--fs/namei.c10
-rw-r--r--fs/nfs/inode.c11
-rw-r--r--fs/nfs/nfs4proc.c8
-rw-r--r--fs/nfs/nfs4xdr.c21
-rw-r--r--fs/nfs/super.c4
-rw-r--r--fs/nfsd/nfs4proc.c2
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nfsd/nfs4xdr.c5
-rw-r--r--fs/nfsd/vfs.c5
-rw-r--r--fs/ocfs2/refcounttree.c5
-rw-r--r--fs/open.c4
-rw-r--r--fs/proc/vmcore.c2
-rw-r--r--fs/reiserfs/procfs.c99
-rw-r--r--fs/reiserfs/super.c3
-rw-r--r--fs/super.c25
-rw-r--r--fs/xfs/xfs_dinode.h3
-rw-r--r--fs/xfs/xfs_inode.c31
-rw-r--r--fs/xfs/xfs_log_recover.c13
41 files changed, 414 insertions, 333 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index eaf1333..8bc5e8c 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -36,16 +36,23 @@ static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb,
u64 extent_item_pos,
struct extent_inode_elem **eie)
{
- u64 data_offset;
- u64 data_len;
+ u64 offset = 0;
struct extent_inode_elem *e;
- data_offset = btrfs_file_extent_offset(eb, fi);
- data_len = btrfs_file_extent_num_bytes(eb, fi);
+ if (!btrfs_file_extent_compression(eb, fi) &&
+ !btrfs_file_extent_encryption(eb, fi) &&
+ !btrfs_file_extent_other_encoding(eb, fi)) {
+ u64 data_offset;
+ u64 data_len;
- if (extent_item_pos < data_offset ||
- extent_item_pos >= data_offset + data_len)
- return 1;
+ data_offset = btrfs_file_extent_offset(eb, fi);
+ data_len = btrfs_file_extent_num_bytes(eb, fi);
+
+ if (extent_item_pos < data_offset ||
+ extent_item_pos >= data_offset + data_len)
+ return 1;
+ offset = extent_item_pos - data_offset;
+ }
e = kmalloc(sizeof(*e), GFP_NOFS);
if (!e)
@@ -53,7 +60,7 @@ static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb,
e->next = *eie;
e->inum = key->objectid;
- e->offset = key->offset + (extent_item_pos - data_offset);
+ e->offset = key->offset + offset;
*eie = e;
return 0;
@@ -189,7 +196,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
struct extent_buffer *eb;
struct btrfs_key key;
struct btrfs_file_extent_item *fi;
- struct extent_inode_elem *eie = NULL;
+ struct extent_inode_elem *eie = NULL, *old = NULL;
u64 disk_byte;
if (level != 0) {
@@ -223,6 +230,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
if (disk_byte == wanted_disk_byte) {
eie = NULL;
+ old = NULL;
if (extent_item_pos) {
ret = check_extent_in_eb(&key, eb, fi,
*extent_item_pos,
@@ -230,18 +238,20 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
if (ret < 0)
break;
}
- if (!ret) {
- ret = ulist_add(parents, eb->start,
- (uintptr_t)eie, GFP_NOFS);
- if (ret < 0)
- break;
- if (!extent_item_pos) {
- ret = btrfs_next_old_leaf(root, path,
- time_seq);
- continue;
- }
+ if (ret > 0)
+ goto next;
+ ret = ulist_add_merge(parents, eb->start,
+ (uintptr_t)eie,
+ (u64 *)&old, GFP_NOFS);
+ if (ret < 0)
+ break;
+ if (!ret && extent_item_pos) {
+ while (old->next)
+ old = old->next;
+ old->next = eie;
}
}
+next:
ret = btrfs_next_old_item(root, path, time_seq);
}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 5bf4c39..ed50460 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1271,7 +1271,6 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
BUG_ON(!eb_rewin);
}
- extent_buffer_get(eb_rewin);
btrfs_tree_read_unlock(eb);
free_extent_buffer(eb);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0236de7..1204c8e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -7466,6 +7466,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
int err = 0;
int ret;
int level;
+ bool root_dropped = false;
path = btrfs_alloc_path();
if (!path) {
@@ -7523,6 +7524,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
while (1) {
btrfs_tree_lock(path->nodes[level]);
btrfs_set_lock_blocking(path->nodes[level]);
+ path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
ret = btrfs_lookup_extent_info(trans, root,
path->nodes[level]->start,
@@ -7538,6 +7540,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
break;
btrfs_tree_unlock(path->nodes[level]);
+ path->locks[level] = 0;
WARN_ON(wc->refs[level] != 1);
level--;
}
@@ -7552,11 +7555,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
while (1) {
- if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
- pr_debug("btrfs: drop snapshot early exit\n");
- err = -EAGAIN;
- goto out_end_trans;
- }
ret = walk_down_tree(trans, root, path, wc);
if (ret < 0) {
@@ -7584,7 +7582,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
}
BUG_ON(wc->level == 0);
- if (btrfs_should_end_transaction(trans, tree_root)) {
+ if (btrfs_should_end_transaction(trans, tree_root) ||
+ (!for_reloc && btrfs_need_cleaner_sleep(root))) {
ret = btrfs_update_root(trans, tree_root,
&root->root_key,
root_item);
@@ -7595,6 +7594,12 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
}
btrfs_end_transaction_throttle(trans, tree_root);
+ if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
+ pr_debug("btrfs: drop snapshot early exit\n");
+ err = -EAGAIN;
+ goto out_free;
+ }
+
trans = btrfs_start_transaction(tree_root, 0);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
@@ -7639,12 +7644,22 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
free_extent_buffer(root->commit_root);
btrfs_put_fs_root(root);
}
+ root_dropped = true;
out_end_trans:
btrfs_end_transaction_throttle(trans, tree_root);
out_free:
kfree(wc);
btrfs_free_path(path);
out:
+ /*
+ * So if we need to stop dropping the snapshot for whatever reason we
+ * need to make sure to add it back to the dead root list so that we
+ * keep trying to do the work later. This also cleans up roots if we
+ * don't have it in the radix (like when we recover after a power fail
+ * or unmount) so we don't leak memory.
+ */
+ if (root_dropped == false)
+ btrfs_add_dead_root(root);
if (err)
btrfs_std_error(root->fs_info, err);
return err;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 583d98b..fe443fe 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4048,7 +4048,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
while (!end) {
- u64 offset_in_extent;
+ u64 offset_in_extent = 0;
/* break if the extent we found is outside the range */
if (em->start >= max || extent_map_end(em) < off)
@@ -4064,9 +4064,12 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
/*
* record the offset from the start of the extent
- * for adjusting the disk offset below
+ * for adjusting the disk offset below. Only do this if the
+ * extent isn't compressed since our in ram offset may be past
+ * what we have actually allocated on disk.
*/
- offset_in_extent = em_start - em->start;
+ if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
+ offset_in_extent = em_start - em->start;
em_end = extent_map_end(em);
em_len = em_end - em_start;
emflags = em->flags;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a005fe2..8e686a4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -596,20 +596,29 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
if (no_splits)
goto next;
- if (em->block_start < EXTENT_MAP_LAST_BYTE &&
- em->start < start) {
+ if (em->start < start) {
split->start = em->start;
split->len = start - em->start;
- split->orig_start = em->orig_start;
- split->block_start = em->block_start;
- if (compressed)
- split->block_len = em->block_len;
- else
- split->block_len = split->len;
- split->ram_bytes = em->ram_bytes;
- split->orig_block_len = max(split->block_len,
- em->orig_block_len);
+ if (em->block_start < EXTENT_MAP_LAST_BYTE) {
+ split->orig_start = em->orig_start;
+ split->block_start = em->block_start;
+
+ if (compressed)
+ split->block_len = em->block_len;
+ else
+ split->block_len = split->len;
+ split->orig_block_len = max(split->block_len,
+ em->orig_block_len);
+ split->ram_bytes = em->ram_bytes;
+ } else {
+ split->orig_start = split->start;
+ split->block_len = 0;
+ split->block_start = em->block_start;
+ split->orig_block_len = 0;
+ split->ram_bytes = split->len;
+ }
+
split->generation = gen;
split->bdev = em->bdev;
split->flags = flags;
@@ -620,8 +629,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
split = split2;
split2 = NULL;
}
- if (em->block_start < EXTENT_MAP_LAST_BYTE &&
- testend && em->start + em->len > start + len) {
+ if (testend && em->start + em->len > start + len) {
u64 diff = start + len - em->start;
split->start = start + len;
@@ -630,18 +638,28 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
split->flags = flags;
split->compress_type = em->compress_type;
split->generation = gen;
- split->orig_block_len = max(em->block_len,
+
+ if (em->block_start < EXTENT_MAP_LAST_BYTE) {
+ split->orig_block_len = max(em->block_len,
em->orig_block_len);
- split->ram_bytes = em->ram_bytes;
- if (compressed) {
- split->block_len = em->block_len;
- split->block_start = em->block_start;
- split->orig_start = em->orig_start;
+ split->ram_bytes = em->ram_bytes;
+ if (compressed) {
+ split->block_len = em->block_len;
+ split->block_start = em->block_start;
+ split->orig_start = em->orig_start;
+ } else {
+ split->block_len = split->len;
+ split->block_start = em->block_start
+ + diff;
+ split->orig_start = em->orig_start;
+ }
} else {
- split->block_len = split->len;
- split->block_start = em->block_start + diff;
- split->orig_start = em->orig_start;
+ split->ram_bytes = split->len;
+ split->orig_start = split->start;
+ split->block_len = 0;
+ split->block_start = em->block_start;
+ split->orig_block_len = 0;
}
ret = add_extent_mapping(em_tree, split, modified);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6d1b93c..021694c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2166,16 +2166,23 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
continue;
- extent_offset = btrfs_file_extent_offset(leaf, extent);
- if (key.offset - extent_offset != offset)
+ /*
+ * 'offset' refers to the exact key.offset,
+ * NOT the 'offset' field in btrfs_extent_data_ref, ie.
+ * (key.offset - extent_offset).
+ */
+ if (key.offset != offset)
continue;
+ extent_offset = btrfs_file_extent_offset(leaf, extent);
num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
+
if (extent_offset >= old->extent_offset + old->offset +
old->len || extent_offset + num_bytes <=
old->extent_offset + old->offset)
continue;
+ ret = 0;
break;
}
@@ -2187,7 +2194,7 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
backref->root_id = root_id;
backref->inum = inum;
- backref->file_pos = offset + extent_offset;
+ backref->file_pos = offset;
backref->num_bytes = num_bytes;
backref->extent_offset = extent_offset;
backref->generation = btrfs_file_extent_generation(leaf, extent);
@@ -2210,7 +2217,8 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,
new->path = path;
list_for_each_entry_safe(old, tmp, &new->head, list) {
- ret = iterate_inodes_from_logical(old->bytenr, fs_info,
+ ret = iterate_inodes_from_logical(old->bytenr +
+ old->extent_offset, fs_info,
path, record_one_backref,
old);
BUG_ON(ret < 0 && ret != -ENOENT);
@@ -4391,9 +4399,6 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
int mask = attr->ia_valid;
int ret;
- if (newsize == oldsize)
- return 0;
-
/*
* The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
* special case where we need to update the times despite not having
@@ -5165,14 +5170,31 @@ next:
}
/* Reached end of directory/root. Bump pos past the last item. */
- if (key_type == BTRFS_DIR_INDEX_KEY)
- /*
- * 32-bit glibc will use getdents64, but then strtol -
- * so the last number we can serve is this.
- */
- ctx->pos = 0x7fffffff;
- else
- ctx->pos++;
+ ctx->pos++;
+
+ /*
+ * Stop new entries from being returned after we return the last
+ * entry.
+ *
+ * New directory entries are assigned a strictly increasing
+ * offset. This means that new entries created during readdir
+ * are *guaranteed* to be seen in the future by that readdir.
+ * This has broken buggy programs which operate on names as
+ * they're returned by readdir. Until we re-use freed offsets
+ * we have this hack to stop new entries from being returned
+ * under the assumption that they'll never reach this huge
+ * offset.
+ *
+ * This is being careful not to overflow 32bit loff_t unless the
+ * last entry requires it because doing so has broken 32bit apps
+ * in the past.
+ */
+ if (key_type == BTRFS_DIR_INDEX_KEY) {
+ if (ctx->pos >= INT_MAX)
+ ctx->pos = LLONG_MAX;
+ else
+ ctx->pos = INT_MAX;
+ }
nopos:
ret = 0;
err:
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 4ba2a69..64a157b 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2495,7 +2495,7 @@ again:
ret = scrub_extent(sctx, extent_logical, extent_len,
extent_physical, extent_dev, flags,
generation, extent_mirror_num,
- extent_physical);
+ extent_logical - logical + physical);
if (ret)
goto out;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index d58cce7..af1931a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -983,12 +983,12 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
* a dirty root struct and adds it into the list of dead roots that need to
* be deleted
*/
-int btrfs_add_dead_root(struct btrfs_root *root)
+void btrfs_add_dead_root(struct btrfs_root *root)
{
spin_lock(&root->fs_info->trans_lock);
- list_add_tail(&root->root_list, &root->fs_info->dead_roots);
+ if (list_empty(&root->root_list))
+ list_add_tail(&root->root_list, &root->fs_info->dead_roots);
spin_unlock(&root->fs_info->trans_lock);
- return 0;
}
/*
@@ -1925,7 +1925,7 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
}
root = list_first_entry(&fs_info->dead_roots,
struct btrfs_root, root_list);
- list_del(&root->root_list);
+ list_del_init(&root->root_list);
spin_unlock(&fs_info->trans_lock);
pr_debug("btrfs: cleaner removing %llu\n",
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 005b037..defbc42 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -143,7 +143,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
-int btrfs_add_dead_root(struct btrfs_root *root);
+void btrfs_add_dead_root(struct btrfs_root *root);
int btrfs_defrag_root(struct btrfs_root *root);
int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 2c67914..ff60d89 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3746,8 +3746,9 @@ next_slot:
}
log_extents:
+ btrfs_release_path(path);
+ btrfs_release_path(dst_path);
if (fast_search) {
- btrfs_release_path(dst_path);
ret = btrfs_log_changed_extents(trans, root, inode, dst_path);
if (ret) {
err = ret;
@@ -3764,8 +3765,6 @@ log_extents:
}
if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
- btrfs_release_path(path);
- btrfs_release_path(dst_path);
ret = log_directory_changes(trans, root, inode, path, dst_path);
if (ret) {
err = ret;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 4888cb3..c7c83ff 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -533,8 +533,7 @@ EXPORT_SYMBOL_GPL(debugfs_remove);
*/
void debugfs_remove_recursive(struct dentry *dentry)
{
- struct dentry *child;
- struct dentry *parent;
+ struct dentry *child, *next, *parent;
if (IS_ERR_OR_NULL(dentry))
return;
@@ -544,61 +543,37 @@ void debugfs_remove_recursive(struct dentry *dentry)
return;
parent = dentry;
+ down:
mutex_lock(&parent->d_inode->i_mutex);
+ list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) {
+ if (!debugfs_positive(child))
+ continue;
- while (1) {
- /*
- * When all dentries under "parent" has been removed,
- * walk up the tree until we reach our starting point.
- */
- if (list_empty(&parent->d_subdirs)) {
- mutex_unlock(&parent->d_inode->i_mutex);
- if (parent == dentry)
- break;
- parent = parent->d_parent;
- mutex_lock(&parent->d_inode->i_mutex);
- }
- child = list_entry(parent->d_subdirs.next, struct dentry,
- d_u.d_child);
- next_sibling:
-
- /*
- * If "child" isn't empty, walk down the tree and
- * remove all its descendants first.
- */
+ /* perhaps simple_empty(child) makes more sense */
if (!list_empty(&child->d_subdirs)) {
mutex_unlock(&parent->d_inode->i_mutex);
parent = child;
- mutex_lock(&parent->d_inode->i_mutex);
- continue;
+ goto down;
}
- __debugfs_remove(child, parent);
- if (parent->d_subdirs.next == &child->d_u.d_child) {
- /*
- * Try the next sibling.
- */
- if (child->d_u.d_child.next != &parent->d_subdirs) {
- child = list_entry(child->d_u.d_child.next,
- struct dentry,
- d_u.d_child);
- goto next_sibling;
- }
-
- /*
- * Avoid infinite loop if we fail to remove
- * one dentry.
- */
- mutex_unlock(&parent->d_inode->i_mutex);
- break;
- }
- simple_release_fs(&debugfs_mount, &debugfs_mount_count);
+ up:
+ if (!__debugfs_remove(child, parent))
+ simple_release_fs(&debugfs_mount, &debugfs_mount_count);
}
- parent = dentry->d_parent;
+ mutex_unlock(&parent->d_inode->i_mutex);
+ child = parent;
+ parent = parent->d_parent;
mutex_lock(&parent->d_inode->i_mutex);
- __debugfs_remove(dentry, parent);
+
+ if (child != dentry) {
+ next = list_entry(child->d_u.d_child.next, struct dentry,
+ d_u.d_child);
+ goto up;
+ }
+
+ if (!__debugfs_remove(child, parent))
+ simple_release_fs(&debugfs_mount, &debugfs_mount_count);
mutex_unlock(&parent->d_inode->i_mutex);
- simple_release_fs(&debugfs_mount, &debugfs_mount_count);
}
EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 911649a..81214911 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -686,7 +686,6 @@ static int device_close(struct inode *inode, struct file *file)
device_remove_lockspace() */
sigprocmask(SIG_SETMASK, &tmpsig, NULL);
- recalc_sigpending();
return 0;
}
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 998ea11..1194b1f 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1780,11 +1780,11 @@ retry:
inode->i_op = &ext3_file_inode_operations;
inode->i_fop = &ext3_file_operations;
ext3_set_aops(inode);
+ d_tmpfile(dentry, inode);
err = ext3_orphan_add(handle, inode);
if (err)
goto err_drop_inode;
mark_inode_dirty(inode);
- d_tmpfile(dentry, inode);
unlock_new_inode(inode);
}
ext3_journal_stop(handle);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7097b0f..72ba470 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2835,6 +2835,9 @@ again:
err = -EIO;
break;
}
+ /* Yield here to deal with large extent trees.
+ * Should be a no-op if we did IO above. */
+ cond_resched();
if (WARN_ON(i + 1 > depth)) {
err = -EIO;
break;
@@ -4261,8 +4264,8 @@ got_allocated_blocks:
/* not a good idea to call discard here directly,
* but otherwise we'd need to call it every free() */
ext4_discard_preallocations(inode);
- ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex),
- ext4_ext_get_actual_len(&newex), fb_flags);
+ ext4_free_blocks(handle, inode, NULL, newblock,
+ EXT4_C2B(sbi, allocated_clusters), fb_flags);
goto out2;
}
@@ -4382,8 +4385,9 @@ out2:
}
out3:
- trace_ext4_ext_map_blocks_exit(inode, flags, map, err ? err : allocated);
-
+ trace_ext4_ext_map_blocks_exit(inode, flags, map,
+ err ? err : allocated);
+ ext4_es_lru_add(inode);
return err ? err : allocated;
}
@@ -4405,9 +4409,20 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode)
last_block = (inode->i_size + sb->s_blocksize - 1)
>> EXT4_BLOCK_SIZE_BITS(sb);
+retry:
err = ext4_es_remove_extent(inode, last_block,
EXT_MAX_BLOCKS - last_block);
+ if (err == -ENOMEM) {
+ cond_resched();
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ goto retry;
+ }
+ if (err) {
+ ext4_std_error(inode->i_sb, err);
+ return;
+ }
err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
+ ext4_std_error(inode->i_sb, err);
}
static void ext4_falloc_update_inode(struct inode *inode,
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 4b8df7f..91cb110 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -148,6 +148,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t end);
static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
int nr_to_scan);
+static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
+ struct ext4_inode_info *locked_ei);
int __init ext4_init_es(void)
{
@@ -665,7 +667,13 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
err = __es_remove_extent(inode, lblk, end);
if (err != 0)
goto error;
+retry:
err = __es_insert_extent(inode, &newes);
+ if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
+ EXT4_I(inode)))
+ goto retry;
+ if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
+ err = 0;
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
@@ -744,8 +752,10 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
struct extent_status orig_es;
ext4_lblk_t len1, len2;
ext4_fsblk_t block;
- int err = 0;
+ int err;
+retry:
+ err = 0;
es = __es_tree_search(&tree->root, lblk);
if (!es)
goto out;
@@ -780,6 +790,10 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
if (err) {
es->es_lblk = orig_es.es_lblk;
es->es_len = orig_es.es_len;
+ if ((err == -ENOMEM) &&
+ __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
+ EXT4_I(inode)))
+ goto retry;
goto out;
}
} else {
@@ -889,22 +903,14 @@ static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a,
return -1;
}
-static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
+static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
+ struct ext4_inode_info *locked_ei)
{
- struct ext4_sb_info *sbi = container_of(shrink,
- struct ext4_sb_info, s_es_shrinker);
struct ext4_inode_info *ei;
struct list_head *cur, *tmp;
LIST_HEAD(skiped);
- int nr_to_scan = sc->nr_to_scan;
int ret, nr_shrunk = 0;
- ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
- trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
-
- if (!nr_to_scan)
- return ret;
-
spin_lock(&sbi->s_es_lru_lock);
/*
@@ -933,7 +939,7 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
continue;
}
- if (ei->i_es_lru_nr == 0)
+ if (ei->i_es_lru_nr == 0 || ei == locked_ei)
continue;
write_lock(&ei->i_es_lock);
@@ -952,6 +958,27 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
list_splice_tail(&skiped, &sbi->s_es_lru);
spin_unlock(&sbi->s_es_lru_lock);
+ if (locked_ei && nr_shrunk == 0)
+ nr_shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
+
+ return nr_shrunk;
+}
+
+static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
+{
+ struct ext4_sb_info *sbi = container_of(shrink,
+ struct ext4_sb_info, s_es_shrinker);
+ int nr_to_scan = sc->nr_to_scan;
+ int ret, nr_shrunk;
+
+ ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
+ trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
+
+ if (!nr_to_scan)
+ return ret;
+
+ nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
+
ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret);
return ret;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f03598c..8bf5999 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -734,11 +734,8 @@ repeat_in_this_group:
ino = ext4_find_next_zero_bit((unsigned long *)
inode_bitmap_bh->b_data,
EXT4_INODES_PER_GROUP(sb), ino);
- if (ino >= EXT4_INODES_PER_GROUP(sb)) {
- if (++group == ngroups)
- group = 0;
- continue;
- }
+ if (ino >= EXT4_INODES_PER_GROUP(sb))
+ goto next_group;
if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
ext4_error(sb, "reserved inode found cleared - "
"inode=%lu", ino + 1);
@@ -769,6 +766,9 @@ repeat_in_this_group:
goto got; /* we grabbed the inode! */
if (ino < EXT4_INODES_PER_GROUP(sb))
goto repeat_in_this_group;
+next_group:
+ if (++group == ngroups)
+ group = 0;
}
err = -ENOSPC;
goto out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 98b9bff..dd32a2e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -514,10 +514,9 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
"logical block %lu\n", inode->i_ino, flags, map->m_len,
(unsigned long) map->m_lblk);
- ext4_es_lru_add(inode);
-
/* Lookup extent status tree firstly */
if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
+ ext4_es_lru_add(inode);
if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
map->m_pblk = ext4_es_pblock(&es) +
map->m_lblk - es.es_lblk;
@@ -556,14 +555,13 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
int ret;
unsigned long long status;
-#ifdef ES_AGGRESSIVE_TEST
- if (retval != map->m_len) {
- printk("ES len assertion failed for inode: %lu "
- "retval %d != map->m_len %d "
- "in %s (lookup)\n", inode->i_ino, retval,
- map->m_len, __func__);
+ if (unlikely(retval != map->m_len)) {
+ ext4_warning(inode->i_sb,
+ "ES len assertion failed for inode "
+ "%lu: retval %d != map->m_len %d",
+ inode->i_ino, retval, map->m_len);
+ WARN_ON(1);
}
-#endif
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
@@ -657,14 +655,13 @@ found:
int ret;
unsigned long long status;
-#ifdef ES_AGGRESSIVE_TEST
- if (retval != map->m_len) {
- printk("ES len assertion failed for inode: %lu "
- "retval %d != map->m_len %d "
- "in %s (allocation)\n", inode->i_ino, retval,
- map->m_len, __func__);
+ if (unlikely(retval != map->m_len)) {
+ ext4_warning(inode->i_sb,
+ "ES len assertion failed for inode "
+ "%lu: retval %d != map->m_len %d",
+ inode->i_ino, retval, map->m_len);
+ WARN_ON(1);
}
-#endif
/*
* If the extent has been zeroed out, we don't need to update
@@ -1529,11 +1526,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
"logical block %lu\n", inode->i_ino, map->m_len,
(unsigned long) map->m_lblk);
- ext4_es_lru_add(inode);
-
/* Lookup extent status tree firstly */
if (ext4_es_lookup_extent(inode, iblock, &es)) {
-
+ ext4_es_lru_add(inode);
if (ext4_es_is_hole(&es)) {
retval = 0;
down_read((&EXT4_I(inode)->i_data_sem));
@@ -1640,14 +1635,13 @@ add_delayed:
int ret;
unsigned long long status;
-#ifdef ES_AGGRESSIVE_TEST
- if (retval != map->m_len) {
- printk("ES len assertion failed for inode: %lu "
- "retval %d != map->m_len %d "
- "in %s (lookup)\n", inode->i_ino, retval,
- map->m_len, __func__);
+ if (unlikely(retval != map->m_len)) {
+ ext4_warning(inode->i_sb,
+ "ES len assertion failed for inode "
+ "%lu: retval %d != map->m_len %d",
+ inode->i_ino, retval, map->m_len);
+ WARN_ON(1);
}
-#endif
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 234b834..35f55a0 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2316,11 +2316,11 @@ retry:
inode->i_op = &ext4_file_inode_operations;
inode->i_fop = &ext4_file_operations;
ext4_set_aops(inode);
+ d_tmpfile(dentry, inode);
err = ext4_orphan_add(handle, inode);
if (err)
goto err_drop_inode;
mark_inode_dirty(inode);
- d_tmpfile(dentry, inode);
unlock_new_inode(inode);
}
if (handle)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index bca26f3..36b141e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -5481,6 +5481,7 @@ static void __exit ext4_exit_fs(void)
kset_unregister(ext4_kset);
ext4_exit_system_zone();
ext4_exit_pageio();
+ ext4_exit_es();
}
MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 6599222..65343c3 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -730,14 +730,14 @@ static int __init fcntl_init(void)
* Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
* is defined as O_NONBLOCK on some platforms and not on others.
*/
- BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
+ BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
O_RDONLY | O_WRONLY | O_RDWR |
O_CREAT | O_EXCL | O_NOCTTY |
O_TRUNC | O_APPEND | /* O_NONBLOCK | */
__O_SYNC | O_DSYNC | FASYNC |
O_DIRECT | O_LARGEFILE | O_DIRECTORY |
O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
- __FMODE_EXEC | O_PATH
+ __FMODE_EXEC | O_PATH | __O_TMPFILE
));
fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 0eda527..72a5d5b 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1223,30 +1223,46 @@ static int fuse_direntplus_link(struct file *file,
if (name.name[1] == '.' && name.len == 2)
return 0;
}
+
+ if (invalid_nodeid(o->nodeid))
+ return -EIO;
+ if (!fuse_valid_type(o->attr.mode))
+ return -EIO;
+
fc = get_fuse_conn(dir);
name.hash = full_name_hash(name.name, name.len);
dentry = d_lookup(parent, &name);
- if (dentry && dentry->d_inode) {
+ if (dentry) {
inode = dentry->d_inode;
- if (get_node_id(inode) == o->nodeid) {
+ if (!inode) {
+ d_drop(dentry);
+ } else if (get_node_id(inode) != o->nodeid ||
+ ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
+ err = d_invalidate(dentry);
+ if (err)
+ goto out;
+ } else if (is_bad_inode(inode)) {
+ err = -EIO;
+ goto out;
+ } else {
struct fuse_inode *fi;
fi = get_fuse_inode(inode);
spin_lock(&fc->lock);
fi->nlookup++;
spin_unlock(&fc->lock);
+ fuse_change_attributes(inode, &o->attr,
+ entry_attr_timeout(o),
+ attr_version);
+
/*
* The other branch to 'found' comes via fuse_iget()
* which bumps nlookup inside
*/
goto found;
}
- err = d_invalidate(dentry);
- if (err)
- goto out;
dput(dentry);
- dentry = NULL;
}
dentry = d_alloc(parent, &name);
@@ -1259,25 +1275,30 @@ static int fuse_direntplus_link(struct file *file,
if (!inode)
goto out;
- alias = d_materialise_unique(dentry, inode);
- err = PTR_ERR(alias);
- if (IS_ERR(alias))
- goto out;
+ if (S_ISDIR(inode->i_mode)) {
+ mutex_lock(&fc->inst_mutex);
+ alias = fuse_d_add_directory(dentry, inode);
+ mutex_unlock(&fc->inst_mutex);
+ err = PTR_ERR(alias);
+ if (IS_ERR(alias)) {
+ iput(inode);
+ goto out;
+ }
+ } else {
+ alias = d_splice_alias(inode, dentry);
+ }
+
if (alias) {
dput(dentry);
dentry = alias;
}
found:
- fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o),
- attr_version);
-
fuse_change_entry_timeout(dentry, o);
err = 0;
out:
- if (dentry)
- dput(dentry);
+ dput(dentry);
return err;
}
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 01bfe766..41e491b 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -64,12 +64,17 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
nlm_init->protocol, nlm_version,
nlm_init->hostname, nlm_init->noresvport,
nlm_init->net);
- if (host == NULL) {
- lockd_down(nlm_init->net);
- return ERR_PTR(-ENOLCK);
- }
+ if (host == NULL)
+ goto out_nohost;
+ if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL)
+ goto out_nobind;
return host;
+out_nobind:
+ nlmclnt_release_host(host);
+out_nohost:
+ lockd_down(nlm_init->net);
+ return ERR_PTR(-ENOLCK);
}
EXPORT_SYMBOL_GPL(nlmclnt_init);
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 9760ecb..acd3947 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -125,14 +125,15 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
{
struct nlm_args *argp = &req->a_args;
struct nlm_lock *lock = &argp->lock;
+ char *nodename = req->a_host->h_rpcclnt->cl_nodename;
nlmclnt_next_cookie(&argp->cookie);
memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh));
- lock->caller = utsname()->nodename;
+ lock->caller = nodename;
lock->oh.data = req->a_owner;
lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
(unsigned int)fl->fl_u.nfs_fl.owner->pid,
- utsname()->nodename);
+ nodename);
lock->svid = fl->fl_u.nfs_fl.owner->pid;
lock->fl.fl_start = fl->fl_start;
lock->fl.fl_end = fl->fl_end;
diff --git a/fs/namei.c b/fs/namei.c
index 8b61d10..89a612e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3671,15 +3671,11 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
return -EINVAL;
/*
- * To use null names we require CAP_DAC_READ_SEARCH
- * This ensures that not everyone will be able to create
- * handlink using the passed filedescriptor.
+ * Using empty names is equivalent to using AT_SYMLINK_FOLLOW
+ * on /proc/self/fd/<fd>.
*/
- if (flags & AT_EMPTY_PATH) {
- if (!capable(CAP_DAC_READ_SEARCH))
- return -ENOENT;
+ if (flags & AT_EMPTY_PATH)
how = LOOKUP_EMPTY;
- }
if (flags & AT_SYMLINK_FOLLOW)
how |= LOOKUP_FOLLOW;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index af6e806..941246f 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -463,7 +463,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
unlock_new_inode(inode);
} else
nfs_refresh_inode(inode, fattr);
- nfs_setsecurity(inode, fattr, label);
dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n",
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
@@ -963,9 +962,15 @@ EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
{
struct nfs_inode *nfsi = NFS_I(inode);
-
+ int ret;
+
if (mapping->nrpages != 0) {
- int ret = invalidate_inode_pages2(mapping);
+ if (S_ISREG(inode->i_mode)) {
+ ret = nfs_sync_mapping(mapping);
+ if (ret < 0)
+ return ret;
+ }
+ ret = invalidate_inode_pages2(mapping);
if (ret < 0)
return ret;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cf11799..108a774 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3071,15 +3071,13 @@ struct rpc_clnt *
nfs4_proc_lookup_mountpoint(struct inode *dir, struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
+ struct rpc_clnt *client = NFS_CLIENT(dir);
int status;
- struct rpc_clnt *client = rpc_clone_client(NFS_CLIENT(dir));
status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, NULL);
- if (status < 0) {
- rpc_shutdown_client(client);
+ if (status < 0)
return ERR_PTR(status);
- }
- return client;
+ return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client;
}
static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0abfb846..3850b01 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -999,6 +999,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
__be32 *p;
__be32 *q;
int len;
+ uint32_t bmval_len = 2;
uint32_t bmval0 = 0;
uint32_t bmval1 = 0;
uint32_t bmval2 = 0;
@@ -1010,7 +1011,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
* = 40 bytes, plus any contribution from variable-length fields
* such as owner/group.
*/
- len = 20;
+ len = 8;
/* Sigh */
if (iap->ia_valid & ATTR_SIZE)
@@ -1040,8 +1041,6 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
}
len += 4 + (XDR_QUADLEN(owner_grouplen) << 2);
}
- if (label)
- len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2);
if (iap->ia_valid & ATTR_ATIME_SET)
len += 16;
else if (iap->ia_valid & ATTR_ATIME)
@@ -1050,15 +1049,22 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
len += 16;
else if (iap->ia_valid & ATTR_MTIME)
len += 4;
+ if (label) {
+ len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2);
+ bmval_len = 3;
+ }
+
+ len += bmval_len << 2;
p = reserve_space(xdr, len);
/*
* We write the bitmap length now, but leave the bitmap and the attribute
* buffer length to be backfilled at the end of this routine.
*/
- *p++ = cpu_to_be32(3);
+ *p++ = cpu_to_be32(bmval_len);
q = p;
- p += 4;
+ /* Skip bitmap entries + attrlen */
+ p += bmval_len + 1;
if (iap->ia_valid & ATTR_SIZE) {
bmval0 |= FATTR4_WORD0_SIZE;
@@ -1112,10 +1118,11 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
len, ((char *)p - (char *)q) + 4);
BUG();
}
- len = (char *)p - (char *)q - 16;
*q++ = htonl(bmval0);
*q++ = htonl(bmval1);
- *q++ = htonl(bmval2);
+ if (bmval_len == 3)
+ *q++ = htonl(bmval2);
+ len = (char *)p - (char *)(q + 1);
*q = htonl(len);
/* out: */
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 71fdc0d..f6db66d 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2478,6 +2478,10 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,
if (server->flags & NFS_MOUNT_NOAC)
sb_mntdata.mntflags |= MS_SYNCHRONOUS;
+ if (mount_info->cloned != NULL && mount_info->cloned->sb != NULL)
+ if (mount_info->cloned->sb->s_flags & MS_SYNCHRONOUS)
+ sb_mntdata.mntflags |= MS_SYNCHRONOUS;
+
/* Get a superblock - note that we may end up sharing one that already exists */
s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata);
if (IS_ERR(s)) {
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 0d4c410..419572f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1524,7 +1524,7 @@ static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\
- 1 + 1 + 0 + /* eir_flags, spr_how, SP4_NONE (for now) */\
+ 1 + 1 + 2 + /* eir_flags, spr_how, spo_must_enforce & _allow */\
2 + /*eir_server_owner.so_minor_id */\
/* eir_server_owner.so_major_id<> */\
XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 280acef..43f4229 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1264,6 +1264,8 @@ static bool svc_rqst_integrity_protected(struct svc_rqst *rqstp)
struct svc_cred *cr = &rqstp->rq_cred;
u32 service;
+ if (!cr->cr_gss_mech)
+ return false;
service = gss_pseudoflavor_to_service(cr->cr_gss_mech, cr->cr_flavor);
return service == RPC_GSS_SVC_INTEGRITY ||
service == RPC_GSS_SVC_PRIVACY;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0c0f3ea9..c2a4701 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3360,7 +3360,8 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
8 /* eir_clientid */ +
4 /* eir_sequenceid */ +
4 /* eir_flags */ +
- 4 /* spr_how (SP4_NONE) */ +
+ 4 /* spr_how */ +
+ 8 /* spo_must_enforce, spo_must_allow */ +
8 /* so_minor_id */ +
4 /* so_major_id.len */ +
(XDR_QUADLEN(major_id_sz) * 4) +
@@ -3372,8 +3373,6 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
WRITE32(exid->seqid);
WRITE32(exid->flags);
- /* state_protect4_r. Currently only support SP4_NONE */
- BUG_ON(exid->spa_how != SP4_NONE);
WRITE32(exid->spa_how);
switch (exid->spa_how) {
case SP4_NONE:
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8ff6a00..c827acb 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -830,9 +830,10 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
flags = O_WRONLY|O_LARGEFILE;
}
*filp = dentry_open(&path, flags, current_cred());
- if (IS_ERR(*filp))
+ if (IS_ERR(*filp)) {
host_err = PTR_ERR(*filp);
- else {
+ *filp = NULL;
+ } else {
host_err = ima_file_check(*filp, may_flags);
if (may_flags & NFSD_MAY_64BIT_COOKIE)
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 998b17e..9f6b96a 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2965,6 +2965,11 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
to = map_end & (PAGE_CACHE_SIZE - 1);
page = find_or_create_page(mapping, page_index, GFP_NOFS);
+ if (!page) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ break;
+ }
/*
* In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page
diff --git a/fs/open.c b/fs/open.c
index 9156cb0..7931f76 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -823,7 +823,7 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
int lookup_flags = 0;
int acc_mode;
- if (flags & O_CREAT)
+ if (flags & (O_CREAT | __O_TMPFILE))
op->mode = (mode & S_IALLUGO) | S_IFREG;
else
op->mode = 0;
@@ -844,6 +844,8 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
if ((flags & O_TMPFILE_MASK) != O_TMPFILE)
return -EINVAL;
acc_mode = MAY_OPEN | ACC_MODE(flags);
+ if (!(acc_mode & MAY_WRITE))
+ return -EINVAL;
} else if (flags & O_PATH) {
/*
* If we have O_PATH in the open flag. Then we
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 2850317..a1a16eb 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -223,7 +223,7 @@ static inline char *alloc_elfnotes_buf(size_t notes_sz)
* regions in the 1st kernel pointed to by PT_LOAD entries) into
* virtually contiguous user-space in ELF layout.
*/
-#ifdef CONFIG_MMU
+#if defined(CONFIG_MMU) && !defined(CONFIG_S390)
static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
{
size_t size = vma->vm_end - vma->vm_start;
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 33532f7..a958444 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -19,12 +19,13 @@
/*
* LOCKING:
*
- * We rely on new Alexander Viro's super-block locking.
+ * These guys are evicted from procfs as the very first step in ->kill_sb().
*
*/
-static int show_version(struct seq_file *m, struct super_block *sb)
+static int show_version(struct seq_file *m, void *unused)
{
+ struct super_block *sb = m->private;
char *format;
if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_6)) {
@@ -66,8 +67,9 @@ static int show_version(struct seq_file *m, struct super_block *sb)
#define DJP( x ) le32_to_cpu( jp -> x )
#define JF( x ) ( r -> s_journal -> x )
-static int show_super(struct seq_file *m, struct super_block *sb)
+static int show_super(struct seq_file *m, void *unused)
{
+ struct super_block *sb = m->private;
struct reiserfs_sb_info *r = REISERFS_SB(sb);
seq_printf(m, "state: \t%s\n"
@@ -128,8 +130,9 @@ static int show_super(struct seq_file *m, struct super_block *sb)
return 0;
}
-static int show_per_level(struct seq_file *m, struct super_block *sb)
+static int show_per_level(struct seq_file *m, void *unused)
{
+ struct super_block *sb = m->private;
struct reiserfs_sb_info *r = REISERFS_SB(sb);
int level;
@@ -186,8 +189,9 @@ static int show_per_level(struct seq_file *m, struct super_block *sb)
return 0;
}
-static int show_bitmap(struct seq_file *m, struct super_block *sb)
+static int show_bitmap(struct seq_file *m, void *unused)
{
+ struct super_block *sb = m->private;
struct reiserfs_sb_info *r = REISERFS_SB(sb);
seq_printf(m, "free_block: %lu\n"
@@ -218,8 +222,9 @@ static int show_bitmap(struct seq_file *m, struct super_block *sb)
return 0;
}
-static int show_on_disk_super(struct seq_file *m, struct super_block *sb)
+static int show_on_disk_super(struct seq_file *m, void *unused)
{
+ struct super_block *sb = m->private;
struct reiserfs_sb_info *sb_info = REISERFS_SB(sb);
struct reiserfs_super_block *rs = sb_info->s_rs;
int hash_code = DFL(s_hash_function_code);
@@ -261,8 +266,9 @@ static int show_on_disk_super(struct seq_file *m, struct super_block *sb)
return 0;
}
-static int show_oidmap(struct seq_file *m, struct super_block *sb)
+static int show_oidmap(struct seq_file *m, void *unused)
{
+ struct super_block *sb = m->private;
struct reiserfs_sb_info *sb_info = REISERFS_SB(sb);
struct reiserfs_super_block *rs = sb_info->s_rs;
unsigned int mapsize = le16_to_cpu(rs->s_v1.s_oid_cursize);
@@ -291,8 +297,9 @@ static int show_oidmap(struct seq_file *m, struct super_block *sb)
return 0;
}
-static int show_journal(struct seq_file *m, struct super_block *sb)
+static int show_journal(struct seq_file *m, void *unused)
{
+ struct super_block *sb = m->private;
struct reiserfs_sb_info *r = REISERFS_SB(sb);
struct reiserfs_super_block *rs = r->s_rs;
struct journal_params *jp = &rs->s_v1.s_journal;
@@ -383,92 +390,24 @@ static int show_journal(struct seq_file *m, struct super_block *sb)
return 0;
}
-/* iterator */
-static int test_sb(struct super_block *sb, void *data)
-{
- return data == sb;
-}
-
-static int set_sb(struct super_block *sb, void *data)
-{
- return -ENOENT;
-}
-
-struct reiserfs_seq_private {
- struct super_block *sb;
- int (*show) (struct seq_file *, struct super_block *);
-};
-
-static void *r_start(struct seq_file *m, loff_t * pos)
-{
- struct reiserfs_seq_private *priv = m->private;
- loff_t l = *pos;
-
- if (l)
- return NULL;
-
- if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, priv->sb)))
- return NULL;
-
- up_write(&priv->sb->s_umount);
- return priv->sb;
-}
-
-static void *r_next(struct seq_file *m, void *v, loff_t * pos)
-{
- ++*pos;
- if (v)
- deactivate_super(v);
- return NULL;
-}
-
-static void r_stop(struct seq_file *m, void *v)
-{
- if (v)
- deactivate_super(v);
-}
-
-static int r_show(struct seq_file *m, void *v)
-{
- struct reiserfs_seq_private *priv = m->private;
- return priv->show(m, v);
-}
-
-static const struct seq_operations r_ops = {
- .start = r_start,
- .next = r_next,
- .stop = r_stop,
- .show = r_show,
-};
-
static int r_open(struct inode *inode, struct file *file)
{
- struct reiserfs_seq_private *priv;
- int ret = seq_open_private(file, &r_ops,
- sizeof(struct reiserfs_seq_private));
-
- if (!ret) {
- struct seq_file *m = file->private_data;
- priv = m->private;
- priv->sb = proc_get_parent_data(inode);
- priv->show = PDE_DATA(inode);
- }
- return ret;
+ return single_open(file, PDE_DATA(inode),
+ proc_get_parent_data(inode));
}
static const struct file_operations r_file_operations = {
.open = r_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
- .owner = THIS_MODULE,
+ .release = single_release,
};
static struct proc_dir_entry *proc_info_root = NULL;
static const char proc_info_root_name[] = "fs/reiserfs";
static void add_file(struct super_block *sb, char *name,
- int (*func) (struct seq_file *, struct super_block *))
+ int (*func) (struct seq_file *, void *))
{
proc_create_data(name, 0, REISERFS_SB(sb)->procdir,
&r_file_operations, func);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index f8a23c3..e2e202a 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -499,6 +499,7 @@ int remove_save_link(struct inode *inode, int truncate)
static void reiserfs_kill_sb(struct super_block *s)
{
if (REISERFS_SB(s)) {
+ reiserfs_proc_info_done(s);
/*
* Force any pending inode evictions to occur now. Any
* inodes to be removed that have extended attributes
@@ -554,8 +555,6 @@ static void reiserfs_put_super(struct super_block *s)
REISERFS_SB(s)->reserved_blocks);
}
- reiserfs_proc_info_done(s);
-
reiserfs_write_unlock(s);
mutex_destroy(&REISERFS_SB(s)->lock);
kfree(s->s_fs_info);
diff --git a/fs/super.c b/fs/super.c
index 7465d43..68307c0 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -336,19 +336,19 @@ EXPORT_SYMBOL(deactivate_super);
* and want to turn it into a full-blown active reference. grab_super()
* is called with sb_lock held and drops it. Returns 1 in case of
* success, 0 if we had failed (superblock contents was already dead or
- * dying when grab_super() had been called).
+ * dying when grab_super() had been called). Note that this is only
+ * called for superblocks not in rundown mode (== ones still on ->fs_supers
+ * of their type), so increment of ->s_count is OK here.
*/
static int grab_super(struct super_block *s) __releases(sb_lock)
{
- if (atomic_inc_not_zero(&s->s_active)) {
- spin_unlock(&sb_lock);
- return 1;
- }
- /* it's going away */
s->s_count++;
spin_unlock(&sb_lock);
- /* wait for it to die */
down_write(&s->s_umount);
+ if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) {
+ put_super(s);
+ return 1;
+ }
up_write(&s->s_umount);
put_super(s);
return 0;
@@ -463,11 +463,6 @@ retry:
destroy_super(s);
s = NULL;
}
- down_write(&old->s_umount);
- if (unlikely(!(old->s_flags & MS_BORN))) {
- deactivate_locked_super(old);
- goto retry;
- }
return old;
}
}
@@ -660,10 +655,10 @@ restart:
if (hlist_unhashed(&sb->s_instances))
continue;
if (sb->s_bdev == bdev) {
- if (grab_super(sb)) /* drops sb_lock */
- return sb;
- else
+ if (!grab_super(sb))
goto restart;
+ up_write(&sb->s_umount);
+ return sb;
}
}
spin_unlock(&sb_lock);
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index 07d735a..e5869b5 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -39,6 +39,9 @@ typedef struct xfs_timestamp {
* There is a very similar struct icdinode in xfs_inode which matches the
* layout of the first 96 bytes of this structure, but is kept in native
* format instead of big endian.
+ *
+ * Note: di_flushiter is only used by v1/2 inodes - it's effectively a zeroed
+ * padding field for v3 inodes.
*/
typedef struct xfs_dinode {
__be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b78481f..bb262c2 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -896,7 +896,6 @@ xfs_dinode_to_disk(
to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
- to->di_flushiter = cpu_to_be16(from->di_flushiter);
to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
@@ -924,6 +923,9 @@ xfs_dinode_to_disk(
to->di_lsn = cpu_to_be64(from->di_lsn);
memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
uuid_copy(&to->di_uuid, &from->di_uuid);
+ to->di_flushiter = 0;
+ } else {
+ to->di_flushiter = cpu_to_be16(from->di_flushiter);
}
}
@@ -1029,10 +1031,14 @@ xfs_dinode_calc_crc(
/*
* Read the disk inode attributes into the in-core inode structure.
*
- * If we are initialising a new inode and we are not utilising the
- * XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new inode core
- * with a random generation number. If we are keeping inodes around, we need to
- * read the inode cluster to get the existing generation number off disk.
+ * For version 5 superblocks, if we are initialising a new inode and we are not
+ * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
+ * inode core with a random generation number. If we are keeping inodes around,
+ * we need to read the inode cluster to get the existing generation number off
+ * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
+ * format) then log recovery is dependent on the di_flushiter field being
+ * initialised from the current on-disk value and hence we must also read the
+ * inode off disk.
*/
int
xfs_iread(
@@ -1054,6 +1060,7 @@ xfs_iread(
/* shortcut IO on inode allocation if possible */
if ((iget_flags & XFS_IGET_CREATE) &&
+ xfs_sb_version_hascrc(&mp->m_sb) &&
!(mp->m_flags & XFS_MOUNT_IKEEP)) {
/* initialise the on-disk inode core */
memset(&ip->i_d, 0, sizeof(ip->i_d));
@@ -2882,12 +2889,18 @@ xfs_iflush_int(
__func__, ip->i_ino, ip->i_d.di_forkoff, ip);
goto corrupt_out;
}
+
/*
- * bump the flush iteration count, used to detect flushes which
- * postdate a log record during recovery. This is redundant as we now
- * log every change and hence this can't happen. Still, it doesn't hurt.
+ * Inode item log recovery for v1/v2 inodes are dependent on the
+ * di_flushiter count for correct sequencing. We bump the flush
+ * iteration count so we can detect flushes which postdate a log record
+ * during recovery. This is redundant as we now log every change and
+ * hence this can't happen but we need to still do it to ensure
+ * backwards compatibility with old kernels that predate logging all
+ * inode changes.
*/
- ip->i_d.di_flushiter++;
+ if (ip->i_d.di_version < 3)
+ ip->i_d.di_flushiter++;
/*
* Copy the dirty parts of the inode into the on-disk
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 6fcc910a..7681b19 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2592,8 +2592,16 @@ xlog_recover_inode_pass2(
goto error;
}
- /* Skip replay when the on disk inode is newer than the log one */
- if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
+ /*
+ * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
+ * are transactional and if ordering is necessary we can determine that
+ * more accurately by the LSN field in the V3 inode core. Don't trust
+ * the inode versions we might be changing them here - use the
+ * superblock flag to determine whether we need to look at di_flushiter
+ * to skip replay when the on disk inode is newer than the log one
+ */
+ if (!xfs_sb_version_hascrc(&mp->m_sb) &&
+ dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
/*
* Deal with the wrap case, DI_MAX_FLUSH is less
* than smaller numbers
@@ -2608,6 +2616,7 @@ xlog_recover_inode_pass2(
goto error;
}
}
+
/* Take the opportunity to reset the flush iteration count */
dicp->di_flushiter = 0;