summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c306
-rw-r--r--fs/anon_inodes.c50
-rw-r--r--fs/attr.c5
-rw-r--r--fs/bio-integrity.c14
-rw-r--r--fs/btrfs/acl.c2
-rw-r--r--fs/btrfs/backref.c3
-rw-r--r--fs/btrfs/compression.c2
-rw-r--r--fs/btrfs/ctree.c395
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/btrfs/inode.c57
-rw-r--r--fs/btrfs/ioctl.c9
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/relocation.c18
-rw-r--r--fs/btrfs/send.c179
-rw-r--r--fs/btrfs/transaction.c6
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/buffer.c6
-rw-r--r--fs/ceph/addr.c10
-rw-r--r--fs/ceph/cache.c3
-rw-r--r--fs/ceph/mds_client.c8
-rw-r--r--fs/cifs/cifsacl.c28
-rw-r--r--fs/cifs/cifsglob.h12
-rw-r--r--fs/cifs/cifsproto.h7
-rw-r--r--fs/cifs/cifssmb.c8
-rw-r--r--fs/cifs/dir.c11
-rw-r--r--fs/cifs/file.c37
-rw-r--r--fs/cifs/inode.c42
-rw-r--r--fs/cifs/link.c26
-rw-r--r--fs/cifs/readdir.c40
-rw-r--r--fs/cifs/smb1ops.c29
-rw-r--r--fs/cifs/smb2glob.h3
-rw-r--r--fs/cifs/smb2inode.c16
-rw-r--r--fs/cifs/smb2ops.c14
-rw-r--r--fs/cifs/smb2pdu.c4
-rw-r--r--fs/cifs/smb2proto.h2
-rw-r--r--fs/cifs/xattr.c64
-rw-r--r--fs/configfs/dir.c16
-rw-r--r--fs/dcache.c21
-rw-r--r--fs/dcookies.c2
-rw-r--r--fs/devpts/inode.c1
-rw-r--r--fs/dlm/lowcomms.c8
-rw-r--r--fs/eventpoll.c141
-rw-r--r--fs/exec.c6
-rw-r--r--fs/exofs/ore.c37
-rw-r--r--fs/exportfs/expfs.c18
-rw-r--r--fs/ext2/super.c1
-rw-r--r--fs/ext4/ext4.h12
-rw-r--r--fs/ext4/ext4_jbd2.c9
-rw-r--r--fs/ext4/extents.c46
-rw-r--r--fs/ext4/inline.c8
-rw-r--r--fs/ext4/inode.c31
-rw-r--r--fs/ext4/ioctl.c3
-rw-r--r--fs/ext4/mballoc.c21
-rw-r--r--fs/ext4/resize.c34
-rw-r--r--fs/ext4/super.c41
-rw-r--r--fs/ext4/xattr.c1
-rw-r--r--fs/file.c2
-rw-r--r--fs/fs-writeback.c15
-rw-r--r--fs/fscache/object-list.c5
-rw-r--r--fs/fscache/object.c3
-rw-r--r--fs/fuse/dev.c22
-rw-r--r--fs/gfs2/aops.c30
-rw-r--r--fs/gfs2/inode.c51
-rw-r--r--fs/gfs2/log.c4
-rw-r--r--fs/gfs2/meta_io.c5
-rw-r--r--fs/gfs2/ops_fstype.c12
-rw-r--r--fs/hpfs/alloc.c66
-rw-r--r--fs/hpfs/hpfs_fn.h2
-rw-r--r--fs/hpfs/super.c29
-rw-r--r--fs/jbd2/transaction.c12
-rw-r--r--fs/libfs.c43
-rw-r--r--fs/lockd/svclock.c8
-rw-r--r--fs/mount.h2
-rw-r--r--fs/namei.c2
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nfs/blocklayout/extents.c2
-rw-r--r--fs/nfs/delegation.c11
-rw-r--r--fs/nfs/dir.c26
-rw-r--r--fs/nfs/direct.c38
-rw-r--r--fs/nfs/inode.c65
-rw-r--r--fs/nfs/internal.h12
-rw-r--r--fs/nfs/nfs4_fs.h1
-rw-r--r--fs/nfs/nfs4client.c20
-rw-r--r--fs/nfs/nfs4filelayout.c18
-rw-r--r--fs/nfs/nfs4filelayoutdev.c2
-rw-r--r--fs/nfs/nfs4proc.c124
-rw-r--r--fs/nfs/nfs4session.c25
-rw-r--r--fs/nfs/nfs4session.h2
-rw-r--r--fs/nfs/nfs4state.c14
-rw-r--r--fs/nfs/nfs4xdr.c47
-rw-r--r--fs/nfs/nfstrace.h1
-rw-r--r--fs/nfs/write.c18
-rw-r--r--fs/nfsd/export.c15
-rw-r--r--fs/nfsd/nfs4xdr.c4
-rw-r--r--fs/nfsd/nfscache.c9
-rw-r--r--fs/nfsd/vfs.c173
-rw-r--r--fs/nilfs2/segment.c10
-rw-r--r--fs/notify/fanotify/fanotify_user.c4
-rw-r--r--fs/ocfs2/file.c8
-rw-r--r--fs/ocfs2/quota_global.c27
-rw-r--r--fs/ocfs2/quota_local.c4
-rw-r--r--fs/pipe.c39
-rw-r--r--fs/proc/base.c1
-rw-r--r--fs/proc/page.c2
-rw-r--r--fs/proc/proc_devtree.c1
-rw-r--r--fs/quota/dquot.c14
-rw-r--r--fs/read_write.c16
-rw-r--r--fs/splice.c18
-rw-r--r--fs/stat.c31
-rw-r--r--fs/xfs/xfs_fsops.c6
-rw-r--r--fs/xfs/xfs_ioctl.c9
-rw-r--r--fs/xfs/xfs_ioctl32.c3
-rw-r--r--fs/xfs/xfs_qm.c71
-rw-r--r--fs/xfs/xfs_sb.c13
115 files changed, 2065 insertions, 1035 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 067e3d3..062a5f6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -36,10 +36,10 @@
#include <linux/eventfd.h>
#include <linux/blkdev.h>
#include <linux/compat.h>
-#include <linux/anon_inodes.h>
#include <linux/migrate.h>
#include <linux/ramfs.h>
#include <linux/percpu-refcount.h>
+#include <linux/mount.h>
#include <asm/kmap_types.h>
#include <asm/uaccess.h>
@@ -80,6 +80,8 @@ struct kioctx {
struct percpu_ref users;
atomic_t dead;
+ struct percpu_ref reqs;
+
unsigned long user_id;
struct __percpu kioctx_cpu *cpu;
@@ -107,7 +109,6 @@ struct kioctx {
struct page **ring_pages;
long nr_pages;
- struct rcu_head rcu_head;
struct work_struct free_work;
struct {
@@ -152,12 +153,67 @@ unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio request
static struct kmem_cache *kiocb_cachep;
static struct kmem_cache *kioctx_cachep;
+static struct vfsmount *aio_mnt;
+
+static const struct file_operations aio_ring_fops;
+static const struct address_space_operations aio_ctx_aops;
+
+static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
+{
+ struct qstr this = QSTR_INIT("[aio]", 5);
+ struct file *file;
+ struct path path;
+ struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+
+ inode->i_mapping->a_ops = &aio_ctx_aops;
+ inode->i_mapping->private_data = ctx;
+ inode->i_size = PAGE_SIZE * nr_pages;
+
+ path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this);
+ if (!path.dentry) {
+ iput(inode);
+ return ERR_PTR(-ENOMEM);
+ }
+ path.mnt = mntget(aio_mnt);
+
+ d_instantiate(path.dentry, inode);
+ file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops);
+ if (IS_ERR(file)) {
+ path_put(&path);
+ return file;
+ }
+
+ file->f_flags = O_RDWR;
+ file->private_data = ctx;
+ return file;
+}
+
+static struct dentry *aio_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
+{
+ static const struct dentry_operations ops = {
+ .d_dname = simple_dname,
+ };
+ return mount_pseudo(fs_type, "aio:", NULL, &ops, 0xa10a10a1);
+}
+
/* aio_setup
* Creates the slab caches used by the aio routines, panic on
* failure as this is done early during the boot sequence.
*/
static int __init aio_setup(void)
{
+ static struct file_system_type aio_fs = {
+ .name = "aio",
+ .mount = aio_mount,
+ .kill_sb = kill_anon_super,
+ };
+ aio_mnt = kern_mount(&aio_fs);
+ if (IS_ERR(aio_mnt))
+ panic("Failed to create aio fs mount.");
+
kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
@@ -188,15 +244,22 @@ static void aio_free_ring(struct kioctx *ctx)
int i;
for (i = 0; i < ctx->nr_pages; i++) {
+ struct page *page;
pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
page_count(ctx->ring_pages[i]));
- put_page(ctx->ring_pages[i]);
+ page = ctx->ring_pages[i];
+ if (!page)
+ continue;
+ ctx->ring_pages[i] = NULL;
+ put_page(page);
}
put_aio_ring_file(ctx);
- if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages)
+ if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) {
kfree(ctx->ring_pages);
+ ctx->ring_pages = NULL;
+ }
}
static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
@@ -222,18 +285,38 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
unsigned long flags;
int rc;
+ rc = 0;
+
+ /* Make sure the old page hasn't already been changed */
+ spin_lock(&mapping->private_lock);
+ ctx = mapping->private_data;
+ if (ctx) {
+ pgoff_t idx;
+ spin_lock_irqsave(&ctx->completion_lock, flags);
+ idx = old->index;
+ if (idx < (pgoff_t)ctx->nr_pages) {
+ if (ctx->ring_pages[idx] != old)
+ rc = -EAGAIN;
+ } else
+ rc = -EINVAL;
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ } else
+ rc = -EINVAL;
+ spin_unlock(&mapping->private_lock);
+
+ if (rc != 0)
+ return rc;
+
/* Writeback must be complete */
BUG_ON(PageWriteback(old));
- put_page(old);
+ get_page(new);
- rc = migrate_page_move_mapping(mapping, new, old, NULL, mode);
+ rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1);
if (rc != MIGRATEPAGE_SUCCESS) {
- get_page(old);
+ put_page(new);
return rc;
}
- get_page(new);
-
/* We can potentially race against kioctx teardown here. Use the
* address_space's private data lock to protect the mapping's
* private_data.
@@ -245,13 +328,24 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
spin_lock_irqsave(&ctx->completion_lock, flags);
migrate_page_copy(new, old);
idx = old->index;
- if (idx < (pgoff_t)ctx->nr_pages)
- ctx->ring_pages[idx] = new;
+ if (idx < (pgoff_t)ctx->nr_pages) {
+ /* And only do the move if things haven't changed */
+ if (ctx->ring_pages[idx] == old)
+ ctx->ring_pages[idx] = new;
+ else
+ rc = -EAGAIN;
+ } else
+ rc = -EINVAL;
spin_unlock_irqrestore(&ctx->completion_lock, flags);
} else
rc = -EBUSY;
spin_unlock(&mapping->private_lock);
+ if (rc == MIGRATEPAGE_SUCCESS)
+ put_page(old);
+ else
+ put_page(new);
+
return rc;
}
#endif
@@ -268,7 +362,7 @@ static int aio_setup_ring(struct kioctx *ctx)
struct aio_ring *ring;
unsigned nr_events = ctx->max_reqs;
struct mm_struct *mm = current->mm;
- unsigned long size, populate;
+ unsigned long size, unused;
int nr_pages;
int i;
struct file *file;
@@ -283,15 +377,25 @@ static int aio_setup_ring(struct kioctx *ctx)
if (nr_pages < 0)
return -EINVAL;
- file = anon_inode_getfile_private("[aio]", &aio_ring_fops, ctx, O_RDWR);
+ file = aio_private_file(ctx, nr_pages);
if (IS_ERR(file)) {
ctx->aio_ring_file = NULL;
return -EAGAIN;
}
- file->f_inode->i_mapping->a_ops = &aio_ctx_aops;
- file->f_inode->i_mapping->private_data = ctx;
- file->f_inode->i_size = PAGE_SIZE * (loff_t)nr_pages;
+ ctx->aio_ring_file = file;
+ nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
+ / sizeof(struct io_event);
+
+ ctx->ring_pages = ctx->internal_pages;
+ if (nr_pages > AIO_RING_PAGES) {
+ ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
+ GFP_KERNEL);
+ if (!ctx->ring_pages) {
+ put_aio_ring_file(ctx);
+ return -ENOMEM;
+ }
+ }
for (i = 0; i < nr_pages; i++) {
struct page *page;
@@ -304,17 +408,14 @@ static int aio_setup_ring(struct kioctx *ctx)
SetPageUptodate(page);
SetPageDirty(page);
unlock_page(page);
+
+ ctx->ring_pages[i] = page;
}
- ctx->aio_ring_file = file;
- nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
- / sizeof(struct io_event);
+ ctx->nr_pages = i;
- ctx->ring_pages = ctx->internal_pages;
- if (nr_pages > AIO_RING_PAGES) {
- ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
- GFP_KERNEL);
- if (!ctx->ring_pages)
- return -ENOMEM;
+ if (unlikely(i != nr_pages)) {
+ aio_free_ring(ctx);
+ return -EAGAIN;
}
ctx->mmap_size = nr_pages * PAGE_SIZE;
@@ -323,9 +424,9 @@ static int aio_setup_ring(struct kioctx *ctx)
down_write(&mm->mmap_sem);
ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE, 0, &populate);
+ MAP_SHARED, 0, &unused);
+ up_write(&mm->mmap_sem);
if (IS_ERR((void *)ctx->mmap_base)) {
- up_write(&mm->mmap_sem);
ctx->mmap_size = 0;
aio_free_ring(ctx);
return -EAGAIN;
@@ -333,27 +434,6 @@ static int aio_setup_ring(struct kioctx *ctx)
pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base);
- /* We must do this while still holding mmap_sem for write, as we
- * need to be protected against userspace attempting to mremap()
- * or munmap() the ring buffer.
- */
- ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages,
- 1, 0, ctx->ring_pages, NULL);
-
- /* Dropping the reference here is safe as the page cache will hold
- * onto the pages for us. It is also required so that page migration
- * can unmap the pages and get the right reference count.
- */
- for (i = 0; i < ctx->nr_pages; i++)
- put_page(ctx->ring_pages[i]);
-
- up_write(&mm->mmap_sem);
-
- if (unlikely(ctx->nr_pages != nr_pages)) {
- aio_free_ring(ctx);
- return -EAGAIN;
- }
-
ctx->user_id = ctx->mmap_base;
ctx->nr_events = nr_events; /* trusted copy */
@@ -412,26 +492,34 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb)
return cancel(kiocb);
}
-static void free_ioctx_rcu(struct rcu_head *head)
+static void free_ioctx(struct work_struct *work)
{
- struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
+ struct kioctx *ctx = container_of(work, struct kioctx, free_work);
+
+ pr_debug("freeing %p\n", ctx);
+ aio_free_ring(ctx);
free_percpu(ctx->cpu);
kmem_cache_free(kioctx_cachep, ctx);
}
+static void free_ioctx_reqs(struct percpu_ref *ref)
+{
+ struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
+
+ INIT_WORK(&ctx->free_work, free_ioctx);
+ schedule_work(&ctx->free_work);
+}
+
/*
* When this function runs, the kioctx has been removed from the "hash table"
* and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
* now it's safe to cancel any that need to be.
*/
-static void free_ioctx(struct work_struct *work)
+static void free_ioctx_users(struct percpu_ref *ref)
{
- struct kioctx *ctx = container_of(work, struct kioctx, free_work);
- struct aio_ring *ring;
+ struct kioctx *ctx = container_of(ref, struct kioctx, users);
struct kiocb *req;
- unsigned cpu, avail;
- DEFINE_WAIT(wait);
spin_lock_irq(&ctx->ctx_lock);
@@ -445,54 +533,8 @@ static void free_ioctx(struct work_struct *work)
spin_unlock_irq(&ctx->ctx_lock);
- for_each_possible_cpu(cpu) {
- struct kioctx_cpu *kcpu = per_cpu_ptr(ctx->cpu, cpu);
-
- atomic_add(kcpu->reqs_available, &ctx->reqs_available);
- kcpu->reqs_available = 0;
- }
-
- while (1) {
- prepare_to_wait(&ctx->wait, &wait, TASK_UNINTERRUPTIBLE);
-
- ring = kmap_atomic(ctx->ring_pages[0]);
- avail = (ring->head <= ring->tail)
- ? ring->tail - ring->head
- : ctx->nr_events - ring->head + ring->tail;
-
- atomic_add(avail, &ctx->reqs_available);
- ring->head = ring->tail;
- kunmap_atomic(ring);
-
- if (atomic_read(&ctx->reqs_available) >= ctx->nr_events - 1)
- break;
-
- schedule();
- }
- finish_wait(&ctx->wait, &wait);
-
- WARN_ON(atomic_read(&ctx->reqs_available) > ctx->nr_events - 1);
-
- aio_free_ring(ctx);
-
- pr_debug("freeing %p\n", ctx);
-
- /*
- * Here the call_rcu() is between the wait_event() for reqs_active to
- * hit 0, and freeing the ioctx.
- *
- * aio_complete() decrements reqs_active, but it has to touch the ioctx
- * after to issue a wakeup so we use rcu.
- */
- call_rcu(&ctx->rcu_head, free_ioctx_rcu);
-}
-
-static void free_ioctx_ref(struct percpu_ref *ref)
-{
- struct kioctx *ctx = container_of(ref, struct kioctx, users);
-
- INIT_WORK(&ctx->free_work, free_ioctx);
- schedule_work(&ctx->free_work);
+ percpu_ref_kill(&ctx->reqs);
+ percpu_ref_put(&ctx->reqs);
}
static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
@@ -551,6 +593,16 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
}
}
+static void aio_nr_sub(unsigned nr)
+{
+ spin_lock(&aio_nr_lock);
+ if (WARN_ON(aio_nr - nr > aio_nr))
+ aio_nr = 0;
+ else
+ aio_nr -= nr;
+ spin_unlock(&aio_nr_lock);
+}
+
/* ioctx_alloc
* Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
*/
@@ -588,8 +640,11 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
ctx->max_reqs = nr_events;
- if (percpu_ref_init(&ctx->users, free_ioctx_ref))
- goto out_freectx;
+ if (percpu_ref_init(&ctx->users, free_ioctx_users))
+ goto err;
+
+ if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs))
+ goto err;
spin_lock_init(&ctx->ctx_lock);
spin_lock_init(&ctx->completion_lock);
@@ -600,10 +655,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
ctx->cpu = alloc_percpu(struct kioctx_cpu);
if (!ctx->cpu)
- goto out_freeref;
+ goto err;
if (aio_setup_ring(ctx) < 0)
- goto out_freepcpu;
+ goto err;
atomic_set(&ctx->reqs_available, ctx->nr_events - 1);
ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4);
@@ -615,32 +670,31 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
if (aio_nr + nr_events > (aio_max_nr * 2UL) ||
aio_nr + nr_events < aio_nr) {
spin_unlock(&aio_nr_lock);
- goto out_cleanup;
+ err = -EAGAIN;
+ goto err_ctx;
}
aio_nr += ctx->max_reqs;
spin_unlock(&aio_nr_lock);
- percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */
+ percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */
+ percpu_ref_get(&ctx->reqs); /* free_ioctx_users() will drop this */
err = ioctx_add_table(ctx, mm);
if (err)
- goto out_cleanup_put;
+ goto err_cleanup;
pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
ctx, ctx->user_id, mm, ctx->nr_events);
return ctx;
-out_cleanup_put:
- percpu_ref_put(&ctx->users);
-out_cleanup:
- err = -EAGAIN;
+err_cleanup:
+ aio_nr_sub(ctx->max_reqs);
+err_ctx:
aio_free_ring(ctx);
-out_freepcpu:
+err:
free_percpu(ctx->cpu);
-out_freeref:
+ free_percpu(ctx->reqs.pcpu_count);
free_percpu(ctx->users.pcpu_count);
-out_freectx:
- put_aio_ring_file(ctx);
kmem_cache_free(kioctx_cachep, ctx);
pr_debug("error allocating ioctx %d\n", err);
return ERR_PTR(err);
@@ -675,10 +729,7 @@ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx)
* -EAGAIN with no ioctxs actually in use (as far as userspace
* could tell).
*/
- spin_lock(&aio_nr_lock);
- BUG_ON(aio_nr - ctx->max_reqs > aio_nr);
- aio_nr -= ctx->max_reqs;
- spin_unlock(&aio_nr_lock);
+ aio_nr_sub(ctx->max_reqs);
if (ctx->mmap_size)
vm_munmap(ctx->mmap_base, ctx->mmap_size);
@@ -810,6 +861,8 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx)
if (unlikely(!req))
goto out_put;
+ percpu_ref_get(&ctx->reqs);
+
req->ki_ctx = ctx;
return req;
out_put:
@@ -879,12 +932,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
return;
}
- /*
- * Take rcu_read_lock() in case the kioctx is being destroyed, as we
- * need to issue a wakeup after incrementing reqs_available.
- */
- rcu_read_lock();
-
if (iocb->ki_list.next) {
unsigned long flags;
@@ -959,7 +1006,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
if (waitqueue_active(&ctx->wait))
wake_up(&ctx->wait);
- rcu_read_unlock();
+ percpu_ref_put(&ctx->reqs);
}
EXPORT_SYMBOL(aio_complete);
@@ -1370,6 +1417,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
return 0;
out_put_req:
put_reqs_available(ctx, 1);
+ percpu_ref_put(&ctx->reqs);
kiocb_free(req);
return ret;
}
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 85c9618..22f9698 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -24,7 +24,6 @@
static struct vfsmount *anon_inode_mnt __read_mostly;
static struct inode *anon_inode_inode;
-static const struct file_operations anon_inode_fops;
/*
* anon_inodefs_dname() is called from d_path().
@@ -39,51 +38,6 @@ static const struct dentry_operations anon_inodefs_dentry_operations = {
.d_dname = anon_inodefs_dname,
};
-/*
- * nop .set_page_dirty method so that people can use .page_mkwrite on
- * anon inodes.
- */
-static int anon_set_page_dirty(struct page *page)
-{
- return 0;
-};
-
-static const struct address_space_operations anon_aops = {
- .set_page_dirty = anon_set_page_dirty,
-};
-
-/*
- * A single inode exists for all anon_inode files. Contrary to pipes,
- * anon_inode inodes have no associated per-instance data, so we need
- * only allocate one of them.
- */
-static struct inode *anon_inode_mkinode(struct super_block *s)
-{
- struct inode *inode = new_inode_pseudo(s);
-
- if (!inode)
- return ERR_PTR(-ENOMEM);
-
- inode->i_ino = get_next_ino();
- inode->i_fop = &anon_inode_fops;
-
- inode->i_mapping->a_ops = &anon_aops;
-
- /*
- * Mark the inode dirty from the very beginning,
- * that way it will never be moved to the dirty
- * list because mark_inode_dirty() will think
- * that it already _is_ on the dirty list.
- */
- inode->i_state = I_DIRTY;
- inode->i_mode = S_IRUSR | S_IWUSR;
- inode->i_uid = current_fsuid();
- inode->i_gid = current_fsgid();
- inode->i_flags |= S_PRIVATE;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- return inode;
-}
-
static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
@@ -92,7 +46,7 @@ static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
&anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
if (!IS_ERR(root)) {
struct super_block *s = root->d_sb;
- anon_inode_inode = anon_inode_mkinode(s);
+ anon_inode_inode = alloc_anon_inode(s);
if (IS_ERR(anon_inode_inode)) {
dput(root);
deactivate_locked_super(s);
@@ -134,7 +88,7 @@ struct file *anon_inode_getfile_private(const char *name,
if (fops->owner && !try_module_get(fops->owner))
return ERR_PTR(-ENOENT);
- inode = anon_inode_mkinode(anon_inode_mnt->mnt_sb);
+ inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
if (IS_ERR(inode)) {
file = ERR_PTR(-ENOMEM);
goto err_module;
diff --git a/fs/attr.c b/fs/attr.c
index 1449adb..8dd5825 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -182,11 +182,6 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
return -EPERM;
}
- if ((ia_valid & ATTR_SIZE) && IS_I_VERSION(inode)) {
- if (attr->ia_size != inode->i_size)
- inode_inc_iversion(inode);
- }
-
if ((ia_valid & ATTR_MODE)) {
umode_t amode = attr->ia_mode;
/* Flag setting protected by i_mutex */
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index fc60b31..b5ee393 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -114,6 +114,14 @@ void bio_integrity_free(struct bio *bio)
}
EXPORT_SYMBOL(bio_integrity_free);
+static inline unsigned int bip_integrity_vecs(struct bio_integrity_payload *bip)
+{
+ if (bip->bip_slab == BIO_POOL_NONE)
+ return BIP_INLINE_VECS;
+
+ return bvec_nr_vecs(bip->bip_slab);
+}
+
/**
* bio_integrity_add_page - Attach integrity metadata
* @bio: bio to update
@@ -129,7 +137,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
struct bio_integrity_payload *bip = bio->bi_integrity;
struct bio_vec *iv;
- if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) {
+ if (bip->bip_vcnt >= bip_integrity_vecs(bip)) {
printk(KERN_ERR "%s: bip_vec full\n", __func__);
return 0;
}
@@ -308,7 +316,7 @@ static void bio_integrity_generate(struct bio *bio)
bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
bix.sector_size = bi->sector_size;
- bio_for_each_segment(bv, bio, i) {
+ bio_for_each_segment_all(bv, bio, i) {
void *kaddr = kmap_atomic(bv->bv_page);
bix.data_buf = kaddr + bv->bv_offset;
bix.data_size = bv->bv_len;
@@ -450,7 +458,7 @@ static int bio_integrity_verify(struct bio *bio)
bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
bix.sector_size = bi->sector_size;
- bio_for_each_segment(bv, bio, i) {
+ bio_for_each_segment_all(bv, bio, i) {
void *kaddr = kmap_atomic(bv->bv_page);
bix.data_buf = kaddr + bv->bv_offset;
bix.data_size = bv->bv_len;
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index e15d2b0..0890c83 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -229,7 +229,7 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
if (ret > 0) {
/* we need an acl */
ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);
- } else {
+ } else if (ret < 0) {
cache_no_acl(inode);
}
} else {
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 0552a59..5eb50b5 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -185,6 +185,9 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
{
struct __prelim_ref *ref;
+ if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ return 0;
+
ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask);
if (!ref)
return -ENOMEM;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 6aad98c..6e9ff8f 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -1012,6 +1012,8 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
bytes = min(bytes, working_bytes);
kaddr = kmap_atomic(page_out);
memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
+ if (*pg_index == (vcnt - 1) && *pg_offset == 0)
+ memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
kunmap_atomic(kaddr);
flush_dcache_page(page_out);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 61b5bcd..c1123ec 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -39,7 +39,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
struct extent_buffer *src_buf);
static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
int level, int slot);
-static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
+static int tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb);
static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
@@ -475,6 +475,8 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
* the index is the shifted logical of the *new* root node for root replace
* operations, or the shifted logical of the affected block for all other
* operations.
+ *
+ * Note: must be called with write lock (tree_mod_log_write_lock).
*/
static noinline int
__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
@@ -483,24 +485,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
struct rb_node **new;
struct rb_node *parent = NULL;
struct tree_mod_elem *cur;
- int ret = 0;
BUG_ON(!tm);
- tree_mod_log_write_lock(fs_info);
- if (list_empty(&fs_info->tree_mod_seq_list)) {
- tree_mod_log_write_unlock(fs_info);
- /*
- * Ok we no longer care about logging modifications, free up tm
- * and return 0. Any callers shouldn't be using tm after
- * calling tree_mod_log_insert, but if they do we can just
- * change this to return a special error code to let the callers
- * do their own thing.
- */
- kfree(tm);
- return 0;
- }
-
spin_lock(&fs_info->tree_mod_seq_lock);
tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
spin_unlock(&fs_info->tree_mod_seq_lock);
@@ -518,18 +505,13 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
new = &((*new)->rb_left);
else if (cur->seq > tm->seq)
new = &((*new)->rb_right);
- else {
- ret = -EEXIST;
- kfree(tm);
- goto out;
- }
+ else
+ return -EEXIST;
}
rb_link_node(&tm->node, parent, new);
rb_insert_color(&tm->node, tm_root);
-out:
- tree_mod_log_write_unlock(fs_info);
- return ret;
+ return 0;
}
/*
@@ -545,19 +527,38 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
return 1;
if (eb && btrfs_header_level(eb) == 0)
return 1;
+
+ tree_mod_log_write_lock(fs_info);
+ if (list_empty(&(fs_info)->tree_mod_seq_list)) {
+ tree_mod_log_write_unlock(fs_info);
+ return 1;
+ }
+
return 0;
}
-static inline int
-__tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb, int slot,
- enum mod_log_op op, gfp_t flags)
+/* Similar to tree_mod_dont_log, but doesn't acquire any locks. */
+static inline int tree_mod_need_log(const struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb)
+{
+ smp_mb();
+ if (list_empty(&(fs_info)->tree_mod_seq_list))
+ return 0;
+ if (eb && btrfs_header_level(eb) == 0)
+ return 0;
+
+ return 1;
+}
+
+static struct tree_mod_elem *
+alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
+ enum mod_log_op op, gfp_t flags)
{
struct tree_mod_elem *tm;
tm = kzalloc(sizeof(*tm), flags);
if (!tm)
- return -ENOMEM;
+ return NULL;
tm->index = eb->start >> PAGE_CACHE_SHIFT;
if (op != MOD_LOG_KEY_ADD) {
@@ -567,8 +568,9 @@ __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
tm->op = op;
tm->slot = slot;
tm->generation = btrfs_node_ptr_generation(eb, slot);
+ RB_CLEAR_NODE(&tm->node);
- return __tree_mod_log_insert(fs_info, tm);
+ return tm;
}
static noinline int
@@ -576,10 +578,27 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int slot,
enum mod_log_op op, gfp_t flags)
{
- if (tree_mod_dont_log(fs_info, eb))
+ struct tree_mod_elem *tm;
+ int ret;
+
+ if (!tree_mod_need_log(fs_info, eb))
return 0;
- return __tree_mod_log_insert_key(fs_info, eb, slot, op, flags);
+ tm = alloc_tree_mod_elem(eb, slot, op, flags);
+ if (!tm)
+ return -ENOMEM;
+
+ if (tree_mod_dont_log(fs_info, eb)) {
+ kfree(tm);
+ return 0;
+ }
+
+ ret = __tree_mod_log_insert(fs_info, tm);
+ tree_mod_log_write_unlock(fs_info);
+ if (ret)
+ kfree(tm);
+
+ return ret;
}
static noinline int
@@ -587,53 +606,95 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int dst_slot, int src_slot,
int nr_items, gfp_t flags)
{
- struct tree_mod_elem *tm;
- int ret;
+ struct tree_mod_elem *tm = NULL;
+ struct tree_mod_elem **tm_list = NULL;
+ int ret = 0;
int i;
+ int locked = 0;
- if (tree_mod_dont_log(fs_info, eb))
+ if (!tree_mod_need_log(fs_info, eb))
return 0;
+ tm_list = kzalloc(nr_items * sizeof(struct tree_mod_elem *), flags);
+ if (!tm_list)
+ return -ENOMEM;
+
+ tm = kzalloc(sizeof(*tm), flags);
+ if (!tm) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+
+ tm->index = eb->start >> PAGE_CACHE_SHIFT;
+ tm->slot = src_slot;
+ tm->move.dst_slot = dst_slot;
+ tm->move.nr_items = nr_items;
+ tm->op = MOD_LOG_MOVE_KEYS;
+
+ for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
+ tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
+ MOD_LOG_KEY_REMOVE_WHILE_MOVING, flags);
+ if (!tm_list[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ }
+
+ if (tree_mod_dont_log(fs_info, eb))
+ goto free_tms;
+ locked = 1;
+
/*
* When we override something during the move, we log these removals.
* This can only happen when we move towards the beginning of the
* buffer, i.e. dst_slot < src_slot.
*/
for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
- ret = __tree_mod_log_insert_key(fs_info, eb, i + dst_slot,
- MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS);
- BUG_ON(ret < 0);
+ ret = __tree_mod_log_insert(fs_info, tm_list[i]);
+ if (ret)
+ goto free_tms;
}
- tm = kzalloc(sizeof(*tm), flags);
- if (!tm)
- return -ENOMEM;
+ ret = __tree_mod_log_insert(fs_info, tm);
+ if (ret)
+ goto free_tms;
+ tree_mod_log_write_unlock(fs_info);
+ kfree(tm_list);
- tm->index = eb->start >> PAGE_CACHE_SHIFT;
- tm->slot = src_slot;
- tm->move.dst_slot = dst_slot;
- tm->move.nr_items = nr_items;
- tm->op = MOD_LOG_MOVE_KEYS;
+ return 0;
+free_tms:
+ for (i = 0; i < nr_items; i++) {
+ if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
+ rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
+ kfree(tm_list[i]);
+ }
+ if (locked)
+ tree_mod_log_write_unlock(fs_info);
+ kfree(tm_list);
+ kfree(tm);
- return __tree_mod_log_insert(fs_info, tm);
+ return ret;
}
-static inline void
-__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
+static inline int
+__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
+ struct tree_mod_elem **tm_list,
+ int nritems)
{
- int i;
- u32 nritems;
+ int i, j;
int ret;
- if (btrfs_header_level(eb) == 0)
- return;
-
- nritems = btrfs_header_nritems(eb);
for (i = nritems - 1; i >= 0; i--) {
- ret = __tree_mod_log_insert_key(fs_info, eb, i,
- MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
- BUG_ON(ret < 0);
+ ret = __tree_mod_log_insert(fs_info, tm_list[i]);
+ if (ret) {
+ for (j = nritems - 1; j > i; j--)
+ rb_erase(&tm_list[j]->node,
+ &fs_info->tree_mod_log);
+ return ret;
+ }
}
+
+ return 0;
}
static noinline int
@@ -642,17 +703,38 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
struct extent_buffer *new_root, gfp_t flags,
int log_removal)
{
- struct tree_mod_elem *tm;
+ struct tree_mod_elem *tm = NULL;
+ struct tree_mod_elem **tm_list = NULL;
+ int nritems = 0;
+ int ret = 0;
+ int i;
- if (tree_mod_dont_log(fs_info, NULL))
+ if (!tree_mod_need_log(fs_info, NULL))
return 0;
- if (log_removal)
- __tree_mod_log_free_eb(fs_info, old_root);
+ if (log_removal && btrfs_header_level(old_root) > 0) {
+ nritems = btrfs_header_nritems(old_root);
+ tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
+ flags);
+ if (!tm_list) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ for (i = 0; i < nritems; i++) {
+ tm_list[i] = alloc_tree_mod_elem(old_root, i,
+ MOD_LOG_KEY_REMOVE_WHILE_FREEING, flags);
+ if (!tm_list[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ }
+ }
tm = kzalloc(sizeof(*tm), flags);
- if (!tm)
- return -ENOMEM;
+ if (!tm) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
tm->index = new_root->start >> PAGE_CACHE_SHIFT;
tm->old_root.logical = old_root->start;
@@ -660,7 +742,30 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
tm->generation = btrfs_header_generation(old_root);
tm->op = MOD_LOG_ROOT_REPLACE;
- return __tree_mod_log_insert(fs_info, tm);
+ if (tree_mod_dont_log(fs_info, NULL))
+ goto free_tms;
+
+ if (tm_list)
+ ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
+ if (!ret)
+ ret = __tree_mod_log_insert(fs_info, tm);
+
+ tree_mod_log_write_unlock(fs_info);
+ if (ret)
+ goto free_tms;
+ kfree(tm_list);
+
+ return ret;
+
+free_tms:
+ if (tm_list) {
+ for (i = 0; i < nritems; i++)
+ kfree(tm_list[i]);
+ kfree(tm_list);
+ }
+ kfree(tm);
+
+ return ret;
}
static struct tree_mod_elem *
@@ -729,31 +834,75 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
return __tree_mod_log_search(fs_info, start, min_seq, 0);
}
-static noinline void
+static noinline int
tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
struct extent_buffer *src, unsigned long dst_offset,
unsigned long src_offset, int nr_items)
{
- int ret;
+ int ret = 0;
+ struct tree_mod_elem **tm_list = NULL;
+ struct tree_mod_elem **tm_list_add, **tm_list_rem;
int i;
+ int locked = 0;
- if (tree_mod_dont_log(fs_info, NULL))
- return;
+ if (!tree_mod_need_log(fs_info, NULL))
+ return 0;
if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
- return;
+ return 0;
+ tm_list = kzalloc(nr_items * 2 * sizeof(struct tree_mod_elem *),
+ GFP_NOFS);
+ if (!tm_list)
+ return -ENOMEM;
+
+ tm_list_add = tm_list;
+ tm_list_rem = tm_list + nr_items;
for (i = 0; i < nr_items; i++) {
- ret = __tree_mod_log_insert_key(fs_info, src,
- i + src_offset,
- MOD_LOG_KEY_REMOVE, GFP_NOFS);
- BUG_ON(ret < 0);
- ret = __tree_mod_log_insert_key(fs_info, dst,
- i + dst_offset,
- MOD_LOG_KEY_ADD,
- GFP_NOFS);
- BUG_ON(ret < 0);
+ tm_list_rem[i] = alloc_tree_mod_elem(src, i + src_offset,
+ MOD_LOG_KEY_REMOVE, GFP_NOFS);
+ if (!tm_list_rem[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+
+ tm_list_add[i] = alloc_tree_mod_elem(dst, i + dst_offset,
+ MOD_LOG_KEY_ADD, GFP_NOFS);
+ if (!tm_list_add[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ }
+
+ if (tree_mod_dont_log(fs_info, NULL))
+ goto free_tms;
+ locked = 1;
+
+ for (i = 0; i < nr_items; i++) {
+ ret = __tree_mod_log_insert(fs_info, tm_list_rem[i]);
+ if (ret)
+ goto free_tms;
+ ret = __tree_mod_log_insert(fs_info, tm_list_add[i]);
+ if (ret)
+ goto free_tms;
+ }
+
+ tree_mod_log_write_unlock(fs_info);
+ kfree(tm_list);
+
+ return 0;
+
+free_tms:
+ for (i = 0; i < nr_items * 2; i++) {
+ if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
+ rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
+ kfree(tm_list[i]);
}
+ if (locked)
+ tree_mod_log_write_unlock(fs_info);
+ kfree(tm_list);
+
+ return ret;
}
static inline void
@@ -772,18 +921,58 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
{
int ret;
- ret = __tree_mod_log_insert_key(fs_info, eb, slot,
+ ret = tree_mod_log_insert_key(fs_info, eb, slot,
MOD_LOG_KEY_REPLACE,
atomic ? GFP_ATOMIC : GFP_NOFS);
BUG_ON(ret < 0);
}
-static noinline void
+static noinline int
tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
{
+ struct tree_mod_elem **tm_list = NULL;
+ int nritems = 0;
+ int i;
+ int ret = 0;
+
+ if (btrfs_header_level(eb) == 0)
+ return 0;
+
+ if (!tree_mod_need_log(fs_info, NULL))
+ return 0;
+
+ nritems = btrfs_header_nritems(eb);
+ tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
+ GFP_NOFS);
+ if (!tm_list)
+ return -ENOMEM;
+
+ for (i = 0; i < nritems; i++) {
+ tm_list[i] = alloc_tree_mod_elem(eb, i,
+ MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
+ if (!tm_list[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ }
+
if (tree_mod_dont_log(fs_info, eb))
- return;
- __tree_mod_log_free_eb(fs_info, eb);
+ goto free_tms;
+
+ ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
+ tree_mod_log_write_unlock(fs_info);
+ if (ret)
+ goto free_tms;
+ kfree(tm_list);
+
+ return 0;
+
+free_tms:
+ for (i = 0; i < nritems; i++)
+ kfree(tm_list[i]);
+ kfree(tm_list);
+
+ return ret;
}
static noinline void
@@ -1041,8 +1230,13 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_set_node_ptr_generation(parent, parent_slot,
trans->transid);
btrfs_mark_buffer_dirty(parent);
- if (last_ref)
- tree_mod_log_free_eb(root->fs_info, buf);
+ if (last_ref) {
+ ret = tree_mod_log_free_eb(root->fs_info, buf);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
+ }
btrfs_free_tree_block(trans, root, buf, parent_start,
last_ref);
}
@@ -2758,7 +2952,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
int level;
int lowest_unlock = 1;
u8 lowest_level = 0;
- int prev_cmp;
+ int prev_cmp = -1;
lowest_level = p->lowest_level;
WARN_ON(p->nodes[0] != NULL);
@@ -2769,7 +2963,6 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
}
again:
- prev_cmp = -1;
b = get_old_root(root, time_seq);
level = btrfs_header_level(b);
p->locks[level] = BTRFS_READ_LOCK;
@@ -2787,6 +2980,11 @@ again:
*/
btrfs_unlock_up_safe(p, level + 1);
+ /*
+ * Since we can unwind eb's we want to do a real search every
+ * time.
+ */
+ prev_cmp = -1;
ret = key_search(b, key, level, &prev_cmp, &slot);
if (level != 0) {
@@ -3019,8 +3217,12 @@ static int push_node_left(struct btrfs_trans_handle *trans,
} else
push_items = min(src_nritems - 8, push_items);
- tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
- push_items);
+ ret = tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
+ push_items);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
copy_extent_buffer(dst, src,
btrfs_node_key_ptr_offset(dst_nritems),
btrfs_node_key_ptr_offset(0),
@@ -3090,8 +3292,12 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
(dst_nritems) *
sizeof(struct btrfs_key_ptr));
- tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
- src_nritems - push_items, push_items);
+ ret = tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
+ src_nritems - push_items, push_items);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
copy_extent_buffer(dst, src,
btrfs_node_key_ptr_offset(0),
btrfs_node_key_ptr_offset(src_nritems - push_items),
@@ -3292,7 +3498,12 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
btrfs_header_chunk_tree_uuid(split),
BTRFS_UUID_SIZE);
- tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid);
+ ret = tree_mod_log_eb_copy(root->fs_info, split, c, 0,
+ mid, c_nritems - mid);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
copy_extent_buffer(split, c,
btrfs_node_key_ptr_offset(0),
btrfs_node_key_ptr_offset(mid),
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d58bef1..b256ddc 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -7718,7 +7718,7 @@ out:
*/
if (!for_reloc && root_dropped == false)
btrfs_add_dead_root(root);
- if (err)
+ if (err && err != -EAGAIN)
btrfs_std_error(root->fs_info, err);
return err;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 51e3afa..3d03d2e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2367,10 +2367,23 @@ out_unlock:
return ret;
}
+static void free_sa_defrag_extent(struct new_sa_defrag_extent *new)
+{
+ struct old_sa_defrag_extent *old, *tmp;
+
+ if (!new)
+ return;
+
+ list_for_each_entry_safe(old, tmp, &new->head, list) {
+ list_del(&old->list);
+ kfree(old);
+ }
+ kfree(new);
+}
+
static void relink_file_extents(struct new_sa_defrag_extent *new)
{
struct btrfs_path *path;
- struct old_sa_defrag_extent *old, *tmp;
struct sa_defrag_extent_backref *backref;
struct sa_defrag_extent_backref *prev = NULL;
struct inode *inode;
@@ -2413,16 +2426,11 @@ static void relink_file_extents(struct new_sa_defrag_extent *new)
kfree(prev);
btrfs_free_path(path);
-
- list_for_each_entry_safe(old, tmp, &new->head, list) {
- list_del(&old->list);
- kfree(old);
- }
out:
+ free_sa_defrag_extent(new);
+
atomic_dec(&root->fs_info->defrag_running);
wake_up(&root->fs_info->transaction_wait);
-
- kfree(new);
}
static struct new_sa_defrag_extent *
@@ -2432,7 +2440,7 @@ record_old_file_extents(struct inode *inode,
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path;
struct btrfs_key key;
- struct old_sa_defrag_extent *old, *tmp;
+ struct old_sa_defrag_extent *old;
struct new_sa_defrag_extent *new;
int ret;
@@ -2480,7 +2488,7 @@ record_old_file_extents(struct inode *inode,
if (slot >= btrfs_header_nritems(l)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
- goto out_free_list;
+ goto out_free_path;
else if (ret > 0)
break;
continue;
@@ -2509,7 +2517,7 @@ record_old_file_extents(struct inode *inode,
old = kmalloc(sizeof(*old), GFP_NOFS);
if (!old)
- goto out_free_list;
+ goto out_free_path;
offset = max(new->file_pos, key.offset);
end = min(new->file_pos + new->len, key.offset + num_bytes);
@@ -2531,15 +2539,10 @@ next:
return new;
-out_free_list:
- list_for_each_entry_safe(old, tmp, &new->head, list) {
- list_del(&old->list);
- kfree(old);
- }
out_free_path:
btrfs_free_path(path);
out_kfree:
- kfree(new);
+ free_sa_defrag_extent(new);
return NULL;
}
@@ -2610,7 +2613,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
EXTENT_DEFRAG, 1, cached_state);
if (ret) {
u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
- if (last_snapshot >= BTRFS_I(inode)->generation)
+ if (0 && last_snapshot >= BTRFS_I(inode)->generation)
/* the inode is shared */
new = record_old_file_extents(inode, ordered_extent);
@@ -2710,8 +2713,14 @@ out:
btrfs_remove_ordered_extent(inode, ordered_extent);
/* for snapshot-aware defrag */
- if (new)
- relink_file_extents(new);
+ if (new) {
+ if (ret) {
+ free_sa_defrag_extent(new);
+ atomic_dec(&root->fs_info->defrag_running);
+ } else {
+ relink_file_extents(new);
+ }
+ }
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
@@ -4345,8 +4354,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
* these flags set. For all other operations the VFS set these flags
* explicitly if it wants a timestamp update.
*/
- if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME))))
- inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
+ if (newsize != oldsize) {
+ inode_inc_iversion(inode);
+ if (!(mask & (ATTR_CTIME | ATTR_MTIME)))
+ inode->i_ctime = inode->i_mtime =
+ current_fs_time(inode->i_sb);
+ }
if (newsize > oldsize) {
truncate_pagecache(inode, newsize);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9d46f60..669eb53 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1548,6 +1548,12 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
printk(KERN_INFO "btrfs: Snapshot src from "
"another FS\n");
ret = -EINVAL;
+ } else if (!inode_owner_or_capable(src_inode)) {
+ /*
+ * Subvolume creation is not restricted, but snapshots
+ * are limited to own subvolumes only
+ */
+ ret = -EPERM;
} else {
ret = btrfs_mksubvol(&file->f_path, name, namelen,
BTRFS_I(src_inode)->root,
@@ -2130,7 +2136,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT);
if (err == -EINTR)
- goto out;
+ goto out_drop_write;
dentry = lookup_one_len(vol_args->name, parent, namelen);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
@@ -2293,6 +2299,7 @@ out_dput:
dput(dentry);
out_unlock_dir:
mutex_unlock(&dir->i_mutex);
+out_drop_write:
mnt_drop_write_file(file);
out:
kfree(vol_args);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index c702cb6..bda1cd8 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -537,7 +537,9 @@ void btrfs_remove_ordered_extent(struct inode *inode,
*/
if (RB_EMPTY_ROOT(&tree->tree) &&
!mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
+ spin_lock(&root->fs_info->ordered_root_lock);
list_del_init(&BTRFS_I(inode)->ordered_operations);
+ spin_unlock(&root->fs_info->ordered_root_lock);
}
if (!root->nr_ordered_extents) {
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 4a35572..26450d8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4481,6 +4481,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
u64 disk_bytenr;
+ u64 new_bytenr;
LIST_HEAD(list);
ordered = btrfs_lookup_ordered_extent(inode, file_pos);
@@ -4492,13 +4493,24 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
if (ret)
goto out;
- disk_bytenr = ordered->start;
while (!list_empty(&list)) {
sums = list_entry(list.next, struct btrfs_ordered_sum, list);
list_del_init(&sums->list);
- sums->bytenr = disk_bytenr;
- disk_bytenr += sums->len;
+ /*
+ * We need to offset the new_bytenr based on where the csum is.
+ * We need to do this because we will read in entire prealloc
+ * extents but we may have written to say the middle of the
+ * prealloc extent, so we need to make sure the csum goes with
+ * the right disk offset.
+ *
+ * We can do this because the data reloc inode refers strictly
+ * to the on disk bytes, so we don't have to worry about
+ * disk_len vs real len like with real inodes since it's all
+ * disk length.
+ */
+ new_bytenr = ordered->start + (sums->bytenr - disk_bytenr);
+ sums->bytenr = new_bytenr;
btrfs_add_ordered_sum(inode, ordered, sums);
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index e46e0ed..741c839 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -121,7 +121,6 @@ struct send_ctx {
struct list_head name_cache_list;
int name_cache_size;
- struct file *cur_inode_filp;
char *read_buf;
};
@@ -2120,77 +2119,6 @@ out:
}
/*
- * Called for regular files when sending extents data. Opens a struct file
- * to read from the file.
- */
-static int open_cur_inode_file(struct send_ctx *sctx)
-{
- int ret = 0;
- struct btrfs_key key;
- struct path path;
- struct inode *inode;
- struct dentry *dentry;
- struct file *filp;
- int new = 0;
-
- if (sctx->cur_inode_filp)
- goto out;
-
- key.objectid = sctx->cur_ino;
- key.type = BTRFS_INODE_ITEM_KEY;
- key.offset = 0;
-
- inode = btrfs_iget(sctx->send_root->fs_info->sb, &key, sctx->send_root,
- &new);
- if (IS_ERR(inode)) {
- ret = PTR_ERR(inode);
- goto out;
- }
-
- dentry = d_obtain_alias(inode);
- inode = NULL;
- if (IS_ERR(dentry)) {
- ret = PTR_ERR(dentry);
- goto out;
- }
-
- path.mnt = sctx->mnt;
- path.dentry = dentry;
- filp = dentry_open(&path, O_RDONLY | O_LARGEFILE, current_cred());
- dput(dentry);
- dentry = NULL;
- if (IS_ERR(filp)) {
- ret = PTR_ERR(filp);
- goto out;
- }
- sctx->cur_inode_filp = filp;
-
-out:
- /*
- * no xxxput required here as every vfs op
- * does it by itself on failure
- */
- return ret;
-}
-
-/*
- * Closes the struct file that was created in open_cur_inode_file
- */
-static int close_cur_inode_file(struct send_ctx *sctx)
-{
- int ret = 0;
-
- if (!sctx->cur_inode_filp)
- goto out;
-
- ret = filp_close(sctx->cur_inode_filp, NULL);
- sctx->cur_inode_filp = NULL;
-
-out:
- return ret;
-}
-
-/*
* Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
*/
static int send_subvol_begin(struct send_ctx *sctx)
@@ -3622,6 +3550,72 @@ out:
return ret;
}
+static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
+{
+ struct btrfs_root *root = sctx->send_root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct inode *inode;
+ struct page *page;
+ char *addr;
+ struct btrfs_key key;
+ pgoff_t index = offset >> PAGE_CACHE_SHIFT;
+ pgoff_t last_index;
+ unsigned pg_offset = offset & ~PAGE_CACHE_MASK;
+ ssize_t ret = 0;
+
+ key.objectid = sctx->cur_ino;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
+ inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ if (offset + len > i_size_read(inode)) {
+ if (offset > i_size_read(inode))
+ len = 0;
+ else
+ len = offset - i_size_read(inode);
+ }
+ if (len == 0)
+ goto out;
+
+ last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT;
+ while (index <= last_index) {
+ unsigned cur_len = min_t(unsigned, len,
+ PAGE_CACHE_SIZE - pg_offset);
+ page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+ if (!page) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ if (!PageUptodate(page)) {
+ btrfs_readpage(NULL, page);
+ lock_page(page);
+ if (!PageUptodate(page)) {
+ unlock_page(page);
+ page_cache_release(page);
+ ret = -EIO;
+ break;
+ }
+ }
+
+ addr = kmap(page);
+ memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len);
+ kunmap(page);
+ unlock_page(page);
+ page_cache_release(page);
+ index++;
+ pg_offset = 0;
+ len -= cur_len;
+ ret += cur_len;
+ }
+out:
+ iput(inode);
+ return ret;
+}
+
/*
* Read some bytes from the current inode/file and send a write command to
* user space.
@@ -3630,35 +3624,20 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
{
int ret = 0;
struct fs_path *p;
- loff_t pos = offset;
- int num_read = 0;
- mm_segment_t old_fs;
+ ssize_t num_read = 0;
p = fs_path_alloc();
if (!p)
return -ENOMEM;
- /*
- * vfs normally only accepts user space buffers for security reasons.
- * we only read from the file and also only provide the read_buf buffer
- * to vfs. As this buffer does not come from a user space call, it's
- * ok to temporary allow kernel space buffers.
- */
- old_fs = get_fs();
- set_fs(KERNEL_DS);
-
verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
- ret = open_cur_inode_file(sctx);
- if (ret < 0)
- goto out;
-
- ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len, &pos);
- if (ret < 0)
- goto out;
- num_read = ret;
- if (!num_read)
+ num_read = fill_read_buf(sctx, offset, len);
+ if (num_read <= 0) {
+ if (num_read < 0)
+ ret = num_read;
goto out;
+ }
ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
if (ret < 0)
@@ -3677,7 +3656,6 @@ verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
tlv_put_failure:
out:
fs_path_free(p);
- set_fs(old_fs);
if (ret < 0)
return ret;
return num_read;
@@ -4222,10 +4200,6 @@ static int changed_inode(struct send_ctx *sctx,
u64 left_gen = 0;
u64 right_gen = 0;
- ret = close_cur_inode_file(sctx);
- if (ret < 0)
- goto out;
-
sctx->cur_ino = key->objectid;
sctx->cur_inode_new_gen = 0;
@@ -4686,11 +4660,6 @@ static int send_subvol(struct send_ctx *sctx)
}
out:
- if (!ret)
- ret = close_cur_inode_file(sctx);
- else
- close_cur_inode_file(sctx);
-
free_recorded_refs(sctx);
return ret;
}
@@ -4756,8 +4725,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
}
if (!access_ok(VERIFY_READ, arg->clone_sources,
- sizeof(*arg->clone_sources *
- arg->clone_sources_count))) {
+ sizeof(*arg->clone_sources) *
+ arg->clone_sources_count)) {
ret = -EFAULT;
goto out;
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 8c81bdc..b791cfb 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1453,7 +1453,7 @@ static void do_async_commit(struct work_struct *work)
* We've got freeze protection passed with the transaction.
* Tell lockdep about it.
*/
- if (ac->newtrans->type < TRANS_JOIN_NOLOCK)
+ if (ac->newtrans->type & __TRANS_FREEZABLE)
rwsem_acquire_read(
&ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
0, 1, _THIS_IP_);
@@ -1494,7 +1494,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
* Tell lockdep we've released the freeze rwsem, since the
* async commit thread will be the one to unlock it.
*/
- if (trans->type < TRANS_JOIN_NOLOCK)
+ if (ac->newtrans->type & __TRANS_FREEZABLE)
rwsem_release(
&root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
1, _THIS_IP_);
@@ -1552,6 +1552,8 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
root->fs_info->running_transaction = NULL;
spin_unlock(&root->fs_info->trans_lock);
+ if (trans->type & __TRANS_FREEZABLE)
+ sb_end_intwrite(root->fs_info->sb);
put_transaction(cur_trans);
put_transaction(cur_trans);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 79f057c..e14e1f7 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3375,7 +3375,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
btrfs_set_token_file_extent_type(leaf, fi,
BTRFS_FILE_EXTENT_REG,
&token);
- if (em->block_start == 0)
+ if (em->block_start == EXTENT_MAP_HOLE)
skip_csum = true;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 043b215..b691f37 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4488,6 +4488,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got "
"%Lu-%Lu\n", logical, logical+len, em->start,
em->start + em->len);
+ free_extent_map(em);
return 1;
}
@@ -4668,6 +4669,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, "
"found %Lu-%Lu\n", logical, em->start,
em->start + em->len);
+ free_extent_map(em);
return -EINVAL;
}
diff --git a/fs/buffer.c b/fs/buffer.c
index 6024877..aeeea65 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -654,14 +654,16 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
static void __set_page_dirty(struct page *page,
struct address_space *mapping, int warn)
{
- spin_lock_irq(&mapping->tree_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&mapping->tree_lock, flags);
if (page->mapping) { /* Race with truncate? */
WARN_ON_ONCE(warn && !PageUptodate(page));
account_page_dirtied(page, mapping);
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
}
- spin_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 6df8bd4..ec3ba43 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -210,13 +210,17 @@ static int readpage_nounlock(struct file *filp, struct page *page)
if (err < 0) {
SetPageError(page);
goto out;
- } else if (err < PAGE_CACHE_SIZE) {
+ } else {
+ if (err < PAGE_CACHE_SIZE) {
/* zero fill remainder of page */
- zero_user_segment(page, err, PAGE_CACHE_SIZE);
+ zero_user_segment(page, err, PAGE_CACHE_SIZE);
+ } else {
+ flush_dcache_page(page);
+ }
}
SetPageUptodate(page);
- if (err == 0)
+ if (err >= 0)
ceph_readpage_to_fscache(inode, page);
out:
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 6bfe65e..360b622 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -324,6 +324,9 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
{
struct ceph_inode_info *ci = ceph_inode(inode);
+ if (!PageFsCache(page))
+ return;
+
fscache_wait_on_page_write(ci->fscache, page);
fscache_uncache_page(ci->fscache, page);
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index b7bda5d..7889015 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -642,6 +642,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
req->r_unsafe_dir = NULL;
}
+ complete_all(&req->r_safe_completion);
+
ceph_mdsc_put_request(req);
}
@@ -1875,8 +1877,11 @@ static int __do_request(struct ceph_mds_client *mdsc,
int mds = -1;
int err = -EAGAIN;
- if (req->r_err || req->r_got_result)
+ if (req->r_err || req->r_got_result) {
+ if (req->r_aborted)
+ __unregister_request(mdsc, req);
goto out;
+ }
if (req->r_timeout &&
time_after_eq(jiffies, req->r_started + req->r_timeout)) {
@@ -2186,7 +2191,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
if (head->safe) {
req->r_got_safe = true;
__unregister_request(mdsc, req);
- complete_all(&req->r_safe_completion);
if (req->r_got_unsafe) {
/*
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 51f5e0e..494b683 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -1027,15 +1027,30 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
__u32 secdesclen = 0;
struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */
struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
+ struct cifs_tcon *tcon;
+
+ if (IS_ERR(tlink))
+ return PTR_ERR(tlink);
+ tcon = tlink_tcon(tlink);
cifs_dbg(NOISY, "set ACL from mode for %s\n", path);
/* Get the security descriptor */
- pntsd = get_cifs_acl(CIFS_SB(inode->i_sb), inode, path, &secdesclen);
+
+ if (tcon->ses->server->ops->get_acl == NULL) {
+ cifs_put_tlink(tlink);
+ return -EOPNOTSUPP;
+ }
+
+ pntsd = tcon->ses->server->ops->get_acl(cifs_sb, inode, path,
+ &secdesclen);
if (IS_ERR(pntsd)) {
rc = PTR_ERR(pntsd);
cifs_dbg(VFS, "%s: error %d getting sec desc\n", __func__, rc);
- goto out;
+ cifs_put_tlink(tlink);
+ return rc;
}
/*
@@ -1048,6 +1063,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
pnntsd = kmalloc(secdesclen, GFP_KERNEL);
if (!pnntsd) {
kfree(pntsd);
+ cifs_put_tlink(tlink);
return -ENOMEM;
}
@@ -1056,14 +1072,18 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
cifs_dbg(NOISY, "build_sec_desc rc: %d\n", rc);
+ if (tcon->ses->server->ops->set_acl == NULL)
+ rc = -EOPNOTSUPP;
+
if (!rc) {
/* Set the security descriptor */
- rc = set_cifs_acl(pnntsd, secdesclen, inode, path, aclflag);
+ rc = tcon->ses->server->ops->set_acl(pnntsd, secdesclen, inode,
+ path, aclflag);
cifs_dbg(NOISY, "set_cifs_acl rc: %d\n", rc);
}
+ cifs_put_tlink(tlink);
kfree(pnntsd);
kfree(pntsd);
-out:
return rc;
}
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 52b6f6c..db95dca 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -261,7 +261,7 @@ struct smb_version_operations {
/* query path data from the server */
int (*query_path_info)(const unsigned int, struct cifs_tcon *,
struct cifs_sb_info *, const char *,
- FILE_ALL_INFO *, bool *);
+ FILE_ALL_INFO *, bool *, bool *);
/* query file data from the server */
int (*query_file_info)(const unsigned int, struct cifs_tcon *,
struct cifs_fid *, FILE_ALL_INFO *);
@@ -379,6 +379,16 @@ struct smb_version_operations {
char * (*create_lease_buf)(u8 *, u8);
/* parse lease context buffer and return oplock/epoch info */
__u8 (*parse_lease_buf)(void *, unsigned int *);
+ ssize_t (*query_all_EAs)(const unsigned int, struct cifs_tcon *,
+ const unsigned char *, const unsigned char *, char *,
+ size_t, const struct nls_table *, int);
+ int (*set_EA)(const unsigned int, struct cifs_tcon *, const char *,
+ const char *, const void *, const __u16,
+ const struct nls_table *, int);
+ struct cifs_ntsd * (*get_acl)(struct cifs_sb_info *, struct inode *,
+ const char *, u32 *);
+ int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *,
+ int);
};
struct smb_version_values {
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index b5ec2a2..45ccfbd 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -475,9 +475,10 @@ extern int CIFSGetExtAttr(const unsigned int xid, struct cifs_tcon *tcon,
const int netfid, __u64 *pExtAttrBits, __u64 *pMask);
extern void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb);
extern bool CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr);
-extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr,
- const unsigned char *path,
- struct cifs_sb_info *cifs_sb, unsigned int xid);
+extern int CIFSCheckMFSymlink(unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb,
+ struct cifs_fattr *fattr,
+ const unsigned char *path);
extern int mdfour(unsigned char *, unsigned char *, int);
extern int E_md4hash(const unsigned char *passwd, unsigned char *p16,
const struct nls_table *codepage);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index ccd31ab..5f1f328 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -3315,11 +3315,13 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
return 0;
}
cifs_acl->version = cpu_to_le16(1);
- if (acl_type == ACL_TYPE_ACCESS)
+ if (acl_type == ACL_TYPE_ACCESS) {
cifs_acl->access_entry_count = cpu_to_le16(count);
- else if (acl_type == ACL_TYPE_DEFAULT)
+ cifs_acl->default_entry_count = __constant_cpu_to_le16(0xFFFF);
+ } else if (acl_type == ACL_TYPE_DEFAULT) {
cifs_acl->default_entry_count = cpu_to_le16(count);
- else {
+ cifs_acl->access_entry_count = __constant_cpu_to_le16(0xFFFF);
+ } else {
cifs_dbg(FYI, "unknown ACL type %d\n", acl_type);
return 0;
}
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 5384c2a..f039c23 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -193,7 +193,7 @@ check_name(struct dentry *direntry)
static int
cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
struct tcon_link *tlink, unsigned oflags, umode_t mode,
- __u32 *oplock, struct cifs_fid *fid, int *created)
+ __u32 *oplock, struct cifs_fid *fid)
{
int rc = -ENOENT;
int create_options = CREATE_NOT_DIR;
@@ -349,7 +349,6 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
.device = 0,
};
- *created |= FILE_CREATED;
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
args.uid = current_fsuid();
if (inode->i_mode & S_ISGID)
@@ -480,13 +479,16 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
cifs_add_pending_open(&fid, tlink, &open);
rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
- &oplock, &fid, opened);
+ &oplock, &fid);
if (rc) {
cifs_del_pending_open(&open);
goto out;
}
+ if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
+ *opened |= FILE_CREATED;
+
rc = finish_open(file, direntry, generic_file_open, opened);
if (rc) {
if (server->ops->close)
@@ -529,7 +531,6 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
struct TCP_Server_Info *server;
struct cifs_fid fid;
__u32 oplock;
- int created = FILE_CREATED;
cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p\n",
inode, direntry->d_name.name, direntry);
@@ -546,7 +547,7 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
server->ops->new_lease_key(&fid);
rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
- &oplock, &fid, &created);
+ &oplock, &fid);
if (!rc && server->ops->close)
server->ops->close(xid, tcon, &fid);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 7ddddf2..81476e1 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2381,7 +2381,7 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
unsigned long nr_segs, loff_t *poffset)
{
unsigned long nr_pages, i;
- size_t copied, len, cur_len;
+ size_t bytes, copied, len, cur_len;
ssize_t total_written = 0;
loff_t offset;
struct iov_iter it;
@@ -2436,14 +2436,45 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
save_len = cur_len;
for (i = 0; i < nr_pages; i++) {
- copied = min_t(const size_t, cur_len, PAGE_SIZE);
+ bytes = min_t(const size_t, cur_len, PAGE_SIZE);
copied = iov_iter_copy_from_user(wdata->pages[i], &it,
- 0, copied);
+ 0, bytes);
cur_len -= copied;
iov_iter_advance(&it, copied);
+ /*
+ * If we didn't copy as much as we expected, then that
+ * may mean we trod into an unmapped area. Stop copying
+ * at that point. On the next pass through the big
+ * loop, we'll likely end up getting a zero-length
+ * write and bailing out of it.
+ */
+ if (copied < bytes)
+ break;
}
cur_len = save_len - cur_len;
+ /*
+ * If we have no data to send, then that probably means that
+ * the copy above failed altogether. That's most likely because
+ * the address in the iovec was bogus. Set the rc to -EFAULT,
+ * free anything we allocated and bail out.
+ */
+ if (!cur_len) {
+ for (i = 0; i < nr_pages; i++)
+ put_page(wdata->pages[i]);
+ kfree(wdata);
+ rc = -EFAULT;
+ break;
+ }
+
+ /*
+ * i + 1 now represents the number of pages we actually used in
+ * the copy phase above. Bring nr_pages down to that, and free
+ * any pages that we didn't use.
+ */
+ for ( ; nr_pages > i + 1; nr_pages--)
+ put_page(wdata->pages[nr_pages - 1]);
+
wdata->sync_mode = WB_SYNC_ALL;
wdata->nr_pages = nr_pages;
wdata->offset = (__u64)offset;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 867b7cd..5f8bdff 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -383,7 +383,8 @@ int cifs_get_inode_info_unix(struct inode **pinode,
/* check for Minshall+French symlinks */
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) {
- int tmprc = CIFSCheckMFSymlink(&fattr, full_path, cifs_sb, xid);
+ int tmprc = CIFSCheckMFSymlink(xid, tcon, cifs_sb, &fattr,
+ full_path);
if (tmprc)
cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc);
}
@@ -517,10 +518,15 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path,
return PTR_ERR(tlink);
tcon = tlink_tcon(tlink);
- rc = CIFSSMBQAllEAs(xid, tcon, path, "SETFILEBITS",
- ea_value, 4 /* size of buf */, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (tcon->ses->server->ops->query_all_EAs == NULL) {
+ cifs_put_tlink(tlink);
+ return -EOPNOTSUPP;
+ }
+
+ rc = tcon->ses->server->ops->query_all_EAs(xid, tcon, path,
+ "SETFILEBITS", ea_value, 4 /* size of buf */,
+ cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
cifs_put_tlink(tlink);
if (rc < 0)
return (int)rc;
@@ -542,7 +548,8 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path,
/* Fill a cifs_fattr struct with info from FILE_ALL_INFO */
static void
cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
- struct cifs_sb_info *cifs_sb, bool adjust_tz)
+ struct cifs_sb_info *cifs_sb, bool adjust_tz,
+ bool symlink)
{
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
@@ -569,7 +576,11 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
fattr->cf_createtime = le64_to_cpu(info->CreationTime);
fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
- if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
+
+ if (symlink) {
+ fattr->cf_mode = S_IFLNK;
+ fattr->cf_dtype = DT_LNK;
+ } else if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
fattr->cf_dtype = DT_DIR;
/*
@@ -578,10 +589,6 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
*/
if (!tcon->unix_ext)
fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK;
- } else if (fattr->cf_cifsattrs & ATTR_REPARSE) {
- fattr->cf_mode = S_IFLNK;
- fattr->cf_dtype = DT_LNK;
- fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
} else {
fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
fattr->cf_dtype = DT_REG;
@@ -626,7 +633,8 @@ cifs_get_file_info(struct file *filp)
rc = server->ops->query_file_info(xid, tcon, &cfile->fid, &find_data);
switch (rc) {
case 0:
- cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false);
+ cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false,
+ false);
break;
case -EREMOTE:
cifs_create_dfs_fattr(&fattr, inode->i_sb);
@@ -673,6 +681,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
bool adjust_tz = false;
struct cifs_fattr fattr;
struct cifs_search_info *srchinf = NULL;
+ bool symlink = false;
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
@@ -702,12 +711,12 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
}
data = (FILE_ALL_INFO *)buf;
rc = server->ops->query_path_info(xid, tcon, cifs_sb, full_path,
- data, &adjust_tz);
+ data, &adjust_tz, &symlink);
}
if (!rc) {
- cifs_all_info_to_fattr(&fattr, (FILE_ALL_INFO *)data, cifs_sb,
- adjust_tz);
+ cifs_all_info_to_fattr(&fattr, data, cifs_sb, adjust_tz,
+ symlink);
} else if (rc == -EREMOTE) {
cifs_create_dfs_fattr(&fattr, sb);
rc = 0;
@@ -796,7 +805,8 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
/* check for Minshall+French symlinks */
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) {
- tmprc = CIFSCheckMFSymlink(&fattr, full_path, cifs_sb, xid);
+ tmprc = CIFSCheckMFSymlink(xid, tcon, cifs_sb, &fattr,
+ full_path);
if (tmprc)
cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc);
}
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 7e36ceb..477e53b 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -354,34 +354,30 @@ open_query_close_cifs_symlink(const unsigned char *path, char *pbuf,
int
-CIFSCheckMFSymlink(struct cifs_fattr *fattr,
- const unsigned char *path,
- struct cifs_sb_info *cifs_sb, unsigned int xid)
+CIFSCheckMFSymlink(unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
+ const unsigned char *path)
{
- int rc = 0;
+ int rc;
u8 *buf = NULL;
unsigned int link_len = 0;
unsigned int bytes_read = 0;
- struct cifs_tcon *ptcon;
if (!CIFSCouldBeMFSymlink(fattr))
/* it's not a symlink */
return 0;
buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL);
- if (!buf) {
- rc = -ENOMEM;
- goto out;
- }
+ if (!buf)
+ return -ENOMEM;
- ptcon = tlink_tcon(cifs_sb_tlink(cifs_sb));
- if ((ptcon->ses) && (ptcon->ses->server->ops->query_mf_symlink))
- rc = ptcon->ses->server->ops->query_mf_symlink(path, buf,
- &bytes_read, cifs_sb, xid);
+ if (tcon->ses->server->ops->query_mf_symlink)
+ rc = tcon->ses->server->ops->query_mf_symlink(path, buf,
+ &bytes_read, cifs_sb, xid);
else
- goto out;
+ rc = -ENOSYS;
- if (rc != 0)
+ if (rc)
goto out;
if (bytes_read == 0) /* not a symlink */
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 53a75f3..5940eca 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -134,22 +134,6 @@ out:
dput(dentry);
}
-/*
- * Is it possible that this directory might turn out to be a DFS referral
- * once we go to try and use it?
- */
-static bool
-cifs_dfs_is_possible(struct cifs_sb_info *cifs_sb)
-{
-#ifdef CONFIG_CIFS_DFS_UPCALL
- struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
-
- if (tcon->Flags & SMB_SHARE_IS_IN_DFS)
- return true;
-#endif
- return false;
-}
-
static void
cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
{
@@ -159,27 +143,19 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
fattr->cf_dtype = DT_DIR;
- /*
- * Windows CIFS servers generally make DFS referrals look
- * like directories in FIND_* responses with the reparse
- * attribute flag also set (since DFS junctions are
- * reparse points). We must revalidate at least these
- * directory inodes before trying to use them (if
- * they are DFS we will get PATH_NOT_COVERED back
- * when queried directly and can then try to connect
- * to the DFS target)
- */
- if (cifs_dfs_is_possible(cifs_sb) &&
- (fattr->cf_cifsattrs & ATTR_REPARSE))
- fattr->cf_flags |= CIFS_FATTR_NEED_REVAL;
- } else if (fattr->cf_cifsattrs & ATTR_REPARSE) {
- fattr->cf_mode = S_IFLNK;
- fattr->cf_dtype = DT_LNK;
} else {
fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
fattr->cf_dtype = DT_REG;
}
+ /*
+ * We need to revalidate it further to make a decision about whether it
+ * is a symbolic link, DFS referral or a reparse point with a direct
+ * access like junctions, deduplicated files, NFS symlinks.
+ */
+ if (fattr->cf_cifsattrs & ATTR_REPARSE)
+ fattr->cf_flags |= CIFS_FATTR_NEED_REVAL;
+
/* non-unix readdir doesn't provide nlink */
fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK;
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 8233b17..e6ed0dc 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -534,10 +534,12 @@ cifs_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
static int
cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb, const char *full_path,
- FILE_ALL_INFO *data, bool *adjustTZ)
+ FILE_ALL_INFO *data, bool *adjustTZ, bool *symlink)
{
int rc;
+ *symlink = false;
+
/* could do find first instead but this returns more info */
rc = CIFSSMBQPathInfo(xid, tcon, full_path, data, 0 /* not legacy */,
cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
@@ -554,6 +556,23 @@ cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
CIFS_MOUNT_MAP_SPECIAL_CHR);
*adjustTZ = true;
}
+
+ if (!rc && (le32_to_cpu(data->Attributes) & ATTR_REPARSE)) {
+ int tmprc;
+ int oplock = 0;
+ __u16 netfid;
+
+ /* Need to check if this is a symbolic link or not */
+ tmprc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN,
+ FILE_READ_ATTRIBUTES, 0, &netfid, &oplock,
+ NULL, cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (tmprc == -EOPNOTSUPP)
+ *symlink = true;
+ else
+ CIFSSMBClose(xid, tcon, netfid);
+ }
+
return rc;
}
@@ -984,6 +1003,14 @@ struct smb_version_operations smb1_operations = {
.push_mand_locks = cifs_push_mandatory_locks,
.query_mf_symlink = open_query_close_cifs_symlink,
.is_read_op = cifs_is_read_op,
+#ifdef CONFIG_CIFS_XATTR
+ .query_all_EAs = CIFSSMBQAllEAs,
+ .set_EA = CIFSSMBSetEA,
+#endif /* CIFS_XATTR */
+#ifdef CONFIG_CIFS_ACL
+ .get_acl = get_cifs_acl,
+ .set_acl = set_cifs_acl,
+#endif /* CIFS_ACL */
};
struct smb_version_values smb1_values = {
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index c383508..bc0bb9c 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -57,4 +57,7 @@
#define SMB2_CMACAES_SIZE (16)
#define SMB3_SIGNKEY_SIZE (16)
+/* Maximum buffer size value we can send with 1 credit */
+#define SMB2_MAX_BUFFER_SIZE 65536
+
#endif /* _SMB2_GLOB_H */
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 78ff88c..84c012a 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -123,12 +123,13 @@ move_smb2_info_to_cifs(FILE_ALL_INFO *dst, struct smb2_file_all_info *src)
int
smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb, const char *full_path,
- FILE_ALL_INFO *data, bool *adjust_tz)
+ FILE_ALL_INFO *data, bool *adjust_tz, bool *symlink)
{
int rc;
struct smb2_file_all_info *smb2_data;
*adjust_tz = false;
+ *symlink = false;
smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
GFP_KERNEL);
@@ -136,9 +137,16 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
return -ENOMEM;
rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path,
- FILE_READ_ATTRIBUTES, FILE_OPEN,
- OPEN_REPARSE_POINT, smb2_data,
- SMB2_OP_QUERY_INFO);
+ FILE_READ_ATTRIBUTES, FILE_OPEN, 0,
+ smb2_data, SMB2_OP_QUERY_INFO);
+ if (rc == -EOPNOTSUPP) {
+ *symlink = true;
+ /* Failed on a symbolic link - query a reparse point info */
+ rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path,
+ FILE_READ_ATTRIBUTES, FILE_OPEN,
+ OPEN_REPARSE_POINT, smb2_data,
+ SMB2_OP_QUERY_INFO);
+ }
if (rc)
goto out;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 861b332..027a0c6 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -182,11 +182,8 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
/* start with specified wsize, or default */
wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE;
wsize = min_t(unsigned int, wsize, server->max_write);
- /*
- * limit write size to 2 ** 16, because we don't support multicredit
- * requests now.
- */
- wsize = min_t(unsigned int, wsize, 2 << 15);
+ /* set it to the maximum buffer size value we can send with 1 credit */
+ wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE);
return wsize;
}
@@ -200,11 +197,8 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
/* start with specified rsize, or default */
rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE;
rsize = min_t(unsigned int, rsize, server->max_read);
- /*
- * limit write size to 2 ** 16, because we don't support multicredit
- * requests now.
- */
- rsize = min_t(unsigned int, rsize, 2 << 15);
+ /* set it to the maximum buffer size value we can send with 1 credit */
+ rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE);
return rsize;
}
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index edccb52..06d29e3 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -413,7 +413,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
/* SMB2 only has an extended negflavor */
server->negflavor = CIFS_NEGFLAVOR_EXTENDED;
- server->maxBuf = le32_to_cpu(rsp->MaxTransactSize);
+ /* set it to the maximum buffer size value we can send with 1 credit */
+ server->maxBuf = min_t(unsigned int, le32_to_cpu(rsp->MaxTransactSize),
+ SMB2_MAX_BUFFER_SIZE);
server->max_read = le32_to_cpu(rsp->MaxReadSize);
server->max_write = le32_to_cpu(rsp->MaxWriteSize);
/* BB Do we need to validate the SecurityMode? */
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index e3fb480..7db5db0 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -61,7 +61,7 @@ extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst,
extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb,
const char *full_path, FILE_ALL_INFO *data,
- bool *adjust_tz);
+ bool *adjust_tz, bool *symlink);
extern int smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon,
const char *full_path, __u64 size,
struct cifs_sb_info *cifs_sb, bool set_alloc);
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 09afda4..5ac836a 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -82,9 +82,11 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name)
goto remove_ea_exit;
ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */
- rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, NULL,
- (__u16)0, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (pTcon->ses->server->ops->set_EA)
+ rc = pTcon->ses->server->ops->set_EA(xid, pTcon,
+ full_path, ea_name, NULL, (__u16)0,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
}
remove_ea_exit:
kfree(full_path);
@@ -149,18 +151,22 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
cifs_dbg(FYI, "attempt to set cifs inode metadata\n");
ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */
- rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value,
- (__u16)value_size, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (pTcon->ses->server->ops->set_EA)
+ rc = pTcon->ses->server->ops->set_EA(xid, pTcon,
+ full_path, ea_name, ea_value, (__u16)value_size,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
} else if (strncmp(ea_name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN)
== 0) {
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
goto set_ea_exit;
ea_name += XATTR_OS2_PREFIX_LEN; /* skip past os2. prefix */
- rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value,
- (__u16)value_size, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (pTcon->ses->server->ops->set_EA)
+ rc = pTcon->ses->server->ops->set_EA(xid, pTcon,
+ full_path, ea_name, ea_value, (__u16)value_size,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
} else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL,
strlen(CIFS_XATTR_CIFS_ACL)) == 0) {
#ifdef CONFIG_CIFS_ACL
@@ -170,8 +176,12 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
rc = -ENOMEM;
} else {
memcpy(pacl, ea_value, value_size);
- rc = set_cifs_acl(pacl, value_size,
- direntry->d_inode, full_path, CIFS_ACL_DACL);
+ if (pTcon->ses->server->ops->set_acl)
+ rc = pTcon->ses->server->ops->set_acl(pacl,
+ value_size, direntry->d_inode,
+ full_path, CIFS_ACL_DACL);
+ else
+ rc = -EOPNOTSUPP;
if (rc == 0) /* force revalidate of the inode */
CIFS_I(direntry->d_inode)->time = 0;
kfree(pacl);
@@ -272,17 +282,21 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
/* revalidate/getattr then populate from inode */
} /* BB add else when above is implemented */
ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */
- rc = CIFSSMBQAllEAs(xid, pTcon, full_path, ea_name, ea_value,
- buf_size, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (pTcon->ses->server->ops->query_all_EAs)
+ rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon,
+ full_path, ea_name, ea_value, buf_size,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
} else if (strncmp(ea_name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
goto get_ea_exit;
ea_name += XATTR_OS2_PREFIX_LEN; /* skip past os2. prefix */
- rc = CIFSSMBQAllEAs(xid, pTcon, full_path, ea_name, ea_value,
- buf_size, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (pTcon->ses->server->ops->query_all_EAs)
+ rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon,
+ full_path, ea_name, ea_value, buf_size,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
} else if (strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
strlen(POSIX_ACL_XATTR_ACCESS)) == 0) {
#ifdef CONFIG_CIFS_POSIX
@@ -313,8 +327,11 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
u32 acllen;
struct cifs_ntsd *pacl;
- pacl = get_cifs_acl(cifs_sb, direntry->d_inode,
- full_path, &acllen);
+ if (pTcon->ses->server->ops->get_acl == NULL)
+ goto get_ea_exit; /* rc already EOPNOTSUPP */
+
+ pacl = pTcon->ses->server->ops->get_acl(cifs_sb,
+ direntry->d_inode, full_path, &acllen);
if (IS_ERR(pacl)) {
rc = PTR_ERR(pacl);
cifs_dbg(VFS, "%s: error %zd getting sec desc\n",
@@ -400,11 +417,12 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size)
/* if proc/fs/cifs/streamstoxattr is set then
search server for EAs or streams to
returns as xattrs */
- rc = CIFSSMBQAllEAs(xid, pTcon, full_path, NULL, data,
- buf_size, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (pTcon->ses->server->ops->query_all_EAs)
+ rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon,
+ full_path, NULL, data, buf_size,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
list_ea_exit:
kfree(full_path);
free_xid(xid);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 277bd1b..511d415 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -56,10 +56,19 @@ static void configfs_d_iput(struct dentry * dentry,
struct configfs_dirent *sd = dentry->d_fsdata;
if (sd) {
- BUG_ON(sd->s_dentry != dentry);
/* Coordinate with configfs_readdir */
spin_lock(&configfs_dirent_lock);
- sd->s_dentry = NULL;
+ /* Coordinate with configfs_attach_attr where will increase
+ * sd->s_count and update sd->s_dentry to new allocated one.
+ * Only set sd->dentry to null when this dentry is the only
+ * sd owner.
+ * If not do so, configfs_d_iput may run just after
+ * configfs_attach_attr and set sd->s_dentry to null
+ * even it's still in use.
+ */
+ if (atomic_read(&sd->s_count) <= 2)
+ sd->s_dentry = NULL;
+
spin_unlock(&configfs_dirent_lock);
configfs_put(sd);
}
@@ -426,8 +435,11 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
struct configfs_attribute * attr = sd->s_element;
int error;
+ spin_lock(&configfs_dirent_lock);
dentry->d_fsdata = configfs_get(sd);
sd->s_dentry = dentry;
+ spin_unlock(&configfs_dirent_lock);
+
error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG,
configfs_init_file);
if (error) {
diff --git a/fs/dcache.c b/fs/dcache.c
index ae6ebb8..30b38e2 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2846,9 +2846,9 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
u32 dlen = ACCESS_ONCE(name->len);
char *p;
- if (*buflen < dlen + 1)
- return -ENAMETOOLONG;
*buflen -= dlen + 1;
+ if (*buflen < 0)
+ return -ENAMETOOLONG;
p = *buffer -= dlen + 1;
*p++ = '/';
while (dlen--) {
@@ -2881,9 +2881,9 @@ static int prepend_path(const struct path *path,
const struct path *root,
char **buffer, int *buflen)
{
- struct dentry *dentry = path->dentry;
- struct vfsmount *vfsmnt = path->mnt;
- struct mount *mnt = real_mount(vfsmnt);
+ struct dentry *dentry;
+ struct vfsmount *vfsmnt;
+ struct mount *mnt;
int error = 0;
unsigned seq = 0;
char *bptr;
@@ -2893,6 +2893,9 @@ static int prepend_path(const struct path *path,
restart:
bptr = *buffer;
blen = *buflen;
+ dentry = path->dentry;
+ vfsmnt = path->mnt;
+ mnt = real_mount(vfsmnt);
read_seqbegin_or_lock(&rename_lock, &seq);
while (dentry != root->dentry || vfsmnt != root->mnt) {
struct dentry * parent;
@@ -3061,8 +3064,13 @@ char *d_path(const struct path *path, char *buf, int buflen)
* thus don't need to be hashed. They also don't need a name until a
* user wants to identify the object in /proc/pid/fd/. The little hack
* below allows us to generate a name for these objects on demand:
+ *
+ * Some pseudo inodes are mountable. When they are mounted
+ * path->dentry == path->mnt->mnt_root. In that case don't call d_dname
+ * and instead have d_path return the mounted path.
*/
- if (path->dentry->d_op && path->dentry->d_op->d_dname)
+ if (path->dentry->d_op && path->dentry->d_op->d_dname &&
+ (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root))
return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
rcu_read_lock();
@@ -3132,7 +3140,6 @@ restart:
read_seqbegin_or_lock(&rename_lock, &seq);
while (!IS_ROOT(dentry)) {
struct dentry *parent = dentry->d_parent;
- int error;
prefetch(parent);
error = prepend_name(&end, &len, &dentry->d_name);
diff --git a/fs/dcookies.c b/fs/dcookies.c
index ab5954b..ac44a69 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -204,7 +204,7 @@ out:
}
#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE4(lookup_dcookie, u32, w0, u32, w1, char __user *, buf, size_t, len)
+COMPAT_SYSCALL_DEFINE4(lookup_dcookie, u32, w0, u32, w1, char __user *, buf, compat_size_t, len)
{
#ifdef __BIG_ENDIAN
return sys_lookup_dcookie(((u64)w0 << 32) | w1, buf, len);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 073d30b..a726b9f 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -498,6 +498,7 @@ static void devpts_kill_sb(struct super_block *sb)
{
struct pts_fs_info *fsi = DEVPTS_SB(sb);
+ ida_destroy(&fsi->allocated_ptys);
kfree(fsi);
kill_litter_super(sb);
}
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index d90909e..a5e34dd 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -649,6 +649,7 @@ static void process_sctp_notification(struct connection *con,
struct msghdr *msg, char *buf)
{
union sctp_notification *sn = (union sctp_notification *)buf;
+ struct linger linger;
switch (sn->sn_header.sn_type) {
case SCTP_SEND_FAILED:
@@ -727,6 +728,13 @@ static void process_sctp_notification(struct connection *con,
}
add_sock(new_con->sock, new_con);
+ linger.l_onoff = 1;
+ linger.l_linger = 0;
+ ret = kernel_setsockopt(new_con->sock, SOL_SOCKET, SO_LINGER,
+ (char *)&linger, sizeof(linger));
+ if (ret < 0)
+ log_print("set socket option SO_LINGER failed");
+
log_print("connecting to %d sctp association %d",
nodeid, (int)sn->sn_assoc_change.sac_assoc_id);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 810c28f..d76c974 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -41,6 +41,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/compat.h>
+#include <linux/rculist.h>
/*
* LOCKING:
@@ -133,8 +134,12 @@ struct nested_calls {
* of these on a server and we do not want this to take another cache line.
*/
struct epitem {
- /* RB tree node used to link this structure to the eventpoll RB tree */
- struct rb_node rbn;
+ union {
+ /* RB tree node links this structure to the eventpoll RB tree */
+ struct rb_node rbn;
+ /* Used to free the struct epitem */
+ struct rcu_head rcu;
+ };
/* List header used to link this structure to the eventpoll ready list */
struct list_head rdllink;
@@ -580,14 +585,14 @@ static inline void ep_pm_stay_awake_rcu(struct epitem *epi)
* @sproc: Pointer to the scan callback.
* @priv: Private opaque data passed to the @sproc callback.
* @depth: The current depth of recursive f_op->poll calls.
+ * @ep_locked: caller already holds ep->mtx
*
* Returns: The same integer error code returned by the @sproc callback.
*/
static int ep_scan_ready_list(struct eventpoll *ep,
int (*sproc)(struct eventpoll *,
struct list_head *, void *),
- void *priv,
- int depth)
+ void *priv, int depth, bool ep_locked)
{
int error, pwake = 0;
unsigned long flags;
@@ -598,7 +603,9 @@ static int ep_scan_ready_list(struct eventpoll *ep,
* We need to lock this because we could be hit by
* eventpoll_release_file() and epoll_ctl().
*/
- mutex_lock_nested(&ep->mtx, depth);
+
+ if (!ep_locked)
+ mutex_lock_nested(&ep->mtx, depth);
/*
* Steal the ready list, and re-init the original one to the
@@ -662,7 +669,8 @@ static int ep_scan_ready_list(struct eventpoll *ep,
}
spin_unlock_irqrestore(&ep->lock, flags);
- mutex_unlock(&ep->mtx);
+ if (!ep_locked)
+ mutex_unlock(&ep->mtx);
/* We have to call this outside the lock */
if (pwake)
@@ -671,6 +679,12 @@ static int ep_scan_ready_list(struct eventpoll *ep,
return error;
}
+static void epi_rcu_free(struct rcu_head *head)
+{
+ struct epitem *epi = container_of(head, struct epitem, rcu);
+ kmem_cache_free(epi_cache, epi);
+}
+
/*
* Removes a "struct epitem" from the eventpoll RB tree and deallocates
* all the associated resources. Must be called with "mtx" held.
@@ -692,8 +706,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
/* Remove the current item from the list of epoll hooks */
spin_lock(&file->f_lock);
- if (ep_is_linked(&epi->fllink))
- list_del_init(&epi->fllink);
+ list_del_rcu(&epi->fllink);
spin_unlock(&file->f_lock);
rb_erase(&epi->rbn, &ep->rbr);
@@ -704,9 +717,14 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
spin_unlock_irqrestore(&ep->lock, flags);
wakeup_source_unregister(ep_wakeup_source(epi));
-
- /* At this point it is safe to free the eventpoll item */
- kmem_cache_free(epi_cache, epi);
+ /*
+ * At this point it is safe to free the eventpoll item. Use the union
+ * field epi->rcu, since we are trying to minimize the size of
+ * 'struct epitem'. The 'rbn' field is no longer in use. Protected by
+ * ep->mtx. The rcu read side, reverse_path_check_proc(), does not make
+ * use of the rbn field.
+ */
+ call_rcu(&epi->rcu, epi_rcu_free);
atomic_long_dec(&ep->user->epoll_watches);
@@ -807,15 +825,34 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
return 0;
}
+static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
+ poll_table *pt);
+
+struct readyevents_arg {
+ struct eventpoll *ep;
+ bool locked;
+};
+
static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests)
{
- return ep_scan_ready_list(priv, ep_read_events_proc, NULL, call_nests + 1);
+ struct readyevents_arg *arg = priv;
+
+ return ep_scan_ready_list(arg->ep, ep_read_events_proc, NULL,
+ call_nests + 1, arg->locked);
}
static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
{
int pollflags;
struct eventpoll *ep = file->private_data;
+ struct readyevents_arg arg;
+
+ /*
+ * During ep_insert() we already hold the ep->mtx for the tfile.
+ * Prevent re-aquisition.
+ */
+ arg.locked = wait && (wait->_qproc == ep_ptable_queue_proc);
+ arg.ep = ep;
/* Insert inside our poll wait queue */
poll_wait(file, &ep->poll_wait, wait);
@@ -827,7 +864,7 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
* could re-enter here.
*/
pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS,
- ep_poll_readyevents_proc, ep, ep, current);
+ ep_poll_readyevents_proc, &arg, ep, current);
return pollflags != -1 ? pollflags : 0;
}
@@ -872,7 +909,6 @@ static const struct file_operations eventpoll_fops = {
*/
void eventpoll_release_file(struct file *file)
{
- struct list_head *lsthead = &file->f_ep_links;
struct eventpoll *ep;
struct epitem *epi;
@@ -890,17 +926,12 @@ void eventpoll_release_file(struct file *file)
* Besides, ep_remove() acquires the lock, so we can't hold it here.
*/
mutex_lock(&epmutex);
-
- while (!list_empty(lsthead)) {
- epi = list_first_entry(lsthead, struct epitem, fllink);
-
+ list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) {
ep = epi->ep;
- list_del_init(&epi->fllink);
mutex_lock_nested(&ep->mtx, 0);
ep_remove(ep, epi);
mutex_unlock(&ep->mtx);
}
-
mutex_unlock(&epmutex);
}
@@ -1138,7 +1169,9 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
struct file *child_file;
struct epitem *epi;
- list_for_each_entry(epi, &file->f_ep_links, fllink) {
+ /* CTL_DEL can remove links here, but that can't increase our count */
+ rcu_read_lock();
+ list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) {
child_file = epi->ep->file;
if (is_file_epoll(child_file)) {
if (list_empty(&child_file->f_ep_links)) {
@@ -1160,6 +1193,7 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
"file is not an ep!\n");
}
}
+ rcu_read_unlock();
return error;
}
@@ -1231,7 +1265,7 @@ static noinline void ep_destroy_wakeup_source(struct epitem *epi)
* Must be called with "mtx" held.
*/
static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
- struct file *tfile, int fd)
+ struct file *tfile, int fd, int full_check)
{
int error, revents, pwake = 0;
unsigned long flags;
@@ -1286,7 +1320,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
/* Add the current item to the list of active epoll hook for this file */
spin_lock(&tfile->f_lock);
- list_add_tail(&epi->fllink, &tfile->f_ep_links);
+ list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
spin_unlock(&tfile->f_lock);
/*
@@ -1297,7 +1331,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
/* now check if we've created too many backpaths */
error = -EINVAL;
- if (reverse_path_check())
+ if (full_check && reverse_path_check())
goto error_remove_epi;
/* We have to drop the new item inside our item list to keep track of it */
@@ -1327,8 +1361,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
error_remove_epi:
spin_lock(&tfile->f_lock);
- if (ep_is_linked(&epi->fllink))
- list_del_init(&epi->fllink);
+ list_del_rcu(&epi->fllink);
spin_unlock(&tfile->f_lock);
rb_erase(&epi->rbn, &ep->rbr);
@@ -1521,7 +1554,7 @@ static int ep_send_events(struct eventpoll *ep,
esed.maxevents = maxevents;
esed.events = events;
- return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0);
+ return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0, false);
}
static inline struct timespec ep_set_mstimeout(long ms)
@@ -1791,11 +1824,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
struct epoll_event __user *, event)
{
int error;
- int did_lock_epmutex = 0;
+ int full_check = 0;
struct fd f, tf;
struct eventpoll *ep;
struct epitem *epi;
struct epoll_event epds;
+ struct eventpoll *tep = NULL;
error = -EFAULT;
if (ep_op_has_event(op) &&
@@ -1844,27 +1878,37 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
* and hang them on the tfile_check_list, so we can check that we
* haven't created too many possible wakeup paths.
*
- * We need to hold the epmutex across both ep_insert and ep_remove
- * b/c we want to make sure we are looking at a coherent view of
- * epoll network.
+ * We do not need to take the global 'epumutex' on EPOLL_CTL_ADD when
+ * the epoll file descriptor is attaching directly to a wakeup source,
+ * unless the epoll file descriptor is nested. The purpose of taking the
+ * 'epmutex' on add is to prevent complex toplogies such as loops and
+ * deep wakeup paths from forming in parallel through multiple
+ * EPOLL_CTL_ADD operations.
*/
- if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) {
- mutex_lock(&epmutex);
- did_lock_epmutex = 1;
- }
+ mutex_lock_nested(&ep->mtx, 0);
if (op == EPOLL_CTL_ADD) {
- if (is_file_epoll(tf.file)) {
- error = -ELOOP;
- if (ep_loop_check(ep, tf.file) != 0) {
- clear_tfile_check_list();
- goto error_tgt_fput;
+ if (!list_empty(&f.file->f_ep_links) ||
+ is_file_epoll(tf.file)) {
+ full_check = 1;
+ mutex_unlock(&ep->mtx);
+ mutex_lock(&epmutex);
+ if (is_file_epoll(tf.file)) {
+ error = -ELOOP;
+ if (ep_loop_check(ep, tf.file) != 0) {
+ clear_tfile_check_list();
+ goto error_tgt_fput;
+ }
+ } else
+ list_add(&tf.file->f_tfile_llink,
+ &tfile_check_list);
+ mutex_lock_nested(&ep->mtx, 0);
+ if (is_file_epoll(tf.file)) {
+ tep = tf.file->private_data;
+ mutex_lock_nested(&tep->mtx, 1);
}
- } else
- list_add(&tf.file->f_tfile_llink, &tfile_check_list);
+ }
}
- mutex_lock_nested(&ep->mtx, 0);
-
/*
* Try to lookup the file inside our RB tree, Since we grabbed "mtx"
* above, we can be sure to be able to use the item looked up by
@@ -1877,10 +1921,11 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
case EPOLL_CTL_ADD:
if (!epi) {
epds.events |= POLLERR | POLLHUP;
- error = ep_insert(ep, &epds, tf.file, fd);
+ error = ep_insert(ep, &epds, tf.file, fd, full_check);
} else
error = -EEXIST;
- clear_tfile_check_list();
+ if (full_check)
+ clear_tfile_check_list();
break;
case EPOLL_CTL_DEL:
if (epi)
@@ -1896,10 +1941,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
error = -ENOENT;
break;
}
+ if (tep != NULL)
+ mutex_unlock(&tep->mtx);
mutex_unlock(&ep->mtx);
error_tgt_fput:
- if (did_lock_epmutex)
+ if (full_check)
mutex_unlock(&epmutex);
fdput(tf);
diff --git a/fs/exec.c b/fs/exec.c
index 8875dd1..bb8afc1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1668,6 +1668,12 @@ int __get_dumpable(unsigned long mm_flags)
return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret;
}
+/*
+ * This returns the actual value of the suid_dumpable flag. For things
+ * that are using this for checking for privilege transitions, it must
+ * test against SUID_DUMP_USER rather than treating it as a boolean
+ * value.
+ */
int get_dumpable(struct mm_struct *mm)
{
return __get_dumpable(mm->flags);
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index b744228..85cde3e 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -103,7 +103,7 @@ int ore_verify_layout(unsigned total_comps, struct ore_layout *layout)
layout->max_io_length =
(BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - layout->stripe_unit) *
- layout->group_width;
+ (layout->group_width - layout->parity);
if (layout->parity) {
unsigned stripe_length =
(layout->group_width - layout->parity) *
@@ -286,7 +286,8 @@ int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc,
if (length) {
ore_calc_stripe_info(layout, offset, length, &ios->si);
ios->length = ios->si.length;
- ios->nr_pages = (ios->length + PAGE_SIZE - 1) / PAGE_SIZE;
+ ios->nr_pages = ((ios->offset & (PAGE_SIZE - 1)) +
+ ios->length + PAGE_SIZE - 1) / PAGE_SIZE;
if (layout->parity)
_ore_post_alloc_raid_stuff(ios);
}
@@ -536,6 +537,7 @@ void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
u64 H = LmodS - G * T;
u32 N = div_u64(H, U);
+ u32 Nlast;
/* "H - (N * U)" is just "H % U" so it's bound to u32 */
u32 C = (u32)(H - (N * U)) / stripe_unit + G * group_width;
@@ -568,6 +570,10 @@ void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
si->length = T - H;
if (si->length > length)
si->length = length;
+
+ Nlast = div_u64(H + si->length + U - 1, U);
+ si->maxdevUnits = Nlast - N;
+
si->M = M;
}
EXPORT_SYMBOL(ore_calc_stripe_info);
@@ -583,13 +589,16 @@ int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg,
int ret;
if (per_dev->bio == NULL) {
- unsigned pages_in_stripe = ios->layout->group_width *
- (ios->layout->stripe_unit / PAGE_SIZE);
- unsigned nr_pages = ios->nr_pages * ios->layout->group_width /
- (ios->layout->group_width -
- ios->layout->parity);
- unsigned bio_size = (nr_pages + pages_in_stripe) /
- ios->layout->group_width;
+ unsigned bio_size;
+
+ if (!ios->reading) {
+ bio_size = ios->si.maxdevUnits;
+ } else {
+ bio_size = (ios->si.maxdevUnits + 1) *
+ (ios->layout->group_width - ios->layout->parity) /
+ ios->layout->group_width;
+ }
+ bio_size *= (ios->layout->stripe_unit / PAGE_SIZE);
per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
if (unlikely(!per_dev->bio)) {
@@ -609,8 +618,12 @@ int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg,
added_len = bio_add_pc_page(q, per_dev->bio, pages[pg],
pglen, pgbase);
if (unlikely(pglen != added_len)) {
- ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=%u\n",
- per_dev->bio->bi_vcnt);
+ /* If bi_vcnt == bi_max then this is a SW BUG */
+ ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=0x%x "
+ "bi_max=0x%x BIO_MAX=0x%x cur_len=0x%x\n",
+ per_dev->bio->bi_vcnt,
+ per_dev->bio->bi_max_vecs,
+ BIO_MAX_PAGES_KMALLOC, cur_len);
ret = -ENOMEM;
goto out;
}
@@ -1098,7 +1111,7 @@ int ore_truncate(struct ore_layout *layout, struct ore_components *oc,
size_attr->attr = g_attr_logical_length;
size_attr->attr.val_ptr = &size_attr->newsize;
- ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n",
+ ORE_DBGMSG2("trunc(0x%llx) obj_offset=0x%llx dev=%d\n",
_LLU(oc->comps->obj.id), _LLU(obj_size), i);
ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1,
&size_attr->attr);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index a235f00..c43fe9b 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -215,7 +215,7 @@ struct getdents_callback {
struct dir_context ctx;
char *name; /* name that was found. It already points to a
buffer NAME_MAX+1 is size */
- unsigned long ino; /* the inum we are looking for */
+ u64 ino; /* the inum we are looking for */
int found; /* inode matched? */
int sequence; /* sequence counter */
};
@@ -255,10 +255,14 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
struct inode *dir = path->dentry->d_inode;
int error;
struct file *file;
+ struct kstat stat;
+ struct path child_path = {
+ .mnt = path->mnt,
+ .dentry = child,
+ };
struct getdents_callback buffer = {
.ctx.actor = filldir_one,
.name = name,
- .ino = child->d_inode->i_ino
};
error = -ENOTDIR;
@@ -268,6 +272,16 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
if (!dir->i_fop)
goto out;
/*
+ * inode->i_ino is unsigned long, kstat->ino is u64, so the
+ * former would be insufficient on 32-bit hosts when the
+ * filesystem supports 64-bit inode numbers. So we need to
+ * actually call ->getattr, not just read i_ino:
+ */
+ error = vfs_getattr_nosec(&child_path, &stat);
+ if (error)
+ return error;
+ buffer.ino = stat.ino;
+ /*
* Open the directory ...
*/
file = dentry_open(path, O_RDONLY, cred);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 2885349..20d6697 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1493,6 +1493,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
sb->s_blocksize - offset : towrite;
tmp_bh.b_state = 0;
+ tmp_bh.b_size = sb->s_blocksize;
err = ext2_get_block(inode, blk, &tmp_bh, 1);
if (err < 0)
goto out;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index af815ea..a9d2bf9 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -267,6 +267,16 @@ struct ext4_io_submit {
/* Translate # of blks to # of clusters */
#define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \
(sbi)->s_cluster_bits)
+/* Mask out the low bits to get the starting block of the cluster */
+#define EXT4_PBLK_CMASK(s, pblk) ((pblk) & \
+ ~((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
+#define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \
+ ~((ext4_lblk_t) (s)->s_cluster_ratio - 1))
+/* Get the cluster offset */
+#define EXT4_PBLK_COFF(s, pblk) ((pblk) & \
+ ((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
+#define EXT4_LBLK_COFF(s, lblk) ((lblk) & \
+ ((ext4_lblk_t) (s)->s_cluster_ratio - 1))
/*
* Structure of a blocks group descriptor
@@ -760,6 +770,8 @@ do { \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
(einode)->xtime.tv_sec = \
(signed)le32_to_cpu((raw_inode)->xtime); \
+ else \
+ (einode)->xtime.tv_sec = 0; \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
ext4_decode_extra_time(&(einode)->xtime, \
raw_inode->xtime ## _extra); \
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 17ac112..3fe29de 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -259,6 +259,15 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
if (WARN_ON_ONCE(err)) {
ext4_journal_abort_handle(where, line, __func__, bh,
handle, err);
+ ext4_error_inode(inode, where, line,
+ bh->b_blocknr,
+ "journal_dirty_metadata failed: "
+ "handle type %u started at line %u, "
+ "credits %u/%u, errcode %d",
+ handle->h_type,
+ handle->h_line_no,
+ handle->h_requested_credits,
+ handle->h_buffer_credits, err);
}
} else {
if (inode)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 54d52af..e678549 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -360,8 +360,10 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
{
ext4_fsblk_t block = ext4_ext_pblock(ext);
int len = ext4_ext_get_actual_len(ext);
+ ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
+ ext4_lblk_t last = lblock + len - 1;
- if (len == 0)
+ if (lblock > last)
return 0;
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
}
@@ -387,11 +389,26 @@ static int ext4_valid_extent_entries(struct inode *inode,
if (depth == 0) {
/* leaf entries */
struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
+ struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
+ ext4_fsblk_t pblock = 0;
+ ext4_lblk_t lblock = 0;
+ ext4_lblk_t prev = 0;
+ int len = 0;
while (entries) {
if (!ext4_valid_extent(inode, ext))
return 0;
+
+ /* Check for overlapping extents */
+ lblock = le32_to_cpu(ext->ee_block);
+ len = ext4_ext_get_actual_len(ext);
+ if ((lblock <= prev) && prev) {
+ pblock = ext4_ext_pblock(ext);
+ es->s_last_error_block = cpu_to_le64(pblock);
+ return 0;
+ }
ext++;
entries--;
+ prev = lblock + len - 1;
}
} else {
struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
@@ -1844,8 +1861,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
depth = ext_depth(inode);
if (!path[depth].p_ext)
goto out;
- b2 = le32_to_cpu(path[depth].p_ext->ee_block);
- b2 &= ~(sbi->s_cluster_ratio - 1);
+ b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block));
/*
* get the next allocated block if the extent in the path
@@ -1855,7 +1871,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
b2 = ext4_ext_next_allocated_block(path);
if (b2 == EXT_MAX_BLOCKS)
goto out;
- b2 &= ~(sbi->s_cluster_ratio - 1);
+ b2 = EXT4_LBLK_CMASK(sbi, b2);
}
/* check for wrap through zero on extent logical start block*/
@@ -2535,7 +2551,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
* extent, we have to mark the cluster as used (store negative
* cluster number in partial_cluster).
*/
- unaligned = pblk & (sbi->s_cluster_ratio - 1);
+ unaligned = EXT4_PBLK_COFF(sbi, pblk);
if (unaligned && (ee_len == num) &&
(*partial_cluster != -((long long)EXT4_B2C(sbi, pblk))))
*partial_cluster = EXT4_B2C(sbi, pblk);
@@ -2629,7 +2645,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
* accidentally freeing it later on
*/
pblk = ext4_ext_pblock(ex);
- if (pblk & (sbi->s_cluster_ratio - 1))
+ if (EXT4_PBLK_COFF(sbi, pblk))
*partial_cluster =
-((long long)EXT4_B2C(sbi, pblk));
ex--;
@@ -3784,7 +3800,7 @@ int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ext4_lblk_t lblk_start, lblk_end;
- lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
+ lblk_start = EXT4_LBLK_CMASK(sbi, lblk);
lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
return ext4_find_delalloc_range(inode, lblk_start, lblk_end);
@@ -3843,9 +3859,9 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks);
/* Check towards left side */
- c_offset = lblk_start & (sbi->s_cluster_ratio - 1);
+ c_offset = EXT4_LBLK_COFF(sbi, lblk_start);
if (c_offset) {
- lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
+ lblk_from = EXT4_LBLK_CMASK(sbi, lblk_start);
lblk_to = lblk_from + c_offset - 1;
if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
@@ -3853,7 +3869,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
}
/* Now check towards right. */
- c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1);
+ c_offset = EXT4_LBLK_COFF(sbi, lblk_start + num_blks);
if (allocated_clusters && c_offset) {
lblk_from = lblk_start + num_blks;
lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
@@ -3921,6 +3937,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
} else
err = ret;
map->m_flags |= EXT4_MAP_MAPPED;
+ map->m_pblk = newblock;
if (allocated > map->m_len)
allocated = map->m_len;
map->m_len = allocated;
@@ -4061,7 +4078,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
struct ext4_ext_path *path)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
+ ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
ext4_lblk_t ex_cluster_start, ex_cluster_end;
ext4_lblk_t rr_cluster_start;
ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
@@ -4079,8 +4096,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
(rr_cluster_start == ex_cluster_start)) {
if (rr_cluster_start == ex_cluster_end)
ee_start += ee_len - 1;
- map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) +
- c_offset;
+ map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset;
map->m_len = min(map->m_len,
(unsigned) sbi->s_cluster_ratio - c_offset);
/*
@@ -4234,7 +4250,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
*/
map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
newex.ee_block = cpu_to_le32(map->m_lblk);
- cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
+ cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
/*
* If we are doing bigalloc, check to see if the extent returned
@@ -4302,7 +4318,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* needed so that future calls to get_implied_cluster_alloc()
* work correctly.
*/
- offset = map->m_lblk & (sbi->s_cluster_ratio - 1);
+ offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
ar.goal -= offset;
ar.logical -= offset;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index d9ecbf1..46b3668 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1925,9 +1925,11 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
}
/* Clear the content within i_blocks. */
- if (i_size < EXT4_MIN_INLINE_DATA_SIZE)
- memset(ext4_raw_inode(&is.iloc)->i_block + i_size, 0,
- EXT4_MIN_INLINE_DATA_SIZE - i_size);
+ if (i_size < EXT4_MIN_INLINE_DATA_SIZE) {
+ void *p = (void *) ext4_raw_inode(&is.iloc)->i_block;
+ memset(p + i_size, 0,
+ EXT4_MIN_INLINE_DATA_SIZE - i_size);
+ }
EXT4_I(inode)->i_inline_size = i_size <
EXT4_MIN_INLINE_DATA_SIZE ?
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e274e9c..786bf07 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -38,6 +38,7 @@
#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <linux/aio.h>
+#include <linux/bitops.h>
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -1206,7 +1207,6 @@ static int ext4_journalled_write_end(struct file *file,
*/
static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
{
- int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int md_needed;
@@ -1218,7 +1218,6 @@ static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
* in order to allocate nrblocks
* worse case is one extent per block
*/
-repeat:
spin_lock(&ei->i_block_reservation_lock);
/*
* ext4_calc_metadata_amount() has side effects, which we have
@@ -1238,10 +1237,6 @@ repeat:
ei->i_da_metadata_calc_len = save_len;
ei->i_da_metadata_calc_last_lblock = save_last_lblock;
spin_unlock(&ei->i_block_reservation_lock);
- if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
- cond_resched();
- goto repeat;
- }
return -ENOSPC;
}
ei->i_reserved_meta_blocks += md_needed;
@@ -1255,7 +1250,6 @@ repeat:
*/
static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
{
- int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int md_needed;
@@ -1277,7 +1271,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
* in order to allocate nrblocks
* worse case is one extent per block
*/
-repeat:
spin_lock(&ei->i_block_reservation_lock);
/*
* ext4_calc_metadata_amount() has side effects, which we have
@@ -1297,10 +1290,6 @@ repeat:
ei->i_da_metadata_calc_len = save_len;
ei->i_da_metadata_calc_last_lblock = save_last_lblock;
spin_unlock(&ei->i_block_reservation_lock);
- if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
- cond_resched();
- goto repeat;
- }
dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
return -ENOSPC;
}
@@ -3934,18 +3923,20 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
void ext4_set_inode_flags(struct inode *inode)
{
unsigned int flags = EXT4_I(inode)->i_flags;
+ unsigned int new_fl = 0;
- inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
if (flags & EXT4_SYNC_FL)
- inode->i_flags |= S_SYNC;
+ new_fl |= S_SYNC;
if (flags & EXT4_APPEND_FL)
- inode->i_flags |= S_APPEND;
+ new_fl |= S_APPEND;
if (flags & EXT4_IMMUTABLE_FL)
- inode->i_flags |= S_IMMUTABLE;
+ new_fl |= S_IMMUTABLE;
if (flags & EXT4_NOATIME_FL)
- inode->i_flags |= S_NOATIME;
+ new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
- inode->i_flags |= S_DIRSYNC;
+ new_fl |= S_DIRSYNC;
+ set_mask_bits(&inode->i_flags,
+ S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl);
}
/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
@@ -4594,6 +4585,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_size > sbi->s_bitmap_maxbytes)
return -EFBIG;
}
+
+ if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size)
+ inode_inc_iversion(inode);
+
if (S_ISREG(inode->i_mode) &&
(attr->ia_size < inode->i_size)) {
if (ext4_should_order_data(inode)) {
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a569d33..d011b69 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -144,7 +144,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
handle = ext4_journal_start(inode_bl, EXT4_HT_MOVE_EXTENTS, 2);
if (IS_ERR(handle)) {
err = -EINVAL;
- goto swap_boot_out;
+ goto journal_err_out;
}
/* Protect extent tree against block allocations via delalloc */
@@ -202,6 +202,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
ext4_double_up_write_data_sem(inode, inode_bl);
+journal_err_out:
ext4_inode_resume_unlocked_dio(inode);
ext4_inode_resume_unlocked_dio(inode_bl);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index a41e3ba..04a5c75 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3442,6 +3442,9 @@ static void ext4_mb_pa_callback(struct rcu_head *head)
{
struct ext4_prealloc_space *pa;
pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
+
+ BUG_ON(atomic_read(&pa->pa_count));
+ BUG_ON(pa->pa_deleted == 0);
kmem_cache_free(ext4_pspace_cachep, pa);
}
@@ -3455,11 +3458,13 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
ext4_group_t grp;
ext4_fsblk_t grp_blk;
- if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
- return;
-
/* in this short window concurrent discard can set pa_deleted */
spin_lock(&pa->pa_lock);
+ if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
+ spin_unlock(&pa->pa_lock);
+ return;
+ }
+
if (pa->pa_deleted == 1) {
spin_unlock(&pa->pa_lock);
return;
@@ -4121,7 +4126,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
ext4_get_group_no_and_offset(sb, goal, &group, &block);
/* set up allocation goals */
- ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
+ ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
ac->ac_status = AC_STATUS_CONTINUE;
ac->ac_sb = sb;
ac->ac_inode = ar->inode;
@@ -4663,7 +4668,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
* blocks at the beginning or the end unless we are explicitly
* requested to avoid doing so.
*/
- overflow = block & (sbi->s_cluster_ratio - 1);
+ overflow = EXT4_PBLK_COFF(sbi, block);
if (overflow) {
if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
overflow = sbi->s_cluster_ratio - overflow;
@@ -4677,7 +4682,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
count += overflow;
}
}
- overflow = count & (sbi->s_cluster_ratio - 1);
+ overflow = EXT4_LBLK_COFF(sbi, count);
if (overflow) {
if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
if (count > overflow)
@@ -4794,8 +4799,8 @@ do_more:
" group:%d block:%d count:%lu failed"
" with %d", block_group, bit, count,
err);
- }
-
+ } else
+ EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
ext4_lock_group(sb, block_group);
mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index c5adbb3..f3b84cd 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -243,6 +243,7 @@ static int ext4_alloc_group_tables(struct super_block *sb,
ext4_group_t group;
ext4_group_t last_group;
unsigned overhead;
+ __u16 uninit_mask = (flexbg_size > 1) ? ~EXT4_BG_BLOCK_UNINIT : ~0;
BUG_ON(flex_gd->count == 0 || group_data == NULL);
@@ -266,7 +267,7 @@ next_group:
src_group++;
for (; src_group <= last_group; src_group++) {
overhead = ext4_group_overhead_blocks(sb, src_group);
- if (overhead != 0)
+ if (overhead == 0)
last_blk += group_data[src_group - group].blocks_count;
else
break;
@@ -280,8 +281,7 @@ next_group:
group = ext4_get_group_number(sb, start_blk - 1);
group -= group_data[0].group;
group_data[group].free_blocks_count--;
- if (flexbg_size > 1)
- flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
+ flex_gd->bg_flags[group] &= uninit_mask;
}
/* Allocate inode bitmaps */
@@ -292,22 +292,30 @@ next_group:
group = ext4_get_group_number(sb, start_blk - 1);
group -= group_data[0].group;
group_data[group].free_blocks_count--;
- if (flexbg_size > 1)
- flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
+ flex_gd->bg_flags[group] &= uninit_mask;
}
/* Allocate inode tables */
for (; it_index < flex_gd->count; it_index++) {
- if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk)
+ unsigned int itb = EXT4_SB(sb)->s_itb_per_group;
+ ext4_fsblk_t next_group_start;
+
+ if (start_blk + itb > last_blk)
goto next_group;
group_data[it_index].inode_table = start_blk;
- group = ext4_get_group_number(sb, start_blk - 1);
+ group = ext4_get_group_number(sb, start_blk);
+ next_group_start = ext4_group_first_block_no(sb, group + 1);
group -= group_data[0].group;
- group_data[group].free_blocks_count -=
- EXT4_SB(sb)->s_itb_per_group;
- if (flexbg_size > 1)
- flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
+ if (start_blk + itb > next_group_start) {
+ flex_gd->bg_flags[group + 1] &= uninit_mask;
+ overhead = start_blk + itb - next_group_start;
+ group_data[group + 1].free_blocks_count -= overhead;
+ itb -= overhead;
+ }
+
+ group_data[group].free_blocks_count -= itb;
+ flex_gd->bg_flags[group] &= uninit_mask;
start_blk += EXT4_SB(sb)->s_itb_per_group;
}
@@ -401,7 +409,7 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
start = ext4_group_first_block_no(sb, group);
group -= flex_gd->groups[0].group;
- count2 = sb->s_blocksize * 8 - (block - start);
+ count2 = EXT4_BLOCKS_PER_GROUP(sb) - (block - start);
if (count2 > count)
count2 = count;
@@ -620,7 +628,7 @@ handle_ib:
if (err)
goto out;
count = group_table_count[j];
- start = group_data[i].block_bitmap;
+ start = (&group_data[i].block_bitmap)[j];
block = start;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2c2e6cb..d9711dc 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -773,7 +773,7 @@ static void ext4_put_super(struct super_block *sb)
}
ext4_es_unregister_shrinker(sbi);
- del_timer(&sbi->s_err_report);
+ del_timer_sync(&sbi->s_err_report);
ext4_release_system_zone(sb);
ext4_mb_release(sb);
ext4_ext_release(sb);
@@ -3288,11 +3288,19 @@ int ext4_calculate_overhead(struct super_block *sb)
}
-static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
+static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb)
{
ext4_fsblk_t resv_clusters;
/*
+ * There's no need to reserve anything when we aren't using extents.
+ * The space estimates are exact, there are no unwritten extents,
+ * hole punching doesn't need new metadata... This is needed especially
+ * to keep ext2/3 backward compatibility.
+ */
+ if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
+ return 0;
+ /*
* By default we reserve 2% or 4096 clusters, whichever is smaller.
* This should cover the situations where we can not afford to run
* out of space like for example punch hole, or converting
@@ -3300,7 +3308,8 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
* allocation would require 1, or 2 blocks, higher numbers are
* very rare.
*/
- resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;
+ resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >>
+ EXT4_SB(sb)->s_cluster_bits;
do_div(resv_clusters, 50);
resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
@@ -3658,16 +3667,22 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
for (i = 0; i < 4; i++)
sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
sbi->s_def_hash_version = es->s_def_hash_version;
- i = le32_to_cpu(es->s_flags);
- if (i & EXT2_FLAGS_UNSIGNED_HASH)
- sbi->s_hash_unsigned = 3;
- else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
+ if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) {
+ i = le32_to_cpu(es->s_flags);
+ if (i & EXT2_FLAGS_UNSIGNED_HASH)
+ sbi->s_hash_unsigned = 3;
+ else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
#ifdef __CHAR_UNSIGNED__
- es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
- sbi->s_hash_unsigned = 3;
+ if (!(sb->s_flags & MS_RDONLY))
+ es->s_flags |=
+ cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
+ sbi->s_hash_unsigned = 3;
#else
- es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
+ if (!(sb->s_flags & MS_RDONLY))
+ es->s_flags |=
+ cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
#endif
+ }
}
/* Handle clustersize */
@@ -4043,10 +4058,10 @@ no_journal:
"available");
}
- err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi));
+ err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb));
if (err) {
ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
- "reserved pool", ext4_calculate_resv_clusters(sbi));
+ "reserved pool", ext4_calculate_resv_clusters(sb));
goto failed_mount4a;
}
@@ -4151,7 +4166,7 @@ failed_mount_wq:
}
failed_mount3:
ext4_es_unregister_shrinker(sbi);
- del_timer(&sbi->s_err_report);
+ del_timer_sync(&sbi->s_err_report);
if (sbi->s_flex_groups)
ext4_kvfree(sbi->s_flex_groups);
percpu_counter_destroy(&sbi->s_freeclusters_counter);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 03e9beb..1423c48 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1352,6 +1352,7 @@ retry:
new_extra_isize = s_min_extra_isize;
kfree(is); is = NULL;
kfree(bs); bs = NULL;
+ brelse(bh);
goto retry;
}
error = -1;
diff --git a/fs/file.c b/fs/file.c
index 4a78f98..9de2026 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -34,7 +34,7 @@ static void *alloc_fdmem(size_t size)
* vmalloc() if the allocation size will be considered "large" by the VM.
*/
if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
- void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN);
+ void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY);
if (data != NULL)
return data;
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 9f4935b..3595180 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -510,13 +510,16 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
}
WARN_ON(inode->i_state & I_SYNC);
/*
- * Skip inode if it is clean. We don't want to mess with writeback
- * lists in this function since flusher thread may be doing for example
- * sync in parallel and if we move the inode, it could get skipped. So
- * here we make sure inode is on some writeback list and leave it there
- * unless we have completely cleaned the inode.
+ * Skip inode if it is clean and we have no outstanding writeback in
+ * WB_SYNC_ALL mode. We don't want to mess with writeback lists in this
+ * function since flusher thread may be doing for example sync in
+ * parallel and if we move the inode, it could get skipped. So here we
+ * make sure inode is on some writeback list and leave it there unless
+ * we have completely cleaned the inode.
*/
- if (!(inode->i_state & I_DIRTY))
+ if (!(inode->i_state & I_DIRTY) &&
+ (wbc->sync_mode != WB_SYNC_ALL ||
+ !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
goto out;
inode->i_state |= I_SYNC;
spin_unlock(&inode->i_lock);
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index e1959ef..b5ebc2d 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -50,6 +50,8 @@ void fscache_objlist_add(struct fscache_object *obj)
struct fscache_object *xobj;
struct rb_node **p = &fscache_object_list.rb_node, *parent = NULL;
+ ASSERT(RB_EMPTY_NODE(&obj->objlist_link));
+
write_lock(&fscache_object_list_lock);
while (*p) {
@@ -75,6 +77,9 @@ void fscache_objlist_add(struct fscache_object *obj)
*/
void fscache_objlist_remove(struct fscache_object *obj)
{
+ if (RB_EMPTY_NODE(&obj->objlist_link))
+ return;
+
write_lock(&fscache_object_list_lock);
BUG_ON(RB_EMPTY_ROOT(&fscache_object_list));
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 86d75a6..fec4134 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -314,6 +314,9 @@ void fscache_object_init(struct fscache_object *object,
object->cache = cache;
object->cookie = cookie;
object->parent = NULL;
+#ifdef CONFIG_FSCACHE_OBJECT_LIST
+ RB_CLEAR_NODE(&object->objlist_link);
+#endif
object->oob_event_mask = 0;
for (t = object->oob_table; t->events; t++)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ef74ad5..fa8cb4b 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1296,22 +1296,6 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs));
}
-static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf)
-{
- return 1;
-}
-
-static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = {
- .can_merge = 0,
- .map = generic_pipe_buf_map,
- .unmap = generic_pipe_buf_unmap,
- .confirm = generic_pipe_buf_confirm,
- .release = generic_pipe_buf_release,
- .steal = fuse_dev_pipe_buf_steal,
- .get = generic_pipe_buf_get,
-};
-
static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
@@ -1358,7 +1342,11 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
buf->page = bufs[page_nr].page;
buf->offset = bufs[page_nr].offset;
buf->len = bufs[page_nr].len;
- buf->ops = &fuse_dev_pipe_buf_ops;
+ /*
+ * Need to be careful about this. Having buf->ops in module
+ * code can Oops if the buffer persists after module unload.
+ */
+ buf->ops = &nosteal_pipe_buf_ops;
pipe->nrbufs++;
page_nr++;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 1f7d805..1253c20 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -984,6 +984,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
+ struct address_space *mapping = inode->i_mapping;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh;
int rv;
@@ -1004,6 +1005,35 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
if (rv != 1)
goto out; /* dio not valid, fall back to buffered i/o */
+ /*
+ * Now since we are holding a deferred (CW) lock at this point, you
+ * might be wondering why this is ever needed. There is a case however
+ * where we've granted a deferred local lock against a cached exclusive
+ * glock. That is ok provided all granted local locks are deferred, but
+ * it also means that it is possible to encounter pages which are
+ * cached and possibly also mapped. So here we check for that and sort
+ * them out ahead of the dio. The glock state machine will take care of
+ * everything else.
+ *
+ * If in fact the cached glock state (gl->gl_state) is deferred (CW) in
+ * the first place, mapping->nr_pages will always be zero.
+ */
+ if (mapping->nrpages) {
+ loff_t lstart = offset & (PAGE_CACHE_SIZE - 1);
+ loff_t len = iov_length(iov, nr_segs);
+ loff_t end = PAGE_ALIGN(offset + len) - 1;
+
+ rv = 0;
+ if (len == 0)
+ goto out;
+ if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
+ unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len);
+ rv = filemap_write_and_wait_range(mapping, lstart, end);
+ if (rv)
+ return rv;
+ truncate_inode_pages_range(mapping, lstart, end);
+ }
+
rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs, gfs2_get_block_direct,
NULL, NULL, 0);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index ced3257..630db36 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -584,17 +584,17 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (!IS_ERR(inode)) {
d = d_splice_alias(inode, dentry);
error = 0;
- if (file && !IS_ERR(d)) {
- if (d == NULL)
- d = dentry;
- if (S_ISREG(inode->i_mode))
- error = finish_open(file, d, gfs2_open_common, opened);
- else
+ if (file) {
+ if (S_ISREG(inode->i_mode)) {
+ WARN_ON(d != NULL);
+ error = finish_open(file, dentry, gfs2_open_common, opened);
+ } else {
error = finish_no_open(file, d);
+ }
+ } else {
+ dput(d);
}
gfs2_glock_dq_uninit(ghs);
- if (IS_ERR(d))
- return PTR_ERR(d);
return error;
} else if (error != -ENOENT) {
goto fail_gunlock;
@@ -781,8 +781,10 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry,
error = finish_open(file, dentry, gfs2_open_common, opened);
gfs2_glock_dq_uninit(&gh);
- if (error)
+ if (error) {
+ dput(d);
return ERR_PTR(error);
+ }
return d;
}
@@ -1163,14 +1165,19 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
d = __gfs2_lookup(dir, dentry, file, opened);
if (IS_ERR(d))
return PTR_ERR(d);
- if (d == NULL)
- d = dentry;
- if (d->d_inode) {
- if (!(*opened & FILE_OPENED))
- return finish_no_open(file, d);
+ if (d != NULL)
+ dentry = d;
+ if (dentry->d_inode) {
+ if (!(*opened & FILE_OPENED)) {
+ if (d == NULL)
+ dget(dentry);
+ return finish_no_open(file, dentry);
+ }
+ dput(d);
return 0;
}
+ BUG_ON(d != NULL);
if (!(flags & O_CREAT))
return -ENOENT;
@@ -1603,10 +1610,22 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid))
ogid = ngid = NO_GID_QUOTA_CHANGE;
- error = gfs2_quota_lock(ip, nuid, ngid);
+ error = get_write_access(inode);
if (error)
return error;
+ error = gfs2_rs_alloc(ip);
+ if (error)
+ goto out;
+
+ error = gfs2_rindex_update(sdp);
+ if (error)
+ goto out;
+
+ error = gfs2_quota_lock(ip, nuid, ngid);
+ if (error)
+ goto out;
+
if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
!gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
error = gfs2_quota_check(ip, nuid, ngid);
@@ -1633,6 +1652,8 @@ out_end_trans:
gfs2_trans_end(sdp);
out_gunlock_q:
gfs2_quota_unlock(ip);
+out:
+ put_write_access(inode);
return error;
}
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 610613f..9dcb977 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -551,10 +551,10 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
struct buffer_head *bh = bd->bd_bh;
struct gfs2_glock *gl = bd->bd_gl;
- gfs2_remove_from_ail(bd);
- bd->bd_bh = NULL;
bh->b_private = NULL;
bd->bd_blkno = bh->b_blocknr;
+ gfs2_remove_from_ail(bd); /* drops ref on bh */
+ bd->bd_bh = NULL;
bd->bd_ops = &gfs2_revoke_lops;
sdp->sd_log_num_revoke++;
atomic_inc(&gl->gl_revokes);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 9324150..52f177b 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -258,6 +258,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
struct address_space *mapping = bh->b_page->mapping;
struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
struct gfs2_bufdata *bd = bh->b_private;
+ int was_pinned = 0;
if (test_clear_buffer_pinned(bh)) {
trace_gfs2_pin(bd, 0);
@@ -273,12 +274,16 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
tr->tr_num_databuf_rm++;
}
tr->tr_touched = 1;
+ was_pinned = 1;
brelse(bh);
}
if (bd) {
spin_lock(&sdp->sd_ail_lock);
if (bd->bd_tr) {
gfs2_trans_add_revoke(sdp, bd);
+ } else if (was_pinned) {
+ bh->b_private = NULL;
+ kmem_cache_free(gfs2_bufdata_cachep, bd);
}
spin_unlock(&sdp->sd_ail_lock);
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 19ff5e8..21518b7 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1366,8 +1366,18 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
if (IS_ERR(s))
goto error_bdev;
- if (s->s_root)
+ if (s->s_root) {
+ /*
+ * s_umount nests inside bd_mutex during
+ * __invalidate_device(). blkdev_put() acquires
+ * bd_mutex and can't be called under s_umount. Drop
+ * s_umount temporarily. This is safe as we're
+ * holding an active reference.
+ */
+ up_write(&s->s_umount);
blkdev_put(bdev, mode);
+ down_write(&s->s_umount);
+ }
memset(&args, 0, sizeof(args));
args.ar_quota = GFS2_QUOTA_DEFAULT;
diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c
index cdb84a8..58b5106 100644
--- a/fs/hpfs/alloc.c
+++ b/fs/hpfs/alloc.c
@@ -8,6 +8,58 @@
#include "hpfs_fn.h"
+static void hpfs_claim_alloc(struct super_block *s, secno sec)
+{
+ struct hpfs_sb_info *sbi = hpfs_sb(s);
+ if (sbi->sb_n_free != (unsigned)-1) {
+ if (unlikely(!sbi->sb_n_free)) {
+ hpfs_error(s, "free count underflow, allocating sector %08x", sec);
+ sbi->sb_n_free = -1;
+ return;
+ }
+ sbi->sb_n_free--;
+ }
+}
+
+static void hpfs_claim_free(struct super_block *s, secno sec)
+{
+ struct hpfs_sb_info *sbi = hpfs_sb(s);
+ if (sbi->sb_n_free != (unsigned)-1) {
+ if (unlikely(sbi->sb_n_free >= sbi->sb_fs_size)) {
+ hpfs_error(s, "free count overflow, freeing sector %08x", sec);
+ sbi->sb_n_free = -1;
+ return;
+ }
+ sbi->sb_n_free++;
+ }
+}
+
+static void hpfs_claim_dirband_alloc(struct super_block *s, secno sec)
+{
+ struct hpfs_sb_info *sbi = hpfs_sb(s);
+ if (sbi->sb_n_free_dnodes != (unsigned)-1) {
+ if (unlikely(!sbi->sb_n_free_dnodes)) {
+ hpfs_error(s, "dirband free count underflow, allocating sector %08x", sec);
+ sbi->sb_n_free_dnodes = -1;
+ return;
+ }
+ sbi->sb_n_free_dnodes--;
+ }
+}
+
+static void hpfs_claim_dirband_free(struct super_block *s, secno sec)
+{
+ struct hpfs_sb_info *sbi = hpfs_sb(s);
+ if (sbi->sb_n_free_dnodes != (unsigned)-1) {
+ if (unlikely(sbi->sb_n_free_dnodes >= sbi->sb_dirband_size / 4)) {
+ hpfs_error(s, "dirband free count overflow, freeing sector %08x", sec);
+ sbi->sb_n_free_dnodes = -1;
+ return;
+ }
+ sbi->sb_n_free_dnodes++;
+ }
+}
+
/*
* Check if a sector is allocated in bitmap
* This is really slow. Turned on only if chk==2
@@ -203,9 +255,15 @@ secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forwa
}
sec = 0;
ret:
+ if (sec) {
+ i = 0;
+ do
+ hpfs_claim_alloc(s, sec + i);
+ while (unlikely(++i < n));
+ }
if (sec && f_p) {
for (i = 0; i < forward; i++) {
- if (!hpfs_alloc_if_possible(s, sec + i + 1)) {
+ if (!hpfs_alloc_if_possible(s, sec + n + i)) {
hpfs_error(s, "Prealloc doesn't work! Wanted %d, allocated at %08x, can't allocate %d", forward, sec, i);
sec = 0;
break;
@@ -228,6 +286,7 @@ static secno alloc_in_dirband(struct super_block *s, secno near)
nr >>= 2;
sec = alloc_in_bmp(s, (~0x3fff) | nr, 1, 0);
if (!sec) return 0;
+ hpfs_claim_dirband_alloc(s, sec);
return ((sec & 0x3fff) << 2) + sbi->sb_dirband_start;
}
@@ -242,6 +301,7 @@ int hpfs_alloc_if_possible(struct super_block *s, secno sec)
bmp[(sec & 0x3fff) >> 5] &= cpu_to_le32(~(1 << (sec & 0x1f)));
hpfs_mark_4buffers_dirty(&qbh);
hpfs_brelse4(&qbh);
+ hpfs_claim_alloc(s, sec);
return 1;
}
hpfs_brelse4(&qbh);
@@ -275,6 +335,7 @@ void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n)
return;
}
bmp[(sec & 0x3fff) >> 5] |= cpu_to_le32(1 << (sec & 0x1f));
+ hpfs_claim_free(s, sec);
if (!--n) {
hpfs_mark_4buffers_dirty(&qbh);
hpfs_brelse4(&qbh);
@@ -359,6 +420,7 @@ void hpfs_free_dnode(struct super_block *s, dnode_secno dno)
bmp[ssec >> 5] |= cpu_to_le32(1 << (ssec & 0x1f));
hpfs_mark_4buffers_dirty(&qbh);
hpfs_brelse4(&qbh);
+ hpfs_claim_dirband_free(s, dno);
}
}
@@ -366,7 +428,7 @@ struct dnode *hpfs_alloc_dnode(struct super_block *s, secno near,
dnode_secno *dno, struct quad_buffer_head *qbh)
{
struct dnode *d;
- if (hpfs_count_one_bitmap(s, hpfs_sb(s)->sb_dmap) > FREE_DNODES_ADD) {
+ if (hpfs_get_free_dnodes(s) > FREE_DNODES_ADD) {
if (!(*dno = alloc_in_dirband(s, near)))
if (!(*dno = hpfs_alloc_sector(s, near, 4, 0))) return NULL;
} else {
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 1b39863..c65f0bf 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -311,7 +311,7 @@ static inline struct hpfs_sb_info *hpfs_sb(struct super_block *sb)
__printf(2, 3)
void hpfs_error(struct super_block *, const char *, ...);
int hpfs_stop_cycles(struct super_block *, int, int *, int *, char *);
-unsigned hpfs_count_one_bitmap(struct super_block *, secno);
+unsigned hpfs_get_free_dnodes(struct super_block *);
/*
* local time (HPFS) to GMT (Unix)
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 4334cda..3d6f897 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -115,7 +115,7 @@ static void hpfs_put_super(struct super_block *s)
kfree(sbi);
}
-unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno)
+static unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno)
{
struct quad_buffer_head qbh;
unsigned long *bits;
@@ -123,7 +123,7 @@ unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno)
bits = hpfs_map_4sectors(s, secno, &qbh, 0);
if (!bits)
- return 0;
+ return (unsigned)-1;
count = bitmap_weight(bits, 2048 * BITS_PER_BYTE);
hpfs_brelse4(&qbh);
return count;
@@ -138,30 +138,45 @@ static unsigned count_bitmaps(struct super_block *s)
hpfs_prefetch_bitmap(s, n);
}
for (n = 0; n < n_bands; n++) {
+ unsigned c;
hpfs_prefetch_bitmap(s, n + COUNT_RD_AHEAD);
- count += hpfs_count_one_bitmap(s, le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[n]));
+ c = hpfs_count_one_bitmap(s, le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[n]));
+ if (c != (unsigned)-1)
+ count += c;
}
return count;
}
+unsigned hpfs_get_free_dnodes(struct super_block *s)
+{
+ struct hpfs_sb_info *sbi = hpfs_sb(s);
+ if (sbi->sb_n_free_dnodes == (unsigned)-1) {
+ unsigned c = hpfs_count_one_bitmap(s, sbi->sb_dmap);
+ if (c == (unsigned)-1)
+ return 0;
+ sbi->sb_n_free_dnodes = c;
+ }
+ return sbi->sb_n_free_dnodes;
+}
+
static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *s = dentry->d_sb;
struct hpfs_sb_info *sbi = hpfs_sb(s);
u64 id = huge_encode_dev(s->s_bdev->bd_dev);
+
hpfs_lock(s);
- /*if (sbi->sb_n_free == -1) {*/
+ if (sbi->sb_n_free == (unsigned)-1)
sbi->sb_n_free = count_bitmaps(s);
- sbi->sb_n_free_dnodes = hpfs_count_one_bitmap(s, sbi->sb_dmap);
- /*}*/
+
buf->f_type = s->s_magic;
buf->f_bsize = 512;
buf->f_blocks = sbi->sb_fs_size;
buf->f_bfree = sbi->sb_n_free;
buf->f_bavail = sbi->sb_n_free;
buf->f_files = sbi->sb_dirband_size / 4;
- buf->f_ffree = sbi->sb_n_free_dnodes;
+ buf->f_ffree = hpfs_get_free_dnodes(s);
buf->f_fsid.val[0] = (u32)id;
buf->f_fsid.val[1] = (u32)(id >> 32);
buf->f_namelen = 254;
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 7aa9a32..7272cc6 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -514,11 +514,13 @@ int jbd2_journal_start_reserved(handle_t *handle, unsigned int type,
* similarly constrained call sites
*/
ret = start_this_handle(journal, handle, GFP_NOFS);
- if (ret < 0)
+ if (ret < 0) {
jbd2_journal_free_reserved(handle);
+ return ret;
+ }
handle->h_type = type;
handle->h_line_no = line_no;
- return ret;
+ return 0;
}
EXPORT_SYMBOL(jbd2_journal_start_reserved);
@@ -1290,7 +1292,10 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
* once a transaction -bzzz
*/
jh->b_modified = 1;
- J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
+ if (handle->h_buffer_credits <= 0) {
+ ret = -ENOSPC;
+ goto out_unlock_bh;
+ }
handle->h_buffer_credits--;
}
@@ -1373,7 +1378,6 @@ out_unlock_bh:
jbd2_journal_put_journal_head(jh);
out:
JBUFFER_TRACE(jh, "exit");
- WARN_ON(ret); /* All errors are bugs, so dump the stack */
return ret;
}
diff --git a/fs/libfs.c b/fs/libfs.c
index 3a3a9b5..193e0c2 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -993,3 +993,46 @@ EXPORT_SYMBOL_GPL(simple_attr_open);
EXPORT_SYMBOL_GPL(simple_attr_release);
EXPORT_SYMBOL_GPL(simple_attr_read);
EXPORT_SYMBOL_GPL(simple_attr_write);
+
+/*
+ * nop .set_page_dirty method so that people can use .page_mkwrite on
+ * anon inodes.
+ */
+static int anon_set_page_dirty(struct page *page)
+{
+ return 0;
+};
+
+/*
+ * A single inode exists for all anon_inode files. Contrary to pipes,
+ * anon_inode inodes have no associated per-instance data, so we need
+ * only allocate one of them.
+ */
+struct inode *alloc_anon_inode(struct super_block *s)
+{
+ static const struct address_space_operations anon_aops = {
+ .set_page_dirty = anon_set_page_dirty,
+ };
+ struct inode *inode = new_inode_pseudo(s);
+
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+
+ inode->i_ino = get_next_ino();
+ inode->i_mapping->a_ops = &anon_aops;
+
+ /*
+ * Mark the inode dirty from the very beginning,
+ * that way it will never be moved to the dirty
+ * list because mark_inode_dirty() will think
+ * that it already _is_ on the dirty list.
+ */
+ inode->i_state = I_DIRTY;
+ inode->i_mode = S_IRUSR | S_IWUSR;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
+ inode->i_flags |= S_PRIVATE;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ return inode;
+}
+EXPORT_SYMBOL(alloc_anon_inode);
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index e066a39..ab798a8 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -779,6 +779,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
struct nlm_file *file = block->b_file;
struct nlm_lock *lock = &block->b_call->a_args.lock;
int error;
+ loff_t fl_start, fl_end;
dprintk("lockd: grant blocked lock %p\n", block);
@@ -796,9 +797,16 @@ nlmsvc_grant_blocked(struct nlm_block *block)
}
/* Try the lock operation again */
+ /* vfs_lock_file() can mangle fl_start and fl_end, but we need
+ * them unchanged for the GRANT_MSG
+ */
lock->fl.fl_flags |= FL_SLEEP;
+ fl_start = lock->fl.fl_start;
+ fl_end = lock->fl.fl_end;
error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
lock->fl.fl_flags &= ~FL_SLEEP;
+ lock->fl.fl_start = fl_start;
+ lock->fl.fl_end = fl_end;
switch (error) {
case 0:
diff --git a/fs/mount.h b/fs/mount.h
index 64a8581..68d80bd 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -73,7 +73,7 @@ static inline int mnt_has_parent(struct mount *mnt)
static inline int is_mounted(struct vfsmount *mnt)
{
/* neither detached nor internal? */
- return !IS_ERR_OR_NULL(real_mount(mnt));
+ return !IS_ERR_OR_NULL(real_mount(mnt)->mnt_ns);
}
extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
diff --git a/fs/namei.c b/fs/namei.c
index caa2805..187cacf 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2468,6 +2468,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
*/
static inline int may_create(struct inode *dir, struct dentry *child)
{
+ audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
if (child->d_inode)
return -EEXIST;
if (IS_DEADDIR(dir))
@@ -3923,6 +3924,7 @@ retry:
out_dput:
done_path_create(&new_path, new_dentry);
if (retry_estale(error, how)) {
+ path_put(&old_path);
how |= LOOKUP_REVAL;
goto retry;
}
diff --git a/fs/namespace.c b/fs/namespace.c
index da5c494..84447db 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2888,7 +2888,7 @@ bool fs_fully_visible(struct file_system_type *type)
struct inode *inode = child->mnt_mountpoint->d_inode;
if (!S_ISDIR(inode->i_mode))
goto next;
- if (inode->i_nlink != 2)
+ if (inode->i_nlink > 2)
goto next;
}
visible = true;
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 9c3e117..4d01614 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -44,7 +44,7 @@
static inline sector_t normalize(sector_t s, int base)
{
sector_t tmp = s; /* Since do_div modifies its argument */
- return s - do_div(tmp, base);
+ return s - sector_div(tmp, base);
}
static inline sector_t normalize_up(sector_t s, int base)
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index ef792f2..5d8ccec 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -659,16 +659,19 @@ int nfs_async_inode_return_delegation(struct inode *inode,
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
+ if (delegation == NULL)
+ goto out_enoent;
- if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) {
- rcu_read_unlock();
- return -ENOENT;
- }
+ if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid))
+ goto out_enoent;
nfs_mark_return_delegation(server, delegation);
rcu_read_unlock();
nfs_delegation_run_state_manager(clp);
return 0;
+out_enoent:
+ rcu_read_unlock();
+ return -ENOENT;
}
static struct inode *
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 02b0df7..1402806 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -276,6 +276,15 @@ out_eof:
return -EBADCOOKIE;
}
+static bool
+nfs_readdir_inode_mapping_valid(struct nfs_inode *nfsi)
+{
+ if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
+ return false;
+ smp_rmb();
+ return !test_bit(NFS_INO_INVALIDATING, &nfsi->flags);
+}
+
static
int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
{
@@ -289,8 +298,8 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
struct nfs_open_dir_context *ctx = desc->file->private_data;
new_pos = desc->current_index + i;
- if (ctx->attr_gencount != nfsi->attr_gencount
- || (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))) {
+ if (ctx->attr_gencount != nfsi->attr_gencount ||
+ !nfs_readdir_inode_mapping_valid(nfsi)) {
ctx->duped = 0;
ctx->attr_gencount = nfsi->attr_gencount;
} else if (new_pos < desc->ctx->pos) {
@@ -1139,7 +1148,13 @@ out_zap_parent:
if (inode && S_ISDIR(inode->i_mode)) {
/* Purge readdir caches. */
nfs_zap_caches(inode);
- if (dentry->d_flags & DCACHE_DISCONNECTED)
+ /*
+ * We can't d_drop the root of a disconnected tree:
+ * its d_hash is on the s_anon list and d_drop() would hide
+ * it from shrink_dcache_for_unmount(), leading to busy
+ * inodes on unmount and further oopses.
+ */
+ if (IS_ROOT(dentry))
goto out_valid;
}
/* If we have submounts, don't unhash ! */
@@ -1852,6 +1867,11 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
GFP_KERNEL)) {
SetPageUptodate(page);
unlock_page(page);
+ /*
+ * add_to_page_cache_lru() grabs an extra page refcount.
+ * Drop it here to avoid leaking this page later.
+ */
+ page_cache_release(page);
} else
__free_page(page);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 91ff089..af5f3ff 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -223,14 +223,31 @@ out:
* Synchronous I/O uses a stack-allocated iocb. Thus we can't trust
* the iocb is still valid here if this is a synchronous request.
*/
-static void nfs_direct_complete(struct nfs_direct_req *dreq)
+static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
{
+ struct inode *inode = dreq->inode;
+
+ if (dreq->iocb && write) {
+ loff_t pos = dreq->iocb->ki_pos + dreq->count;
+
+ spin_lock(&inode->i_lock);
+ if (i_size_read(inode) < pos)
+ i_size_write(inode, pos);
+ spin_unlock(&inode->i_lock);
+ }
+
+ if (write)
+ nfs_zap_mapping(inode, inode->i_mapping);
+
+ inode_dio_done(inode);
+
if (dreq->iocb) {
long res = (long) dreq->error;
if (!res)
res = (long) dreq->count;
aio_complete(dreq->iocb, res, 0);
}
+
complete_all(&dreq->completion);
nfs_direct_req_release(dreq);
@@ -273,7 +290,7 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
}
out_put:
if (put_dreq(dreq))
- nfs_direct_complete(dreq);
+ nfs_direct_complete(dreq, false);
hdr->release(hdr);
}
@@ -403,6 +420,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
loff_t pos, bool uio)
{
struct nfs_pageio_descriptor desc;
+ struct inode *inode = dreq->inode;
ssize_t result = -EINVAL;
size_t requested_bytes = 0;
unsigned long seg;
@@ -411,6 +429,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
&nfs_direct_read_completion_ops);
get_dreq(dreq);
desc.pg_dreq = dreq;
+ atomic_inc(&inode->i_dio_count);
for (seg = 0; seg < nr_segs; seg++) {
const struct iovec *vec = &iov[seg];
@@ -430,12 +449,13 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
* generic layer handle the completion.
*/
if (requested_bytes == 0) {
+ inode_dio_done(inode);
nfs_direct_req_release(dreq);
return result < 0 ? result : -EIO;
}
if (put_dreq(dreq))
- nfs_direct_complete(dreq);
+ nfs_direct_complete(dreq, false);
return 0;
}
@@ -473,12 +493,6 @@ out:
return result;
}
-static void nfs_inode_dio_write_done(struct inode *inode)
-{
- nfs_zap_mapping(inode, inode->i_mapping);
- inode_dio_done(inode);
-}
-
#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
{
@@ -594,8 +608,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)
nfs_direct_write_reschedule(dreq);
break;
default:
- nfs_inode_dio_write_done(dreq->inode);
- nfs_direct_complete(dreq);
+ nfs_direct_complete(dreq, true);
}
}
@@ -611,8 +624,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)
static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
{
- nfs_inode_dio_write_done(inode);
- nfs_direct_complete(dreq);
+ nfs_direct_complete(dreq, true);
}
#endif
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index eda8879..fdeeb28 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -164,17 +164,16 @@ static void nfs_zap_caches_locked(struct inode *inode)
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
nfs_fscache_invalidate(inode);
nfsi->cache_validity |= NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_LABEL
| NFS_INO_INVALID_DATA
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
| NFS_INO_REVAL_PAGECACHE;
} else
nfsi->cache_validity |= NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_LABEL
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
| NFS_INO_REVAL_PAGECACHE;
+ nfs_zap_label_cache_locked(nfsi);
}
void nfs_zap_caches(struct inode *inode)
@@ -266,6 +265,13 @@ nfs_init_locked(struct inode *inode, void *opaque)
}
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
+static void nfs_clear_label_invalid(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_LABEL;
+ spin_unlock(&inode->i_lock);
+}
+
void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
struct nfs4_label *label)
{
@@ -289,6 +295,7 @@ void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
__func__,
(char *)label->label,
label->len, error);
+ nfs_clear_label_invalid(inode);
}
}
@@ -981,11 +988,11 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
if (ret < 0)
return ret;
}
- spin_lock(&inode->i_lock);
- nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
- if (S_ISDIR(inode->i_mode))
+ if (S_ISDIR(inode->i_mode)) {
+ spin_lock(&inode->i_lock);
memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
- spin_unlock(&inode->i_lock);
+ spin_unlock(&inode->i_lock);
+ }
nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
nfs_fscache_wait_on_invalidate(inode);
@@ -1011,6 +1018,7 @@ static bool nfs_mapping_need_revalidate_inode(struct inode *inode)
int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ unsigned long *bitlock = &nfsi->flags;
int ret = 0;
/* swapfiles are not supposed to be shared. */
@@ -1022,12 +1030,46 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
if (ret < 0)
goto out;
}
- if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
- trace_nfs_invalidate_mapping_enter(inode);
- ret = nfs_invalidate_mapping(inode, mapping);
- trace_nfs_invalidate_mapping_exit(inode, ret);
+
+ /*
+ * We must clear NFS_INO_INVALID_DATA first to ensure that
+ * invalidations that come in while we're shooting down the mappings
+ * are respected. But, that leaves a race window where one revalidator
+ * can clear the flag, and then another checks it before the mapping
+ * gets invalidated. Fix that by serializing access to this part of
+ * the function.
+ *
+ * At the same time, we need to allow other tasks to see whether we
+ * might be in the middle of invalidating the pages, so we only set
+ * the bit lock here if it looks like we're going to be doing that.
+ */
+ for (;;) {
+ ret = wait_on_bit(bitlock, NFS_INO_INVALIDATING,
+ nfs_wait_bit_killable, TASK_KILLABLE);
+ if (ret)
+ goto out;
+ spin_lock(&inode->i_lock);
+ if (test_bit(NFS_INO_INVALIDATING, bitlock)) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
+ if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
+ break;
+ spin_unlock(&inode->i_lock);
+ goto out;
}
+ set_bit(NFS_INO_INVALIDATING, bitlock);
+ smp_wmb();
+ nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
+ spin_unlock(&inode->i_lock);
+ trace_nfs_invalidate_mapping_enter(inode);
+ ret = nfs_invalidate_mapping(inode, mapping);
+ trace_nfs_invalidate_mapping_exit(inode, ret);
+
+ clear_bit_unlock(NFS_INO_INVALIDATING, bitlock);
+ smp_mb__after_clear_bit();
+ wake_up_bit(bitlock, NFS_INO_INVALIDATING);
out:
return ret;
}
@@ -1599,7 +1641,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_blocks = fattr->du.nfs2.blocks;
/* Update attrtimeo value if we're out of the unstable period */
- if (invalid & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) {
+ if (invalid & NFS_INO_INVALID_ATTR) {
nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = now;
@@ -1612,7 +1654,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
}
}
invalid &= ~NFS_INO_INVALID_ATTR;
- invalid &= ~NFS_INO_INVALID_LABEL;
/* Don't invalidate the data if we were to blame */
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
|| S_ISLNK(inode->i_mode)))
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 38da8c2..a84dbf2 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -266,6 +266,18 @@ extern const u32 nfs41_maxgetdevinfo_overhead;
extern struct rpc_procinfo nfs4_procedures[];
#endif
+#ifdef CONFIG_NFS_V4_SECURITY_LABEL
+static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi)
+{
+ if (nfs_server_capable(&nfsi->vfs_inode, NFS_CAP_SECURITY_LABEL))
+ nfsi->cache_validity |= NFS_INO_INVALID_LABEL;
+}
+#else
+static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi)
+{
+}
+#endif /* CONFIG_NFS_V4_SECURITY_LABEL */
+
/* proc.c */
void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 28842ab..fdfd591 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -249,6 +249,7 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser
extern int nfs41_setup_sequence(struct nfs4_session *session,
struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
struct rpc_task *task);
+extern int nfs41_sequence_done(struct rpc_task *, struct nfs4_sequence_res *);
extern int nfs4_proc_create_session(struct nfs_client *, struct rpc_cred *);
extern int nfs4_proc_destroy_session(struct nfs4_session *, struct rpc_cred *);
extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index a860ab5..55ebebe 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -169,7 +169,7 @@ void nfs41_shutdown_client(struct nfs_client *clp)
void nfs40_shutdown_client(struct nfs_client *clp)
{
if (clp->cl_slot_tbl) {
- nfs4_release_slot_table(clp->cl_slot_tbl);
+ nfs4_shutdown_slot_table(clp->cl_slot_tbl);
kfree(clp->cl_slot_tbl);
}
}
@@ -407,13 +407,11 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
error = nfs4_discover_server_trunking(clp, &old);
if (error < 0)
goto error;
- nfs_put_client(clp);
- if (clp != old) {
- clp->cl_preserve_clid = true;
- clp = old;
- }
- return clp;
+ if (clp != old)
+ clp->cl_preserve_clid = true;
+ nfs_put_client(clp);
+ return old;
error:
nfs_mark_client_ready(clp, error);
@@ -491,9 +489,10 @@ int nfs40_walk_client_list(struct nfs_client *new,
prev = pos;
status = nfs_wait_client_init_complete(pos);
- spin_lock(&nn->nfs_client_lock);
if (status < 0)
- continue;
+ goto out;
+ status = -NFS4ERR_STALE_CLIENTID;
+ spin_lock(&nn->nfs_client_lock);
}
if (pos->cl_cons_state != NFS_CS_READY)
continue;
@@ -631,7 +630,8 @@ int nfs41_walk_client_list(struct nfs_client *new,
}
spin_lock(&nn->nfs_client_lock);
if (status < 0)
- continue;
+ break;
+ status = -NFS4ERR_STALE_CLIENTID;
}
if (pos->cl_cons_state != NFS_CS_READY)
continue;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index b86464b..394b0a0 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -335,8 +335,11 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data)
dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) &&
- task->tk_status == 0)
+ task->tk_status == 0) {
+ if (rdata->res.seq_res.sr_slot != NULL)
+ nfs41_sequence_done(task, &rdata->res.seq_res);
return;
+ }
/* Note this may cause RPC to be resent */
rdata->header->mds_ops->rpc_call_done(task, data);
@@ -442,8 +445,11 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data)
struct nfs_write_data *wdata = data;
if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) &&
- task->tk_status == 0)
+ task->tk_status == 0) {
+ if (wdata->res.seq_res.sr_slot != NULL)
+ nfs41_sequence_done(task, &wdata->res.seq_res);
return;
+ }
/* Note this may cause RPC to be resent */
wdata->header->mds_ops->rpc_call_done(task, data);
@@ -1216,17 +1222,17 @@ static void filelayout_recover_commit_reqs(struct list_head *dst,
struct pnfs_commit_bucket *b;
int i;
- /* NOTE cinfo->lock is NOT held, relying on fact that this is
- * only called on single thread per dreq.
- * Can't take the lock because need to do pnfs_put_lseg
- */
+ spin_lock(cinfo->lock);
for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
+ spin_unlock(cinfo->lock);
pnfs_put_lseg(b->wlseg);
b->wlseg = NULL;
+ spin_lock(cinfo->lock);
}
}
cinfo->ds->nwritten = 0;
+ spin_unlock(cinfo->lock);
}
static unsigned int
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index c7c295e5..efac602 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -95,7 +95,7 @@ same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
b6 = (struct sockaddr_in6 *)addr2;
/* LINKLOCAL addresses must have matching scope_id */
- if (ipv6_addr_scope(&a6->sin6_addr) ==
+ if (ipv6_addr_src_scope(&a6->sin6_addr) ==
IPV6_ADDR_SCOPE_LINKLOCAL &&
a6->sin6_scope_id != b6->sin6_scope_id)
return false;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d53d678..bcd42fb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -532,7 +532,7 @@ static int nfs40_sequence_done(struct rpc_task *task,
struct nfs4_slot *slot = res->sr_slot;
struct nfs4_slot_table *tbl;
- if (!RPC_WAS_SENT(task))
+ if (slot == NULL)
goto out;
tbl = slot->table;
@@ -585,7 +585,7 @@ out_unlock:
nfs41_server_notify_highest_slotid_update(session->clp);
}
-static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
+int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
{
struct nfs4_session *session;
struct nfs4_slot *slot;
@@ -685,6 +685,7 @@ out_retry:
rpc_delay(task, NFS4_POLL_RETRY_MAX);
return 0;
}
+EXPORT_SYMBOL_GPL(nfs41_sequence_done);
static int nfs4_sequence_done(struct rpc_task *task,
struct nfs4_sequence_res *res)
@@ -1318,21 +1319,14 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
int ret;
if (!data->rpc_done) {
- ret = data->rpc_status;
- goto err;
+ if (data->rpc_status) {
+ ret = data->rpc_status;
+ goto err;
+ }
+ /* cached opens have already been processed */
+ goto update;
}
- ret = -ESTALE;
- if (!(data->f_attr.valid & NFS_ATTR_FATTR_TYPE) ||
- !(data->f_attr.valid & NFS_ATTR_FATTR_FILEID) ||
- !(data->f_attr.valid & NFS_ATTR_FATTR_CHANGE))
- goto err;
-
- ret = -ENOMEM;
- state = nfs4_get_open_state(inode, data->owner);
- if (state == NULL)
- goto err;
-
ret = nfs_refresh_inode(inode, &data->f_attr);
if (ret)
goto err;
@@ -1341,8 +1335,10 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
if (data->o_res.delegation_type != 0)
nfs4_opendata_check_deleg(data, state);
+update:
update_open_stateid(state, &data->o_res.stateid, NULL,
data->o_arg.fmode);
+ atomic_inc(&state->count);
return state;
err:
@@ -1616,15 +1612,15 @@ static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata)
{
struct nfs4_opendata *data = calldata;
- nfs40_setup_sequence(data->o_arg.server, &data->o_arg.seq_args,
- &data->o_res.seq_res, task);
+ nfs40_setup_sequence(data->o_arg.server, &data->c_arg.seq_args,
+ &data->c_res.seq_res, task);
}
static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
{
struct nfs4_opendata *data = calldata;
- nfs40_sequence_done(task, &data->o_res.seq_res);
+ nfs40_sequence_done(task, &data->c_res.seq_res);
data->rpc_status = task->tk_status;
if (data->rpc_status == 0) {
@@ -1682,7 +1678,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
};
int status;
- nfs4_init_sequence(&data->o_arg.seq_args, &data->o_res.seq_res, 1);
+ nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1);
kref_get(&data->kref);
data->rpc_done = 0;
data->rpc_status = 0;
@@ -3977,8 +3973,9 @@ static bool nfs4_stateid_is_current(nfs4_stateid *stateid,
{
nfs4_stateid current_stateid;
- if (nfs4_set_rw_stateid(&current_stateid, ctx, l_ctx, fmode))
- return false;
+ /* If the current stateid represents a lost lock, then exit */
+ if (nfs4_set_rw_stateid(&current_stateid, ctx, l_ctx, fmode) == -EIO)
+ return true;
return nfs4_stateid_match(stateid, &current_stateid);
}
@@ -4575,7 +4572,7 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf,
struct nfs4_label label = {0, 0, buflen, buf};
u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL };
- struct nfs4_getattr_arg args = {
+ struct nfs4_getattr_arg arg = {
.fh = NFS_FH(inode),
.bitmask = bitmask,
};
@@ -4586,14 +4583,14 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf,
};
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETATTR],
- .rpc_argp = &args,
+ .rpc_argp = &arg,
.rpc_resp = &res,
};
int ret;
nfs_fattr_init(&fattr);
- ret = rpc_call_sync(server->client, &msg, 0);
+ ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 0);
if (ret)
return ret;
if (!(fattr.valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL))
@@ -4630,7 +4627,7 @@ static int _nfs4_do_set_security_label(struct inode *inode,
struct iattr sattr = {0};
struct nfs_server *server = NFS_SERVER(inode);
const u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL };
- struct nfs_setattrargs args = {
+ struct nfs_setattrargs arg = {
.fh = NFS_FH(inode),
.iap = &sattr,
.server = server,
@@ -4644,14 +4641,14 @@ static int _nfs4_do_set_security_label(struct inode *inode,
};
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR],
- .rpc_argp = &args,
+ .rpc_argp = &arg,
.rpc_resp = &res,
};
int status;
- nfs4_stateid_copy(&args.stateid, &zero_stateid);
+ nfs4_stateid_copy(&arg.stateid, &zero_stateid);
- status = rpc_call_sync(server->client, &msg, 0);
+ status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
if (status)
dprintk("%s failed: %d\n", __func__, status);
@@ -4757,8 +4754,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
dprintk("%s ERROR %d, Reset session\n", __func__,
task->tk_status);
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
- task->tk_status = 0;
- return -EAGAIN;
+ goto wait_on_recovery;
#endif /* CONFIG_NFS_V4_1 */
case -NFS4ERR_DELAY:
nfs_inc_server_stats(server, NFSIOS_DELAY);
@@ -4943,11 +4939,17 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status);
switch (task->tk_status) {
- case -NFS4ERR_STALE_STATEID:
- case -NFS4ERR_EXPIRED:
case 0:
renew_lease(data->res.server, data->timestamp);
break;
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_DELEG_REVOKED:
+ case -NFS4ERR_BAD_STATEID:
+ case -NFS4ERR_OLD_STATEID:
+ case -NFS4ERR_STALE_STATEID:
+ case -NFS4ERR_EXPIRED:
+ task->tk_status = 0;
+ break;
default:
if (nfs4_async_handle_error(task, data->res.server, NULL) ==
-EAGAIN) {
@@ -5106,6 +5108,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
status = 0;
}
request->fl_ops->fl_release_private(request);
+ request->fl_ops = NULL;
out:
return status;
}
@@ -5777,21 +5780,20 @@ struct nfs_release_lockowner_data {
struct nfs4_lock_state *lsp;
struct nfs_server *server;
struct nfs_release_lockowner_args args;
- struct nfs4_sequence_args seq_args;
- struct nfs4_sequence_res seq_res;
+ struct nfs_release_lockowner_res res;
};
static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata)
{
struct nfs_release_lockowner_data *data = calldata;
nfs40_setup_sequence(data->server,
- &data->seq_args, &data->seq_res, task);
+ &data->args.seq_args, &data->res.seq_res, task);
}
static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata)
{
struct nfs_release_lockowner_data *data = calldata;
- nfs40_sequence_done(task, &data->seq_res);
+ nfs40_sequence_done(task, &data->res.seq_res);
}
static void nfs4_release_lockowner_release(void *calldata)
@@ -5820,7 +5822,6 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st
data = kmalloc(sizeof(*data), GFP_NOFS);
if (!data)
return -ENOMEM;
- nfs4_init_sequence(&data->seq_args, &data->seq_res, 0);
data->lsp = lsp;
data->server = server;
data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
@@ -5828,6 +5829,8 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st
data->args.lock_owner.s_dev = server->s_dev;
msg.rpc_argp = &data->args;
+ msg.rpc_resp = &data->res;
+ nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data);
return 0;
}
@@ -7056,9 +7059,9 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layout_hdr *lo;
struct nfs4_state *state = NULL;
- unsigned long timeo, giveup;
+ unsigned long timeo, now, giveup;
- dprintk("--> %s\n", __func__);
+ dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status);
if (!nfs41_sequence_done(task, &lgp->res.seq_res))
goto out;
@@ -7066,12 +7069,38 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
switch (task->tk_status) {
case 0:
goto out;
+ /*
+ * NFS4ERR_LAYOUTTRYLATER is a conflict with another client
+ * (or clients) writing to the same RAID stripe
+ */
case -NFS4ERR_LAYOUTTRYLATER:
+ /*
+ * NFS4ERR_RECALLCONFLICT is when conflict with self (must recall
+ * existing layout before getting a new one).
+ */
case -NFS4ERR_RECALLCONFLICT:
timeo = rpc_get_timeout(task->tk_client);
giveup = lgp->args.timestamp + timeo;
- if (time_after(giveup, jiffies))
- task->tk_status = -NFS4ERR_DELAY;
+ now = jiffies;
+ if (time_after(giveup, now)) {
+ unsigned long delay;
+
+ /* Delay for:
+ * - Not less then NFS4_POLL_RETRY_MIN.
+ * - One last time a jiffie before we give up
+ * - exponential backoff (time_now minus start_attempt)
+ */
+ delay = max_t(unsigned long, NFS4_POLL_RETRY_MIN,
+ min((giveup - now - 1),
+ now - lgp->args.timestamp));
+
+ dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n",
+ __func__, delay);
+ rpc_delay(task, delay);
+ task->tk_status = 0;
+ rpc_restart_call_prepare(task);
+ goto out; /* Do not call nfs4_async_handle_error() */
+ }
break;
case -NFS4ERR_EXPIRED:
case -NFS4ERR_BAD_STATEID:
@@ -7245,7 +7274,14 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
return;
server = NFS_SERVER(lrp->args.inode);
- if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
+ switch (task->tk_status) {
+ default:
+ task->tk_status = 0;
+ case 0:
+ break;
+ case -NFS4ERR_DELAY:
+ if (nfs4_async_handle_error(task, server, NULL) != -EAGAIN)
+ break;
rpc_restart_call_prepare(task);
return;
}
@@ -7560,7 +7596,7 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
switch (err) {
case 0:
case -NFS4ERR_WRONGSEC:
- case -NFS4ERR_NOTSUPP:
+ case -ENOTSUPP:
goto out;
default:
err = nfs4_handle_exception(server, err, &exception);
@@ -7594,7 +7630,7 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
* Fall back on "guess and check" method if
* the server doesn't support SECINFO_NO_NAME
*/
- if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) {
+ if (err == -NFS4ERR_WRONGSEC || err == -ENOTSUPP) {
err = nfs4_find_root_sec(server, fhandle, info);
goto out_freepage;
}
diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c
index cf883c7..e799dc3 100644
--- a/fs/nfs/nfs4session.c
+++ b/fs/nfs/nfs4session.c
@@ -231,14 +231,23 @@ out:
return ret;
}
+/*
+ * nfs4_release_slot_table - release all slot table entries
+ */
+static void nfs4_release_slot_table(struct nfs4_slot_table *tbl)
+{
+ nfs4_shrink_slot_table(tbl, 0);
+}
+
/**
- * nfs4_release_slot_table - release resources attached to a slot table
+ * nfs4_shutdown_slot_table - release resources attached to a slot table
* @tbl: slot table to shut down
*
*/
-void nfs4_release_slot_table(struct nfs4_slot_table *tbl)
+void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl)
{
- nfs4_shrink_slot_table(tbl, 0);
+ nfs4_release_slot_table(tbl);
+ rpc_destroy_wait_queue(&tbl->slot_tbl_waitq);
}
/**
@@ -422,7 +431,7 @@ void nfs41_update_target_slotid(struct nfs4_slot_table *tbl,
spin_unlock(&tbl->slot_tbl_lock);
}
-static void nfs4_destroy_session_slot_tables(struct nfs4_session *session)
+static void nfs4_release_session_slot_tables(struct nfs4_session *session)
{
nfs4_release_slot_table(&session->fc_slot_table);
nfs4_release_slot_table(&session->bc_slot_table);
@@ -450,7 +459,7 @@ int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
if (status && tbl->slots == NULL)
/* Fore and back channel share a connection so get
* both slot tables or neither */
- nfs4_destroy_session_slot_tables(ses);
+ nfs4_release_session_slot_tables(ses);
return status;
}
@@ -470,6 +479,12 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
return session;
}
+static void nfs4_destroy_session_slot_tables(struct nfs4_session *session)
+{
+ nfs4_shutdown_slot_table(&session->fc_slot_table);
+ nfs4_shutdown_slot_table(&session->bc_slot_table);
+}
+
void nfs4_destroy_session(struct nfs4_session *session)
{
struct rpc_xprt *xprt;
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index 2323061..b34ada9 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -74,7 +74,7 @@ enum nfs4_session_state {
extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl,
unsigned int max_reqs, const char *queue);
-extern void nfs4_release_slot_table(struct nfs4_slot_table *tbl);
+extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl);
extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl);
extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index cc14cbb..26c07f9 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1015,8 +1015,11 @@ int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state,
if (ret == -EIO)
/* A lost lock - don't even consider delegations */
goto out;
- if (nfs4_copy_delegation_stateid(dst, state->inode, fmode))
+ /* returns true if delegation stateid found and copied */
+ if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) {
+ ret = 0;
goto out;
+ }
if (ret != -ENOENT)
/* nfs4_copy_delegation_stateid() didn't over-write
* dst, so it still has the lock stateid which we now
@@ -1422,7 +1425,7 @@ restart:
if (status >= 0) {
status = nfs4_reclaim_locks(state, ops);
if (status >= 0) {
- if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) {
+ if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) {
spin_lock(&state->state_lock);
list_for_each_entry(lock, &state->lock_states, ls_locks) {
if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
@@ -1881,10 +1884,15 @@ again:
nfs4_root_machine_cred(clp);
goto again;
}
- if (i > 2)
+ if (clnt->cl_auth->au_flavor == RPC_AUTH_UNIX)
break;
case -NFS4ERR_CLID_INUSE:
case -NFS4ERR_WRONGSEC:
+ /* No point in retrying if we already used RPC_AUTH_UNIX */
+ if (clnt->cl_auth->au_flavor == RPC_AUTH_UNIX) {
+ status = -EPERM;
+ break;
+ }
clnt = rpc_clone_client_set_auth(clnt, RPC_AUTH_UNIX);
if (IS_ERR(clnt)) {
status = PTR_ERR(clnt);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 79210d2..b2f842d 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3053,7 +3053,8 @@ out_overflow:
return -EIO;
}
-static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
+static bool __decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected,
+ int *nfs_retval)
{
__be32 *p;
uint32_t opnum;
@@ -3063,19 +3064,32 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
if (unlikely(!p))
goto out_overflow;
opnum = be32_to_cpup(p++);
- if (opnum != expected) {
- dprintk("nfs: Server returned operation"
- " %d but we issued a request for %d\n",
- opnum, expected);
- return -EIO;
- }
+ if (unlikely(opnum != expected))
+ goto out_bad_operation;
nfserr = be32_to_cpup(p);
- if (nfserr != NFS_OK)
- return nfs4_stat_to_errno(nfserr);
- return 0;
+ if (nfserr == NFS_OK)
+ *nfs_retval = 0;
+ else
+ *nfs_retval = nfs4_stat_to_errno(nfserr);
+ return true;
+out_bad_operation:
+ dprintk("nfs: Server returned operation"
+ " %d but we issued a request for %d\n",
+ opnum, expected);
+ *nfs_retval = -EREMOTEIO;
+ return false;
out_overflow:
print_overflow_msg(__func__, xdr);
- return -EIO;
+ *nfs_retval = -EIO;
+ return false;
+}
+
+static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
+{
+ int retval;
+
+ __decode_op_hdr(xdr, expected, &retval);
+ return retval;
}
/* Dummy routine */
@@ -4957,11 +4971,12 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
uint32_t savewords, bmlen, i;
int status;
- status = decode_op_hdr(xdr, OP_OPEN);
- if (status != -EIO)
- nfs_increment_open_seqid(status, res->seqid);
- if (!status)
- status = decode_stateid(xdr, &res->stateid);
+ if (!__decode_op_hdr(xdr, OP_OPEN, &status))
+ return status;
+ nfs_increment_open_seqid(status, res->seqid);
+ if (status)
+ return status;
+ status = decode_stateid(xdr, &res->stateid);
if (unlikely(status))
return status;
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 89fe741..59f838c 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -36,6 +36,7 @@
__print_flags(v, "|", \
{ 1 << NFS_INO_ADVISE_RDPLUS, "ADVISE_RDPLUS" }, \
{ 1 << NFS_INO_STALE, "STALE" }, \
+ { 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \
{ 1 << NFS_INO_FLUSHING, "FLUSHING" }, \
{ 1 << NFS_INO_FSCACHE, "FSCACHE" }, \
{ 1 << NFS_INO_COMMIT, "COMMIT" }, \
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ac1dc33..c6aa89f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -909,9 +909,14 @@ bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx)
*/
static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
{
+ struct nfs_inode *nfsi = NFS_I(inode);
+
if (nfs_have_delegated_attributes(inode))
goto out;
- if (NFS_I(inode)->cache_validity & (NFS_INO_INVALID_DATA|NFS_INO_REVAL_PAGECACHE))
+ if (nfsi->cache_validity & (NFS_INO_INVALID_DATA|NFS_INO_REVAL_PAGECACHE))
+ return false;
+ smp_rmb();
+ if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags))
return false;
out:
return PageUptodate(page) != 0;
@@ -922,19 +927,20 @@ out:
* extend the write to cover the entire page in order to avoid fragmentation
* inefficiencies.
*
- * If the file is opened for synchronous writes or if we have a write delegation
- * from the server then we can just skip the rest of the checks.
+ * If the file is opened for synchronous writes then we can just skip the rest
+ * of the checks.
*/
static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode)
{
if (file->f_flags & O_DSYNC)
return 0;
+ if (!nfs_write_pageuptodate(page, inode))
+ return 0;
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
return 1;
- if (nfs_write_pageuptodate(page, inode) && (inode->i_flock == NULL ||
- (inode->i_flock->fl_start == 0 &&
+ if (inode->i_flock == NULL || (inode->i_flock->fl_start == 0 &&
inode->i_flock->fl_end == OFFSET_MAX &&
- inode->i_flock->fl_type != F_RDLCK)))
+ inode->i_flock->fl_type != F_RDLCK))
return 1;
return 0;
}
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 5f38ea3..af51cf9 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -536,16 +536,12 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
if (err)
goto out3;
exp.ex_anon_uid= make_kuid(&init_user_ns, an_int);
- if (!uid_valid(exp.ex_anon_uid))
- goto out3;
/* anon gid */
err = get_int(&mesg, &an_int);
if (err)
goto out3;
exp.ex_anon_gid= make_kgid(&init_user_ns, an_int);
- if (!gid_valid(exp.ex_anon_gid))
- goto out3;
/* fsid */
err = get_int(&mesg, &an_int);
@@ -583,6 +579,17 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
exp.ex_uuid);
if (err)
goto out4;
+ /*
+ * For some reason exportfs has been passing down an
+ * invalid (-1) uid & gid on the "dummy" export which it
+ * uses to test export support. To make sure exportfs
+ * sees errors from check_export we therefore need to
+ * delay these checks till after check_export:
+ */
+ if (!uid_valid(exp.ex_anon_uid))
+ goto out4;
+ if (!gid_valid(exp.ex_anon_gid))
+ goto out4;
}
expp = svc_export_lookup(&exp);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index d9454fe..ecc735e 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -141,8 +141,8 @@ xdr_error: \
static void next_decode_page(struct nfsd4_compoundargs *argp)
{
- argp->pagelist++;
argp->p = page_address(argp->pagelist[0]);
+ argp->pagelist++;
if (argp->pagelen < PAGE_SIZE) {
argp->end = argp->p + (argp->pagelen>>2);
argp->pagelen = 0;
@@ -411,6 +411,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
label->data = kzalloc(dummy32 + 1, GFP_KERNEL);
if (!label->data)
return nfserr_jukebox;
+ label->len = dummy32;
defer_free(argp, kfree, label->data);
memcpy(label->data, buf, dummy32);
}
@@ -1208,6 +1209,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
len -= pages * PAGE_SIZE;
argp->p = (__be32 *)page_address(argp->pagelist[0]);
+ argp->pagelist++;
argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE);
}
argp->p += XDR_QUADLEN(len);
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 9186c7c..b6af150 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -132,6 +132,13 @@ nfsd_reply_cache_alloc(void)
}
static void
+nfsd_reply_cache_unhash(struct svc_cacherep *rp)
+{
+ hlist_del_init(&rp->c_hash);
+ list_del_init(&rp->c_lru);
+}
+
+static void
nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
{
if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
@@ -417,7 +424,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru);
if (nfsd_cache_entry_expired(rp) ||
num_drc_entries >= max_drc_entries) {
- lru_put_end(rp);
+ nfsd_reply_cache_unhash(rp);
prune_cache_entries();
goto search_cache;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c827acb..72cb28e 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -298,41 +298,12 @@ commit_metadata(struct svc_fh *fhp)
}
/*
- * Set various file attributes.
- * N.B. After this call fhp needs an fh_put
+ * Go over the attributes and take care of the small differences between
+ * NFS semantics and what Linux expects.
*/
-__be32
-nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
- int check_guard, time_t guardtime)
+static void
+nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
{
- struct dentry *dentry;
- struct inode *inode;
- int accmode = NFSD_MAY_SATTR;
- umode_t ftype = 0;
- __be32 err;
- int host_err;
- int size_change = 0;
-
- if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
- accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
- if (iap->ia_valid & ATTR_SIZE)
- ftype = S_IFREG;
-
- /* Get inode */
- err = fh_verify(rqstp, fhp, ftype, accmode);
- if (err)
- goto out;
-
- dentry = fhp->fh_dentry;
- inode = dentry->d_inode;
-
- /* Ignore any mode updates on symlinks */
- if (S_ISLNK(inode->i_mode))
- iap->ia_valid &= ~ATTR_MODE;
-
- if (!iap->ia_valid)
- goto out;
-
/*
* NFSv2 does not differentiate between "set-[ac]time-to-now"
* which only requires access, and "set-[ac]time-to-X" which
@@ -342,8 +313,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
* convert to "set to now" instead of "set to explicit time"
*
* We only call inode_change_ok as the last test as technically
- * it is not an interface that we should be using. It is only
- * valid if the filesystem does not define it's own i_op->setattr.
+ * it is not an interface that we should be using.
*/
#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
#define MAX_TOUCH_TIME_ERROR (30*60)
@@ -369,30 +339,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
iap->ia_valid &= ~BOTH_TIME_SET;
}
}
-
- /*
- * The size case is special.
- * It changes the file as well as the attributes.
- */
- if (iap->ia_valid & ATTR_SIZE) {
- if (iap->ia_size < inode->i_size) {
- err = nfsd_permission(rqstp, fhp->fh_export, dentry,
- NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
- if (err)
- goto out;
- }
-
- host_err = get_write_access(inode);
- if (host_err)
- goto out_nfserr;
-
- size_change = 1;
- host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
- if (host_err) {
- put_write_access(inode);
- goto out_nfserr;
- }
- }
/* sanitize the mode change */
if (iap->ia_valid & ATTR_MODE) {
@@ -415,32 +361,111 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID);
}
}
+}
- /* Change the attributes. */
+static __be32
+nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct iattr *iap)
+{
+ struct inode *inode = fhp->fh_dentry->d_inode;
+ int host_err;
- iap->ia_valid |= ATTR_CTIME;
+ if (iap->ia_size < inode->i_size) {
+ __be32 err;
- err = nfserr_notsync;
- if (!check_guard || guardtime == inode->i_ctime.tv_sec) {
- host_err = nfsd_break_lease(inode);
- if (host_err)
- goto out_nfserr;
- fh_lock(fhp);
+ err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
+ NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE);
+ if (err)
+ return err;
+ }
- host_err = notify_change(dentry, iap);
- err = nfserrno(host_err);
- fh_unlock(fhp);
+ host_err = get_write_access(inode);
+ if (host_err)
+ goto out_nfserrno;
+
+ host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
+ if (host_err)
+ goto out_put_write_access;
+ return 0;
+
+out_put_write_access:
+ put_write_access(inode);
+out_nfserrno:
+ return nfserrno(host_err);
+}
+
+/*
+ * Set various file attributes. After this call fhp needs an fh_put.
+ */
+__be32
+nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
+ int check_guard, time_t guardtime)
+{
+ struct dentry *dentry;
+ struct inode *inode;
+ int accmode = NFSD_MAY_SATTR;
+ umode_t ftype = 0;
+ __be32 err;
+ int host_err;
+ int size_change = 0;
+
+ if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
+ accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
+ if (iap->ia_valid & ATTR_SIZE)
+ ftype = S_IFREG;
+
+ /* Get inode */
+ err = fh_verify(rqstp, fhp, ftype, accmode);
+ if (err)
+ goto out;
+
+ dentry = fhp->fh_dentry;
+ inode = dentry->d_inode;
+
+ /* Ignore any mode updates on symlinks */
+ if (S_ISLNK(inode->i_mode))
+ iap->ia_valid &= ~ATTR_MODE;
+
+ if (!iap->ia_valid)
+ goto out;
+
+ nfsd_sanitize_attrs(inode, iap);
+
+ /*
+ * The size case is special, it changes the file in addition to the
+ * attributes.
+ */
+ if (iap->ia_valid & ATTR_SIZE) {
+ err = nfsd_get_write_access(rqstp, fhp, iap);
+ if (err)
+ goto out;
+ size_change = 1;
}
+
+ iap->ia_valid |= ATTR_CTIME;
+
+ if (check_guard && guardtime != inode->i_ctime.tv_sec) {
+ err = nfserr_notsync;
+ goto out_put_write_access;
+ }
+
+ host_err = nfsd_break_lease(inode);
+ if (host_err)
+ goto out_put_write_access_nfserror;
+
+ fh_lock(fhp);
+ host_err = notify_change(dentry, iap);
+ fh_unlock(fhp);
+
+out_put_write_access_nfserror:
+ err = nfserrno(host_err);
+out_put_write_access:
if (size_change)
put_write_access(inode);
if (!err)
commit_metadata(fhp);
out:
return err;
-
-out_nfserr:
- err = nfserrno(host_err);
- goto out;
}
#if defined(CONFIG_NFSD_V2_ACL) || \
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 9f6b486..a1a1916 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1440,17 +1440,19 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
nilfs_clear_logs(&sci->sc_segbufs);
- err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
- if (unlikely(err))
- return err;
-
if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
err = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
sci->sc_freesegs,
sci->sc_nfreesegs,
NULL);
WARN_ON(err); /* do not happen */
+ sci->sc_stage.flags &= ~NILFS_CF_SUFREED;
}
+
+ err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
+ if (unlikely(err))
+ return err;
+
nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
sci->sc_stage = prev_stage;
}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index e44cb64..6663511 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -888,9 +888,9 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark,
{
return sys_fanotify_mark(fanotify_fd, flags,
#ifdef __BIG_ENDIAN
- ((__u64)mask1 << 32) | mask0,
-#else
((__u64)mask0 << 32) | mask1,
+#else
+ ((__u64)mask1 << 32) | mask0,
#endif
dfd, pathname);
}
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index d71903c..f079411 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2371,8 +2371,8 @@ out_dio:
if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
((file->f_flags & O_DIRECT) && !direct_io)) {
- ret = filemap_fdatawrite_range(file->f_mapping, pos,
- pos + count - 1);
+ ret = filemap_fdatawrite_range(file->f_mapping, *ppos,
+ *ppos + count - 1);
if (ret < 0)
written = ret;
@@ -2385,8 +2385,8 @@ out_dio:
}
if (!ret)
- ret = filemap_fdatawait_range(file->f_mapping, pos,
- pos + count - 1);
+ ret = filemap_fdatawait_range(file->f_mapping, *ppos,
+ *ppos + count - 1);
}
/*
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index aaa5061..d7b5108 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -717,6 +717,12 @@ static int ocfs2_release_dquot(struct dquot *dquot)
*/
if (status < 0)
mlog_errno(status);
+ /*
+ * Clear dq_off so that we search for the structure in quota file next
+ * time we acquire it. The structure might be deleted and reallocated
+ * elsewhere by another node while our dquot structure is on freelist.
+ */
+ dquot->dq_off = 0;
clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
out_trans:
ocfs2_commit_trans(osb, handle);
@@ -756,16 +762,17 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
status = ocfs2_lock_global_qf(info, 1);
if (status < 0)
goto out;
- if (!test_bit(DQ_READ_B, &dquot->dq_flags)) {
- status = ocfs2_qinfo_lock(info, 0);
- if (status < 0)
- goto out_dq;
- status = qtree_read_dquot(&info->dqi_gi, dquot);
- ocfs2_qinfo_unlock(info, 0);
- if (status < 0)
- goto out_dq;
- }
- set_bit(DQ_READ_B, &dquot->dq_flags);
+ status = ocfs2_qinfo_lock(info, 0);
+ if (status < 0)
+ goto out_dq;
+ /*
+ * We always want to read dquot structure from disk because we don't
+ * know what happened with it while it was on freelist.
+ */
+ status = qtree_read_dquot(&info->dqi_gi, dquot);
+ ocfs2_qinfo_unlock(info, 0);
+ if (status < 0)
+ goto out_dq;
OCFS2_DQUOT(dquot)->dq_use_count++;
OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 2e4344b..2001862 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -1303,10 +1303,6 @@ int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot)
ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh);
out:
- /* Clear the read bit so that next time someone uses this
- * dquot he reads fresh info from disk and allocates local
- * dquot structure */
- clear_bit(DQ_READ_B, &dquot->dq_flags);
return status;
}
diff --git a/fs/pipe.c b/fs/pipe.c
index d2c45e1..0e0752e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -726,11 +726,25 @@ pipe_poll(struct file *filp, poll_table *wait)
return mask;
}
+static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
+{
+ int kill = 0;
+
+ spin_lock(&inode->i_lock);
+ if (!--pipe->files) {
+ inode->i_pipe = NULL;
+ kill = 1;
+ }
+ spin_unlock(&inode->i_lock);
+
+ if (kill)
+ free_pipe_info(pipe);
+}
+
static int
pipe_release(struct inode *inode, struct file *file)
{
- struct pipe_inode_info *pipe = inode->i_pipe;
- int kill = 0;
+ struct pipe_inode_info *pipe = file->private_data;
__pipe_lock(pipe);
if (file->f_mode & FMODE_READ)
@@ -743,17 +757,9 @@ pipe_release(struct inode *inode, struct file *file)
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
- spin_lock(&inode->i_lock);
- if (!--pipe->files) {
- inode->i_pipe = NULL;
- kill = 1;
- }
- spin_unlock(&inode->i_lock);
__pipe_unlock(pipe);
- if (kill)
- free_pipe_info(pipe);
-
+ put_pipe_info(inode, pipe);
return 0;
}
@@ -1014,7 +1020,6 @@ static int fifo_open(struct inode *inode, struct file *filp)
{
struct pipe_inode_info *pipe;
bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
- int kill = 0;
int ret;
filp->f_version = 0;
@@ -1130,15 +1135,9 @@ err_wr:
goto err;
err:
- spin_lock(&inode->i_lock);
- if (!--pipe->files) {
- inode->i_pipe = NULL;
- kill = 1;
- }
- spin_unlock(&inode->i_lock);
__pipe_unlock(pipe);
- if (kill)
- free_pipe_info(pipe);
+
+ put_pipe_info(inode, pipe);
return ret;
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1485e38..c35eaa4 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1813,6 +1813,7 @@ static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
if (rc)
goto out_mmput;
+ rc = -ENOENT;
down_read(&mm->mmap_sem);
vma = find_exact_vma(mm, vm_start, vm_end);
if (vma && vma->vm_file) {
diff --git a/fs/proc/page.c b/fs/proc/page.c
index b8730d9..2a8cc94 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -121,7 +121,7 @@ u64 stable_page_flags(struct page *page)
* just checks PG_head/PG_tail, so we need to check PageLRU to make
* sure a given page is a thp, not a non-huge compound page.
*/
- else if (PageTransCompound(page) && PageLRU(compound_trans_head(page)))
+ else if (PageTransCompound(page) && PageLRU(compound_head(page)))
u |= 1 << KPF_THP;
/*
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index 106a835..9fa2154 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -235,6 +235,7 @@ void __init proc_device_tree_init(void)
return;
root = of_find_node_by_path("/");
if (root == NULL) {
+ remove_proc_entry("device-tree", NULL);
pr_debug("/proc/device-tree: can't find root\n");
return;
}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 831d49a..cfc8dcc 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -581,9 +581,17 @@ int dquot_scan_active(struct super_block *sb,
dqstats_inc(DQST_LOOKUPS);
dqput(old_dquot);
old_dquot = dquot;
- ret = fn(dquot, priv);
- if (ret < 0)
- goto out;
+ /*
+ * ->release_dquot() can be racing with us. Our reference
+ * protects us from new calls to it so just wait for any
+ * outstanding call and recheck the DQ_ACTIVE_B after that.
+ */
+ wait_on_dquot(dquot);
+ if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
+ ret = fn(dquot, priv);
+ if (ret < 0)
+ goto out;
+ }
spin_lock(&dq_list_lock);
/* We are safe to continue now because our dquot could not
* be moved out of the inuse list while we hold the reference */
diff --git a/fs/read_write.c b/fs/read_write.c
index e3cd280..3889dcc 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -977,9 +977,9 @@ out:
return ret;
}
-COMPAT_SYSCALL_DEFINE3(readv, unsigned long, fd,
+COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
const struct compat_iovec __user *,vec,
- unsigned long, vlen)
+ compat_ulong_t, vlen)
{
struct fd f = fdget(fd);
ssize_t ret;
@@ -1014,9 +1014,9 @@ COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
return ret;
}
-COMPAT_SYSCALL_DEFINE5(preadv, unsigned long, fd,
+COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
const struct compat_iovec __user *,vec,
- unsigned long, vlen, u32, pos_low, u32, pos_high)
+ compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
return compat_sys_preadv64(fd, vec, vlen, pos);
@@ -1044,9 +1044,9 @@ out:
return ret;
}
-COMPAT_SYSCALL_DEFINE3(writev, unsigned long, fd,
+COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
const struct compat_iovec __user *, vec,
- unsigned long, vlen)
+ compat_ulong_t, vlen)
{
struct fd f = fdget(fd);
ssize_t ret;
@@ -1081,9 +1081,9 @@ COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
return ret;
}
-COMPAT_SYSCALL_DEFINE5(pwritev, unsigned long, fd,
+COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
const struct compat_iovec __user *,vec,
- unsigned long, vlen, u32, pos_low, u32, pos_high)
+ compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
return compat_sys_pwritev64(fd, vec, vlen, pos);
diff --git a/fs/splice.c b/fs/splice.c
index 3b7ee65..84f810d 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -555,6 +555,24 @@ static const struct pipe_buf_operations default_pipe_buf_ops = {
.get = generic_pipe_buf_get,
};
+static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ return 1;
+}
+
+/* Pipe buffer operations for a socket and similar. */
+const struct pipe_buf_operations nosteal_pipe_buf_ops = {
+ .can_merge = 0,
+ .map = generic_pipe_buf_map,
+ .unmap = generic_pipe_buf_unmap,
+ .confirm = generic_pipe_buf_confirm,
+ .release = generic_pipe_buf_release,
+ .steal = generic_pipe_buf_nosteal,
+ .get = generic_pipe_buf_get,
+};
+EXPORT_SYMBOL(nosteal_pipe_buf_ops);
+
static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
unsigned long vlen, loff_t offset)
{
diff --git a/fs/stat.c b/fs/stat.c
index d0ea7ef..ae0c3ce 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -37,14 +37,21 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
EXPORT_SYMBOL(generic_fillattr);
-int vfs_getattr(struct path *path, struct kstat *stat)
+/**
+ * vfs_getattr_nosec - getattr without security checks
+ * @path: file to get attributes from
+ * @stat: structure to return attributes in
+ *
+ * Get attributes without calling security_inode_getattr.
+ *
+ * Currently the only caller other than vfs_getattr is internal to the
+ * filehandle lookup code, which uses only the inode number and returns
+ * no attributes to any user. Any other code probably wants
+ * vfs_getattr.
+ */
+int vfs_getattr_nosec(struct path *path, struct kstat *stat)
{
struct inode *inode = path->dentry->d_inode;
- int retval;
-
- retval = security_inode_getattr(path->mnt, path->dentry);
- if (retval)
- return retval;
if (inode->i_op->getattr)
return inode->i_op->getattr(path->mnt, path->dentry, stat);
@@ -53,6 +60,18 @@ int vfs_getattr(struct path *path, struct kstat *stat)
return 0;
}
+EXPORT_SYMBOL(vfs_getattr_nosec);
+
+int vfs_getattr(struct path *path, struct kstat *stat)
+{
+ int retval;
+
+ retval = security_inode_getattr(path->mnt, path->dentry);
+ if (retval)
+ return retval;
+ return vfs_getattr_nosec(path, stat);
+}
+
EXPORT_SYMBOL(vfs_getattr);
int vfs_fstat(unsigned int fd, struct kstat *stat)
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index e64ee52..c888040 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -217,6 +217,8 @@ xfs_growfs_data_private(
*/
nfree = 0;
for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) {
+ __be32 *agfl_bno;
+
/*
* AG freespace header block
*/
@@ -276,8 +278,10 @@ xfs_growfs_data_private(
agfl->agfl_seqno = cpu_to_be32(agno);
uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid);
}
+
+ agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++)
- agfl->agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
+ agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 668e8f4..8c8ef24 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -443,7 +443,8 @@ xfs_attrlist_by_handle(
return -XFS_ERROR(EPERM);
if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
return -XFS_ERROR(EFAULT);
- if (al_hreq.buflen > XATTR_LIST_MAX)
+ if (al_hreq.buflen < sizeof(struct attrlist) ||
+ al_hreq.buflen > XATTR_LIST_MAX)
return -XFS_ERROR(EINVAL);
/*
@@ -1717,6 +1718,12 @@ xfs_file_ioctl(
if (mp->m_flags & XFS_MOUNT_RDONLY)
return -XFS_ERROR(EROFS);
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (mp->m_flags & XFS_MOUNT_RDONLY)
+ return -XFS_ERROR(EROFS);
+
if (copy_from_user(&eofb, arg, sizeof(eofb)))
return -XFS_ERROR(EFAULT);
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index f671f7e..53365c6 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -357,7 +357,8 @@ xfs_compat_attrlist_by_handle(
if (copy_from_user(&al_hreq, arg,
sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
return -XFS_ERROR(EFAULT);
- if (al_hreq.buflen > XATTR_LIST_MAX)
+ if (al_hreq.buflen < sizeof(struct attrlist) ||
+ al_hreq.buflen > XATTR_LIST_MAX)
return -XFS_ERROR(EINVAL);
/*
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 3e6c2e6..4688a62 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -137,8 +137,6 @@ xfs_qm_dqpurge(
{
struct xfs_mount *mp = dqp->q_mount;
struct xfs_quotainfo *qi = mp->m_quotainfo;
- struct xfs_dquot *gdqp = NULL;
- struct xfs_dquot *pdqp = NULL;
xfs_dqlock(dqp);
if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) {
@@ -146,21 +144,6 @@ xfs_qm_dqpurge(
return EAGAIN;
}
- /*
- * If this quota has a hint attached, prepare for releasing it now.
- */
- gdqp = dqp->q_gdquot;
- if (gdqp) {
- xfs_dqlock(gdqp);
- dqp->q_gdquot = NULL;
- }
-
- pdqp = dqp->q_pdquot;
- if (pdqp) {
- xfs_dqlock(pdqp);
- dqp->q_pdquot = NULL;
- }
-
dqp->dq_flags |= XFS_DQ_FREEING;
xfs_dqflock(dqp);
@@ -209,11 +192,47 @@ xfs_qm_dqpurge(
XFS_STATS_DEC(xs_qm_dquot_unused);
xfs_qm_dqdestroy(dqp);
+ return 0;
+}
+
+/*
+ * Release the group or project dquot pointers the user dquots maybe carrying
+ * around as a hint, and proceed to purge the user dquot cache if requested.
+*/
+STATIC int
+xfs_qm_dqpurge_hints(
+ struct xfs_dquot *dqp,
+ void *data)
+{
+ struct xfs_dquot *gdqp = NULL;
+ struct xfs_dquot *pdqp = NULL;
+ uint flags = *((uint *)data);
+ xfs_dqlock(dqp);
+ if (dqp->dq_flags & XFS_DQ_FREEING) {
+ xfs_dqunlock(dqp);
+ return EAGAIN;
+ }
+
+ /* If this quota has a hint attached, prepare for releasing it now */
+ gdqp = dqp->q_gdquot;
if (gdqp)
- xfs_qm_dqput(gdqp);
+ dqp->q_gdquot = NULL;
+
+ pdqp = dqp->q_pdquot;
if (pdqp)
- xfs_qm_dqput(pdqp);
+ dqp->q_pdquot = NULL;
+
+ xfs_dqunlock(dqp);
+
+ if (gdqp)
+ xfs_qm_dqrele(gdqp);
+ if (pdqp)
+ xfs_qm_dqrele(pdqp);
+
+ if (flags & XFS_QMOPT_UQUOTA)
+ return xfs_qm_dqpurge(dqp, NULL);
+
return 0;
}
@@ -225,8 +244,18 @@ xfs_qm_dqpurge_all(
struct xfs_mount *mp,
uint flags)
{
- if (flags & XFS_QMOPT_UQUOTA)
- xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL);
+ /*
+ * We have to release group/project dquot hint(s) from the user dquot
+ * at first if they are there, otherwise we would run into an infinite
+ * loop while walking through radix tree to purge other type of dquots
+ * since their refcount is not zero if the user dquot refers to them
+ * as hint.
+ *
+ * Call the special xfs_qm_dqpurge_hints() will end up go through the
+ * general xfs_qm_dqpurge() against user dquot cache if requested.
+ */
+ xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge_hints, &flags);
+
if (flags & XFS_QMOPT_GQUOTA)
xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL);
if (flags & XFS_QMOPT_PQUOTA)
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c
index a5b59d9..0397081 100644
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/xfs_sb.c
@@ -596,6 +596,11 @@ xfs_sb_verify(
* single bit error could clear the feature bit and unused parts of the
* superblock are supposed to be zero. Hence a non-null crc field indicates that
* we've potentially lost a feature bit and we should check it anyway.
+ *
+ * However, past bugs (i.e. in growfs) left non-zeroed regions beyond the
+ * last field in V4 secondary superblocks. So for secondary superblocks,
+ * we are more forgiving, and ignore CRC failures if the primary doesn't
+ * indicate that the fs version is V5.
*/
static void
xfs_sb_read_verify(
@@ -616,8 +621,12 @@ xfs_sb_read_verify(
if (!xfs_verify_cksum(bp->b_addr, be16_to_cpu(dsb->sb_sectsize),
offsetof(struct xfs_sb, sb_crc))) {
- error = EFSCORRUPTED;
- goto out_error;
+ /* Only fail bad secondaries on a known V5 filesystem */
+ if (bp->b_bn != XFS_SB_DADDR &&
+ xfs_sb_version_hascrc(&mp->m_sb)) {
+ error = EFSCORRUPTED;
+ goto out_error;
+ }
}
}
error = xfs_sb_verify(bp, true);