summaryrefslogtreecommitdiff
path: root/fs/ceph/caps.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph/caps.c')
-rw-r--r--fs/ceph/caps.c118
1 files changed, 73 insertions, 45 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index db122bb..0c16818 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3,6 +3,7 @@
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/wait.h>
#include <linux/writeback.h>
@@ -1204,6 +1205,12 @@ retry:
if (capsnap->dirty_pages || capsnap->writing)
continue;
+ /*
+ * if cap writeback already occurred, we should have dropped
+ * the capsnap in ceph_put_wrbuffer_cap_refs.
+ */
+ BUG_ON(capsnap->dirty == 0);
+
/* pick mds, take s_mutex */
mds = __ceph_get_cap_mds(ci, &mseq);
if (session && session->s_mds != mds) {
@@ -1407,6 +1414,7 @@ static int try_nonblocking_invalidate(struct inode *inode)
*/
void ceph_check_caps(struct ceph_inode_info *ci, int flags,
struct ceph_mds_session *session)
+ __releases(session->s_mutex)
{
struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode);
struct ceph_mds_client *mdsc = &client->mdsc;
@@ -1414,7 +1422,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
struct ceph_cap *cap;
int file_wanted, used;
int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */
- int drop_session_lock = session ? 0 : 1;
int issued, implemented, want, retain, revoking, flushing = 0;
int mds = -1; /* keep track of how far we've gone through i_caps list
to avoid an infinite loop on retry */
@@ -1639,7 +1646,7 @@ ack:
if (queue_invalidate)
ceph_queue_invalidate(inode);
- if (session && drop_session_lock)
+ if (session)
mutex_unlock(&session->s_mutex);
if (took_snap_rwsem)
up_read(&mdsc->snap_rwsem);
@@ -1854,8 +1861,8 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
} else {
pr_err("%p auth cap %p not mds%d ???\n", inode,
cap, session->s_mds);
- spin_unlock(&inode->i_lock);
}
+ spin_unlock(&inode->i_lock);
}
}
@@ -2117,8 +2124,8 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
}
spin_unlock(&inode->i_lock);
- dout("put_cap_refs %p had %s %s\n", inode, ceph_cap_string(had),
- last ? "last" : "");
+ dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
+ last ? " last" : "", put ? " put" : "");
if (last && !flushsnaps)
ceph_check_caps(ci, 0, NULL);
@@ -2142,7 +2149,8 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
{
struct inode *inode = &ci->vfs_inode;
int last = 0;
- int last_snap = 0;
+ int complete_capsnap = 0;
+ int drop_capsnap = 0;
int found = 0;
struct ceph_cap_snap *capsnap = NULL;
@@ -2165,19 +2173,32 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
if (capsnap->context == snapc) {
found = 1;
- capsnap->dirty_pages -= nr;
- last_snap = !capsnap->dirty_pages;
break;
}
}
BUG_ON(!found);
+ capsnap->dirty_pages -= nr;
+ if (capsnap->dirty_pages == 0) {
+ complete_capsnap = 1;
+ if (capsnap->dirty == 0)
+ /* cap writeback completed before we created
+ * the cap_snap; no FLUSHSNAP is needed */
+ drop_capsnap = 1;
+ }
dout("put_wrbuffer_cap_refs on %p cap_snap %p "
- " snap %lld %d/%d -> %d/%d %s%s\n",
+ " snap %lld %d/%d -> %d/%d %s%s%s\n",
inode, capsnap, capsnap->context->seq,
ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr,
ci->i_wrbuffer_ref, capsnap->dirty_pages,
last ? " (wrbuffer last)" : "",
- last_snap ? " (capsnap last)" : "");
+ complete_capsnap ? " (complete capsnap)" : "",
+ drop_capsnap ? " (drop capsnap)" : "");
+ if (drop_capsnap) {
+ ceph_put_snap_context(capsnap->context);
+ list_del(&capsnap->ci_item);
+ list_del(&capsnap->flushing_item);
+ ceph_put_cap_snap(capsnap);
+ }
}
spin_unlock(&inode->i_lock);
@@ -2185,28 +2206,31 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
if (last) {
ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
iput(inode);
- } else if (last_snap) {
+ } else if (complete_capsnap) {
ceph_flush_snaps(ci);
wake_up(&ci->i_cap_wq);
}
+ if (drop_capsnap)
+ iput(inode);
}
/*
* Handle a cap GRANT message from the MDS. (Note that a GRANT may
* actually be a revocation if it specifies a smaller cap set.)
*
- * caller holds s_mutex.
+ * caller holds s_mutex and i_lock, we drop both.
+ *
* return value:
* 0 - ok
* 1 - check_caps on auth cap only (writeback)
* 2 - check_caps (ack revoke)
*/
-static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
- struct ceph_mds_session *session,
- struct ceph_cap *cap,
- struct ceph_buffer *xattr_buf)
+static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
+ struct ceph_mds_session *session,
+ struct ceph_cap *cap,
+ struct ceph_buffer *xattr_buf)
__releases(inode->i_lock)
-
+ __releases(session->s_mutex)
{
struct ceph_inode_info *ci = ceph_inode(inode);
int mds = session->s_mds;
@@ -2216,7 +2240,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
u64 size = le64_to_cpu(grant->size);
u64 max_size = le64_to_cpu(grant->max_size);
struct timespec mtime, atime, ctime;
- int reply = 0;
+ int check_caps = 0;
int wake = 0;
int writeback = 0;
int revoked_rdcache = 0;
@@ -2329,11 +2353,12 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER)
writeback = 1; /* will delay ack */
else if (dirty & ~newcaps)
- reply = 1; /* initiate writeback in check_caps */
+ check_caps = 1; /* initiate writeback in check_caps */
else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 ||
revoked_rdcache)
- reply = 2; /* send revoke ack in check_caps */
+ check_caps = 2; /* send revoke ack in check_caps */
cap->issued = newcaps;
+ cap->implemented |= newcaps;
} else if (cap->issued == newcaps) {
dout("caps unchanged: %s -> %s\n",
ceph_cap_string(cap->issued), ceph_cap_string(newcaps));
@@ -2346,6 +2371,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
* pending revocation */
wake = 1;
}
+ BUG_ON(cap->issued & ~cap->implemented);
spin_unlock(&inode->i_lock);
if (writeback)
@@ -2359,7 +2385,14 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
ceph_queue_invalidate(inode);
if (wake)
wake_up(&ci->i_cap_wq);
- return reply;
+
+ if (check_caps == 1)
+ ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
+ session);
+ else if (check_caps == 2)
+ ceph_check_caps(ci, CHECK_CAPS_NODELAY, session);
+ else
+ mutex_unlock(&session->s_mutex);
}
/*
@@ -2454,8 +2487,8 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
break;
}
WARN_ON(capsnap->dirty_pages || capsnap->writing);
- dout(" removing cap_snap %p follows %lld\n",
- capsnap, follows);
+ dout(" removing %p cap_snap %p follows %lld\n",
+ inode, capsnap, follows);
ceph_put_snap_context(capsnap->context);
list_del(&capsnap->ci_item);
list_del(&capsnap->flushing_item);
@@ -2548,9 +2581,8 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
ci->i_cap_exporting_issued = cap->issued;
}
__ceph_remove_cap(cap);
- } else {
- WARN_ON(!cap);
}
+ /* else, we already released it */
spin_unlock(&inode->i_lock);
}
@@ -2621,9 +2653,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
u64 cap_id;
u64 size, max_size;
u64 tid;
- int check_caps = 0;
void *snaptrace;
- int r;
dout("handle_caps from mds%d\n", mds);
@@ -2668,8 +2698,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
case CEPH_CAP_OP_IMPORT:
handle_cap_import(mdsc, inode, h, session,
snaptrace, le32_to_cpu(h->snap_trace_len));
- check_caps = 1; /* we may have sent a RELEASE to the old auth */
- goto done;
+ ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY,
+ session);
+ goto done_unlocked;
}
/* the rest require a cap */
@@ -2686,16 +2717,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
switch (op) {
case CEPH_CAP_OP_REVOKE:
case CEPH_CAP_OP_GRANT:
- r = handle_cap_grant(inode, h, session, cap, msg->middle);
- if (r == 1)
- ceph_check_caps(ceph_inode(inode),
- CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
- session);
- else if (r == 2)
- ceph_check_caps(ceph_inode(inode),
- CHECK_CAPS_NODELAY,
- session);
- break;
+ handle_cap_grant(inode, h, session, cap, msg->middle);
+ goto done_unlocked;
case CEPH_CAP_OP_FLUSH_ACK:
handle_cap_flush_ack(inode, tid, h, session, cap);
@@ -2713,9 +2736,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
done:
mutex_unlock(&session->s_mutex);
-
- if (check_caps)
- ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, NULL);
+done_unlocked:
if (inode)
iput(inode);
return;
@@ -2838,11 +2859,18 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
struct ceph_cap *cap;
struct ceph_mds_request_release *rel = *p;
int ret = 0;
-
- dout("encode_inode_release %p mds%d drop %s unless %s\n", inode,
- mds, ceph_cap_string(drop), ceph_cap_string(unless));
+ int used = 0;
spin_lock(&inode->i_lock);
+ used = __ceph_caps_used(ci);
+
+ dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode,
+ mds, ceph_cap_string(used), ceph_cap_string(drop),
+ ceph_cap_string(unless));
+
+ /* only drop unused caps */
+ drop &= ~used;
+
cap = __get_cap_for_mds(ci, mds);
if (cap && __cap_is_valid(cap)) {
if (force ||