From 0aa12fb439838a85802ab8b7fbb9bcfc3e6e05cb Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 29 May 2010 09:12:30 -0700 Subject: sched: add wait_for_completion_killable_timeout Add missing _killable_timeout variant for wait_for_completion that will return when a timeout expires or the task is killed. CC: Ingo Molnar CC: Andreas Herrmann CC: Thomas Gleixner CC: Mike Galbraith Acked-by: Peter Zijlstra Signed-off-by: Sage Weil diff --git a/include/linux/completion.h b/include/linux/completion.h index 4a6b604..51e3145 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -83,6 +83,8 @@ extern unsigned long wait_for_completion_timeout(struct completion *x, unsigned long timeout); extern unsigned long wait_for_completion_interruptible_timeout( struct completion *x, unsigned long timeout); +extern unsigned long wait_for_completion_killable_timeout( + struct completion *x, unsigned long timeout); extern bool try_wait_for_completion(struct completion *x); extern bool completion_done(struct completion *x); diff --git a/kernel/sched.c b/kernel/sched.c index 3c2a54f..4d051c7 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4161,6 +4161,23 @@ int __sched wait_for_completion_killable(struct completion *x) EXPORT_SYMBOL(wait_for_completion_killable); /** + * wait_for_completion_killable_timeout: - waits for completion of a task (w/(to,killable)) + * @x: holds the state of this particular completion + * @timeout: timeout value in jiffies + * + * This waits for either a completion of a specific task to be + * signaled or for a specified timeout to expire. It can be + * interrupted by a kill signal. The timeout is in jiffies. + */ +unsigned long __sched +wait_for_completion_killable_timeout(struct completion *x, + unsigned long timeout) +{ + return wait_for_common(x, timeout, TASK_KILLABLE); +} +EXPORT_SYMBOL(wait_for_completion_killable_timeout); + +/** * try_wait_for_completion - try to decrement a completion without blocking * @x: completion structure * -- cgit v0.10.2 From aa91647c898d62e869fcf35e977ab3c533be8fc1 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 24 May 2010 11:15:51 -0700 Subject: ceph: make mds requests killable, not interruptible The underlying problem is that many mds requests can't be restarted. For example, a restarted create() would return -EEXIST if the original request succeeds. However, we do not want a hung MDS to hang the client too. So, use the _killable wait_for_completion variants to abort on SIGKILL but nothing else. Signed-off-by: Sage Weil diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 885aa57..08413c8 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1768,12 +1768,12 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, mutex_unlock(&mdsc->mutex); dout("do_request waiting\n"); if (req->r_timeout) { - err = (long)wait_for_completion_interruptible_timeout( + err = (long)wait_for_completion_killable_timeout( &req->r_completion, req->r_timeout); if (err == 0) err = -EIO; } else { - err = wait_for_completion_interruptible(&req->r_completion); + err = wait_for_completion_killable(&req->r_completion); } dout("do_request waited, got %d\n", err); mutex_lock(&mdsc->mutex); -- cgit v0.10.2 From e95e9a7ae4c1e7655a0438579f891b3c60178d77 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 25 May 2010 09:24:42 -0700 Subject: ceph: avoid possible null dereference ac->ops may be null; use protocol id in error message instead. Reported-by: Dan Carpenter Signed-off-by: Sage Weil diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index 9f46de2..01c4a1c 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c @@ -217,8 +217,8 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, if (ac->protocol != protocol) { ret = ceph_auth_init_protocol(ac, protocol); if (ret) { - pr_err("error %d on auth method %s init\n", - ret, ac->ops->name); + pr_err("error %d on auth protocol %d init\n", + ret, protocol); goto out; } } -- cgit v0.10.2 From 984c76908efd3c6795aa03dff16a8fc3496af99f Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Sun, 23 May 2010 21:47:58 +0200 Subject: ceph: removed duplicated #includes fs/ceph/auth.c: linux/slab.h is included more than once. fs/ceph/super.h: linux/slab.h is included more than once. Acked-by: Christoph Lameter Signed-off-by: Andrea Gelmini Signed-off-by: Sage Weil diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index 01c4a1c..a28ebdf 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c @@ -1,7 +1,6 @@ #include "ceph_debug.h" #include -#include #include #include diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 3725c9e..d84a252 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include -- cgit v0.10.2 From 09c4d6a7d40dd26c1b35674c582382b7ea551368 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 25 May 2010 15:38:06 -0700 Subject: ceph: do not resend mon requests on auth ticket renewal We only want to send pending mon requests when we successfully authenticate. If we are already authenticated, like when we renew our ticket, there is no need to resend pending requests. Signed-off-by: Sage Weil diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index f6510a4..21c62e9 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -704,8 +704,11 @@ static void handle_auth_reply(struct ceph_mon_client *monc, struct ceph_msg *msg) { int ret; + int was_auth = 0; mutex_lock(&monc->mutex); + if (monc->auth->ops) + was_auth = monc->auth->ops->is_authenticated(monc->auth); monc->pending_auth = 0; ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base, msg->front.iov_len, @@ -716,7 +719,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc, wake_up(&monc->client->auth_wq); } else if (ret > 0) { __send_prepared_auth_request(monc, ret); - } else if (monc->auth->ops->is_authenticated(monc->auth)) { + } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) { dout("authenticated, starting session\n"); monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT; -- cgit v0.10.2 From a41359fa355e7b450c610ed8e913d5d75c3c9c3b Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 25 May 2010 15:39:06 -0700 Subject: ceph: renew auth tickets before they expire We were only requesting renewal after our tickets expire; do so before that. Most of the low-level logic for this was already there; just use it. Signed-off-by: Sage Weil diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index a28ebdf..89490bea 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c @@ -246,7 +246,7 @@ int ceph_build_auth(struct ceph_auth_client *ac, if (!ac->protocol) return ceph_auth_build_hello(ac, msg_buf, msg_len); BUG_ON(!ac->ops); - if (!ac->ops->is_authenticated(ac)) + if (ac->ops->should_authenticate(ac)) return ceph_build_auth_request(ac, msg_buf, msg_len); return 0; } diff --git a/fs/ceph/auth.h b/fs/ceph/auth.h index 4429a70..d38a2fb 100644 --- a/fs/ceph/auth.h +++ b/fs/ceph/auth.h @@ -24,6 +24,12 @@ struct ceph_auth_client_ops { int (*is_authenticated)(struct ceph_auth_client *ac); /* + * true if we should (re)authenticate, e.g., when our tickets + * are getting old and crusty. + */ + int (*should_authenticate)(struct ceph_auth_client *ac); + + /* * build requests and process replies during monitor * handshake. if handle_reply returns -EAGAIN, we build * another request. diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c index 24407c1..ad1dc21 100644 --- a/fs/ceph/auth_none.c +++ b/fs/ceph/auth_none.c @@ -31,6 +31,13 @@ static int is_authenticated(struct ceph_auth_client *ac) return !xi->starting; } +static int should_authenticate(struct ceph_auth_client *ac) +{ + struct ceph_auth_none_info *xi = ac->private; + + return xi->starting; +} + /* * the generic auth code decode the global_id, and we carry no actual * authenticate state, so nothing happens here. @@ -98,6 +105,7 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = { .reset = reset, .destroy = destroy, .is_authenticated = is_authenticated, + .should_authenticate = should_authenticate, .handle_reply = handle_reply, .create_authorizer = ceph_auth_none_create_authorizer, .destroy_authorizer = ceph_auth_none_destroy_authorizer, diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index 7b20623..83d4d27 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c @@ -27,6 +27,17 @@ static int ceph_x_is_authenticated(struct ceph_auth_client *ac) return (ac->want_keys & xi->have_keys) == ac->want_keys; } +static int ceph_x_should_authenticate(struct ceph_auth_client *ac) +{ + struct ceph_x_info *xi = ac->private; + int need; + + ceph_x_validate_tickets(ac, &need); + dout("ceph_x_should_authenticate want=%d need=%d have=%d\n", + ac->want_keys, need, xi->have_keys); + return need != 0; +} + static int ceph_x_encrypt_buflen(int ilen) { return sizeof(struct ceph_x_encrypt_header) + ilen + 16 + @@ -620,6 +631,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, static const struct ceph_auth_client_ops ceph_x_ops = { .name = "x", .is_authenticated = ceph_x_is_authenticated, + .should_authenticate = ceph_x_should_authenticate, .build_request = ceph_x_build_request, .handle_reply = ceph_x_handle_reply, .create_authorizer = ceph_x_create_authorizer, -- cgit v0.10.2 From 7e34bc524ecae3a04d8cc427ee76ddad826a937b Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 22 May 2010 12:01:14 +0200 Subject: fs/ceph: Use ERR_CAST Use ERR_CAST(x) rather than ERR_PTR(PTR_ERR(x)). The former makes more clear what is the purpose of the operation, which otherwise looks like a no-op. In the case of fs/ceph/inode.c, ERR_CAST is not needed, because the type of the returned value is the same as the type of the enclosing function. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ type T; T x; identifier f; @@ T f (...) { <+... - ERR_PTR(PTR_ERR(x)) + x ...+> } @@ expression x; @@ - ERR_PTR(PTR_ERR(x)) + ERR_CAST(x) // Signed-off-by: Julia Lawall Signed-off-by: Sage Weil diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 4fd3090..a86c1d5 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -587,7 +587,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP; req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS); if (IS_ERR(req)) - return ERR_PTR(PTR_ERR(req)); + return ERR_CAST(req); req->r_dentry = dget(dentry); req->r_num_caps = 2; /* we only need inode linkage */ diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 1744764..4480cb1 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -133,7 +133,7 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb, req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPHASH, USE_ANY_MDS); if (IS_ERR(req)) - return ERR_PTR(PTR_ERR(req)); + return ERR_CAST(req); req->r_ino1 = vino; req->r_ino2.ino = cfh->parent_ino; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 0611ec3..f06f902 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -230,7 +230,7 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, /* do the open */ req = prepare_open_request(dir->i_sb, flags, mode); if (IS_ERR(req)) - return ERR_PTR(PTR_ERR(req)); + return ERR_CAST(req); req->r_dentry = dget(dentry); req->r_num_caps = 2; if (flags & O_CREAT) { diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index a81b8b6..226f5a5 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -69,7 +69,7 @@ struct inode *ceph_get_snapdir(struct inode *parent) BUG_ON(!S_ISDIR(parent->i_mode)); if (IS_ERR(inode)) - return ERR_PTR(PTR_ERR(inode)); + return inode; inode->i_mode = parent->i_mode; inode->i_uid = parent->i_uid; inode->i_gid = parent->i_gid; diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index cfdd8f4..ddc656f 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c @@ -706,7 +706,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, len, *p, end); newcrush = crush_decode(*p, min(*p+len, end)); if (IS_ERR(newcrush)) - return ERR_PTR(PTR_ERR(newcrush)); + return ERR_CAST(newcrush); } /* new flags? */ diff --git a/fs/ceph/super.c b/fs/ceph/super.c index bac1389..9b46bb9 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -738,7 +738,7 @@ static struct dentry *open_root_dentry(struct ceph_client *client, dout("open_root_inode opening '%s'\n", path); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); if (IS_ERR(req)) - return ERR_PTR(PTR_ERR(req)); + return ERR_CAST(req); req->r_path1 = kstrdup(path, GFP_NOFS); req->r_ino1.ino = CEPH_INO_ROOT; req->r_ino1.snap = CEPH_NOSNAP; -- cgit v0.10.2 From dd1c9057366f329911180e9000e2b425f23fc287 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 25 May 2010 16:45:25 -0700 Subject: ceph: make lease code DN specific The lease code includes a mask in the CEPH_LOCK_* namespace, but that namespace is changing, and only one mask (formerly _DN == 1) is used, so hard code for that value for now. If we ever extend this code to handle leases over different data types we can extend it accordingly. Signed-off-by: Sage Weil diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h index 3b9eeed..2fa992e 100644 --- a/fs/ceph/ceph_fs.h +++ b/fs/ceph/ceph_fs.h @@ -265,16 +265,17 @@ extern const char *ceph_mds_state_name(int s); * - they also define the lock ordering by the MDS * - a few of these are internal to the mds */ -#define CEPH_LOCK_DN 1 -#define CEPH_LOCK_ISNAP 2 -#define CEPH_LOCK_IVERSION 4 /* mds internal */ -#define CEPH_LOCK_IFILE 8 /* mds internal */ -#define CEPH_LOCK_IAUTH 32 -#define CEPH_LOCK_ILINK 64 -#define CEPH_LOCK_IDFT 128 /* dir frag tree */ -#define CEPH_LOCK_INEST 256 /* mds internal */ -#define CEPH_LOCK_IXATTR 512 -#define CEPH_LOCK_INO 2048 /* immutable inode bits; not a lock */ +#define CEPH_LOCK_DVERSION 1 +#define CEPH_LOCK_DN 2 +#define CEPH_LOCK_ISNAP 16 +#define CEPH_LOCK_IVERSION 32 /* mds internal */ +#define CEPH_LOCK_IFILE 64 +#define CEPH_LOCK_IAUTH 128 +#define CEPH_LOCK_ILINK 256 +#define CEPH_LOCK_IDFT 512 /* dir frag tree */ +#define CEPH_LOCK_INEST 1024 /* mds internal */ +#define CEPH_LOCK_IXATTR 2048 +#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ /* client_session ops */ enum { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 08413c8..5a88b7b 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2541,7 +2541,7 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, return; lease = msg->front.iov_base; lease->action = action; - lease->mask = cpu_to_le16(CEPH_LOCK_DN); + lease->mask = cpu_to_le16(1); lease->ino = cpu_to_le64(ceph_vino(inode).ino); lease->first = lease->last = cpu_to_le64(ceph_vino(inode).snap); lease->seq = cpu_to_le32(seq); @@ -2571,7 +2571,7 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode, BUG_ON(inode == NULL); BUG_ON(dentry == NULL); - BUG_ON(mask != CEPH_LOCK_DN); + BUG_ON(mask == 0); /* is dentry lease valid? */ spin_lock(&dentry->d_lock); -- cgit v0.10.2 From a922d38fd10d55d5033f10df15baf966e8f5b18c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 29 May 2010 09:41:23 -0700 Subject: ceph: close out mds, osd connections before stopping auth The auth module (part of the mon_client) is needed to free any ceph_authorizer(s) used by the mds and osd connections. Flush the msgr workqueue before stopping monc to ensure that the destroy_authorizer auth op is available when those connections are closed out. Signed-off-by: Sage Weil diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 60b7483..64b8b1f 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -120,6 +120,12 @@ void ceph_msgr_exit(void) destroy_workqueue(ceph_msgr_wq); } +void ceph_msgr_flush() +{ + flush_workqueue(ceph_msgr_wq); +} + + /* * socket callback functions */ diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index 00a9430..76fbc95 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h @@ -213,6 +213,7 @@ extern int ceph_parse_ips(const char *c, const char *end, extern int ceph_msgr_init(void); extern void ceph_msgr_exit(void); +extern void ceph_msgr_flush(void); extern struct ceph_messenger *ceph_messenger_create( struct ceph_entity_addr *myaddr); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 9b46bb9..5cf6fba 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -669,9 +669,17 @@ static void ceph_destroy_client(struct ceph_client *client) /* unmount */ ceph_mdsc_stop(&client->mdsc); - ceph_monc_stop(&client->monc); ceph_osdc_stop(&client->osdc); + /* + * make sure mds and osd connections close out before destroying + * the auth module, which is needed to free those connections' + * ceph_authorizers. + */ + ceph_msgr_flush(); + + ceph_monc_stop(&client->monc); + ceph_adjust_min_caps(-client->min_caps); ceph_debugfs_client_cleanup(client); -- cgit v0.10.2 From 79494d1b9b92259eb40ea6e939ba5aff4b8de5f1 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 27 May 2010 14:15:49 -0700 Subject: ceph: fix leak of osd authorizer Release the ceph_authorizer when releasing osd state. Signed-off-by: Sage Weil diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index afa7bb3..d25b4ad 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -361,8 +361,13 @@ static void put_osd(struct ceph_osd *osd) { dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref), atomic_read(&osd->o_ref) - 1); - if (atomic_dec_and_test(&osd->o_ref)) + if (atomic_dec_and_test(&osd->o_ref)) { + struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth; + + if (osd->o_authorizer) + ac->ops->destroy_authorizer(ac, osd->o_authorizer); kfree(osd); + } } /* -- cgit v0.10.2 From 2a8e5e3637e2fc058798f5d3626f525729ffaaaf Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 28 May 2010 16:43:16 -0700 Subject: ceph: clean up on forwarded aborted mds request If an mds request is aborted (timeout, SIGKILL), it is left registered to keep our state in sync with the mds. If we get a forward notification, though, we know the request didn't succeed and we can unregister it safely. We were trying to resend it, but then bailing out (and not unregistering) in __do_request. Signed-off-by: Sage Weil diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 5a88b7b..b49f128 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2014,16 +2014,21 @@ static void handle_forward(struct ceph_mds_client *mdsc, mutex_lock(&mdsc->mutex); req = __lookup_request(mdsc, tid); if (!req) { - dout("forward %llu to mds%d - req dne\n", tid, next_mds); + dout("forward tid %llu to mds%d - req dne\n", tid, next_mds); goto out; /* dup reply? */ } - if (fwd_seq <= req->r_num_fwd) { - dout("forward %llu to mds%d - old seq %d <= %d\n", + if (req->r_aborted) { + dout("forward tid %llu aborted, unregistering\n", tid); + __unregister_request(mdsc, req); + } else if (fwd_seq <= req->r_num_fwd) { + dout("forward tid %llu to mds%d - old seq %d <= %d\n", tid, next_mds, req->r_num_fwd, fwd_seq); } else { /* resend. forward race not possible; mds would drop */ - dout("forward %llu to mds%d (we resend)\n", tid, next_mds); + dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds); + BUG_ON(req->r_err); + BUG_ON(req->r_got_result); req->r_num_fwd = fwd_seq; req->r_resend_mds = next_mds; put_request_session(req); -- cgit v0.10.2