summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/ceph/mds_client.c3
-rw-r--r--fs/ceph/super.c2
-rw-r--r--include/linux/ceph/ceph_features.h2
-rw-r--r--include/linux/ceph/ceph_fs.h4
-rw-r--r--include/linux/ceph/mon_client.h28
-rw-r--r--net/ceph/debugfs.c17
-rw-r--r--net/ceph/mon_client.c210
-rw-r--r--net/ceph/osd_client.c3
8 files changed, 174 insertions, 95 deletions
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 911d64d..b43399d 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3764,7 +3764,6 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
dout("handle_map epoch %u len %d\n", epoch, (int)maplen);
/* do we need it? */
- ceph_monc_got_mdsmap(&mdsc->fsc->client->monc, epoch);
mutex_lock(&mdsc->mutex);
if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) {
dout("handle_map epoch %u <= our %u\n",
@@ -3791,6 +3790,8 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size;
__wake_requests(mdsc, &mdsc->waiting_for_map);
+ ceph_monc_got_map(&mdsc->fsc->client->monc, CEPH_SUB_MDSMAP,
+ mdsc->mdsmap->m_epoch);
mutex_unlock(&mdsc->mutex);
schedule_delayed(mdsc);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index ca4d5e8..c941fd1 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -530,7 +530,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
goto fail;
}
fsc->client->extra_mon_dispatch = extra_mon_dispatch;
- fsc->client->monc.want_mdsmap = 1;
+ ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 0, true);
fsc->mount_options = fsopt;
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 15151f3..ae2f668 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -105,6 +105,7 @@ static inline u64 ceph_sanitize_features(u64 features)
*/
#define CEPH_FEATURES_SUPPORTED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR | \
+ CEPH_FEATURE_SUBSCRIBE2 | \
CEPH_FEATURE_RECONNECT_SEQ | \
CEPH_FEATURE_PGID64 | \
CEPH_FEATURE_PGPOOL3 | \
@@ -127,6 +128,7 @@ static inline u64 ceph_sanitize_features(u64 features)
#define CEPH_FEATURES_REQUIRED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR | \
+ CEPH_FEATURE_SUBSCRIBE2 | \
CEPH_FEATURE_RECONNECT_SEQ | \
CEPH_FEATURE_PGID64 | \
CEPH_FEATURE_PGPOOL3 | \
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index d7d072a..bf74005 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -198,8 +198,8 @@ struct ceph_client_mount {
#define CEPH_SUBSCRIBE_ONETIME 1 /* i want only 1 update after have */
struct ceph_mon_subscribe_item {
- __le64 have_version; __le64 have;
- __u8 onetime;
+ __le64 start;
+ __u8 flags;
} __attribute__ ((packed));
struct ceph_mon_subscribe_ack {
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index 81810dc..8b2d2f0 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -68,7 +68,8 @@ struct ceph_mon_client {
bool hunting;
int cur_mon; /* last monitor i contacted */
- unsigned long sub_sent, sub_renew_after;
+ unsigned long sub_renew_after;
+ unsigned long sub_renew_sent;
struct ceph_connection con;
/* pending generic requests */
@@ -76,10 +77,12 @@ struct ceph_mon_client {
int num_generic_requests;
u64 last_tid;
- /* mds/osd map */
- int want_mdsmap;
- int want_next_osdmap; /* 1 = want, 2 = want+asked */
- u32 have_osdmap, have_mdsmap;
+ /* subs, indexed with CEPH_SUB_* */
+ struct {
+ struct ceph_mon_subscribe_item item;
+ bool want;
+ u32 have; /* epoch */
+ } subs[3];
#ifdef CONFIG_DEBUG_FS
struct dentry *debugfs_file;
@@ -93,14 +96,23 @@ extern int ceph_monmap_contains(struct ceph_monmap *m,
extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl);
extern void ceph_monc_stop(struct ceph_mon_client *monc);
+enum {
+ CEPH_SUB_MDSMAP = 0,
+ CEPH_SUB_MONMAP,
+ CEPH_SUB_OSDMAP,
+};
+
+extern const char *ceph_sub_str[];
+
/*
* The model here is to indicate that we need a new map of at least
- * epoch @want, and also call in when we receive a map. We will
+ * epoch @epoch, and also call in when we receive a map. We will
* periodically rerequest the map from the monitor cluster until we
* get what we want.
*/
-extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 have);
-extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have);
+bool ceph_monc_want_map(struct ceph_mon_client *monc, int sub, u32 epoch,
+ bool continuous);
+void ceph_monc_got_map(struct ceph_mon_client *monc, int sub, u32 epoch);
extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc);
extern int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 593dc2e..b902fbc 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -112,15 +112,20 @@ static int monc_show(struct seq_file *s, void *p)
struct ceph_mon_generic_request *req;
struct ceph_mon_client *monc = &client->monc;
struct rb_node *rp;
+ int i;
mutex_lock(&monc->mutex);
- if (monc->have_mdsmap)
- seq_printf(s, "have mdsmap %u\n", (unsigned int)monc->have_mdsmap);
- if (monc->have_osdmap)
- seq_printf(s, "have osdmap %u\n", (unsigned int)monc->have_osdmap);
- if (monc->want_next_osdmap)
- seq_printf(s, "want next osdmap\n");
+ for (i = 0; i < ARRAY_SIZE(monc->subs); i++) {
+ seq_printf(s, "have %s %u", ceph_sub_str[i],
+ monc->subs[i].have);
+ if (monc->subs[i].want)
+ seq_printf(s, " want %llu%s",
+ le64_to_cpu(monc->subs[i].item.start),
+ (monc->subs[i].item.flags &
+ CEPH_SUBSCRIBE_ONETIME ? "" : "+"));
+ seq_putc(s, '\n');
+ }
for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
__u16 op;
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index d6af6ca..8902991 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -140,9 +140,8 @@ static int __open_session(struct ceph_mon_client *monc)
monc->cur_mon = r % monc->monmap->num_mon;
dout("open_session num=%d r=%d -> mon%d\n",
monc->monmap->num_mon, r, monc->cur_mon);
- monc->sub_sent = 0;
monc->sub_renew_after = jiffies; /* i.e., expired */
- monc->want_next_osdmap = !!monc->want_next_osdmap;
+ monc->sub_renew_sent = 0;
dout("open_session mon%d opening\n", monc->cur_mon);
ceph_con_open(&monc->con,
@@ -189,59 +188,58 @@ static void __schedule_delayed(struct ceph_mon_client *monc)
round_jiffies_relative(delay));
}
+const char *ceph_sub_str[] = {
+ [CEPH_SUB_MDSMAP] = "mdsmap",
+ [CEPH_SUB_MONMAP] = "monmap",
+ [CEPH_SUB_OSDMAP] = "osdmap",
+};
+
/*
- * Send subscribe request for mdsmap and/or osdmap.
+ * Send subscribe request for one or more maps, according to
+ * monc->subs.
*/
static void __send_subscribe(struct ceph_mon_client *monc)
{
- dout("__send_subscribe sub_sent=%u exp=%u want_osd=%d\n",
- (unsigned int)monc->sub_sent, __sub_expired(monc),
- monc->want_next_osdmap);
- if ((__sub_expired(monc) && !monc->sub_sent) ||
- monc->want_next_osdmap == 1) {
- struct ceph_msg *msg = monc->m_subscribe;
- struct ceph_mon_subscribe_item *i;
- void *p, *end;
- int num;
-
- p = msg->front.iov_base;
- end = p + msg->front_alloc_len;
-
- num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap;
- ceph_encode_32(&p, num);
-
- if (monc->want_next_osdmap) {
- dout("__send_subscribe to 'osdmap' %u\n",
- (unsigned int)monc->have_osdmap);
- ceph_encode_string(&p, end, "osdmap", 6);
- i = p;
- i->have = cpu_to_le64(monc->have_osdmap);
- i->onetime = 1;
- p += sizeof(*i);
- monc->want_next_osdmap = 2; /* requested */
- }
- if (monc->want_mdsmap) {
- dout("__send_subscribe to 'mdsmap' %u+\n",
- (unsigned int)monc->have_mdsmap);
- ceph_encode_string(&p, end, "mdsmap", 6);
- i = p;
- i->have = cpu_to_le64(monc->have_mdsmap);
- i->onetime = 0;
- p += sizeof(*i);
- }
- ceph_encode_string(&p, end, "monmap", 6);
- i = p;
- i->have = 0;
- i->onetime = 0;
- p += sizeof(*i);
-
- msg->front.iov_len = p - msg->front.iov_base;
- msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
- ceph_msg_revoke(msg);
- ceph_con_send(&monc->con, ceph_msg_get(msg));
-
- monc->sub_sent = jiffies | 1; /* never 0 */
+ struct ceph_msg *msg = monc->m_subscribe;
+ void *p = msg->front.iov_base;
+ void *const end = p + msg->front_alloc_len;
+ int num = 0;
+ int i;
+
+ dout("%s sent %lu\n", __func__, monc->sub_renew_sent);
+
+ BUG_ON(monc->cur_mon < 0);
+
+ if (!monc->sub_renew_sent)
+ monc->sub_renew_sent = jiffies | 1; /* never 0 */
+
+ msg->hdr.version = cpu_to_le16(2);
+
+ for (i = 0; i < ARRAY_SIZE(monc->subs); i++) {
+ if (monc->subs[i].want)
+ num++;
}
+ BUG_ON(num < 1); /* monmap sub is always there */
+ ceph_encode_32(&p, num);
+ for (i = 0; i < ARRAY_SIZE(monc->subs); i++) {
+ const char *s = ceph_sub_str[i];
+
+ if (!monc->subs[i].want)
+ continue;
+
+ dout("%s %s start %llu flags 0x%x\n", __func__, s,
+ le64_to_cpu(monc->subs[i].item.start),
+ monc->subs[i].item.flags);
+ ceph_encode_string(&p, end, s, strlen(s));
+ memcpy(p, &monc->subs[i].item, sizeof(monc->subs[i].item));
+ p += sizeof(monc->subs[i].item);
+ }
+
+ BUG_ON(p != (end - 35 - (ARRAY_SIZE(monc->subs) - num) * 19));
+ msg->front.iov_len = p - msg->front.iov_base;
+ msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+ ceph_msg_revoke(msg);
+ ceph_con_send(&monc->con, ceph_msg_get(msg));
}
static void handle_subscribe_ack(struct ceph_mon_client *monc,
@@ -255,9 +253,16 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc,
seconds = le32_to_cpu(h->duration);
mutex_lock(&monc->mutex);
- dout("handle_subscribe_ack after %d seconds\n", seconds);
- monc->sub_renew_after = monc->sub_sent + (seconds >> 1)*HZ - 1;
- monc->sub_sent = 0;
+ if (monc->sub_renew_sent) {
+ monc->sub_renew_after = monc->sub_renew_sent +
+ (seconds >> 1) * HZ - 1;
+ dout("%s sent %lu duration %d renew after %lu\n", __func__,
+ monc->sub_renew_sent, seconds, monc->sub_renew_after);
+ monc->sub_renew_sent = 0;
+ } else {
+ dout("%s sent %lu renew after %lu, ignoring\n", __func__,
+ monc->sub_renew_sent, monc->sub_renew_after);
+ }
mutex_unlock(&monc->mutex);
return;
bad:
@@ -266,36 +271,82 @@ bad:
}
/*
- * Keep track of which maps we have
+ * Register interest in a map
+ *
+ * @sub: one of CEPH_SUB_*
+ * @epoch: X for "every map since X", or 0 for "just the latest"
*/
-int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 got)
+static bool __ceph_monc_want_map(struct ceph_mon_client *monc, int sub,
+ u32 epoch, bool continuous)
+{
+ __le64 start = cpu_to_le64(epoch);
+ u8 flags = !continuous ? CEPH_SUBSCRIBE_ONETIME : 0;
+
+ dout("%s %s epoch %u continuous %d\n", __func__, ceph_sub_str[sub],
+ epoch, continuous);
+
+ if (monc->subs[sub].want &&
+ monc->subs[sub].item.start == start &&
+ monc->subs[sub].item.flags == flags)
+ return false;
+
+ monc->subs[sub].item.start = start;
+ monc->subs[sub].item.flags = flags;
+ monc->subs[sub].want = true;
+
+ return true;
+}
+
+bool ceph_monc_want_map(struct ceph_mon_client *monc, int sub, u32 epoch,
+ bool continuous)
{
+ bool need_request;
+
mutex_lock(&monc->mutex);
- monc->have_mdsmap = got;
+ need_request = __ceph_monc_want_map(monc, sub, epoch, continuous);
mutex_unlock(&monc->mutex);
- return 0;
+
+ return need_request;
}
-EXPORT_SYMBOL(ceph_monc_got_mdsmap);
+EXPORT_SYMBOL(ceph_monc_want_map);
-int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got)
+/*
+ * Keep track of which maps we have
+ *
+ * @sub: one of CEPH_SUB_*
+ */
+static void __ceph_monc_got_map(struct ceph_mon_client *monc, int sub,
+ u32 epoch)
+{
+ dout("%s %s epoch %u\n", __func__, ceph_sub_str[sub], epoch);
+
+ if (monc->subs[sub].want) {
+ if (monc->subs[sub].item.flags & CEPH_SUBSCRIBE_ONETIME)
+ monc->subs[sub].want = false;
+ else
+ monc->subs[sub].item.start = cpu_to_le64(epoch + 1);
+ }
+
+ monc->subs[sub].have = epoch;
+}
+
+void ceph_monc_got_map(struct ceph_mon_client *monc, int sub, u32 epoch)
{
mutex_lock(&monc->mutex);
- monc->have_osdmap = got;
- monc->want_next_osdmap = 0;
+ __ceph_monc_got_map(monc, sub, epoch);
mutex_unlock(&monc->mutex);
- return 0;
}
+EXPORT_SYMBOL(ceph_monc_got_map);
/*
* Register interest in the next osdmap
*/
void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
{
- dout("request_next_osdmap have %u\n", monc->have_osdmap);
+ dout("%s have %u\n", __func__, monc->subs[CEPH_SUB_OSDMAP].have);
mutex_lock(&monc->mutex);
- if (!monc->want_next_osdmap)
- monc->want_next_osdmap = 1;
- if (monc->want_next_osdmap < 2)
+ if (__ceph_monc_want_map(monc, CEPH_SUB_OSDMAP,
+ monc->subs[CEPH_SUB_OSDMAP].have + 1, false))
__send_subscribe(monc);
mutex_unlock(&monc->mutex);
}
@@ -314,15 +365,15 @@ int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
long ret;
mutex_lock(&monc->mutex);
- while (monc->have_osdmap < epoch) {
+ while (monc->subs[CEPH_SUB_OSDMAP].have < epoch) {
mutex_unlock(&monc->mutex);
if (timeout && time_after_eq(jiffies, started + timeout))
return -ETIMEDOUT;
ret = wait_event_interruptible_timeout(monc->client->auth_wq,
- monc->have_osdmap >= epoch,
- ceph_timeout_jiffies(timeout));
+ monc->subs[CEPH_SUB_OSDMAP].have >= epoch,
+ ceph_timeout_jiffies(timeout));
if (ret < 0)
return ret;
@@ -335,11 +386,14 @@ int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
EXPORT_SYMBOL(ceph_monc_wait_osdmap);
/*
- *
+ * Open a session with a random monitor. Request monmap and osdmap,
+ * which are waited upon in __ceph_open_session().
*/
int ceph_monc_open_session(struct ceph_mon_client *monc)
{
mutex_lock(&monc->mutex);
+ __ceph_monc_want_map(monc, CEPH_SUB_MONMAP, 0, true);
+ __ceph_monc_want_map(monc, CEPH_SUB_OSDMAP, 0, false);
__open_session(monc);
__schedule_delayed(monc);
mutex_unlock(&monc->mutex);
@@ -375,6 +429,7 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
client->monc.monmap = monmap;
kfree(old);
+ __ceph_monc_got_map(monc, CEPH_SUB_MONMAP, monc->monmap->epoch);
client->have_fsid = true;
out:
@@ -725,8 +780,14 @@ static void delayed_work(struct work_struct *work)
__validate_auth(monc);
}
- if (is_auth)
- __send_subscribe(monc);
+ if (is_auth) {
+ unsigned long now = jiffies;
+
+ dout("%s renew subs? now %lu renew after %lu\n",
+ __func__, now, monc->sub_renew_after);
+ if (time_after_eq(now, monc->sub_renew_after))
+ __send_subscribe(monc);
+ }
}
__schedule_delayed(monc);
mutex_unlock(&monc->mutex);
@@ -815,16 +876,13 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
monc->cur_mon = -1;
monc->hunting = true;
monc->sub_renew_after = jiffies;
- monc->sub_sent = 0;
+ monc->sub_renew_sent = 0;
INIT_DELAYED_WORK(&monc->delayed_work, delayed_work);
monc->generic_request_tree = RB_ROOT;
monc->num_generic_requests = 0;
monc->last_tid = 0;
- monc->have_mdsmap = 0;
- monc->have_osdmap = 0;
- monc->want_next_osdmap = 1;
return 0;
out_auth_reply:
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 5bc0537..3309112 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2187,7 +2187,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
goto bad;
done:
downgrade_write(&osdc->map_sem);
- ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
+ ceph_monc_got_map(&osdc->client->monc, CEPH_SUB_OSDMAP,
+ osdc->osdmap->epoch);
/*
* subscribe to subsequent osdmap updates if full to ensure