From 5bdca4e0768d3e0f4efa43d9a2cc8210aeb91ab9 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@inktank.com>
Date: Tue, 10 Jul 2012 11:53:34 -0700
Subject: libceph: fix messenger retry

In ancient times, the messenger could both initiate and accept connections.
An artifact if that was data structures to store/process an incoming
ceph_msg_connect request and send an outgoing ceph_msg_connect_reply.
Sadly, the negotiation code was referencing those structures and ignoring
important information (like the peer's connect_seq) from the correct ones.

Among other things, this fixes tight reconnect loops where the server sends
RETRY_SESSION and we (the client) retries with the same connect_seq as last
time.  This bug pretty easily triggered by injecting socket failures on the
MDS and running some fs workload like workunits/direct_io/test_sync_io.

Signed-off-by: Sage Weil <sage@inktank.com>

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 2521a95..44c87e7 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -163,16 +163,8 @@ struct ceph_connection {
 
 	/* connection negotiation temps */
 	char in_banner[CEPH_BANNER_MAX_LEN];
-	union {
-		struct {  /* outgoing connection */
-			struct ceph_msg_connect out_connect;
-			struct ceph_msg_connect_reply in_reply;
-		};
-		struct {  /* incoming */
-			struct ceph_msg_connect in_connect;
-			struct ceph_msg_connect_reply out_reply;
-		};
-	};
+	struct ceph_msg_connect out_connect;
+	struct ceph_msg_connect_reply in_reply;
 	struct ceph_entity_addr actual_peer_addr;
 
 	/* message out temps */
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index b332c3d..10255e8 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1423,7 +1423,7 @@ static int process_connect(struct ceph_connection *con)
 		 * dropped messages.
 		 */
 		dout("process_connect got RESET peer seq %u\n",
-		     le32_to_cpu(con->in_connect.connect_seq));
+		     le32_to_cpu(con->in_reply.connect_seq));
 		pr_err("%s%lld %s connection reset\n",
 		       ENTITY_NAME(con->peer_name),
 		       ceph_pr_addr(&con->peer_addr.in_addr));
@@ -1450,10 +1450,10 @@ static int process_connect(struct ceph_connection *con)
 		 * If we sent a smaller connect_seq than the peer has, try
 		 * again with a larger value.
 		 */
-		dout("process_connect got RETRY my seq = %u, peer_seq = %u\n",
+		dout("process_connect got RETRY_SESSION my seq %u, peer %u\n",
 		     le32_to_cpu(con->out_connect.connect_seq),
-		     le32_to_cpu(con->in_connect.connect_seq));
-		con->connect_seq = le32_to_cpu(con->in_connect.connect_seq);
+		     le32_to_cpu(con->in_reply.connect_seq));
+		con->connect_seq = le32_to_cpu(con->in_reply.connect_seq);
 		ceph_con_out_kvec_reset(con);
 		ret = prepare_write_connect(con);
 		if (ret < 0)
@@ -1468,9 +1468,9 @@ static int process_connect(struct ceph_connection *con)
 		 */
 		dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n",
 		     con->peer_global_seq,
-		     le32_to_cpu(con->in_connect.global_seq));
+		     le32_to_cpu(con->in_reply.global_seq));
 		get_global_seq(con->msgr,
-			       le32_to_cpu(con->in_connect.global_seq));
+			       le32_to_cpu(con->in_reply.global_seq));
 		ceph_con_out_kvec_reset(con);
 		ret = prepare_write_connect(con);
 		if (ret < 0)
-- 
cgit v0.10.2


From 236df3755d944c33120d77e84b5ff6f3917eb307 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Wed, 6 Jun 2012 09:15:33 -0500
Subject: rbd: Fix ceph_snap_context size calculation

ceph_snap_context->snaps is an u64 array

Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
Reviewed-by: Alex Elder <elder@inktank.com>
(cherry picked from commit f9f9a1904467816452fc70740165030e84c2c659)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 65665c9..8b9c173 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -499,7 +499,7 @@ static int rbd_header_from_disk(struct rbd_image_header *header,
 			 / sizeof (*ondisk))
 		return -EINVAL;
 	header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
-				snap_count * sizeof (*ondisk),
+				snap_count * sizeof(u64),
 				gfp_flags);
 	if (!header->snapc)
 		return -ENOMEM;
-- 
cgit v0.10.2


From 6a3ca4f18873f950895cb64ddefafb51a732e3f7 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 6 Jun 2012 09:15:33 -0500
Subject: rbd: endian bug in rbd_req_cb()

Sparse complains about this because:
drivers/block/rbd.c:996:20: warning: cast to restricted __le32
drivers/block/rbd.c:996:20: warning: cast from restricted __le16

These are set in osd_req_encode_op() and they are le16.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Alex Elder <elder@inktank.com>
(cherry picked from commit 895cfcc810e53d7d36639969c71efb9087221167)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 8b9c173..8f428a8 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -977,7 +977,7 @@ static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
 	op = (void *)(replyhead + 1);
 	rc = le32_to_cpu(replyhead->result);
 	bytes = le64_to_cpu(op->extent.length);
-	read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ);
+	read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ);
 
 	dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc);
 
-- 
cgit v0.10.2