From 634696b197411e7a95b346d6e5c21841f29fcedd Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Fri, 15 Apr 2016 13:33:03 -0400 Subject: tipc: guarantee peer bearer id exchange after reboot When a link endpoint is going down locally, e.g., because its interface is being stopped, it will spontaneously send out a RESET message to its peer, informing it about this fact. This saves the peer from detecting the failure via probing, and hence gives both speedier and less resource consuming failure detection on the peer side. According to the link FSM, a receiver of a RESET message, ignoring the reason for it, must now consider the sender ready to come back up, and starts periodically sending out ACTIVATE messages to the peer in order to re-establish the link. Also, according to the FSM, the receiver of an ACTIVATE message can now go directly to state ESTABLISHED and start sending regular traffic packets. This is a well-proven and robust FSM. However, in the case of a reboot, there is a small possibilty that link endpoint on the rebooted node may have been re-created with a new bearer identity between the moment it sent its (pre-boot) RESET and the moment it receives the ACTIVATE from the peer. The new bearer identity cannot be known by the peer according to this scenario, since traffic headers don't convey such information. This is a problem, because both endpoints need to know the correct value of the peer's bearer id at any moment in time in order to be able to produce correct link events for their users. The only way to guarantee this is to enforce a full setup message exchange (RESET + ACTIVATE) even after the reboot, since those messages carry the bearer idientity in their header. In this commit we do this by introducing and setting a "stopping" bit in the header of the spontaneously generated RESET messages, informing the peer that the sender will not be immediately ready to re-establish the link. A receiver seeing this bit must act as if this were a locally detected connectivity failure, and hence has to go through a full two- way setup message exchange before any link can be re-established. Although never reported, this problem seems to have always been around. This protocol addition is fully backwards compatible. Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/link.c b/net/tipc/link.c index 7d2bb3e..8b98faf 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1140,11 +1140,17 @@ int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { int mtyp = RESET_MSG; + struct sk_buff *skb; if (l->state == LINK_ESTABLISHING) mtyp = ACTIVATE_MSG; tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq); + + /* Inform peer that this endpoint is going down if applicable */ + skb = skb_peek_tail(xmitq); + if (skb && (l->state == LINK_RESET)) + msg_set_peer_stopping(buf_msg(skb), 1); } /* tipc_link_build_nack_msg: prepare link nack message for transmission @@ -1411,7 +1417,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, l->priority = peers_prio; /* ACTIVATE_MSG serves as PEER_RESET if link is already down */ - if ((mtyp == RESET_MSG) || !link_is_up(l)) + if (msg_peer_stopping(hdr)) + rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + else if ((mtyp == RESET_MSG) || !link_is_up(l)) rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); /* ACTIVATE_MSG takes up link if it was already locally reset */ diff --git a/net/tipc/msg.h b/net/tipc/msg.h index f34f639..58bf515 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -715,6 +715,16 @@ static inline void msg_set_redundant_link(struct tipc_msg *m, u32 r) msg_set_bits(m, 5, 12, 0x1, r); } +static inline u32 msg_peer_stopping(struct tipc_msg *m) +{ + return msg_bits(m, 5, 13, 0x1); +} + +static inline void msg_set_peer_stopping(struct tipc_msg *m, u32 s) +{ + msg_set_bits(m, 5, 13, 0x1, s); +} + static inline char *msg_media_addr(struct tipc_msg *m) { return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET]; -- cgit v0.10.2 From 88e8ac7000dc7ccf99975cc4070907e26a1027f9 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Fri, 15 Apr 2016 13:33:04 -0400 Subject: tipc: reduce transmission rate of reset messages when link is down When a link is down, it will continuously try to re-establish contact with the peer by sending out a RESET or an ACTIVATE message at each timeout interval. The default value for this interval is currently 375 ms. This is wasteful, and may become a problem in very large clusters with dozens or hundreds of nodes being down simultaneously. We now introduce a simple backoff algorithm for these cases. The first five messages are sent at default rate; thereafter a message is sent only each 16th timer interval. This will cover the vast majority of link recycling cases, since the endpoint starting last will transmit at the higher speed, and the link should normally be established well be before the rate needs to be reduced. The only case where we will see a degradation of link re-establishment times is when the endpoints remain intact, and a glitch in the transmission media is causing the link reset. We will then experience a worst-case re-establishing time of 6 seconds, something we deem acceptable. Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/link.c b/net/tipc/link.c index 8b98faf..238b125 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -140,6 +140,7 @@ struct tipc_link { char if_name[TIPC_MAX_IF_NAME]; u32 priority; char net_plane; + u16 rst_cnt; /* Failover/synch */ u16 drop_point; @@ -701,8 +702,6 @@ static void link_profile_stats(struct tipc_link *l) /* tipc_link_timeout - perform periodic task as instructed from node timeout */ -/* tipc_link_timeout - perform periodic task as instructed from node timeout - */ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) { int rc = 0; @@ -730,7 +729,8 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) l->silent_intv_cnt++; break; case LINK_RESET: - xmit = true; + xmit = l->rst_cnt++ <= 4; + xmit |= !(l->rst_cnt % 16); mtyp = RESET_MSG; break; case LINK_ESTABLISHING: @@ -833,6 +833,7 @@ void tipc_link_reset(struct tipc_link *l) l->rcv_nxt = 1; l->acked = 0; l->silent_intv_cnt = 0; + l->rst_cnt = 0; l->stats.recv_info = 0; l->stale_count = 0; l->bc_peer_is_up = false; -- cgit v0.10.2 From 42b18f605feaf7aa1825b35656bb7d6fdc132b45 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Fri, 15 Apr 2016 13:33:05 -0400 Subject: tipc: refactor function tipc_link_timeout() The function tipc_link_timeout() is unnecessary complex, and can easily be made more readable. We do that with this commit. The only functional change is that we remove a redundant test for whether the broadcast link is up or not. Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/link.c b/net/tipc/link.c index 238b125..774ad3c 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -704,37 +704,33 @@ static void link_profile_stats(struct tipc_link *l) */ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) { - int rc = 0; - int mtyp = STATE_MSG; - bool xmit = false; - bool prb = false; + int mtyp, rc = 0; + bool state = false; + bool probe = false; + bool setup = false; u16 bc_snt = l->bc_sndlink->snd_nxt - 1; u16 bc_acked = l->bc_rcvlink->acked; - bool bc_up = link_is_up(l->bc_rcvlink); link_profile_stats(l); switch (l->state) { case LINK_ESTABLISHED: case LINK_SYNCHING: - if (!l->silent_intv_cnt) { - if (bc_up && (bc_acked != bc_snt)) - xmit = true; - } else if (l->silent_intv_cnt <= l->abort_limit) { - xmit = true; - prb = true; - } else { - rc |= tipc_link_fsm_evt(l, LINK_FAILURE_EVT); - } - l->silent_intv_cnt++; + if (l->silent_intv_cnt > l->abort_limit) + return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + mtyp = STATE_MSG; + state = bc_acked != bc_snt; + probe = l->silent_intv_cnt; + if (probe) + l->silent_intv_cnt++; break; case LINK_RESET: - xmit = l->rst_cnt++ <= 4; - xmit |= !(l->rst_cnt % 16); + setup = l->rst_cnt++ <= 4; + setup |= !(l->rst_cnt % 16); mtyp = RESET_MSG; break; case LINK_ESTABLISHING: - xmit = true; + setup = true; mtyp = ACTIVATE_MSG; break; case LINK_PEER_RESET: @@ -745,8 +741,8 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) break; } - if (xmit) - tipc_link_build_proto_msg(l, mtyp, prb, 0, 0, 0, xmitq); + if (state || probe || setup) + tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, xmitq); return rc; } -- cgit v0.10.2 From de7e07f9ee14f47d05aa43046404c2904f0247dc Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Fri, 15 Apr 2016 13:33:06 -0400 Subject: tipc: ensure that first packets on link are sent in order In some link establishment scenarios we see that packet #2 may be sent out before packet #1, forcing the receiver to demand retransmission of the missing packet. This is harmless, but may cause confusion among people tracing the packet flow. Since this is extremely easy to fix, we do so by adding en extra send call to the bearer immediately after the link has come up. Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/node.c b/net/tipc/node.c index ace178f..b00e12c 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -581,8 +581,12 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, static void tipc_node_link_up(struct tipc_node *n, int bearer_id, struct sk_buff_head *xmitq) { + struct tipc_media_addr *maddr; + tipc_node_write_lock(n); __tipc_node_link_up(n, bearer_id, xmitq); + maddr = &n->links[bearer_id].maddr; + tipc_bearer_xmit(n->net, bearer_id, xmitq, maddr); tipc_node_write_unlock(n); } -- cgit v0.10.2 From 34b9cd64c889d41eb990aec33fc185cab706c9b0 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Fri, 15 Apr 2016 13:33:07 -0400 Subject: tipc: let first message on link be a state message According to the link FSM, a received traffic packet can take a link from state ESTABLISHING to ESTABLISHED, but the link can still not be fully set up in one atomic operation. This means that even if the the very first packet on the link is a traffic packet with sequence number 1 (one), it has to be dropped and retransmitted. This can be avoided if we let the mentioned packet be preceded by a LINK_PROTOCOL/STATE message, which takes up the endpoint before the arrival of the traffic. We add this small feature in this commit. This is a fully compatible change. Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/link.c b/net/tipc/link.c index 774ad3c..2e28a7d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1107,12 +1107,12 @@ static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked) return released; } -/* tipc_link_build_ack_msg: prepare link acknowledge message for transmission +/* tipc_link_build_state_msg: prepare link state message for transmission * * Note that sending of broadcast ack is coordinated among nodes, to reduce * risk of ack storms towards the sender */ -int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) +int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { if (!l) return 0; @@ -1222,7 +1222,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, if (!tipc_data_input(l, skb, l->inputq)) rc |= tipc_link_input(l, skb, l->inputq); if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) - rc |= tipc_link_build_ack_msg(l, xmitq); + rc |= tipc_link_build_state_msg(l, xmitq); if (unlikely(rc & ~TIPC_LINK_SND_BC_ACK)) break; } while ((skb = __skb_dequeue(defq))); diff --git a/net/tipc/link.h b/net/tipc/link.h index 6a94175..d7e9d42 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -123,7 +123,7 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); -int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq); +int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq); void tipc_link_add_bc_peer(struct tipc_link *snd_l, struct tipc_link *uc_l, struct sk_buff_head *xmitq); diff --git a/net/tipc/node.c b/net/tipc/node.c index b00e12c..68d9f7b 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -545,6 +545,9 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, pr_debug("Established link <%s> on network plane %c\n", tipc_link_name(nl), tipc_link_plane(nl)); + /* Ensure that a STATE message goes first */ + tipc_link_build_state_msg(nl, xmitq); + /* First link? => give it both slots */ if (!ol) { *slot0 = bearer_id; @@ -1283,7 +1286,7 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id /* Broadcast ACKs are sent on a unicast link */ if (rc & TIPC_LINK_SND_BC_ACK) { tipc_node_read_lock(n); - tipc_link_build_ack_msg(le->link, &xmitq); + tipc_link_build_state_msg(le->link, &xmitq); tipc_node_read_unlock(n); } -- cgit v0.10.2