diff options
Diffstat (limited to 'net/rds/tcp_listen.c')
-rw-r--r-- | net/rds/tcp_listen.c | 76 |
1 files changed, 64 insertions, 12 deletions
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 245542c..e0b23fb 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -70,6 +70,52 @@ bail: return ret; } +/* rds_tcp_accept_one_path(): if accepting on cp_index > 0, make sure the + * client's ipaddr < server's ipaddr. Otherwise, close the accepted + * socket and force a reconneect from smaller -> larger ip addr. The reason + * we special case cp_index 0 is to allow the rds probe ping itself to itself + * get through efficiently. + * Since reconnects are only initiated from the node with the numerically + * smaller ip address, we recycle conns in RDS_CONN_ERROR on the passive side + * by moving them to CONNECTING in this function. + */ +struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn) +{ + int i; + bool peer_is_smaller = (conn->c_faddr < conn->c_laddr); + int npaths = conn->c_npaths; + + if (npaths <= 1) { + struct rds_conn_path *cp = &conn->c_path[0]; + int ret; + + ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, + RDS_CONN_CONNECTING); + if (!ret) + rds_conn_path_transition(cp, RDS_CONN_ERROR, + RDS_CONN_CONNECTING); + return cp->cp_transport_data; + } + + /* for mprds, paths with cp_index > 0 MUST be initiated by the peer + * with the smaller address. + */ + if (!peer_is_smaller) + return NULL; + + for (i = 1; i < npaths; i++) { + struct rds_conn_path *cp = &conn->c_path[i]; + + if (rds_conn_path_transition(cp, RDS_CONN_DOWN, + RDS_CONN_CONNECTING) || + rds_conn_path_transition(cp, RDS_CONN_ERROR, + RDS_CONN_CONNECTING)) { + return cp->cp_transport_data; + } + } + return NULL; +} + int rds_tcp_accept_one(struct socket *sock) { struct socket *new_sock = NULL; @@ -78,6 +124,7 @@ int rds_tcp_accept_one(struct socket *sock) struct inet_sock *inet; struct rds_tcp_connection *rs_tcp = NULL; int conn_state; + struct rds_conn_path *cp; if (!sock) /* module unload or netns delete in progress */ return -ENETUNREACH; @@ -118,11 +165,14 @@ int rds_tcp_accept_one(struct socket *sock) * If the client reboots, this conn will need to be cleaned up. * rds_tcp_state_change() will do that cleanup */ - rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data; - rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING); - mutex_lock(&rs_tcp->t_conn_lock); - conn_state = rds_conn_state(conn); - if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP) + rs_tcp = rds_tcp_accept_one_path(conn); + if (!rs_tcp) + goto rst_nsk; + mutex_lock(&rs_tcp->t_conn_path_lock); + cp = rs_tcp->t_cpath; + conn_state = rds_conn_path_state(cp); + if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP && + conn_state != RDS_CONN_ERROR) goto rst_nsk; if (rs_tcp->t_sock) { /* Need to resolve a duelling SYN between peers. @@ -132,17 +182,17 @@ int rds_tcp_accept_one(struct socket *sock) * c_transport_data. */ if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr) || - !conn->c_outgoing) { + !cp->cp_outgoing) { goto rst_nsk; } else { - rds_tcp_reset_callbacks(new_sock, conn); - conn->c_outgoing = 0; + rds_tcp_reset_callbacks(new_sock, cp); + cp->cp_outgoing = 0; /* rds_connect_path_complete() marks RDS_CONN_UP */ - rds_connect_path_complete(conn, RDS_CONN_RESETTING); + rds_connect_path_complete(cp, RDS_CONN_RESETTING); } } else { - rds_tcp_set_callbacks(new_sock, conn); - rds_connect_path_complete(conn, RDS_CONN_CONNECTING); + rds_tcp_set_callbacks(new_sock, cp); + rds_connect_path_complete(cp, RDS_CONN_CONNECTING); } new_sock = NULL; ret = 0; @@ -153,7 +203,7 @@ rst_nsk: ret = 0; out: if (rs_tcp) - mutex_unlock(&rs_tcp->t_conn_lock); + mutex_unlock(&rs_tcp->t_conn_path_lock); if (new_sock) sock_release(new_sock); return ret; @@ -180,6 +230,8 @@ void rds_tcp_listen_data_ready(struct sock *sk) */ if (sk->sk_state == TCP_LISTEN) rds_tcp_accept_work(sk); + else + ready = rds_tcp_listen_sock_def_readable(sock_net(sk)); out: read_unlock_bh(&sk->sk_callback_lock); |