diff options
4 files changed, 125 insertions, 73 deletions
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c index 89c879d..d99b4fa 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c @@ -1283,16 +1283,22 @@ static void kiblnd_map_tx_pool(kib_tx_pool_t *tpo) } } -struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, kib_rdma_desc_t *rd, +struct ib_mr *kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd, int negotiated_nfrags) { - __u16 nfrags = (negotiated_nfrags != -1) ? - negotiated_nfrags : *kiblnd_tunables.kib_map_on_demand; + kib_net_t *net = ni->ni_data; + kib_hca_dev_t *hdev = net->ibn_dev->ibd_hdev; + struct lnet_ioctl_config_o2iblnd_tunables *tunables; + __u16 nfrags; + int mod; + + tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; + mod = tunables->lnd_map_on_demand; + nfrags = (negotiated_nfrags != -1) ? negotiated_nfrags : mod; LASSERT(hdev->ibh_mrs); - if (*kiblnd_tunables.kib_map_on_demand > 0 && - nfrags <= rd->rd_nfrags) + if (mod > 0 && nfrags <= rd->rd_nfrags) return NULL; return hdev->ibh_mrs; @@ -1337,16 +1343,20 @@ static void kiblnd_destroy_fmr_pool_list(struct list_head *head) } } -static int kiblnd_fmr_pool_size(int ncpts) +static int +kiblnd_fmr_pool_size(struct lnet_ioctl_config_o2iblnd_tunables *tunables, + int ncpts) { - int size = *kiblnd_tunables.kib_fmr_pool_size / ncpts; + int size = tunables->lnd_fmr_pool_size / ncpts; return max(IBLND_FMR_POOL, size); } -static int kiblnd_fmr_flush_trigger(int ncpts) +static int +kiblnd_fmr_flush_trigger(struct lnet_ioctl_config_o2iblnd_tunables *tunables, + int ncpts) { - int size = *kiblnd_tunables.kib_fmr_flush_trigger / ncpts; + int size = tunables->lnd_fmr_flush_trigger / ncpts; return max(IBLND_FMR_POOL_FLUSH, size); } @@ -1362,7 +1372,7 @@ static int kiblnd_alloc_fmr_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo) .dirty_watermark = fps->fps_flush_trigger, .flush_function = NULL, .flush_arg = NULL, - .cache = !!*kiblnd_tunables.kib_fmr_cache}; + .cache = !!fps->fps_cache }; int rc = 0; fpo->fmr.fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd, @@ -1508,9 +1518,10 @@ static void kiblnd_fini_fmr_poolset(kib_fmr_poolset_t *fps) } } -static int kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, - kib_net_t *net, int pool_size, - int flush_trigger) +static int +kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, int ncpts, + kib_net_t *net, + struct lnet_ioctl_config_o2iblnd_tunables *tunables) { kib_fmr_pool_t *fpo; int rc; @@ -1519,8 +1530,11 @@ static int kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, fps->fps_net = net; fps->fps_cpt = cpt; - fps->fps_pool_size = pool_size; - fps->fps_flush_trigger = flush_trigger; + + fps->fps_pool_size = kiblnd_fmr_pool_size(tunables, ncpts); + fps->fps_flush_trigger = kiblnd_fmr_flush_trigger(tunables, ncpts); + fps->fps_cache = tunables->lnd_fmr_cache; + spin_lock_init(&fps->fps_lock); INIT_LIST_HEAD(&fps->fps_pool_list); INIT_LIST_HEAD(&fps->fps_failed_pool_list); @@ -2150,25 +2164,28 @@ static void kiblnd_net_fini_pools(kib_net_t *net) } } -static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts) +static int kiblnd_net_init_pools(kib_net_t *net, lnet_ni_t *ni, __u32 *cpts, + int ncpts) { + struct lnet_ioctl_config_o2iblnd_tunables *tunables; unsigned long flags; int cpt; - int rc = 0; + int rc; int i; + tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; + read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - if (!*kiblnd_tunables.kib_map_on_demand) { + if (!tunables->lnd_map_on_demand) { read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); goto create_tx_pool; } read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - if (*kiblnd_tunables.kib_fmr_pool_size < - *kiblnd_tunables.kib_ntx / 4) { + if (tunables->lnd_fmr_pool_size < *kiblnd_tunables.kib_ntx / 4) { CERROR("Can't set fmr pool size (%d) < ntx / 4(%d)\n", - *kiblnd_tunables.kib_fmr_pool_size, + tunables->lnd_fmr_pool_size, *kiblnd_tunables.kib_ntx / 4); rc = -EINVAL; goto failed; @@ -2198,9 +2215,8 @@ static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts) for (i = 0; i < ncpts; i++) { cpt = !cpts ? i : cpts[i]; - rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, net, - kiblnd_fmr_pool_size(ncpts), - kiblnd_fmr_flush_trigger(ncpts)); + rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, ncpts, + net, tunables); if (rc) { CERROR("Can't initialize FMR pool for CPT %d: %d\n", cpt, rc); @@ -2961,7 +2977,7 @@ static int kiblnd_startup(lnet_ni_t *ni) if (rc) goto failed; - rc = kiblnd_net_init_pools(net, ni->ni_cpts, ni->ni_ncpts); + rc = kiblnd_net_init_pools(net, ni, ni->ni_cpts, ni->ni_ncpts); if (rc) { CERROR("Failed to initialize NI pools: %d\n", rc); goto failed; diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h index fffae0c..d458773b 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h @@ -87,18 +87,10 @@ typedef struct { int *kib_timeout; /* comms timeout (seconds) */ int *kib_keepalive; /* keepalive timeout (seconds) */ int *kib_ntx; /* # tx descs */ - int *kib_peercredits_hiw; /* # when eagerly to return credits */ char **kib_default_ipif; /* default IPoIB interface */ int *kib_retry_count; int *kib_rnr_retry_count; - int *kib_concurrent_sends; /* send work queue sizing */ int *kib_ib_mtu; /* IB MTU */ - int *kib_map_on_demand; /* map-on-demand if RD has more */ - /* fragments than this value, 0 */ - /* disable map-on-demand */ - int *kib_fmr_pool_size; /* # FMRs in pool */ - int *kib_fmr_flush_trigger; /* When to trigger FMR flush */ - int *kib_fmr_cache; /* enable FMR pool cache? */ int *kib_require_priv_port; /* accept only privileged ports */ int *kib_use_priv_port; /* use privileged port for active connect */ int *kib_nscheds; /* # threads on each CPT */ @@ -112,9 +104,10 @@ extern kib_tunables_t kiblnd_tunables; #define IBLND_CREDITS_DEFAULT 8 /* default # of peer credits */ #define IBLND_CREDITS_MAX ((typeof(((kib_msg_t *) 0)->ibm_credits)) - 1) /* Max # of peer credits */ -#define IBLND_CREDITS_HIGHWATER(v) ((v) == IBLND_MSG_VERSION_1 ? \ - IBLND_CREDIT_HIGHWATER_V1 : \ - *kiblnd_tunables.kib_peercredits_hiw) /* when eagerly to return credits */ +/* when eagerly to return credits */ +#define IBLND_CREDITS_HIGHWATER(t, v) ((v) == IBLND_MSG_VERSION_1 ? \ + IBLND_CREDIT_HIGHWATER_V1 : \ + t->lnd_peercredits_hiw) #define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(&init_net, \ cb, dev, \ @@ -260,6 +253,7 @@ typedef struct { int fps_cpt; /* CPT id */ int fps_pool_size; int fps_flush_trigger; + int fps_cache; int fps_increasing; /* is allocating new pool */ unsigned long fps_next_retry; /* time stamp for retry if*/ /* failed to allocate */ @@ -614,7 +608,11 @@ int kiblnd_msg_queue_size(int version, struct lnet_ni *ni); static inline int kiblnd_cfg_rdma_frags(struct lnet_ni *ni) { - int mod = *kiblnd_tunables.kib_map_on_demand; + struct lnet_ioctl_config_o2iblnd_tunables *tunables; + int mod; + + tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; + mod = tunables->lnd_map_on_demand; return mod ? mod : IBLND_MAX_RDMA_FRAGS; } @@ -629,9 +627,11 @@ kiblnd_rdma_frags(int version, struct lnet_ni *ni) static inline int kiblnd_concurrent_sends(int version, struct lnet_ni *ni) { + struct lnet_ioctl_config_o2iblnd_tunables *tunables; int concurrent_sends; - concurrent_sends = *kiblnd_tunables.kib_concurrent_sends; + tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; + concurrent_sends = tunables->lnd_concurrent_sends; if (version == IBLND_MSG_VERSION_1) { if (concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2) @@ -766,10 +766,14 @@ kiblnd_send_keepalive(kib_conn_t *conn) static inline int kiblnd_need_noop(kib_conn_t *conn) { + struct lnet_ioctl_config_o2iblnd_tunables *tunables; + lnet_ni_t *ni = conn->ibc_peer->ibp_ni; + LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED); + tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; if (conn->ibc_outstanding_credits < - IBLND_CREDITS_HIGHWATER(conn->ibc_version) && + IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) && !kiblnd_send_keepalive(conn)) return 0; /* No need to send NOOP */ @@ -977,8 +981,7 @@ static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev, #define KIBLND_CONN_PARAM(e) ((e)->param.conn.private_data) #define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len) -struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, - kib_rdma_desc_t *rd, +struct ib_mr *kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd, int negotiated_nfrags); void kiblnd_map_rx_descs(kib_conn_t *conn); void kiblnd_unmap_rx_descs(kib_conn_t *conn); diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c index 5d4a35b..52ee6f9 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -612,8 +612,8 @@ static void kiblnd_unmap_tx(lnet_ni_t *ni, kib_tx_t *tx) static int kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, int nfrags) { - kib_hca_dev_t *hdev = tx->tx_pool->tpo_hdev; kib_net_t *net = ni->ni_data; + kib_hca_dev_t *hdev = net->ibn_dev->ibd_hdev; struct ib_mr *mr = NULL; __u32 nob; int i; @@ -636,7 +636,7 @@ static int kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, nob += rd->rd_frags[i].rf_nob; } - mr = kiblnd_find_rd_dma_mr(hdev, rd, tx->tx_conn ? + mr = kiblnd_find_rd_dma_mr(ni, rd, tx->tx_conn ? tx->tx_conn->ibc_max_frags : -1); if (mr) { /* found pre-mapping MR */ @@ -2577,12 +2577,15 @@ kiblnd_check_reconnect(kib_conn_t *conn, int version, reason = "Unknown"; break; - case IBLND_REJECT_RDMA_FRAGS: + case IBLND_REJECT_RDMA_FRAGS: { + struct lnet_ioctl_config_lnd_tunables *tunables; + if (!cp) { reason = "can't negotiate max frags"; goto out; } - if (!*kiblnd_tunables.kib_map_on_demand) { + tunables = peer->ibp_ni->ni_lnd_tunables; + if (!tunables->lt_tun_u.lt_o2ib.lnd_map_on_demand) { reason = "map_on_demand must be enabled"; goto out; } @@ -2594,7 +2597,7 @@ kiblnd_check_reconnect(kib_conn_t *conn, int version, peer->ibp_max_frags = frag_num; reason = "rdma fragments"; break; - + } case IBLND_REJECT_MSG_QUEUE_SIZE: if (!cp) { reason = "can't negotiate queue depth"; diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c index e50a9cf..f8fdd4a 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c @@ -152,16 +152,10 @@ kib_tunables_t kiblnd_tunables = { .kib_timeout = &timeout, .kib_keepalive = &keepalive, .kib_ntx = &ntx, - .kib_peercredits_hiw = &peer_credits_hiw, .kib_default_ipif = &ipif_name, .kib_retry_count = &retry_count, .kib_rnr_retry_count = &rnr_retry_count, - .kib_concurrent_sends = &concurrent_sends, .kib_ib_mtu = &ib_mtu, - .kib_map_on_demand = &map_on_demand, - .kib_fmr_pool_size = &fmr_pool_size, - .kib_fmr_flush_trigger = &fmr_flush_trigger, - .kib_fmr_cache = &fmr_cache, .kib_require_priv_port = &require_privileged_port, .kib_use_priv_port = &use_privileged_port, .kib_nscheds = &nscheds @@ -182,6 +176,26 @@ int kiblnd_msg_queue_size(int version, lnet_ni_t *ni) int kiblnd_tunables_setup(struct lnet_ni *ni) { + struct lnet_ioctl_config_o2iblnd_tunables *tunables; + + /* + * if there was no tunables specified, setup the tunables to be + * defaulted + */ + if (!ni->ni_lnd_tunables) { + LIBCFS_ALLOC(ni->ni_lnd_tunables, + sizeof(*ni->ni_lnd_tunables)); + if (!ni->ni_lnd_tunables) + return -ENOMEM; + + memcpy(&ni->ni_lnd_tunables->lt_tun_u.lt_o2ib, + &default_tunables, sizeof(*tunables)); + } + tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; + + /* Current API version */ + tunables->lnd_version = 0; + if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) { CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n", *kiblnd_tunables.kib_ib_mtu); @@ -209,38 +223,54 @@ int kiblnd_tunables_setup(struct lnet_ni *ni) if (ni->ni_peertxcredits > credits) ni->ni_peertxcredits = credits; - if (*kiblnd_tunables.kib_peercredits_hiw < ni->ni_peertxcredits / 2) - *kiblnd_tunables.kib_peercredits_hiw = ni->ni_peertxcredits / 2; + if (!tunables->lnd_peercredits_hiw) + tunables->lnd_peercredits_hiw = peer_credits_hiw; - if (*kiblnd_tunables.kib_peercredits_hiw >= ni->ni_peertxcredits) - *kiblnd_tunables.kib_peercredits_hiw = ni->ni_peertxcredits - 1; + if (tunables->lnd_peercredits_hiw < ni->ni_peertxcredits / 2) + tunables->lnd_peercredits_hiw = ni->ni_peertxcredits / 2; - if (*kiblnd_tunables.kib_map_on_demand < 0 || - *kiblnd_tunables.kib_map_on_demand > IBLND_MAX_RDMA_FRAGS) - *kiblnd_tunables.kib_map_on_demand = 0; /* disable map-on-demand */ + if (tunables->lnd_peercredits_hiw >= ni->ni_peertxcredits) + tunables->lnd_peercredits_hiw = ni->ni_peertxcredits - 1; - if (*kiblnd_tunables.kib_map_on_demand == 1) - *kiblnd_tunables.kib_map_on_demand = 2; /* don't make sense to create map if only one fragment */ + if (tunables->lnd_map_on_demand < 0 || + tunables->lnd_map_on_demand > IBLND_MAX_RDMA_FRAGS) { + /* disable map-on-demand */ + tunables->lnd_map_on_demand = 0; + } - if (!*kiblnd_tunables.kib_concurrent_sends) { - if (*kiblnd_tunables.kib_map_on_demand > 0 && - *kiblnd_tunables.kib_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8) - *kiblnd_tunables.kib_concurrent_sends = ni->ni_peertxcredits * 2; - else - *kiblnd_tunables.kib_concurrent_sends = ni->ni_peertxcredits; + if (tunables->lnd_map_on_demand == 1) { + /* don't make sense to create map if only one fragment */ + tunables->lnd_map_on_demand = 2; } - if (*kiblnd_tunables.kib_concurrent_sends > ni->ni_peertxcredits * 2) - *kiblnd_tunables.kib_concurrent_sends = ni->ni_peertxcredits * 2; + if (!tunables->lnd_concurrent_sends) { + if (tunables->lnd_map_on_demand > 0 && + tunables->lnd_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8) { + tunables->lnd_concurrent_sends = + ni->ni_peertxcredits * 2; + } else { + tunables->lnd_concurrent_sends = ni->ni_peertxcredits; + } + } + + if (tunables->lnd_concurrent_sends > ni->ni_peertxcredits * 2) + tunables->lnd_concurrent_sends = ni->ni_peertxcredits * 2; - if (*kiblnd_tunables.kib_concurrent_sends < ni->ni_peertxcredits / 2) - *kiblnd_tunables.kib_concurrent_sends = ni->ni_peertxcredits / 2; + if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits / 2) + tunables->lnd_concurrent_sends = ni->ni_peertxcredits / 2; - if (*kiblnd_tunables.kib_concurrent_sends < ni->ni_peertxcredits) { + if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits) { CWARN("Concurrent sends %d is lower than message queue size: %d, performance may drop slightly.\n", - *kiblnd_tunables.kib_concurrent_sends, ni->ni_peertxcredits); + tunables->lnd_concurrent_sends, ni->ni_peertxcredits); } + if (!tunables->lnd_fmr_pool_size) + tunables->lnd_fmr_pool_size = fmr_pool_size; + if (!tunables->lnd_fmr_flush_trigger) + tunables->lnd_fmr_flush_trigger = fmr_flush_trigger; + if (!tunables->lnd_fmr_cache) + tunables->lnd_fmr_cache = fmr_cache; + return 0; } |