From 7486216b3a0bd26375b17b2cc168a311106cea70 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 9 Jun 2016 15:11:34 +0300 Subject: {net,IB}/mlx5: mlx5_ifc updates Introducing mlx5_ifc updates for upcoming ConnectX-4 features. Needed bits and hardware structures for mlx5e netdev: - MLX5_CQ_PERIOD_NUM_MODES for adaptive moderation support - QoS rate limiting - SQ context rate limiting - Auto negotiation fields in PTYS register - Source SQN field in flow table entry match structure - DCBX parameters Needed bits and hardware structures for IB: - New XRQ opcodes, commands and capabilities layout - Extend q counters definition to support IB. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 9a05cd7..209add9 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -123,6 +123,10 @@ enum { MLX5_CMD_OP_DRAIN_DCT = 0x712, MLX5_CMD_OP_QUERY_DCT = 0x713, MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION = 0x714, + MLX5_CMD_OP_CREATE_XRQ = 0x717, + MLX5_CMD_OP_DESTROY_XRQ = 0x718, + MLX5_CMD_OP_QUERY_XRQ = 0x719, + MLX5_CMD_OP_ARM_XRQ = 0x71a, MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750, MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751, MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752, @@ -139,6 +143,8 @@ enum { MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771, MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772, MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773, + MLX5_CMD_OP_SET_RATE_LIMIT = 0x780, + MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781, MLX5_CMD_OP_ALLOC_PD = 0x800, MLX5_CMD_OP_DEALLOC_PD = 0x801, MLX5_CMD_OP_ALLOC_UAR = 0x802, @@ -361,7 +367,8 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits { }; struct mlx5_ifc_fte_match_set_misc_bits { - u8 reserved_at_0[0x20]; + u8 reserved_at_0[0x8]; + u8 source_sqn[0x18]; u8 reserved_at_20[0x10]; u8 source_port[0x10]; @@ -505,6 +512,17 @@ struct mlx5_ifc_e_switch_cap_bits { u8 reserved_at_20[0x7e0]; }; +struct mlx5_ifc_qos_cap_bits { + u8 packet_pacing[0x1]; + u8 reserved_0[0x1f]; + u8 reserved_1[0x20]; + u8 packet_pacing_max_rate[0x20]; + u8 packet_pacing_min_rate[0x20]; + u8 reserved_2[0x10]; + u8 packet_pacing_rate_table_size[0x10]; + u8 reserved_3[0x760]; +}; + struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 csum_cap[0x1]; u8 vlan_cap[0x1]; @@ -744,7 +762,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 out_of_seq_cnt[0x1]; u8 vport_counters[0x1]; - u8 reserved_at_182[0x4]; + u8 retransmission_q_counters[0x1]; + u8 reserved_at_183[0x3]; u8 max_qp_cnt[0xa]; u8 pkey_table_size[0x10]; @@ -771,7 +790,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_msg[0x5]; u8 reserved_at_1c8[0x4]; u8 max_tc[0x4]; - u8 reserved_at_1d0[0x6]; + u8 reserved_at_1d0[0x1]; + u8 dcbx[0x1]; + u8 reserved_at_1d2[0x4]; u8 rol_s[0x1]; u8 rol_g[0x1]; u8 reserved_at_1d8[0x1]; @@ -803,7 +824,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 tph[0x1]; u8 rf[0x1]; u8 dct[0x1]; - u8 reserved_at_21b[0x1]; + u8 qos[0x1]; u8 eth_net_offloads[0x1]; u8 roce[0x1]; u8 atomic[0x1]; @@ -929,7 +950,15 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 cqe_compression_timeout[0x10]; u8 cqe_compression_max_num[0x10]; - u8 reserved_at_5e0[0x220]; + u8 reserved_at_5e0[0x10]; + u8 tag_matching[0x1]; + u8 rndv_offload_rc[0x1]; + u8 rndv_offload_dc[0x1]; + u8 log_tag_matching_list_sz[0x5]; + u8 reserved_at_5e8[0x3]; + u8 log_max_xrq[0x5]; + + u8 reserved_at_5f0[0x200]; }; enum mlx5_flow_destination_type { @@ -1967,7 +1996,7 @@ struct mlx5_ifc_qpc_bits { u8 reserved_at_560[0x5]; u8 rq_type[0x3]; - u8 srqn_rmpn[0x18]; + u8 srqn_rmpn_xrqn[0x18]; u8 reserved_at_580[0x8]; u8 rmsn[0x18]; @@ -2018,6 +2047,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap; struct mlx5_ifc_e_switch_cap_bits e_switch_cap; struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap; + struct mlx5_ifc_qos_cap_bits qos_cap; u8 reserved_at_0[0x8000]; }; @@ -2244,8 +2274,9 @@ struct mlx5_ifc_sqc_bits { u8 reserved_at_40[0x8]; u8 cqn[0x18]; - u8 reserved_at_60[0xa0]; + u8 reserved_at_60[0x90]; + u8 packet_pacing_rate_limit_index[0x10]; u8 tis_lst_sz[0x10]; u8 reserved_at_110[0x10]; @@ -2593,7 +2624,7 @@ struct mlx5_ifc_dctc_bits { u8 reserved_at_98[0x8]; u8 reserved_at_a0[0x8]; - u8 srqn[0x18]; + u8 srqn_xrqn[0x18]; u8 reserved_at_c0[0x8]; u8 pd[0x18]; @@ -2645,6 +2676,7 @@ enum { enum { MLX5_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, MLX5_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, + MLX5_CQ_PERIOD_NUM_MODES }; struct mlx5_ifc_cqc_bits { @@ -2722,6 +2754,54 @@ struct mlx5_ifc_query_adapter_param_block_bits { u8 vsd_contd_psid[16][0x8]; }; +enum { + MLX5_XRQC_STATE_GOOD = 0x0, + MLX5_XRQC_STATE_ERROR = 0x1, +}; + +enum { + MLX5_XRQC_TOPOLOGY_NO_SPECIAL_TOPOLOGY = 0x0, + MLX5_XRQC_TOPOLOGY_TAG_MATCHING = 0x1, +}; + +enum { + MLX5_XRQC_OFFLOAD_RNDV = 0x1, +}; + +struct mlx5_ifc_tag_matching_topology_context_bits { + u8 log_matching_list_sz[0x4]; + u8 reserved_at_4[0xc]; + u8 append_next_index[0x10]; + + u8 sw_phase_cnt[0x10]; + u8 hw_phase_cnt[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_xrqc_bits { + u8 state[0x4]; + u8 rlkey[0x1]; + u8 reserved_at_5[0xf]; + u8 topology[0x4]; + u8 reserved_at_18[0x4]; + u8 offload[0x4]; + + u8 reserved_at_20[0x8]; + u8 user_index[0x18]; + + u8 reserved_at_40[0x8]; + u8 cqn[0x18]; + + u8 reserved_at_60[0xa0]; + + struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context; + + u8 reserved_at_180[0x180]; + + struct mlx5_ifc_wq_bits wq; +}; + union mlx5_ifc_modify_field_select_resize_field_select_auto_bits { struct mlx5_ifc_modify_field_select_bits modify_field_select; struct mlx5_ifc_resize_field_select_bits resize_field_select; @@ -3144,6 +3224,30 @@ struct mlx5_ifc_rst2init_qp_in_bits { u8 reserved_at_800[0x80]; }; +struct mlx5_ifc_query_xrq_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_xrqc_bits xrq_context; +}; + +struct mlx5_ifc_query_xrq_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x8]; + u8 xrqn[0x18]; + + u8 reserved_at_60[0x20]; +}; + struct mlx5_ifc_query_xrc_srq_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -3547,7 +3651,27 @@ struct mlx5_ifc_query_q_counter_out_bits { u8 out_of_sequence[0x20]; - u8 reserved_at_1e0[0x620]; + u8 reserved_at_1e0[0x20]; + + u8 duplicate_request[0x20]; + + u8 reserved_at_220[0x20]; + + u8 rnr_nak_retry_err[0x20]; + + u8 reserved_at_260[0x20]; + + u8 packet_seq_err[0x20]; + + u8 reserved_at_2a0[0x20]; + + u8 implied_nak_seq_err[0x20]; + + u8 reserved_at_2e0[0x20]; + + u8 local_ack_timeout_err[0x20]; + + u8 reserved_at_320[0x4e0]; }; struct mlx5_ifc_query_q_counter_in_bits { @@ -4998,6 +5122,28 @@ struct mlx5_ifc_detach_from_mcg_in_bits { u8 multicast_gid[16][0x8]; }; +struct mlx5_ifc_destroy_xrq_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_destroy_xrq_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x8]; + u8 xrqn[0x18]; + + u8 reserved_at_60[0x20]; +}; + struct mlx5_ifc_destroy_xrc_srq_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -5583,6 +5729,30 @@ struct mlx5_ifc_dealloc_flow_counter_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_create_xrq_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x8]; + u8 xrqn[0x18]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_create_xrq_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_xrqc_bits xrq_context; +}; + struct mlx5_ifc_create_xrc_srq_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -6124,6 +6294,29 @@ struct mlx5_ifc_attach_to_mcg_in_bits { u8 multicast_gid[16][0x8]; }; +struct mlx5_ifc_arm_xrq_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_arm_xrq_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x8]; + u8 xrqn[0x18]; + + u8 reserved_at_60[0x10]; + u8 lwm[0x10]; +}; + struct mlx5_ifc_arm_xrc_srq_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -6161,7 +6354,8 @@ struct mlx5_ifc_arm_rq_out_bits { }; enum { - MLX5_ARM_RQ_IN_OP_MOD_SRQ_ = 0x1, + MLX5_ARM_RQ_IN_OP_MOD_SRQ = 0x1, + MLX5_ARM_RQ_IN_OP_MOD_XRQ = 0x2, }; struct mlx5_ifc_arm_rq_in_bits { @@ -6354,6 +6548,30 @@ struct mlx5_ifc_add_vxlan_udp_dport_in_bits { u8 vxlan_udp_port[0x10]; }; +struct mlx5_ifc_set_rate_limit_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_set_rate_limit_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 rate_limit_index[0x10]; + + u8 reserved_at_60[0x20]; + + u8 rate_limit[0x20]; +}; + struct mlx5_ifc_access_register_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -6478,12 +6696,15 @@ struct mlx5_ifc_pude_reg_bits { }; struct mlx5_ifc_ptys_reg_bits { - u8 reserved_at_0[0x8]; + u8 an_disable_cap[0x1]; + u8 an_disable_admin[0x1]; + u8 reserved_at_2[0x6]; u8 local_port[0x8]; u8 reserved_at_10[0xd]; u8 proto_mask[0x3]; - u8 reserved_at_20[0x40]; + u8 an_status[0x4]; + u8 reserved_at_24[0x3c]; u8 eth_proto_capability[0x20]; @@ -7444,4 +7665,34 @@ struct mlx5_ifc_mcia_reg_bits { u8 dword_11[0x20]; }; +struct mlx5_ifc_dcbx_param_bits { + u8 dcbx_cee_cap[0x1]; + u8 dcbx_ieee_cap[0x1]; + u8 dcbx_standby_cap[0x1]; + u8 reserved_at_0[0x5]; + u8 port_number[0x8]; + u8 reserved_at_10[0xa]; + u8 max_application_table_size[6]; + u8 reserved_at_20[0x15]; + u8 version_oper[0x3]; + u8 reserved_at_38[5]; + u8 version_admin[0x3]; + u8 willing_admin[0x1]; + u8 reserved_at_41[0x3]; + u8 pfc_cap_oper[0x4]; + u8 reserved_at_48[0x4]; + u8 pfc_cap_admin[0x4]; + u8 reserved_at_50[0x4]; + u8 num_of_tc_oper[0x4]; + u8 reserved_at_58[0x4]; + u8 num_of_tc_admin[0x4]; + u8 remote_willing[0x1]; + u8 reserved_at_61[3]; + u8 remote_pfc_cap[4]; + u8 reserved_at_68[0x14]; + u8 remote_num_of_tc[0x4]; + u8 reserved_at_80[0x18]; + u8 error[0x8]; + u8 reserved_at_a0[0x160]; +}; #endif /* MLX5_IFC_H */ -- cgit v0.10.2 From 3d4e79949cf9e8032f0cd1f91564c6ce3304755c Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 10 Jun 2016 01:05:11 +0530 Subject: RDMA/iw_cxgb4: only read markers_enabled mod param once markers_enabled should be read only once during MPA negotiation. The present code does read markers_enabled twice during negotiation which results in setting wrong recv/xmit markers if the markers_enabled is changed in the middle of negotiation. With this change the markers_enabled is read only once during MPA negotiation. recv markers are set based on markers enabled module parameter and xmit markers are set based on markers flag from the MPA_START_REQ/MPA_START_REP. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index a3a6721..2ba3403 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -992,9 +992,19 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, mpa = (struct mpa_message *)(req + 1); memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); - mpa->flags = (crc_enabled ? MPA_CRC : 0) | - (markers_enabled ? MPA_MARKERS : 0) | - (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0); + + mpa->flags = 0; + if (crc_enabled) + mpa->flags |= MPA_CRC; + if (markers_enabled) { + mpa->flags |= MPA_MARKERS; + ep->mpa_attr.recv_marker_enabled = 1; + } else { + ep->mpa_attr.recv_marker_enabled = 0; + } + if (mpa_rev_to_use == 2) + mpa->flags |= MPA_ENHANCED_RDMA_CONN; + mpa->private_data_size = htons(ep->plen); mpa->revision = mpa_rev_to_use; if (mpa_rev_to_use == 1) { @@ -1169,8 +1179,11 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) mpa = (struct mpa_message *)(req + 1); memset(mpa, 0, sizeof(*mpa)); memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); - mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | - (markers_enabled ? MPA_MARKERS : 0); + mpa->flags = 0; + if (ep->mpa_attr.crc_enabled) + mpa->flags |= MPA_CRC; + if (ep->mpa_attr.recv_marker_enabled) + mpa->flags |= MPA_MARKERS; mpa->revision = ep->mpa_attr.version; mpa->private_data_size = htons(plen); @@ -1555,7 +1568,6 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) */ __state_set(&ep->com, FPDU_MODE); ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; - ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; ep->mpa_attr.version = mpa->revision; ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; -- cgit v0.10.2 From 68cebcab59fbfbab5827fcc44e04cd31da02041b Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 10 Jun 2016 01:05:12 +0530 Subject: RDMA/iw_cxgb4: allocate enough space for debugfs "qps" dump With IPv6 addresses, the "qps" debugfs is running out of space and truncating the output. Bump the required size accordingly. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index ae2e8b2..071d733 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -317,7 +317,7 @@ static int qp_open(struct inode *inode, struct file *file) idr_for_each(&qpd->devp->qpidr, count_idrs, &count); spin_unlock_irq(&qpd->devp->lock); - qpd->bufsize = count * 128; + qpd->bufsize = count * 180; qpd->buf = vmalloc(qpd->bufsize); if (!qpd->buf) { kfree(qpd); -- cgit v0.10.2 From bce2841f5a0dd9caf327fb1bc0051ea09a9e7dfb Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 10 Jun 2016 01:05:13 +0530 Subject: RDMA/iw_cxgb4: clean up c4iw_reject_cr() Get rid of unneeded code, and refactor things a bit. For MPA version 0 we abort the connection. For > 0, we attempt to send an MPA_START/REJECT Reply, and then disconnect gracefully. If the send of the MPA message fails, then we abort the connection. We can ignore c4iw_ep_disconnect() errors here because it will clean up the endpoint if there are failures. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 2ba3403..25236fc 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3037,9 +3037,9 @@ out: int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) { - int err = 0; - int disconnect = 0; + int abort; struct c4iw_ep *ep = to_ep(cm_id); + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); mutex_lock(&ep->com.mutex); @@ -3050,16 +3050,13 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) } set_bit(ULP_REJECT, &ep->com.history); if (mpa_rev == 0) - disconnect = 2; - else { - err = send_mpa_reject(ep, pdata, pdata_len); - disconnect = 1; - } + abort = 1; + else + abort = send_mpa_reject(ep, pdata, pdata_len); mutex_unlock(&ep->com.mutex); - if (disconnect) { - stop_ep_timer(ep); - err = c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL); - } + + stop_ep_timer(ep); + c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL); c4iw_put_ep(&ep->com); return 0; } -- cgit v0.10.2 From 4c72efefd9af015459ed63b164ff4b13cbfa3f61 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 10 Jun 2016 01:05:14 +0530 Subject: RDMA/iw_cxgb4: Add missing error codes for act open cmd Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 25236fc..4819314 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2016,12 +2016,17 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) } /* - * Return whether a failed active open has allocated a TID + * Some of the error codes above implicitly indicate that there is no TID + * allocated with the result of an ACT_OPEN. We use this predicate to make + * that explicit. */ static inline int act_open_has_tid(int status) { - return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && - status != CPL_ERR_ARP_MISS; + return (status != CPL_ERR_TCAM_PARITY && + status != CPL_ERR_TCAM_MISS && + status != CPL_ERR_TCAM_FULL && + status != CPL_ERR_CONN_EXIST_SYNRECV && + status != CPL_ERR_CONN_EXIST); } /* Returns whether a CPL status conveys negative advice. diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index 4705e2d..e0ebe13 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -104,6 +104,8 @@ enum { enum CPL_error { CPL_ERR_NONE = 0, + CPL_ERR_TCAM_PARITY = 1, + CPL_ERR_TCAM_MISS = 2, CPL_ERR_TCAM_FULL = 3, CPL_ERR_BAD_LENGTH = 15, CPL_ERR_BAD_ROUTE = 18, -- cgit v0.10.2 From 4a740838bf44ca4b03e26ff79aedb86b9d8cace4 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 10 Jun 2016 01:05:15 +0530 Subject: RDMA/iw_cxgb4: Low resource fixes for connection manager Pre-allocate buffers for sending various control messages to close connection, abort connection, etc so that we gracefully handle connections when system is running out of memory. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 4819314..81211db 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -294,6 +294,25 @@ static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) return; } +static int alloc_ep_skb_list(struct sk_buff_head *ep_skb_list, int size) +{ + struct sk_buff *skb; + unsigned int i; + size_t len; + + len = roundup(sizeof(union cpl_wr_size), 16); + for (i = 0; i < size; i++) { + skb = alloc_skb(len, GFP_KERNEL); + if (!skb) + goto fail; + skb_queue_tail(ep_skb_list, skb); + } + return 0; +fail: + skb_queue_purge(ep_skb_list); + return -ENOMEM; +} + static void *alloc_ep(int size, gfp_t gfp) { struct c4iw_ep_common *epc; @@ -384,6 +403,8 @@ void _c4iw_free_ep(struct kref *kref) if (ep->mpa_skb) kfree_skb(ep->mpa_skb); } + if (!skb_queue_empty(&ep->com.ep_skb_list)) + skb_queue_purge(&ep->com.ep_skb_list); kfree(ep); } @@ -620,25 +641,27 @@ static void abort_arp_failure(void *handle, struct sk_buff *skb) } } -static int send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) +static int send_flowc(struct c4iw_ep *ep) { - unsigned int flowclen = 80; struct fw_flowc_wr *flowc; + struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); int i; u16 vlan = ep->l2t->vlan; int nparams; + if (WARN_ON(!skb)) + return -ENOMEM; + if (vlan == CPL_L2T_VLAN_NONE) nparams = 8; else nparams = 9; - skb = get_skb(skb, flowclen, GFP_KERNEL); - flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen); + flowc = (struct fw_flowc_wr *)__skb_put(skb, FLOWC_LEN); flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | FW_FLOWC_WR_NPARAMS_V(nparams)); - flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(flowclen, + flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(FLOWC_LEN, 16)) | FW_WR_FLOWID_V(ep->hwtid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; @@ -679,18 +702,16 @@ static int send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) return c4iw_ofld_send(&ep->com.dev->rdev, skb); } -static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp) +static int send_halfclose(struct c4iw_ep *ep) { struct cpl_close_con_req *req; - struct sk_buff *skb; + struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); int wrlen = roundup(sizeof *req, 16); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - skb = get_skb(NULL, wrlen, gfp); - if (!skb) { - printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__); + if (WARN_ON(!skb)) return -ENOMEM; - } + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); t4_set_arp_err_handler(skb, NULL, arp_failure_discard); req = (struct cpl_close_con_req *) skb_put(skb, wrlen); @@ -701,26 +722,24 @@ static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp) return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } -static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) +static int send_abort(struct c4iw_ep *ep) { struct cpl_abort_req *req; int wrlen = roundup(sizeof *req, 16); + struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - skb = get_skb(skb, wrlen, gfp); - if (!skb) { - printk(KERN_ERR MOD "%s - failed to alloc skb.\n", - __func__); + if (WARN_ON(!req_skb)) return -ENOMEM; - } - set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); - t4_set_arp_err_handler(skb, ep, abort_arp_failure); - req = (struct cpl_abort_req *) skb_put(skb, wrlen); + + set_wr_txq(req_skb, CPL_PRIORITY_DATA, ep->txq_idx); + t4_set_arp_err_handler(req_skb, ep, abort_arp_failure); + req = (struct cpl_abort_req *)skb_put(req_skb, wrlen); memset(req, 0, wrlen); INIT_TP_WR(req, ep->hwtid); OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); req->cmd = CPL_ABORT_SEND_RST; - return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); + return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t); } static void best_mtu(const unsigned short *mtus, unsigned short mtu, @@ -1261,7 +1280,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) set_bit(ACT_ESTAB, &ep->com.history); /* start MPA negotiation */ - ret = send_flowc(ep, NULL); + ret = send_flowc(ep); if (ret) goto err; if (ep->retry_with_mpa_v1) @@ -2147,6 +2166,7 @@ out: static int c4iw_reconnect(struct c4iw_ep *ep) { int err = 0; + int size = 0; struct sockaddr_in *laddr = (struct sockaddr_in *) &ep->com.cm_id->m_local_addr; struct sockaddr_in *raddr = (struct sockaddr_in *) @@ -2162,6 +2182,21 @@ static int c4iw_reconnect(struct c4iw_ep *ep) init_timer(&ep->timer); c4iw_init_wr_wait(&ep->com.wr_wait); + /* When MPA revision is different on nodes, the node with MPA_rev=2 + * tries to reconnect with MPA_rev 1 for the same EP through + * c4iw_reconnect(), where the same EP is assigned with new tid for + * further connection establishment. As we are using the same EP pointer + * for reconnect, few skbs are used during the previous c4iw_connect(), + * which leaves the EP with inadequate skbs for further + * c4iw_reconnect(), Further causing an assert BUG_ON() due to empty + * skb_list() during peer_abort(). Allocate skbs which is already used. + */ + size = (CN_MAX_CON_BUF - skb_queue_len(&ep->com.ep_skb_list)); + if (alloc_ep_skb_list(&ep->com.ep_skb_list, size)) { + err = -ENOMEM; + goto fail1; + } + /* * Allocate an active TID to initiate a TCP connection. */ @@ -2227,6 +2262,7 @@ fail2: * response of 1st connect request. */ connect_reply_upcall(ep, -ECONNRESET); +fail1: c4iw_put_ep(&ep->com); out: return err; @@ -2593,6 +2629,10 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) if (peer_mss && child_ep->mtu > (peer_mss + hdrs)) child_ep->mtu = peer_mss + hdrs; + skb_queue_head_init(&child_ep->com.ep_skb_list); + if (alloc_ep_skb_list(&child_ep->com.ep_skb_list, CN_MAX_CON_BUF)) + goto fail; + state_set(&child_ep->com, CONNECTING); child_ep->com.dev = dev; child_ep->com.cm_id = NULL; @@ -2657,6 +2697,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) (const u32 *)&sin6->sin6_addr.s6_addr, 1); } goto out; +fail: + c4iw_put_ep(&child_ep->com); reject: reject_cr(dev, hwtid, skb); if (parent_ep) @@ -2687,7 +2729,7 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) ep->com.state = MPA_REQ_WAIT; start_ep_timer(ep); set_bit(PASS_ESTAB, &ep->com.history); - ret = send_flowc(ep, skb); + ret = send_flowc(ep); mutex_unlock(&ep->com.mutex); if (ret) c4iw_ep_disconnect(ep, 1, GFP_KERNEL); @@ -2888,10 +2930,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) } mutex_unlock(&ep->com.mutex); - rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); - if (!rpl_skb) { - printk(KERN_ERR MOD "%s - cannot allocate skb!\n", - __func__); + rpl_skb = skb_dequeue(&ep->com.ep_skb_list); + if (WARN_ON(!rpl_skb)) { release = 1; goto out; } @@ -3262,6 +3302,13 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) err = -ENOMEM; goto out; } + + skb_queue_head_init(&ep->com.ep_skb_list); + if (alloc_ep_skb_list(&ep->com.ep_skb_list, CN_MAX_CON_BUF)) { + err = -ENOMEM; + goto fail1; + } + init_timer(&ep->timer); ep->plen = conn_param->private_data_len; if (ep->plen) @@ -3280,7 +3327,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (!ep->com.qp) { PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn); err = -EINVAL; - goto fail1; + goto fail2; } ref_qp(ep); PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn, @@ -3293,7 +3340,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (ep->atid == -1) { printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__); err = -ENOMEM; - goto fail1; + goto fail2; } insert_handle(dev, &dev->atid_idr, ep, ep->atid); @@ -3317,7 +3364,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) { err = pick_local_ipaddrs(dev, cm_id); if (err) - goto fail1; + goto fail2; } /* find a route */ @@ -3337,7 +3384,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) { err = pick_local_ip6addrs(dev, cm_id); if (err) - goto fail1; + goto fail2; } /* find a route */ @@ -3353,14 +3400,14 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (!ep->dst) { printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); err = -EHOSTUNREACH; - goto fail2; + goto fail3; } err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true, ep->com.dev->rdev.lldi.adapter_type, cm_id->tos); if (err) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); - goto fail3; + goto fail4; } PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", @@ -3376,13 +3423,15 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) goto out; cxgb4_l2t_release(ep->l2t); -fail3: +fail4: dst_release(ep->dst); -fail2: +fail3: remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); -fail1: +fail2: + skb_queue_purge(&ep->com.ep_skb_list); deref_cm_id(&ep->com); +fail1: c4iw_put_ep(&ep->com); out: return err; @@ -3475,6 +3524,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) err = -ENOMEM; goto fail1; } + skb_queue_head_init(&ep->com.ep_skb_list); PDBG("%s ep %p\n", __func__, ep); ep->com.cm_id = cm_id; ref_cm_id(&ep->com); @@ -3591,6 +3641,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) case MPA_REQ_RCVD: case MPA_REP_SENT: case FPDU_MODE: + case CONNECTING: close = 1; if (abrupt) ep->com.state = ABORTING; @@ -3625,10 +3676,10 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) if (abrupt) { set_bit(EP_DISC_ABORT, &ep->com.history); close_complete_upcall(ep, -ECONNRESET); - ret = send_abort(ep, NULL, gfp); + ret = send_abort(ep); } else { set_bit(EP_DISC_CLOSE, &ep->com.history); - ret = send_halfclose(ep, gfp); + ret = send_halfclose(ep); } if (ret) { set_bit(EP_DISC_FAIL, &ep->com.history); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index f6f34a7..ce42d09 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -789,10 +789,29 @@ enum c4iw_ep_history { CM_ID_DEREFED = 28, }; +enum conn_pre_alloc_buffers { + CN_ABORT_REQ_BUF, + CN_ABORT_RPL_BUF, + CN_CLOSE_CON_REQ_BUF, + CN_DESTROY_BUF, + CN_FLOWC_BUF, + CN_MAX_CON_BUF +}; + +#define FLOWC_LEN 80 +union cpl_wr_size { + struct cpl_abort_req abrt_req; + struct cpl_abort_rpl abrt_rpl; + struct fw_ri_wr ri_req; + struct cpl_close_con_req close_req; + char flowc_buf[FLOWC_LEN]; +}; + struct c4iw_ep_common { struct iw_cm_id *cm_id; struct c4iw_qp *qp; struct c4iw_dev *dev; + struct sk_buff_head ep_skb_list; enum c4iw_ep_state state; struct kref kref; struct mutex mutex; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index e8993e4..b3441af 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1081,9 +1081,10 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe, PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, qhp->ep->hwtid); - skb = alloc_skb(sizeof *wqe, gfp); - if (!skb) + skb = skb_dequeue(&qhp->ep->com.ep_skb_list); + if (WARN_ON(!skb)) return; + set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx); wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); @@ -1202,9 +1203,10 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, ep->hwtid); - skb = alloc_skb(sizeof *wqe, GFP_KERNEL); - if (!skb) + skb = skb_dequeue(&ep->com.ep_skb_list); + if (WARN_ON(!skb)) return -ENOMEM; + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); -- cgit v0.10.2 From 0f8ab0b6e91b4d5302ceee5ec12ce54d81297635 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 10 Jun 2016 01:05:16 +0530 Subject: RDMA/iw_cxgb4: Low resource fixes for Memory registration Pre-allocate buffers for deregistering memory region and memory window during RDMA connection close, when system is running out of memory. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index ce42d09..e94ec0d 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -384,6 +384,7 @@ struct c4iw_mr { struct ib_mr ibmr; struct ib_umem *umem; struct c4iw_dev *rhp; + struct sk_buff *dereg_skb; u64 kva; struct tpt_attributes attr; u64 *mpl; @@ -400,6 +401,7 @@ static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr) struct c4iw_mw { struct ib_mw ibmw; struct c4iw_dev *rhp; + struct sk_buff *dereg_skb; u64 kva; struct tpt_attributes attr; }; diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 55d0651..5d0aa55 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -59,9 +59,9 @@ static int mr_exceeds_hw_limits(struct c4iw_dev *dev, u64 length) } static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, - u32 len, dma_addr_t data, int wait) + u32 len, dma_addr_t data, + int wait, struct sk_buff *skb) { - struct sk_buff *skb; struct ulp_mem_io *req; struct ulptx_sgl *sgl; u8 wr_len; @@ -74,9 +74,11 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, c4iw_init_wr_wait(&wr_wait); wr_len = roundup(sizeof(*req) + sizeof(*sgl), 16); - skb = alloc_skb(wr_len, GFP_KERNEL); - if (!skb) - return -ENOMEM; + if (!skb) { + skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + if (!skb) + return -ENOMEM; + } set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); req = (struct ulp_mem_io *)__skb_put(skb, wr_len); @@ -108,9 +110,8 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, } static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, - void *data) + void *data, struct sk_buff *skb) { - struct sk_buff *skb; struct ulp_mem_io *req; struct ulptx_idata *sc; u8 wr_len, *to_dp, *from_dp; @@ -134,9 +135,11 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, wr_len = roundup(sizeof *req + sizeof *sc + roundup(copy_len, T4_ULPTX_MIN_IO), 16); - skb = alloc_skb(wr_len, GFP_KERNEL); - if (!skb) - return -ENOMEM; + if (!skb) { + skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + if (!skb) + return -ENOMEM; + } set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); req = (struct ulp_mem_io *)__skb_put(skb, wr_len); @@ -173,6 +176,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, memset(to_dp + copy_len, 0, T4_ULPTX_MIN_IO - (copy_len % T4_ULPTX_MIN_IO)); ret = c4iw_ofld_send(rdev, skb); + skb = NULL; if (ret) return ret; len -= C4IW_MAX_INLINE_SIZE; @@ -182,7 +186,8 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, return ret; } -static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data) +static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, + void *data, struct sk_buff *skb) { u32 remain = len; u32 dmalen; @@ -205,7 +210,7 @@ static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void * dmalen = T4_ULPTX_MAX_DMA; remain -= dmalen; ret = _c4iw_write_mem_dma_aligned(rdev, addr, dmalen, daddr, - !remain); + !remain, skb); if (ret) goto out; addr += dmalen >> 5; @@ -213,7 +218,7 @@ static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void * daddr += dmalen; } if (remain) - ret = _c4iw_write_mem_inline(rdev, addr, remain, data); + ret = _c4iw_write_mem_inline(rdev, addr, remain, data, skb); out: dma_unmap_single(&rdev->lldi.pdev->dev, save, len, DMA_TO_DEVICE); return ret; @@ -224,23 +229,25 @@ out: * If data is NULL, clear len byte of memory to zero. */ static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, - void *data) + void *data, struct sk_buff *skb) { if (is_t5(rdev->lldi.adapter_type) && use_dsgl) { if (len > inline_threshold) { - if (_c4iw_write_mem_dma(rdev, addr, len, data)) { + if (_c4iw_write_mem_dma(rdev, addr, len, data, skb)) { printk_ratelimited(KERN_WARNING "%s: dma map" " failure (non fatal)\n", pci_name(rdev->lldi.pdev)); return _c4iw_write_mem_inline(rdev, addr, len, - data); - } else + data, skb); + } else { return 0; + } } else - return _c4iw_write_mem_inline(rdev, addr, len, data); + return _c4iw_write_mem_inline(rdev, addr, + len, data, skb); } else - return _c4iw_write_mem_inline(rdev, addr, len, data); + return _c4iw_write_mem_inline(rdev, addr, len, data, skb); } /* @@ -253,7 +260,8 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, u32 *stag, u8 stag_state, u32 pdid, enum fw_ri_stag_type type, enum fw_ri_mem_perms perm, int bind_enabled, u32 zbva, u64 to, - u64 len, u8 page_size, u32 pbl_size, u32 pbl_addr) + u64 len, u8 page_size, u32 pbl_size, u32 pbl_addr, + struct sk_buff *skb) { int err; struct fw_ri_tpte tpt; @@ -307,7 +315,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, } err = write_adapter_mem(rdev, stag_idx + (rdev->lldi.vr->stag.start >> 5), - sizeof(tpt), &tpt); + sizeof(tpt), &tpt, skb); if (reset_tpt_entry) { c4iw_put_resource(&rdev->resource.tpt_table, stag_idx); @@ -327,28 +335,29 @@ static int write_pbl(struct c4iw_rdev *rdev, __be64 *pbl, __func__, pbl_addr, rdev->lldi.vr->pbl.start, pbl_size); - err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl); + err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl, NULL); return err; } static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size, - u32 pbl_addr) + u32 pbl_addr, struct sk_buff *skb) { return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, - pbl_size, pbl_addr); + pbl_size, pbl_addr, skb); } static int allocate_window(struct c4iw_rdev *rdev, u32 * stag, u32 pdid) { *stag = T4_STAG_UNSET; return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0, - 0UL, 0, 0, 0, 0); + 0UL, 0, 0, 0, 0, NULL); } -static int deallocate_window(struct c4iw_rdev *rdev, u32 stag) +static int deallocate_window(struct c4iw_rdev *rdev, u32 stag, + struct sk_buff *skb) { return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0, - 0); + 0, skb); } static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, @@ -356,7 +365,7 @@ static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, { *stag = T4_STAG_UNSET; return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_NSMR, 0, 0, 0, - 0UL, 0, 0, pbl_size, pbl_addr); + 0UL, 0, 0, pbl_size, pbl_addr, NULL); } static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag) @@ -383,14 +392,16 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php, mhp->attr.mw_bind_enable, mhp->attr.zbva, mhp->attr.va_fbo, mhp->attr.len ? mhp->attr.len : -1, shift - 12, - mhp->attr.pbl_size, mhp->attr.pbl_addr); + mhp->attr.pbl_size, mhp->attr.pbl_addr, NULL); if (ret) return ret; ret = finish_mem_reg(mhp, stag); - if (ret) + if (ret) { dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); + mhp->attr.pbl_addr, mhp->dereg_skb); + mhp->dereg_skb = NULL; + } return ret; } @@ -423,6 +434,12 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) if (!mhp) return ERR_PTR(-ENOMEM); + mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); + if (!mhp->dereg_skb) { + ret = -ENOMEM; + goto err0; + } + mhp->rhp = rhp; mhp->attr.pdid = php->pdid; mhp->attr.perms = c4iw_ib_to_tpt_access(acc); @@ -435,7 +452,8 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, php->pdid, FW_RI_STAG_NSMR, mhp->attr.perms, - mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0); + mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0, + NULL); if (ret) goto err1; @@ -445,8 +463,10 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) return &mhp->ibmr; err2: dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); + mhp->attr.pbl_addr, mhp->dereg_skb); err1: + kfree_skb(mhp->dereg_skb); +err0: kfree(mhp); return ERR_PTR(ret); } @@ -481,11 +501,18 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mhp) return ERR_PTR(-ENOMEM); + mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); + if (!mhp->dereg_skb) { + kfree(mhp); + return ERR_PTR(-ENOMEM); + } + mhp->rhp = rhp; mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); if (IS_ERR(mhp->umem)) { err = PTR_ERR(mhp->umem); + kfree_skb(mhp->dereg_skb); kfree(mhp); return ERR_PTR(err); } @@ -550,6 +577,7 @@ err_pbl: err: ib_umem_release(mhp->umem); + kfree_skb(mhp->dereg_skb); kfree(mhp); return ERR_PTR(err); } @@ -572,8 +600,16 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); if (!mhp) return ERR_PTR(-ENOMEM); + + mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); + if (!mhp->dereg_skb) { + kfree(mhp); + return ERR_PTR(-ENOMEM); + } + ret = allocate_window(&rhp->rdev, &stag, php->pdid); if (ret) { + kfree(mhp->dereg_skb); kfree(mhp); return ERR_PTR(ret); } @@ -584,7 +620,8 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, mmid = (stag) >> 8; mhp->ibmw.rkey = stag; if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { - deallocate_window(&rhp->rdev, mhp->attr.stag); + deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb); + kfree(mhp->dereg_skb); kfree(mhp); return ERR_PTR(-ENOMEM); } @@ -602,7 +639,7 @@ int c4iw_dealloc_mw(struct ib_mw *mw) rhp = mhp->rhp; mmid = (mw->rkey) >> 8; remove_handle(rhp, &rhp->mmidr, mmid); - deallocate_window(&rhp->rdev, mhp->attr.stag); + deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb); kfree(mhp); PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp); return 0; @@ -666,7 +703,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, return &(mhp->ibmr); err3: dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); + mhp->attr.pbl_addr, mhp->dereg_skb); err2: c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, mhp->attr.pbl_size << 3); @@ -717,7 +754,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr) dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev, mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr); dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); + mhp->attr.pbl_addr, mhp->dereg_skb); if (mhp->attr.pbl_size) c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, mhp->attr.pbl_size << 3); -- cgit v0.10.2 From dd6b0241260d0f16c4051da1c5b09d4da8992ef2 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 10 Jun 2016 01:05:17 +0530 Subject: RDMA/iw_cxgb4: Low resource fixes for Completion queue Pre-allocate buffers to deallocate completion queue, so that completion queue is deallocated during RDMA termination when system is running out of memory. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index b0b9557..812ab72 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -33,19 +33,15 @@ #include "iw_cxgb4.h" static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, - struct c4iw_dev_ucontext *uctx) + struct c4iw_dev_ucontext *uctx, struct sk_buff *skb) { struct fw_ri_res_wr *res_wr; struct fw_ri_res *res; int wr_len; struct c4iw_wr_wait wr_wait; - struct sk_buff *skb; int ret; wr_len = sizeof *res_wr + sizeof *res; - skb = alloc_skb(wr_len, GFP_KERNEL); - if (!skb) - return -ENOMEM; set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); @@ -863,7 +859,9 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq) ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context) : NULL; destroy_cq(&chp->rhp->rdev, &chp->cq, - ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx); + ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx, + chp->destroy_skb); + chp->destroy_skb = NULL; kfree(chp); return 0; } @@ -879,7 +877,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, struct c4iw_cq *chp; struct c4iw_create_cq_resp uresp; struct c4iw_ucontext *ucontext = NULL; - int ret; + int ret, wr_len; size_t memsize, hwentries; struct c4iw_mm_entry *mm, *mm2; @@ -896,6 +894,13 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, if (!chp) return ERR_PTR(-ENOMEM); + wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res); + chp->destroy_skb = alloc_skb(wr_len, GFP_KERNEL); + if (!chp->destroy_skb) { + ret = -ENOMEM; + goto err1; + } + if (ib_context) ucontext = to_c4iw_ucontext(ib_context); @@ -936,7 +941,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, ret = create_cq(&rhp->rdev, &chp->cq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); if (ret) - goto err1; + goto err2; chp->rhp = rhp; chp->cq.size--; /* status page */ @@ -947,15 +952,15 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, init_waitqueue_head(&chp->wait); ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); if (ret) - goto err2; + goto err3; if (ucontext) { mm = kmalloc(sizeof *mm, GFP_KERNEL); if (!mm) - goto err3; + goto err4; mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); if (!mm2) - goto err4; + goto err5; uresp.qid_mask = rhp->rdev.cqmask; uresp.cqid = chp->cq.cqid; @@ -970,7 +975,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp) - sizeof(uresp.reserved)); if (ret) - goto err5; + goto err6; mm->key = uresp.key; mm->addr = virt_to_phys(chp->cq.queue); @@ -986,15 +991,18 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, __func__, chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize, (unsigned long long) chp->cq.dma_addr); return &chp->ibcq; -err5: +err6: kfree(mm2); -err4: +err5: kfree(mm); -err3: +err4: remove_handle(rhp, &rhp->cqidr, chp->cq.cqid); -err2: +err3: destroy_cq(&chp->rhp->rdev, &chp->cq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + ucontext ? &ucontext->uctx : &rhp->rdev.uctx, + chp->destroy_skb); +err2: + kfree_skb(chp->destroy_skb); err1: kfree(chp); return ERR_PTR(ret); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index e94ec0d..c5f5881 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -414,6 +414,7 @@ static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw) struct c4iw_cq { struct ib_cq ibcq; struct c4iw_dev *rhp; + struct sk_buff *destroy_skb; struct t4_cq cq; spinlock_t lock; spinlock_t comp_handler_lock; -- cgit v0.10.2 From 16bd020147abeb37dd32cc6442cee2d32b1c1af0 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 May 2016 15:20:47 +0300 Subject: net/mlx5: Export required core functions to support RSS In order to support RSS QPs, we need to create Ethernet based objects. This is done by create_rq, destroy_rq, create_rqt and destroy_rqt mlx5_core functions. We export these functions. Signed-off-by: Yishai Hadas Signed-off-by: Matan Barak Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index 03a5093..28274a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -85,6 +85,7 @@ int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn) return err; } +EXPORT_SYMBOL(mlx5_core_create_rq); int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen) { @@ -110,6 +111,7 @@ void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn) mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); } +EXPORT_SYMBOL(mlx5_core_destroy_rq); int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out) { @@ -430,6 +432,7 @@ int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, return err; } +EXPORT_SYMBOL(mlx5_core_create_rqt); int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in, int inlen) @@ -455,3 +458,4 @@ void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn) mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); } +EXPORT_SYMBOL(mlx5_core_destroy_rqt); -- cgit v0.10.2 From 5fd251c8b4c52da0d0916470a67fbb77b972125e Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 May 2016 15:20:48 +0300 Subject: IB/core: Introduce Work Queue object and its verbs Introduce Work Queue object and its create/destroy/modify verbs. QP can be created without internal WQs "packaged" inside it, this QP can be configured to use "external" WQ object as its receive/send queue. WQ is a necessary component for RSS technology since RSS mechanism is supposed to distribute the traffic between multiple Receive Work Queues. WQ associated (many to one) with Completion Queue and it owns WQ properties (PD, WQ size, etc.). WQ has a type, this patch introduces the IB_WQT_RQ (i.e.receive queue), it may be extend to others such as IB_WQT_SQ. (send queue). WQ from type IB_WQT_RQ contains receive work requests. PD is an attribute of a work queue (i.e. send/receive queue), it's used by the hardware for security validation before scattering to a memory region which is pointed by the WQ. For that, an external WQ object needs a PD, letting the hardware makes that validation. When accessing a memory region that is pointed by the WQ its PD is used and not the QP's PD, this behavior is similar to a SRQ and a QP. WQ context is subject to a well-defined state transitions done by the modify_wq verb. When WQ is created its initial state becomes IB_WQS_RESET. >From IB_WQS_RESET it can be modified to itself or to IB_WQS_RDY. >From IB_WQS_RDY it can be modified to itself, to IB_WQS_RESET or to IB_WQS_ERR. >From IB_WQS_ERR it can be modified to IB_WQS_RESET. Note: transition to IB_WQS_ERR might occur implicitly in case there was some HW error. Signed-off-by: Yishai Hadas Signed-off-by: Matan Barak Reviewed-by: Sagi Grimberg Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 1d7d4cf..c096cad 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1554,6 +1554,88 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd) } EXPORT_SYMBOL(ib_dealloc_xrcd); +/** + * ib_create_wq - Creates a WQ associated with the specified protection + * domain. + * @pd: The protection domain associated with the WQ. + * @wq_init_attr: A list of initial attributes required to create the + * WQ. If WQ creation succeeds, then the attributes are updated to + * the actual capabilities of the created WQ. + * + * wq_init_attr->max_wr and wq_init_attr->max_sge determine + * the requested size of the WQ, and set to the actual values allocated + * on return. + * If ib_create_wq() succeeds, then max_wr and max_sge will always be + * at least as large as the requested values. + */ +struct ib_wq *ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *wq_attr) +{ + struct ib_wq *wq; + + if (!pd->device->create_wq) + return ERR_PTR(-ENOSYS); + + wq = pd->device->create_wq(pd, wq_attr, NULL); + if (!IS_ERR(wq)) { + wq->event_handler = wq_attr->event_handler; + wq->wq_context = wq_attr->wq_context; + wq->wq_type = wq_attr->wq_type; + wq->cq = wq_attr->cq; + wq->device = pd->device; + wq->pd = pd; + wq->uobject = NULL; + atomic_inc(&pd->usecnt); + atomic_inc(&wq_attr->cq->usecnt); + atomic_set(&wq->usecnt, 0); + } + return wq; +} +EXPORT_SYMBOL(ib_create_wq); + +/** + * ib_destroy_wq - Destroys the specified WQ. + * @wq: The WQ to destroy. + */ +int ib_destroy_wq(struct ib_wq *wq) +{ + int err; + struct ib_cq *cq = wq->cq; + struct ib_pd *pd = wq->pd; + + if (atomic_read(&wq->usecnt)) + return -EBUSY; + + err = wq->device->destroy_wq(wq); + if (!err) { + atomic_dec(&pd->usecnt); + atomic_dec(&cq->usecnt); + } + return err; +} +EXPORT_SYMBOL(ib_destroy_wq); + +/** + * ib_modify_wq - Modifies the specified WQ. + * @wq: The WQ to modify. + * @wq_attr: On input, specifies the WQ attributes to modify. + * @wq_attr_mask: A bit-mask used to specify which attributes of the WQ + * are being modified. + * On output, the current values of selected WQ attributes are returned. + */ +int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, + u32 wq_attr_mask) +{ + int err; + + if (!wq->device->modify_wq) + return -ENOSYS; + + err = wq->device->modify_wq(wq, wq_attr, wq_attr_mask, NULL); + return err; +} +EXPORT_SYMBOL(ib_modify_wq); + struct ib_flow *ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7e440d4..f2d954a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1428,6 +1428,48 @@ struct ib_srq { } ext; }; +enum ib_wq_type { + IB_WQT_RQ +}; + +enum ib_wq_state { + IB_WQS_RESET, + IB_WQS_RDY, + IB_WQS_ERR +}; + +struct ib_wq { + struct ib_device *device; + struct ib_uobject *uobject; + void *wq_context; + void (*event_handler)(struct ib_event *, void *); + struct ib_pd *pd; + struct ib_cq *cq; + u32 wq_num; + enum ib_wq_state state; + enum ib_wq_type wq_type; + atomic_t usecnt; +}; + +struct ib_wq_init_attr { + void *wq_context; + enum ib_wq_type wq_type; + u32 max_wr; + u32 max_sge; + struct ib_cq *cq; + void (*event_handler)(struct ib_event *, void *); +}; + +enum ib_wq_attr_mask { + IB_WQ_STATE = 1 << 0, + IB_WQ_CUR_STATE = 1 << 1, +}; + +struct ib_wq_attr { + enum ib_wq_state wq_state; + enum ib_wq_state curr_wq_state; +}; + struct ib_qp { struct ib_device *device; struct ib_pd *pd; @@ -1921,7 +1963,14 @@ struct ib_device { struct ifla_vf_stats *stats); int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid, int type); - + struct ib_wq * (*create_wq)(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata); + int (*destroy_wq)(struct ib_wq *wq); + int (*modify_wq)(struct ib_wq *wq, + struct ib_wq_attr *attr, + u32 wq_attr_mask, + struct ib_udata *udata); struct ib_dma_mapping_ops *dma_ops; struct module *owner; @@ -3167,6 +3216,11 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr); +struct ib_wq *ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr); +int ib_destroy_wq(struct ib_wq *wq); +int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr, + u32 wq_attr_mask); int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset, unsigned int page_size); -- cgit v0.10.2 From f213c05272100f385912372fff678d0af4d7f8ad Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 May 2016 15:20:49 +0300 Subject: IB/uverbs: Add WQ support User space applications which use RSS functionality need to create a work queue object (WQ). The lifetime of such an object is: * Create a WQ * Modify the WQ from reset to init state. * Use the WQ (by downstream patches). * Destroy the WQ. These commands are added to the uverbs API. Signed-off-by: Yishai Hadas Signed-off-by: Matan Barak Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 612ccfd..74776c6 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -162,6 +162,10 @@ struct ib_uqp_object { struct ib_uxrcd_object *uxrcd; }; +struct ib_uwq_object { + struct ib_uevent_object uevent; +}; + struct ib_ucq_object { struct ib_uobject uobject; struct ib_uverbs_file *uverbs_file; @@ -181,6 +185,7 @@ extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; extern struct idr ib_uverbs_xrcd_idr; extern struct idr ib_uverbs_rule_idr; +extern struct idr ib_uverbs_wq_idr; void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); @@ -199,6 +204,7 @@ void ib_uverbs_release_uevent(struct ib_uverbs_file *file, void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); +void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); @@ -275,5 +281,8 @@ IB_UVERBS_DECLARE_EX_CMD(destroy_flow); IB_UVERBS_DECLARE_EX_CMD(query_device); IB_UVERBS_DECLARE_EX_CMD(create_cq); IB_UVERBS_DECLARE_EX_CMD(create_qp); +IB_UVERBS_DECLARE_EX_CMD(create_wq); +IB_UVERBS_DECLARE_EX_CMD(modify_wq); +IB_UVERBS_DECLARE_EX_CMD(destroy_wq); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 1a8babb..22e6173 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -57,6 +57,7 @@ static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; +static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" }; /* * The ib_uobject locking scheme is as follows: @@ -243,6 +244,16 @@ static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0); } +static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0); +} + +static void put_wq_read(struct ib_wq *wq) +{ + put_uobj_read(wq->uobject); +} + static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context) { struct ib_uobject *uobj; @@ -326,6 +337,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, INIT_LIST_HEAD(&ucontext->qp_list); INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); + INIT_LIST_HEAD(&ucontext->wq_list); INIT_LIST_HEAD(&ucontext->xrcd_list); INIT_LIST_HEAD(&ucontext->rule_list); rcu_read_lock(); @@ -3056,6 +3068,237 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, return 0; } +int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_create_wq cmd = {}; + struct ib_uverbs_ex_create_wq_resp resp = {}; + struct ib_uwq_object *obj; + int err = 0; + struct ib_cq *cq; + struct ib_pd *pd; + struct ib_wq *wq; + struct ib_wq_init_attr wq_init_attr = {}; + size_t required_cmd_sz; + size_t required_resp_len; + + required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge); + required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn); + + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + if (ucore->outlen < required_resp_len) + return -ENOSPC; + + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (err) + return err; + + if (cmd.comp_mask) + return -EOPNOTSUPP; + + obj = kmalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, + &wq_lock_class); + down_write(&obj->uevent.uobject.mutex); + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) { + err = -EINVAL; + goto err_uobj; + } + + cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + if (!cq) { + err = -EINVAL; + goto err_put_pd; + } + + wq_init_attr.cq = cq; + wq_init_attr.max_sge = cmd.max_sge; + wq_init_attr.max_wr = cmd.max_wr; + wq_init_attr.wq_context = file; + wq_init_attr.wq_type = cmd.wq_type; + wq_init_attr.event_handler = ib_uverbs_wq_event_handler; + obj->uevent.events_reported = 0; + INIT_LIST_HEAD(&obj->uevent.event_list); + wq = pd->device->create_wq(pd, &wq_init_attr, uhw); + if (IS_ERR(wq)) { + err = PTR_ERR(wq); + goto err_put_cq; + } + + wq->uobject = &obj->uevent.uobject; + obj->uevent.uobject.object = wq; + wq->wq_type = wq_init_attr.wq_type; + wq->cq = cq; + wq->pd = pd; + wq->device = pd->device; + wq->wq_context = wq_init_attr.wq_context; + atomic_set(&wq->usecnt, 0); + atomic_inc(&pd->usecnt); + atomic_inc(&cq->usecnt); + wq->uobject = &obj->uevent.uobject; + obj->uevent.uobject.object = wq; + err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); + if (err) + goto destroy_wq; + + memset(&resp, 0, sizeof(resp)); + resp.wq_handle = obj->uevent.uobject.id; + resp.max_sge = wq_init_attr.max_sge; + resp.max_wr = wq_init_attr.max_wr; + resp.wqn = wq->wq_num; + resp.response_length = required_resp_len; + err = ib_copy_to_udata(ucore, + &resp, resp.response_length); + if (err) + goto err_copy; + + put_pd_read(pd); + put_cq_read(cq); + + mutex_lock(&file->mutex); + list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list); + mutex_unlock(&file->mutex); + + obj->uevent.uobject.live = 1; + up_write(&obj->uevent.uobject.mutex); + return 0; + +err_copy: + idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); +destroy_wq: + ib_destroy_wq(wq); +err_put_cq: + put_cq_read(cq); +err_put_pd: + put_pd_read(pd); +err_uobj: + put_uobj_write(&obj->uevent.uobject); + + return err; +} + +int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_destroy_wq cmd = {}; + struct ib_uverbs_ex_destroy_wq_resp resp = {}; + struct ib_wq *wq; + struct ib_uobject *uobj; + struct ib_uwq_object *obj; + size_t required_cmd_sz; + size_t required_resp_len; + int ret; + + required_cmd_sz = offsetof(typeof(cmd), wq_handle) + sizeof(cmd.wq_handle); + required_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); + + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + if (ucore->outlen < required_resp_len) + return -ENOSPC; + + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (cmd.comp_mask) + return -EOPNOTSUPP; + + resp.response_length = required_resp_len; + uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle, + file->ucontext); + if (!uobj) + return -EINVAL; + + wq = uobj->object; + obj = container_of(uobj, struct ib_uwq_object, uevent.uobject); + ret = ib_destroy_wq(wq); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + if (ret) + return ret; + + idr_remove_uobj(&ib_uverbs_wq_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + ib_uverbs_release_uevent(file, &obj->uevent); + resp.events_reported = obj->uevent.events_reported; + put_uobj(uobj); + + ret = ib_copy_to_udata(ucore, &resp, resp.response_length); + if (ret) + return ret; + + return 0; +} + +int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_modify_wq cmd = {}; + struct ib_wq *wq; + struct ib_wq_attr wq_attr = {}; + size_t required_cmd_sz; + int ret; + + required_cmd_sz = offsetof(typeof(cmd), curr_wq_state) + sizeof(cmd.curr_wq_state); + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (!cmd.attr_mask) + return -EINVAL; + + if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE)) + return -EINVAL; + + wq = idr_read_wq(cmd.wq_handle, file->ucontext); + if (!wq) + return -EINVAL; + + wq_attr.curr_wq_state = cmd.curr_wq_state; + wq_attr.wq_state = cmd.wq_state; + ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw); + put_wq_read(wq); + return ret; +} + int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_udata *ucore, diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 31f422a..91cb36f 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -76,6 +76,7 @@ DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); DEFINE_IDR(ib_uverbs_xrcd_idr); DEFINE_IDR(ib_uverbs_rule_idr); +DEFINE_IDR(ib_uverbs_wq_idr); static DEFINE_SPINLOCK(map_lock); static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); @@ -130,6 +131,9 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq, [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp, + [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq, + [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq, + [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq, }; static void ib_uverbs_add_one(struct ib_device *device); @@ -265,6 +269,17 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, kfree(uqp); } + list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) { + struct ib_wq *wq = uobj->object; + struct ib_uwq_object *uwq = + container_of(uobj, struct ib_uwq_object, uevent.uobject); + + idr_remove_uobj(&ib_uverbs_wq_idr, uobj); + ib_destroy_wq(wq); + ib_uverbs_release_uevent(file, &uwq->uevent); + kfree(uwq); + } + list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { struct ib_srq *srq = uobj->object; struct ib_uevent_object *uevent = @@ -568,6 +583,16 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) &uobj->events_reported); } +void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr) +{ + struct ib_uevent_object *uobj = container_of(event->element.wq->uobject, + struct ib_uevent_object, uobject); + + ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, + event->event, &uobj->event_list, + &uobj->events_reported); +} + void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) { struct ib_uevent_object *uobj; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index f2d954a..0c1956a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -562,6 +562,7 @@ enum ib_event_type { IB_EVENT_QP_LAST_WQE_REACHED, IB_EVENT_CLIENT_REREGISTER, IB_EVENT_GID_CHANGE, + IB_EVENT_WQ_FATAL, }; const char *__attribute_const__ ib_event_msg(enum ib_event_type event); @@ -572,6 +573,7 @@ struct ib_event { struct ib_cq *cq; struct ib_qp *qp; struct ib_srq *srq; + struct ib_wq *wq; u8 port_num; } element; enum ib_event_type event; @@ -1323,6 +1325,7 @@ struct ib_ucontext { struct list_head ah_list; struct list_head xrcd_list; struct list_head rule_list; + struct list_head wq_list; int closing; struct pid *tgid; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index b6543d7..c9470e5 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -95,6 +95,9 @@ enum { IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP, IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, IB_USER_VERBS_EX_CMD_DESTROY_FLOW, + IB_USER_VERBS_EX_CMD_CREATE_WQ, + IB_USER_VERBS_EX_CMD_MODIFY_WQ, + IB_USER_VERBS_EX_CMD_DESTROY_WQ, }; /* @@ -946,4 +949,42 @@ struct ib_uverbs_destroy_srq_resp { __u32 events_reported; }; +struct ib_uverbs_ex_create_wq { + __u32 comp_mask; + __u32 wq_type; + __u64 user_handle; + __u32 pd_handle; + __u32 cq_handle; + __u32 max_wr; + __u32 max_sge; +}; + +struct ib_uverbs_ex_create_wq_resp { + __u32 comp_mask; + __u32 response_length; + __u32 wq_handle; + __u32 max_wr; + __u32 max_sge; + __u32 wqn; +}; + +struct ib_uverbs_ex_destroy_wq { + __u32 comp_mask; + __u32 wq_handle; +}; + +struct ib_uverbs_ex_destroy_wq_resp { + __u32 comp_mask; + __u32 response_length; + __u32 events_reported; + __u32 reserved; +}; + +struct ib_uverbs_ex_modify_wq { + __u32 attr_mask; + __u32 wq_handle; + __u32 wq_state; + __u32 curr_wq_state; +}; + #endif /* IB_USER_VERBS_H */ -- cgit v0.10.2 From 79b20a6c3014c789253fcb1ac4f09f8bdee2e94b Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 May 2016 15:20:50 +0300 Subject: IB/mlx5: Add receive Work Queue verbs A QP can be created without internal WQs "packaged" inside it, this QP can be configured to use "external" WQ object as its receive/send queue. WQ is a necessary component for RSS technology since RSS mechanism is supposed to distribute the traffic between multiple Receive Work Queues Receive WQs are implemented by RQs. Implement the WQ creation, modification and destruction verbs. Signed-off-by: Yishai Hadas Signed-off-by: Matan Barak Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b48ad85..b3589b7 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2450,9 +2450,15 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) IB_LINK_LAYER_ETHERNET) { dev->ib_dev.create_flow = mlx5_ib_create_flow; dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; + dev->ib_dev.create_wq = mlx5_ib_create_wq; + dev->ib_dev.modify_wq = mlx5_ib_modify_wq; + dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq; dev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ); } err = init_node_data(dev); if (err) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index c4a9825..62d4e13 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -217,12 +217,36 @@ struct mlx5_ib_wq { void *qend; }; +struct mlx5_ib_rwq { + struct ib_wq ibwq; + u32 rqn; + u32 rq_num_pas; + u32 log_rq_stride; + u32 log_rq_size; + u32 rq_page_offset; + u32 log_page_size; + struct ib_umem *umem; + size_t buf_size; + unsigned int page_shift; + int create_type; + struct mlx5_db db; + u32 user_index; + u32 wqe_count; + u32 wqe_shift; + int wq_sig; +}; + enum { MLX5_QP_USER, MLX5_QP_KERNEL, MLX5_QP_EMPTY }; +enum { + MLX5_WQ_USER, + MLX5_WQ_KERNEL +}; + /* * Connect-IB can trigger up to four concurrent pagefaults * per-QP. @@ -628,6 +652,11 @@ static inline struct mlx5_ib_qp *to_mqp(struct ib_qp *ibqp) return container_of(ibqp, struct mlx5_ib_qp, ibqp); } +static inline struct mlx5_ib_rwq *to_mrwq(struct ib_wq *ibwq) +{ + return container_of(ibwq, struct mlx5_ib_rwq, ibwq); +} + static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq) { return container_of(msrq, struct mlx5_ib_srq, msrq); @@ -762,6 +791,12 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status); +struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata); +int mlx5_ib_destroy_wq(struct ib_wq *wq); +int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, + u32 wq_attr_mask, struct ib_udata *udata); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING extern struct workqueue_struct *mlx5_ib_page_fault_wq; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index ce43422..43d45e3 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -649,6 +649,71 @@ err_umem: return err; } +static void destroy_user_rq(struct ib_pd *pd, struct mlx5_ib_rwq *rwq) +{ + struct mlx5_ib_ucontext *context; + + context = to_mucontext(pd->uobject->context); + mlx5_ib_db_unmap_user(context, &rwq->db); + if (rwq->umem) + ib_umem_release(rwq->umem); +} + +static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct mlx5_ib_rwq *rwq, + struct mlx5_ib_create_wq *ucmd) +{ + struct mlx5_ib_ucontext *context; + int page_shift = 0; + int npages; + u32 offset = 0; + int ncont = 0; + int err; + + if (!ucmd->buf_addr) + return -EINVAL; + + context = to_mucontext(pd->uobject->context); + rwq->umem = ib_umem_get(pd->uobject->context, ucmd->buf_addr, + rwq->buf_size, 0, 0); + if (IS_ERR(rwq->umem)) { + mlx5_ib_dbg(dev, "umem_get failed\n"); + err = PTR_ERR(rwq->umem); + return err; + } + + mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, &npages, &page_shift, + &ncont, NULL); + err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift, + &rwq->rq_page_offset); + if (err) { + mlx5_ib_warn(dev, "bad offset\n"); + goto err_umem; + } + + rwq->rq_num_pas = ncont; + rwq->page_shift = page_shift; + rwq->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE); + + mlx5_ib_dbg(dev, "addr 0x%llx, size %zd, npages %d, page_shift %d, ncont %d, offset %d\n", + (unsigned long long)ucmd->buf_addr, rwq->buf_size, + npages, page_shift, ncont, offset); + + err = mlx5_ib_db_map_user(context, ucmd->db_addr, &rwq->db); + if (err) { + mlx5_ib_dbg(dev, "map failed\n"); + goto err_umem; + } + + rwq->create_type = MLX5_WQ_USER; + return 0; + +err_umem: + ib_umem_release(rwq->umem); + return err; +} + static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_qp *qp, struct ib_udata *udata, struct ib_qp_init_attr *attr, @@ -4163,3 +4228,244 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd) return 0; } + +static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, + struct ib_wq_init_attr *init_attr) +{ + struct mlx5_ib_dev *dev; + __be64 *rq_pas0; + void *in; + void *rqc; + void *wq; + int inlen; + int err; + + dev = to_mdev(pd->device); + + inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rwq->rq_num_pas; + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); + MLX5_SET(rqc, rqc, mem_rq_type, + MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE); + MLX5_SET(rqc, rqc, user_index, rwq->user_index); + MLX5_SET(rqc, rqc, cqn, to_mcq(init_attr->cq)->mcq.cqn); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); + MLX5_SET(rqc, rqc, flush_in_error_en, 1); + wq = MLX5_ADDR_OF(rqc, rqc, wq); + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); + MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride); + MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size); + MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn); + MLX5_SET(wq, wq, page_offset, rwq->rq_page_offset); + MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size); + MLX5_SET(wq, wq, wq_signature, rwq->wq_sig); + MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma); + rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); + mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0); + err = mlx5_core_create_rq(dev->mdev, in, inlen, &rwq->rqn); + kvfree(in); + return err; +} + +static int set_user_rq_size(struct mlx5_ib_dev *dev, + struct ib_wq_init_attr *wq_init_attr, + struct mlx5_ib_create_wq *ucmd, + struct mlx5_ib_rwq *rwq) +{ + /* Sanity check RQ size before proceeding */ + if (wq_init_attr->max_wr > (1 << MLX5_CAP_GEN(dev->mdev, log_max_wq_sz))) + return -EINVAL; + + if (!ucmd->rq_wqe_count) + return -EINVAL; + + rwq->wqe_count = ucmd->rq_wqe_count; + rwq->wqe_shift = ucmd->rq_wqe_shift; + rwq->buf_size = (rwq->wqe_count << rwq->wqe_shift); + rwq->log_rq_stride = rwq->wqe_shift; + rwq->log_rq_size = ilog2(rwq->wqe_count); + return 0; +} + +static int prepare_user_rq(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata, + struct mlx5_ib_rwq *rwq) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_create_wq ucmd = {}; + int err; + size_t required_cmd_sz; + + required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved); + if (udata->inlen < required_cmd_sz) { + mlx5_ib_dbg(dev, "invalid inlen\n"); + return -EINVAL; + } + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) { + mlx5_ib_dbg(dev, "inlen is not supported\n"); + return -EOPNOTSUPP; + } + + if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) { + mlx5_ib_dbg(dev, "copy failed\n"); + return -EFAULT; + } + + if (ucmd.comp_mask) { + mlx5_ib_dbg(dev, "invalid comp mask\n"); + return -EOPNOTSUPP; + } + + if (ucmd.reserved) { + mlx5_ib_dbg(dev, "invalid reserved\n"); + return -EOPNOTSUPP; + } + + err = set_user_rq_size(dev, init_attr, &ucmd, rwq); + if (err) { + mlx5_ib_dbg(dev, "err %d\n", err); + return err; + } + + err = create_user_rq(dev, pd, rwq, &ucmd); + if (err) { + mlx5_ib_dbg(dev, "err %d\n", err); + if (err) + return err; + } + + rwq->user_index = ucmd.user_index; + return 0; +} + +struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev; + struct mlx5_ib_rwq *rwq; + struct mlx5_ib_create_wq_resp resp = {}; + size_t min_resp_len; + int err; + + if (!udata) + return ERR_PTR(-ENOSYS); + + min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); + if (udata->outlen && udata->outlen < min_resp_len) + return ERR_PTR(-EINVAL); + + dev = to_mdev(pd->device); + switch (init_attr->wq_type) { + case IB_WQT_RQ: + rwq = kzalloc(sizeof(*rwq), GFP_KERNEL); + if (!rwq) + return ERR_PTR(-ENOMEM); + err = prepare_user_rq(pd, init_attr, udata, rwq); + if (err) + goto err; + err = create_rq(rwq, pd, init_attr); + if (err) + goto err_user_rq; + break; + default: + mlx5_ib_dbg(dev, "unsupported wq type %d\n", + init_attr->wq_type); + return ERR_PTR(-EINVAL); + } + + rwq->ibwq.wq_num = rwq->rqn; + rwq->ibwq.state = IB_WQS_RESET; + if (udata->outlen) { + resp.response_length = offsetof(typeof(resp), response_length) + + sizeof(resp.response_length); + err = ib_copy_to_udata(udata, &resp, resp.response_length); + if (err) + goto err_copy; + } + + return &rwq->ibwq; + +err_copy: + mlx5_core_destroy_rq(dev->mdev, rwq->rqn); +err_user_rq: + destroy_user_rq(pd, rwq); +err: + kfree(rwq); + return ERR_PTR(err); +} + +int mlx5_ib_destroy_wq(struct ib_wq *wq) +{ + struct mlx5_ib_dev *dev = to_mdev(wq->device); + struct mlx5_ib_rwq *rwq = to_mrwq(wq); + + mlx5_core_destroy_rq(dev->mdev, rwq->rqn); + destroy_user_rq(wq->pd, rwq); + kfree(rwq); + + return 0; +} + +int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, + u32 wq_attr_mask, struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(wq->device); + struct mlx5_ib_rwq *rwq = to_mrwq(wq); + struct mlx5_ib_modify_wq ucmd = {}; + size_t required_cmd_sz; + int curr_wq_state; + int wq_state; + int inlen; + int err; + void *rqc; + void *in; + + required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved); + if (udata->inlen < required_cmd_sz) + return -EINVAL; + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) + return -EOPNOTSUPP; + + if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) + return -EFAULT; + + if (ucmd.comp_mask || ucmd.reserved) + return -EOPNOTSUPP; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + curr_wq_state = (wq_attr_mask & IB_WQ_CUR_STATE) ? + wq_attr->curr_wq_state : wq->state; + wq_state = (wq_attr_mask & IB_WQ_STATE) ? + wq_attr->wq_state : curr_wq_state; + if (curr_wq_state == IB_WQS_ERR) + curr_wq_state = MLX5_RQC_STATE_ERR; + if (wq_state == IB_WQS_ERR) + wq_state = MLX5_RQC_STATE_ERR; + MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state); + MLX5_SET(rqc, rqc, state, wq_state); + + err = mlx5_core_modify_rq(dev->mdev, rwq->rqn, in, inlen); + kvfree(in); + if (!err) + rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state; + + return err; +} diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h index 61bc308..3e66f93 100644 --- a/drivers/infiniband/hw/mlx5/user.h +++ b/drivers/infiniband/hw/mlx5/user.h @@ -46,6 +46,10 @@ enum { MLX5_SRQ_FLAG_SIGNATURE = 1 << 0, }; +enum { + MLX5_WQ_FLAG_SIGNATURE = 1 << 0, +}; + /* Increment this value if any changes that break userspace ABI * compatibility are made. @@ -159,6 +163,27 @@ struct mlx5_ib_alloc_mw { __u16 reserved2; }; +struct mlx5_ib_create_wq { + __u64 buf_addr; + __u64 db_addr; + __u32 rq_wqe_count; + __u32 rq_wqe_shift; + __u32 user_index; + __u32 flags; + __u32 comp_mask; + __u32 reserved; +}; + +struct mlx5_ib_create_wq_resp { + __u32 response_length; + __u32 reserved; +}; + +struct mlx5_ib_modify_wq { + __u32 comp_mask; + __u32 reserved; +}; + static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext, struct mlx5_ib_create_qp *ucmd, int inlen, -- cgit v0.10.2 From 6d39786bf116e476d75eca91f7cfa22586a32e5f Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 May 2016 15:20:51 +0300 Subject: IB/core: Introduce Receive Work Queue indirection table Introduce Receive Work Queue (WQ) indirection table. This object can be used to spread incoming traffic to different receive Work Queues. A Receive WQ indirection table points to variable size of WQs. This table is given to a QP in downstream patches. Signed-off-by: Yishai Hadas Signed-off-by: Matan Barak Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index c096cad..6b548d7 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1636,6 +1636,68 @@ int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, } EXPORT_SYMBOL(ib_modify_wq); +/* + * ib_create_rwq_ind_table - Creates a RQ Indirection Table. + * @device: The device on which to create the rwq indirection table. + * @ib_rwq_ind_table_init_attr: A list of initial attributes required to + * create the Indirection Table. + * + * Note: The life time of ib_rwq_ind_table_init_attr->ind_tbl is not less + * than the created ib_rwq_ind_table object and the caller is responsible + * for its memory allocation/free. + */ +struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, + struct ib_rwq_ind_table_init_attr *init_attr) +{ + struct ib_rwq_ind_table *rwq_ind_table; + int i; + u32 table_size; + + if (!device->create_rwq_ind_table) + return ERR_PTR(-ENOSYS); + + table_size = (1 << init_attr->log_ind_tbl_size); + rwq_ind_table = device->create_rwq_ind_table(device, + init_attr, NULL); + if (IS_ERR(rwq_ind_table)) + return rwq_ind_table; + + rwq_ind_table->ind_tbl = init_attr->ind_tbl; + rwq_ind_table->log_ind_tbl_size = init_attr->log_ind_tbl_size; + rwq_ind_table->device = device; + rwq_ind_table->uobject = NULL; + atomic_set(&rwq_ind_table->usecnt, 0); + + for (i = 0; i < table_size; i++) + atomic_inc(&rwq_ind_table->ind_tbl[i]->usecnt); + + return rwq_ind_table; +} +EXPORT_SYMBOL(ib_create_rwq_ind_table); + +/* + * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table. + * @wq_ind_table: The Indirection Table to destroy. +*/ +int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table) +{ + int err, i; + u32 table_size = (1 << rwq_ind_table->log_ind_tbl_size); + struct ib_wq **ind_tbl = rwq_ind_table->ind_tbl; + + if (atomic_read(&rwq_ind_table->usecnt)) + return -EBUSY; + + err = rwq_ind_table->device->destroy_rwq_ind_table(rwq_ind_table); + if (!err) { + for (i = 0; i < table_size; i++) + atomic_dec(&ind_tbl[i]->usecnt); + } + + return err; +} +EXPORT_SYMBOL(ib_destroy_rwq_ind_table); + struct ib_flow *ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0c1956a..fa2e018 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1473,6 +1473,21 @@ struct ib_wq_attr { enum ib_wq_state curr_wq_state; }; +struct ib_rwq_ind_table { + struct ib_device *device; + struct ib_uobject *uobject; + atomic_t usecnt; + u32 ind_tbl_num; + u32 log_ind_tbl_size; + struct ib_wq **ind_tbl; +}; + +struct ib_rwq_ind_table_init_attr { + u32 log_ind_tbl_size; + /* Each entry is a pointer to Receive Work Queue */ + struct ib_wq **ind_tbl; +}; + struct ib_qp { struct ib_device *device; struct ib_pd *pd; @@ -1974,6 +1989,10 @@ struct ib_device { struct ib_wq_attr *attr, u32 wq_attr_mask, struct ib_udata *udata); + struct ib_rwq_ind_table * (*create_rwq_ind_table)(struct ib_device *device, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata); + int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table); struct ib_dma_mapping_ops *dma_ops; struct module *owner; @@ -3224,6 +3243,10 @@ struct ib_wq *ib_create_wq(struct ib_pd *pd, int ib_destroy_wq(struct ib_wq *wq); int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr, u32 wq_attr_mask); +struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, + struct ib_rwq_ind_table_init_attr* + wq_ind_table_init_attr); +int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset, unsigned int page_size); -- cgit v0.10.2 From de019a94049d579608a5511f8c50652faf125182 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 May 2016 15:20:52 +0300 Subject: IB/uverbs: Introduce RWQ Indirection table User applications that want to spread traffic on several WQs, need to create an indirection table, by using already created WQs. Adding uverbs API in order to create and destroy this table. Signed-off-by: Yishai Hadas Signed-off-by: Matan Barak Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 74776c6..6c22923 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -186,6 +186,7 @@ extern struct idr ib_uverbs_srq_idr; extern struct idr ib_uverbs_xrcd_idr; extern struct idr ib_uverbs_rule_idr; extern struct idr ib_uverbs_wq_idr; +extern struct idr ib_uverbs_rwq_ind_tbl_idr; void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); @@ -284,5 +285,7 @@ IB_UVERBS_DECLARE_EX_CMD(create_qp); IB_UVERBS_DECLARE_EX_CMD(create_wq); IB_UVERBS_DECLARE_EX_CMD(modify_wq); IB_UVERBS_DECLARE_EX_CMD(destroy_wq); +IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table); +IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 22e6173..327a56c 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -58,6 +58,7 @@ static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" }; +static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" }; /* * The ib_uobject locking scheme is as follows: @@ -338,6 +339,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); INIT_LIST_HEAD(&ucontext->wq_list); + INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list); INIT_LIST_HEAD(&ucontext->xrcd_list); INIT_LIST_HEAD(&ucontext->rule_list); rcu_read_lock(); @@ -3299,6 +3301,214 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, return ret; } +int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_create_rwq_ind_table cmd = {}; + struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {}; + struct ib_uobject *uobj; + int err = 0; + struct ib_rwq_ind_table_init_attr init_attr = {}; + struct ib_rwq_ind_table *rwq_ind_tbl; + struct ib_wq **wqs = NULL; + u32 *wqs_handles = NULL; + struct ib_wq *wq = NULL; + int i, j, num_read_wqs; + u32 num_wq_handles; + u32 expected_in_size; + size_t required_cmd_sz_header; + size_t required_resp_len; + + required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size); + required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num); + + if (ucore->inlen < required_cmd_sz_header) + return -EINVAL; + + if (ucore->outlen < required_resp_len) + return -ENOSPC; + + err = ib_copy_from_udata(&cmd, ucore, required_cmd_sz_header); + if (err) + return err; + + ucore->inbuf += required_cmd_sz_header; + ucore->inlen -= required_cmd_sz_header; + + if (cmd.comp_mask) + return -EOPNOTSUPP; + + if (cmd.log_ind_tbl_size > IB_USER_VERBS_MAX_LOG_IND_TBL_SIZE) + return -EINVAL; + + num_wq_handles = 1 << cmd.log_ind_tbl_size; + expected_in_size = num_wq_handles * sizeof(__u32); + if (num_wq_handles == 1) + /* input size for wq handles is u64 aligned */ + expected_in_size += sizeof(__u32); + + if (ucore->inlen < expected_in_size) + return -EINVAL; + + if (ucore->inlen > expected_in_size && + !ib_is_udata_cleared(ucore, expected_in_size, + ucore->inlen - expected_in_size)) + return -EOPNOTSUPP; + + wqs_handles = kcalloc(num_wq_handles, sizeof(*wqs_handles), + GFP_KERNEL); + if (!wqs_handles) + return -ENOMEM; + + err = ib_copy_from_udata(wqs_handles, ucore, + num_wq_handles * sizeof(__u32)); + if (err) + goto err_free; + + wqs = kcalloc(num_wq_handles, sizeof(*wqs), GFP_KERNEL); + if (!wqs) { + err = -ENOMEM; + goto err_free; + } + + for (num_read_wqs = 0; num_read_wqs < num_wq_handles; + num_read_wqs++) { + wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext); + if (!wq) { + err = -EINVAL; + goto put_wqs; + } + + wqs[num_read_wqs] = wq; + } + + uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); + if (!uobj) { + err = -ENOMEM; + goto put_wqs; + } + + init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class); + down_write(&uobj->mutex); + init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size; + init_attr.ind_tbl = wqs; + rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw); + + if (IS_ERR(rwq_ind_tbl)) { + err = PTR_ERR(rwq_ind_tbl); + goto err_uobj; + } + + rwq_ind_tbl->ind_tbl = wqs; + rwq_ind_tbl->log_ind_tbl_size = init_attr.log_ind_tbl_size; + rwq_ind_tbl->uobject = uobj; + uobj->object = rwq_ind_tbl; + rwq_ind_tbl->device = ib_dev; + atomic_set(&rwq_ind_tbl->usecnt, 0); + + for (i = 0; i < num_wq_handles; i++) + atomic_inc(&wqs[i]->usecnt); + + err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); + if (err) + goto destroy_ind_tbl; + + resp.ind_tbl_handle = uobj->id; + resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num; + resp.response_length = required_resp_len; + + err = ib_copy_to_udata(ucore, + &resp, resp.response_length); + if (err) + goto err_copy; + + kfree(wqs_handles); + + for (j = 0; j < num_read_wqs; j++) + put_wq_read(wqs[j]); + + mutex_lock(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list); + mutex_unlock(&file->mutex); + + uobj->live = 1; + + up_write(&uobj->mutex); + return 0; + +err_copy: + idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); +destroy_ind_tbl: + ib_destroy_rwq_ind_table(rwq_ind_tbl); +err_uobj: + put_uobj_write(uobj); +put_wqs: + for (j = 0; j < num_read_wqs; j++) + put_wq_read(wqs[j]); +err_free: + kfree(wqs_handles); + kfree(wqs); + return err; +} + +int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {}; + struct ib_rwq_ind_table *rwq_ind_tbl; + struct ib_uobject *uobj; + int ret; + struct ib_wq **ind_tbl; + size_t required_cmd_sz; + + required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle); + + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (cmd.comp_mask) + return -EOPNOTSUPP; + + uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle, + file->ucontext); + if (!uobj) + return -EINVAL; + rwq_ind_tbl = uobj->object; + ind_tbl = rwq_ind_tbl->ind_tbl; + + ret = ib_destroy_rwq_ind_table(rwq_ind_tbl); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + + if (ret) + return ret; + + idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + kfree(ind_tbl); + return ret; +} + int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_udata *ucore, diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 91cb36f..426e0ac 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -77,6 +77,7 @@ DEFINE_IDR(ib_uverbs_srq_idr); DEFINE_IDR(ib_uverbs_xrcd_idr); DEFINE_IDR(ib_uverbs_rule_idr); DEFINE_IDR(ib_uverbs_wq_idr); +DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr); static DEFINE_SPINLOCK(map_lock); static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); @@ -134,6 +135,8 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq, [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq, [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq, + [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table, + [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table, }; static void ib_uverbs_add_one(struct ib_device *device); @@ -269,6 +272,16 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, kfree(uqp); } + list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) { + struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object; + struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl; + + idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); + ib_destroy_rwq_ind_table(rwq_ind_tbl); + kfree(ind_tbl); + kfree(uobj); + } + list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) { struct ib_wq *wq = uobj->object; struct ib_uwq_object *uwq = diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index fa2e018..e305c9a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1326,6 +1326,7 @@ struct ib_ucontext { struct list_head xrcd_list; struct list_head rule_list; struct list_head wq_list; + struct list_head rwq_ind_tbl_list; int closing; struct pid *tgid; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index c9470e5..2cf7c95 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -98,6 +98,8 @@ enum { IB_USER_VERBS_EX_CMD_CREATE_WQ, IB_USER_VERBS_EX_CMD_MODIFY_WQ, IB_USER_VERBS_EX_CMD_DESTROY_WQ, + IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL, + IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL }; /* @@ -987,4 +989,28 @@ struct ib_uverbs_ex_modify_wq { __u32 curr_wq_state; }; +/* Prevent memory allocation rather than max expected size */ +#define IB_USER_VERBS_MAX_LOG_IND_TBL_SIZE 0x0d +struct ib_uverbs_ex_create_rwq_ind_table { + __u32 comp_mask; + __u32 log_ind_tbl_size; + /* Following are the wq handles according to log_ind_tbl_size + * wq_handle1 + * wq_handle2 + */ + __u32 wq_handles[0]; +}; + +struct ib_uverbs_ex_create_rwq_ind_table_resp { + __u32 comp_mask; + __u32 response_length; + __u32 ind_tbl_handle; + __u32 ind_tbl_num; +}; + +struct ib_uverbs_ex_destroy_rwq_ind_table { + __u32 comp_mask; + __u32 ind_tbl_handle; +}; + #endif /* IB_USER_VERBS_H */ -- cgit v0.10.2 From c5f9092936fe88b39e2eddccedeb1c51883fcd31 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 May 2016 15:20:53 +0300 Subject: IB/mlx5: Add Receive Work Queue Indirection table operations Some mlx5 based hardwares support a RQ table object. This RQ table points to a few RQ objects. We implement the receive work queue indirection table API (create and destroy) by using this hardware object. Signed-off-by: Yishai Hadas Signed-off-by: Matan Barak Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b3589b7..82be194 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2453,12 +2453,16 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.create_wq = mlx5_ib_create_wq; dev->ib_dev.modify_wq = mlx5_ib_modify_wq; dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq; + dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table; + dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table; dev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ); + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); } err = init_node_data(dev); if (err) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 62d4e13..cd3d620 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -247,6 +247,11 @@ enum { MLX5_WQ_KERNEL }; +struct mlx5_ib_rwq_ind_table { + struct ib_rwq_ind_table ib_rwq_ind_tbl; + u32 rqtn; +}; + /* * Connect-IB can trigger up to four concurrent pagefaults * per-QP. @@ -657,6 +662,11 @@ static inline struct mlx5_ib_rwq *to_mrwq(struct ib_wq *ibwq) return container_of(ibwq, struct mlx5_ib_rwq, ibwq); } +static inline struct mlx5_ib_rwq_ind_table *to_mrwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) +{ + return container_of(ib_rwq_ind_tbl, struct mlx5_ib_rwq_ind_table, ib_rwq_ind_tbl); +} + static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq) { return container_of(msrq, struct mlx5_ib_srq, msrq); @@ -797,6 +807,10 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, int mlx5_ib_destroy_wq(struct ib_wq *wq); int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata); +struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata); +int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING extern struct workqueue_struct *mlx5_ib_page_fault_wq; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 43d45e3..1c0e332 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4415,6 +4415,84 @@ int mlx5_ib_destroy_wq(struct ib_wq *wq) return 0; } +struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + struct mlx5_ib_rwq_ind_table *rwq_ind_tbl; + int sz = 1 << init_attr->log_ind_tbl_size; + struct mlx5_ib_create_rwq_ind_tbl_resp resp = {}; + size_t min_resp_len; + int inlen; + int err; + int i; + u32 *in; + void *rqtc; + + if (udata->inlen > 0 && + !ib_is_udata_cleared(udata, 0, + udata->inlen)) + return ERR_PTR(-EOPNOTSUPP); + + min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); + if (udata->outlen && udata->outlen < min_resp_len) + return ERR_PTR(-EINVAL); + + rwq_ind_tbl = kzalloc(sizeof(*rwq_ind_tbl), GFP_KERNEL); + if (!rwq_ind_tbl) + return ERR_PTR(-ENOMEM); + + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; + in = mlx5_vzalloc(inlen); + if (!in) { + err = -ENOMEM; + goto err; + } + + rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); + + MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); + MLX5_SET(rqtc, rqtc, rqt_max_size, sz); + + for (i = 0; i < sz; i++) + MLX5_SET(rqtc, rqtc, rq_num[i], init_attr->ind_tbl[i]->wq_num); + + err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn); + kvfree(in); + + if (err) + goto err; + + rwq_ind_tbl->ib_rwq_ind_tbl.ind_tbl_num = rwq_ind_tbl->rqtn; + if (udata->outlen) { + resp.response_length = offsetof(typeof(resp), response_length) + + sizeof(resp.response_length); + err = ib_copy_to_udata(udata, &resp, resp.response_length); + if (err) + goto err_copy; + } + + return &rwq_ind_tbl->ib_rwq_ind_tbl; + +err_copy: + mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn); +err: + kfree(rwq_ind_tbl); + return ERR_PTR(err); +} + +int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) +{ + struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl); + struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device); + + mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn); + + kfree(rwq_ind_tbl); + return 0; +} + int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata) { diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h index 3e66f93..0f87955 100644 --- a/drivers/infiniband/hw/mlx5/user.h +++ b/drivers/infiniband/hw/mlx5/user.h @@ -179,6 +179,11 @@ struct mlx5_ib_create_wq_resp { __u32 reserved; }; +struct mlx5_ib_create_rwq_ind_tbl_resp { + __u32 response_length; + __u32 reserved; +}; + struct mlx5_ib_modify_wq { __u32 comp_mask; __u32 reserved; -- cgit v0.10.2 From a9017e232ff9eaabeb50eb89841d99310cfc98dc Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 May 2016 15:20:54 +0300 Subject: IB/core: Extend create QP to get indirection table Extend create QP to get Receive Work Queue (WQ) indirection table. QP can be created with external Receive Work Queue indirection table, in that case it is ready to receive immediately. Signed-off-by: Yishai Hadas Signed-off-by: Matan Barak Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 6b548d7..6916d5c 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -754,6 +754,12 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, struct ib_qp *qp; int ret; + if (qp_init_attr->rwq_ind_tbl && + (qp_init_attr->recv_cq || + qp_init_attr->srq || qp_init_attr->cap.max_recv_wr || + qp_init_attr->cap.max_recv_sge)) + return ERR_PTR(-EINVAL); + /* * If the callers is using the RDMA API calculate the resources * needed for the RDMA READ/WRITE operations. @@ -771,6 +777,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->real_qp = qp; qp->uobject = NULL; qp->qp_type = qp_init_attr->qp_type; + qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl; atomic_set(&qp->usecnt, 0); qp->mrs_used = 0; @@ -788,7 +795,8 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->srq = NULL; } else { qp->recv_cq = qp_init_attr->recv_cq; - atomic_inc(&qp_init_attr->recv_cq->usecnt); + if (qp_init_attr->recv_cq) + atomic_inc(&qp_init_attr->recv_cq->usecnt); qp->srq = qp_init_attr->srq; if (qp->srq) atomic_inc(&qp_init_attr->srq->usecnt); @@ -799,7 +807,10 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->xrcd = NULL; atomic_inc(&pd->usecnt); - atomic_inc(&qp_init_attr->send_cq->usecnt); + if (qp_init_attr->send_cq) + atomic_inc(&qp_init_attr->send_cq->usecnt); + if (qp_init_attr->rwq_ind_tbl) + atomic_inc(&qp->rwq_ind_tbl->usecnt); if (qp_init_attr->cap.max_rdma_ctxs) { ret = rdma_rw_init_mrs(qp, qp_init_attr); @@ -1279,6 +1290,7 @@ int ib_destroy_qp(struct ib_qp *qp) struct ib_pd *pd; struct ib_cq *scq, *rcq; struct ib_srq *srq; + struct ib_rwq_ind_table *ind_tbl; int ret; WARN_ON_ONCE(qp->mrs_used > 0); @@ -1293,6 +1305,7 @@ int ib_destroy_qp(struct ib_qp *qp) scq = qp->send_cq; rcq = qp->recv_cq; srq = qp->srq; + ind_tbl = qp->rwq_ind_tbl; if (!qp->uobject) rdma_rw_cleanup_mrs(qp); @@ -1307,6 +1320,8 @@ int ib_destroy_qp(struct ib_qp *qp) atomic_dec(&rcq->usecnt); if (srq) atomic_dec(&srq->usecnt); + if (ind_tbl) + atomic_dec(&ind_tbl->usecnt); } return ret; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e305c9a..9b2fafe 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1017,6 +1017,7 @@ struct ib_qp_init_attr { * Only needed for special QP types, or when using the RW API. */ u8 port_num; + struct ib_rwq_ind_table *rwq_ind_tbl; }; struct ib_qp_open_attr { @@ -1511,6 +1512,7 @@ struct ib_qp { void *qp_context; u32 qp_num; enum ib_qp_type qp_type; + struct ib_rwq_ind_table *rwq_ind_tbl; }; struct ib_mr { -- cgit v0.10.2 From c70285f880e88cb4f73effb722065a182ba5936f Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 May 2016 15:20:55 +0300 Subject: IB/uverbs: Extend create QP to get RWQ indirection table User applications that want to spread incoming traffic between several WQs should create a QP which contains an indirection table. When such a QP is created other receive side parameters are not valid and should not be given. Its send side is optional and assumed active based on max_send_wr capability value. Extend create QP to work accordingly. Signed-off-by: Yishai Hadas Signed-off-by: Matan Barak Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 327a56c..65ab209 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -255,6 +255,17 @@ static void put_wq_read(struct ib_wq *wq) put_uobj_read(wq->uobject); } +static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle, + struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0); +} + +static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table) +{ + put_uobj_read(ind_table->uobject); +} + static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context) { struct ib_uobject *uobj; @@ -1761,9 +1772,11 @@ static int create_qp(struct ib_uverbs_file *file, struct ib_srq *srq = NULL; struct ib_qp *qp; char *buf; - struct ib_qp_init_attr attr; + struct ib_qp_init_attr attr = {}; struct ib_uverbs_ex_create_qp_resp resp; int ret; + struct ib_rwq_ind_table *ind_tbl = NULL; + bool has_sq = true; if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) return -EPERM; @@ -1775,6 +1788,32 @@ static int create_qp(struct ib_uverbs_file *file, init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &qp_lock_class); down_write(&obj->uevent.uobject.mutex); + if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) + + sizeof(cmd->rwq_ind_tbl_handle) && + (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) { + ind_tbl = idr_read_rwq_indirection_table(cmd->rwq_ind_tbl_handle, + file->ucontext); + if (!ind_tbl) { + ret = -EINVAL; + goto err_put; + } + + attr.rwq_ind_tbl = ind_tbl; + } + + if ((cmd_sz >= offsetof(typeof(*cmd), reserved1) + + sizeof(cmd->reserved1)) && cmd->reserved1) { + ret = -EOPNOTSUPP; + goto err_put; + } + + if (ind_tbl && (cmd->max_recv_wr || cmd->max_recv_sge || cmd->is_srq)) { + ret = -EINVAL; + goto err_put; + } + + if (ind_tbl && !cmd->max_send_wr) + has_sq = false; if (cmd->qp_type == IB_QPT_XRC_TGT) { xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext, @@ -1798,20 +1837,24 @@ static int create_qp(struct ib_uverbs_file *file, } } - if (cmd->recv_cq_handle != cmd->send_cq_handle) { - rcq = idr_read_cq(cmd->recv_cq_handle, - file->ucontext, 0); - if (!rcq) { - ret = -EINVAL; - goto err_put; + if (!ind_tbl) { + if (cmd->recv_cq_handle != cmd->send_cq_handle) { + rcq = idr_read_cq(cmd->recv_cq_handle, + file->ucontext, 0); + if (!rcq) { + ret = -EINVAL; + goto err_put; + } } } } - scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq); - rcq = rcq ?: scq; + if (has_sq) + scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq); + if (!ind_tbl) + rcq = rcq ?: scq; pd = idr_read_pd(cmd->pd_handle, file->ucontext); - if (!pd || !scq) { + if (!pd || (!scq && has_sq)) { ret = -EINVAL; goto err_put; } @@ -1878,16 +1921,20 @@ static int create_qp(struct ib_uverbs_file *file, qp->send_cq = attr.send_cq; qp->recv_cq = attr.recv_cq; qp->srq = attr.srq; + qp->rwq_ind_tbl = ind_tbl; qp->event_handler = attr.event_handler; qp->qp_context = attr.qp_context; qp->qp_type = attr.qp_type; atomic_set(&qp->usecnt, 0); atomic_inc(&pd->usecnt); - atomic_inc(&attr.send_cq->usecnt); + if (attr.send_cq) + atomic_inc(&attr.send_cq->usecnt); if (attr.recv_cq) atomic_inc(&attr.recv_cq->usecnt); if (attr.srq) atomic_inc(&attr.srq->usecnt); + if (ind_tbl) + atomic_inc(&ind_tbl->usecnt); } qp->uobject = &obj->uevent.uobject; @@ -1927,6 +1974,8 @@ static int create_qp(struct ib_uverbs_file *file, put_cq_read(rcq); if (srq) put_srq_read(srq); + if (ind_tbl) + put_rwq_indirection_table_read(ind_tbl); mutex_lock(&file->mutex); list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); @@ -1954,6 +2003,8 @@ err_put: put_cq_read(rcq); if (srq) put_srq_read(srq); + if (ind_tbl) + put_rwq_indirection_table_read(ind_tbl); put_uobj_write(&obj->uevent.uobject); return ret; @@ -2047,7 +2098,7 @@ int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, if (err) return err; - if (cmd.comp_mask) + if (cmd.comp_mask & ~IB_UVERBS_CREATE_QP_SUP_COMP_MASK) return -EINVAL; if (cmd.reserved) diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 2cf7c95..2c8bca8 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -523,6 +523,14 @@ struct ib_uverbs_create_qp { __u64 driver_data[0]; }; +enum ib_uverbs_create_qp_mask { + IB_UVERBS_CREATE_QP_MASK_IND_TABLE = 1UL << 0, +}; + +enum { + IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE, +}; + struct ib_uverbs_ex_create_qp { __u64 user_handle; __u32 pd_handle; @@ -540,6 +548,8 @@ struct ib_uverbs_ex_create_qp { __u8 reserved; __u32 comp_mask; __u32 create_flags; + __u32 rwq_ind_tbl_handle; + __u32 reserved1; }; struct ib_uverbs_open_qp { -- cgit v0.10.2 From 28d6137008b2aa09e35750c604394e363dbfca94 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 23 May 2016 15:20:56 +0300 Subject: IB/mlx5: Add RSS QP support Add support for Raw Ethernet RX HASH QP. Currently, creation and destruction of such a QP are supported. This QP is implemented as a simple TIR object which points to the receive RQ indirection table. The given hashing configuration is used to configure the TIR and by that it chooses the right RQ from the RQ indirection table. Signed-off-by: Yishai Hadas Signed-off-by: Matan Barak Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index cd3d620..7ac4647 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -295,6 +295,10 @@ struct mlx5_ib_qp_trans { u8 resp_depth; }; +struct mlx5_ib_rss_qp { + u32 tirn; +}; + struct mlx5_ib_rq { struct mlx5_ib_qp_base base; struct mlx5_ib_wq *rq; @@ -323,6 +327,7 @@ struct mlx5_ib_qp { union { struct mlx5_ib_qp_trans trans_qp; struct mlx5_ib_raw_packet_qp raw_packet_qp; + struct mlx5_ib_rss_qp rss_qp; }; struct mlx5_buf buf; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 1c0e332..f9df4b52 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1266,6 +1266,187 @@ static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp, rq->doorbell = &qp->db; } +static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) +{ + mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn); +} + +static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct ib_uobject *uobj = pd->uobject; + struct ib_ucontext *ucontext = uobj->context; + struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext); + struct mlx5_ib_create_qp_resp resp = {}; + int inlen; + int err; + u32 *in; + void *tirc; + void *hfso; + u32 selected_fields = 0; + size_t min_resp_len; + u32 tdn = mucontext->tdn; + struct mlx5_ib_create_qp_rss ucmd = {}; + size_t required_cmd_sz; + + if (init_attr->qp_type != IB_QPT_RAW_PACKET) + return -EOPNOTSUPP; + + if (init_attr->create_flags || init_attr->send_cq) + return -EINVAL; + + min_resp_len = offsetof(typeof(resp), uuar_index) + sizeof(resp.uuar_index); + if (udata->outlen < min_resp_len) + return -EINVAL; + + required_cmd_sz = offsetof(typeof(ucmd), reserved1) + sizeof(ucmd.reserved1); + if (udata->inlen < required_cmd_sz) { + mlx5_ib_dbg(dev, "invalid inlen\n"); + return -EINVAL; + } + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) { + mlx5_ib_dbg(dev, "inlen is not supported\n"); + return -EOPNOTSUPP; + } + + if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) { + mlx5_ib_dbg(dev, "copy failed\n"); + return -EFAULT; + } + + if (ucmd.comp_mask) { + mlx5_ib_dbg(dev, "invalid comp mask\n"); + return -EOPNOTSUPP; + } + + if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved)) || ucmd.reserved1) { + mlx5_ib_dbg(dev, "invalid reserved\n"); + return -EOPNOTSUPP; + } + + err = ib_copy_to_udata(udata, &resp, min_resp_len); + if (err) { + mlx5_ib_dbg(dev, "copy failed\n"); + return -EINVAL; + } + + inlen = MLX5_ST_SZ_BYTES(create_tir_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + MLX5_SET(tirc, tirc, disp_type, + MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, + init_attr->rwq_ind_tbl->ind_tbl_num); + MLX5_SET(tirc, tirc, transport_domain, tdn); + + hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + switch (ucmd.rx_hash_function) { + case MLX5_RX_HASH_FUNC_TOEPLITZ: + { + void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); + size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key); + + if (len != ucmd.rx_key_len) { + err = -EINVAL; + goto err; + } + + MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ); + MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); + memcpy(rss_key, ucmd.rx_hash_key, len); + break; + } + default: + err = -EOPNOTSUPP; + goto err; + } + + if (!ucmd.rx_hash_fields_mask) { + /* special case when this TIR serves as steering entry without hashing */ + if (!init_attr->rwq_ind_tbl->log_ind_tbl_size) + goto create_tir; + err = -EINVAL; + goto err; + } + + if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) && + ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))) { + err = -EINVAL; + goto err; + } + + /* If none of IPV4 & IPV6 SRC/DST was set - this bit field is ignored */ + if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6)) + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + + if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) && + ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))) { + err = -EINVAL; + goto err; + } + + /* If none of TCP & UDP SRC/DST was set - this bit field is ignored */ + if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_TCP); + else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_UDP); + + if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6)) + selected_fields |= MLX5_HASH_FIELD_SEL_SRC_IP; + + if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6)) + selected_fields |= MLX5_HASH_FIELD_SEL_DST_IP; + + if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP)) + selected_fields |= MLX5_HASH_FIELD_SEL_L4_SPORT; + + if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) + selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT; + + MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields); + +create_tir: + err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn); + + if (err) + goto err; + + kvfree(in); + /* qpn is reserved for that QP */ + qp->trans_qp.base.mqp.qpn = 0; + return 0; + +err: + kvfree(in); + return err; +} + static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, struct mlx5_ib_qp *qp) @@ -1292,6 +1473,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); + if (init_attr->rwq_ind_tbl) { + if (!udata) + return -ENOSYS; + + err = create_rss_raw_qp_tir(dev, qp, pd, init_attr, udata); + return err; + } + if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { if (!MLX5_CAP_GEN(mdev, block_lb_mc)) { mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n"); @@ -1644,6 +1833,11 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) struct mlx5_modify_qp_mbox_in *in; int err; + if (qp->ibqp.rwq_ind_tbl) { + destroy_rss_raw_qp_tir(dev, qp); + return; + } + base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ? &qp->raw_packet_qp.rq.base : &qp->trans_qp.base; @@ -2504,6 +2698,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int port; enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; + if (ibqp->rwq_ind_tbl) + return -ENOSYS; + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask); @@ -4119,6 +4316,9 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int err = 0; u8 raw_packet_qp_state; + if (ibqp->rwq_ind_tbl) + return -ENOSYS; + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr); diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h index 0f87955..33c54fb 100644 --- a/drivers/infiniband/hw/mlx5/user.h +++ b/drivers/infiniband/hw/mlx5/user.h @@ -152,6 +152,40 @@ struct mlx5_ib_create_qp { __u64 sq_buf_addr; }; +/* RX Hash function flags */ +enum mlx5_rx_hash_function_flags { + MLX5_RX_HASH_FUNC_TOEPLITZ = 1 << 0, +}; + +/* + * RX Hash flags, these flags allows to set which incoming packet's field should + * participates in RX Hash. Each flag represent certain packet's field, + * when the flag is set the field that is represented by the flag will + * participate in RX Hash calculation. + * Note: *IPV4 and *IPV6 flags can't be enabled together on the same QP + * and *TCP and *UDP flags can't be enabled together on the same QP. +*/ +enum mlx5_rx_hash_fields { + MLX5_RX_HASH_SRC_IPV4 = 1 << 0, + MLX5_RX_HASH_DST_IPV4 = 1 << 1, + MLX5_RX_HASH_SRC_IPV6 = 1 << 2, + MLX5_RX_HASH_DST_IPV6 = 1 << 3, + MLX5_RX_HASH_SRC_PORT_TCP = 1 << 4, + MLX5_RX_HASH_DST_PORT_TCP = 1 << 5, + MLX5_RX_HASH_SRC_PORT_UDP = 1 << 6, + MLX5_RX_HASH_DST_PORT_UDP = 1 << 7 +}; + +struct mlx5_ib_create_qp_rss { + __u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */ + __u8 rx_hash_function; /* enum mlx5_rx_hash_function_flags */ + __u8 rx_key_len; /* valid only for Toeplitz */ + __u8 reserved[6]; + __u8 rx_hash_key[128]; /* valid only for Toeplitz */ + __u32 comp_mask; + __u32 reserved1; +}; + struct mlx5_ib_create_qp_resp { __u32 uuar_index; }; -- cgit v0.10.2 From 7c2344c3bbf97eb5dfa732d5098285d15d3bf9bf Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Fri, 17 Jun 2016 14:56:44 +0300 Subject: IB/mlx5: Implements disassociate_ucontext API Implements the IB core disassociate_ucontext API. The driver detaches the HW resources for a given user context to prevent a dependency between application termination and device disconnect. This is done by managing the VMAs that were mapped to the HW bars such as doorbell and blueflame. When need to detach, remap them to an arbitrary kernel page returned by the zap API. Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 82be194..21acee4 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -42,11 +42,13 @@ #include #endif #include +#include #include #include #include #include #include +#include #include #include #include @@ -983,6 +985,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, goto out_uars; } + INIT_LIST_HEAD(&context->vma_private_list); INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); @@ -1086,6 +1089,125 @@ static int get_index(unsigned long offset) return get_arg(offset); } +static void mlx5_ib_vma_open(struct vm_area_struct *area) +{ + /* vma_open is called when a new VMA is created on top of our VMA. This + * is done through either mremap flow or split_vma (usually due to + * mlock, madvise, munmap, etc.) We do not support a clone of the VMA, + * as this VMA is strongly hardware related. Therefore we set the + * vm_ops of the newly created/cloned VMA to NULL, to prevent it from + * calling us again and trying to do incorrect actions. We assume that + * the original VMA size is exactly a single page, and therefore all + * "splitting" operation will not happen to it. + */ + area->vm_ops = NULL; +} + +static void mlx5_ib_vma_close(struct vm_area_struct *area) +{ + struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data; + + /* It's guaranteed that all VMAs opened on a FD are closed before the + * file itself is closed, therefore no sync is needed with the regular + * closing flow. (e.g. mlx5 ib_dealloc_ucontext) + * However need a sync with accessing the vma as part of + * mlx5_ib_disassociate_ucontext. + * The close operation is usually called under mm->mmap_sem except when + * process is exiting. + * The exiting case is handled explicitly as part of + * mlx5_ib_disassociate_ucontext. + */ + mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data; + + /* setting the vma context pointer to null in the mlx5_ib driver's + * private data, to protect a race condition in + * mlx5_ib_disassociate_ucontext(). + */ + mlx5_ib_vma_priv_data->vma = NULL; + list_del(&mlx5_ib_vma_priv_data->list); + kfree(mlx5_ib_vma_priv_data); +} + +static const struct vm_operations_struct mlx5_ib_vm_ops = { + .open = mlx5_ib_vma_open, + .close = mlx5_ib_vma_close +}; + +static int mlx5_ib_set_vma_data(struct vm_area_struct *vma, + struct mlx5_ib_ucontext *ctx) +{ + struct mlx5_ib_vma_private_data *vma_prv; + struct list_head *vma_head = &ctx->vma_private_list; + + vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL); + if (!vma_prv) + return -ENOMEM; + + vma_prv->vma = vma; + vma->vm_private_data = vma_prv; + vma->vm_ops = &mlx5_ib_vm_ops; + + list_add(&vma_prv->list, vma_head); + + return 0; +} + +static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) +{ + int ret; + struct vm_area_struct *vma; + struct mlx5_ib_vma_private_data *vma_private, *n; + struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); + struct task_struct *owning_process = NULL; + struct mm_struct *owning_mm = NULL; + + owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); + if (!owning_process) + return; + + owning_mm = get_task_mm(owning_process); + if (!owning_mm) { + pr_info("no mm, disassociate ucontext is pending task termination\n"); + while (1) { + put_task_struct(owning_process); + usleep_range(1000, 2000); + owning_process = get_pid_task(ibcontext->tgid, + PIDTYPE_PID); + if (!owning_process || + owning_process->state == TASK_DEAD) { + pr_info("disassociate ucontext done, task was terminated\n"); + /* in case task was dead need to release the + * task struct. + */ + if (owning_process) + put_task_struct(owning_process); + return; + } + } + } + + /* need to protect from a race on closing the vma as part of + * mlx5_ib_vma_close. + */ + down_read(&owning_mm->mmap_sem); + list_for_each_entry_safe(vma_private, n, &context->vma_private_list, + list) { + vma = vma_private->vma; + ret = zap_vma_ptes(vma, vma->vm_start, + PAGE_SIZE); + WARN_ONCE(ret, "%s: zap_vma_ptes failed", __func__); + /* context going to be destroyed, should + * not access ops any more. + */ + vma->vm_ops = NULL; + list_del(&vma_private->list); + kfree(vma_private); + } + up_read(&owning_mm->mmap_sem); + mmput(owning_mm); + put_task_struct(owning_process); +} + static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) { switch (cmd) { @@ -1101,8 +1223,10 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) } static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, - struct vm_area_struct *vma, struct mlx5_uuar_info *uuari) + struct vm_area_struct *vma, + struct mlx5_ib_ucontext *context) { + struct mlx5_uuar_info *uuari = &context->uuari; int err; unsigned long idx; phys_addr_t pfn, pa; @@ -1152,14 +1276,13 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd), vma->vm_start, &pa); - return 0; + return mlx5_ib_set_vma_data(vma, context); } static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); - struct mlx5_uuar_info *uuari = &context->uuari; unsigned long command; phys_addr_t pfn; @@ -1168,7 +1291,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm case MLX5_IB_MMAP_WC_PAGE: case MLX5_IB_MMAP_NC_PAGE: case MLX5_IB_MMAP_REGULAR_PAGE: - return uar_mmap(dev, command, vma, uuari); + return uar_mmap(dev, command, vma, context); case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: return -ENOSYS; @@ -2428,6 +2551,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid; } + dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext; + mlx5_ib_internal_fill_odp_caps(dev); if (MLX5_CAP_GEN(mdev, imaicl)) { diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 7ac4647..391588e 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -105,6 +105,11 @@ enum { MLX5_CQE_VERSION_V1, }; +struct mlx5_ib_vma_private_data { + struct list_head list; + struct vm_area_struct *vma; +}; + struct mlx5_ib_ucontext { struct ib_ucontext ibucontext; struct list_head db_page_list; @@ -116,6 +121,7 @@ struct mlx5_ib_ucontext { u8 cqe_version; /* Transport Domain number */ u32 tdn; + struct list_head vma_private_list; }; static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) -- cgit v0.10.2 From 89ea94a7b6c40eb423c144aef1caceebaff79c8d Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Fri, 17 Jun 2016 15:01:38 +0300 Subject: IB/mlx5: Reset flow support for IB kernel ULPs The driver exposes interfaces that directly relate to HW state. Upon fatal error, consumers of these interfaces (ULPs) that rely on completion of all their posted work-request could hang, thereby introducing dependencies in shutdown order. To prevent this from happening, we manage the relevant resources (CQs, QPs) that are used by the device. Upon a fatal error, we now generate simulated completions for outstanding WQEs that were not completed at the time the HW was reset. It includes invoking the completion event handler for all involved CQs so that the ULPs will poll those CQs. When polled we return simulated CQEs with IB_WC_WR_FLUSH_ERR return code enabling ULPs to clean up their resources and not wait forever for completions upon receiving remove_one. The above change requires an extra check in the data path to make sure that when device is in error state, the simulated CQEs will be returned and no further WQEs will be posted. Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 9c0e67b..308a358 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -424,6 +424,83 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, item->key = be32_to_cpu(cqe->mkey); } +static void sw_send_comp(struct mlx5_ib_qp *qp, int num_entries, + struct ib_wc *wc, int *npolled) +{ + struct mlx5_ib_wq *wq; + unsigned int cur; + unsigned int idx; + int np; + int i; + + wq = &qp->sq; + cur = wq->head - wq->tail; + np = *npolled; + + if (cur == 0) + return; + + for (i = 0; i < cur && np < num_entries; i++) { + idx = wq->last_poll & (wq->wqe_cnt - 1); + wc->wr_id = wq->wrid[idx]; + wc->status = IB_WC_WR_FLUSH_ERR; + wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; + wq->tail++; + np++; + wc->qp = &qp->ibqp; + wc++; + wq->last_poll = wq->w_list[idx].next; + } + *npolled = np; +} + +static void sw_recv_comp(struct mlx5_ib_qp *qp, int num_entries, + struct ib_wc *wc, int *npolled) +{ + struct mlx5_ib_wq *wq; + unsigned int cur; + int np; + int i; + + wq = &qp->rq; + cur = wq->head - wq->tail; + np = *npolled; + + if (cur == 0) + return; + + for (i = 0; i < cur && np < num_entries; i++) { + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + wc->status = IB_WC_WR_FLUSH_ERR; + wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; + wq->tail++; + np++; + wc->qp = &qp->ibqp; + wc++; + } + *npolled = np; +} + +static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries, + struct ib_wc *wc, int *npolled) +{ + struct mlx5_ib_qp *qp; + + *npolled = 0; + /* Find uncompleted WQEs belonging to that cq and retrun mmics ones */ + list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) { + sw_send_comp(qp, num_entries, wc + *npolled, npolled); + if (*npolled >= num_entries) + return; + } + + list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) { + sw_recv_comp(qp, num_entries, wc + *npolled, npolled); + if (*npolled >= num_entries) + return; + } +} + static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_ib_qp **cur_qp, struct ib_wc *wc) @@ -594,12 +671,18 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) { struct mlx5_ib_cq *cq = to_mcq(ibcq); struct mlx5_ib_qp *cur_qp = NULL; + struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); + struct mlx5_core_dev *mdev = dev->mdev; unsigned long flags; int soft_polled = 0; int npolled; int err = 0; spin_lock_irqsave(&cq->lock, flags); + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + mlx5_ib_poll_sw_comp(cq, num_entries, wc, &npolled); + goto out; + } if (unlikely(!list_empty(&cq->wc_list))) soft_polled = poll_soft_wc(cq, num_entries, wc); @@ -612,7 +695,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) if (npolled) mlx5_cq_set_ci(&cq->mcq); - +out: spin_unlock_irqrestore(&cq->lock, flags); if (err == 0 || err == -EAGAIN) @@ -843,6 +926,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, cq->resize_buf = NULL; cq->resize_umem = NULL; cq->create_flags = attr->flags; + INIT_LIST_HEAD(&cq->list_send_qp); + INIT_LIST_HEAD(&cq->list_recv_qp); if (context) { err = create_cq_user(dev, udata, context, cq, entries, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 21acee4..9b6d283 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1980,6 +1980,65 @@ static void pkey_change_handler(struct work_struct *work) mutex_unlock(&ports->devr->mutex); } +static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev) +{ + struct mlx5_ib_qp *mqp; + struct mlx5_ib_cq *send_mcq, *recv_mcq; + struct mlx5_core_cq *mcq; + struct list_head cq_armed_list; + unsigned long flags_qp; + unsigned long flags_cq; + unsigned long flags; + + INIT_LIST_HEAD(&cq_armed_list); + + /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/ + spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags); + list_for_each_entry(mqp, &ibdev->qp_list, qps_list) { + spin_lock_irqsave(&mqp->sq.lock, flags_qp); + if (mqp->sq.tail != mqp->sq.head) { + send_mcq = to_mcq(mqp->ibqp.send_cq); + spin_lock_irqsave(&send_mcq->lock, flags_cq); + if (send_mcq->mcq.comp && + mqp->ibqp.send_cq->comp_handler) { + if (!send_mcq->mcq.reset_notify_added) { + send_mcq->mcq.reset_notify_added = 1; + list_add_tail(&send_mcq->mcq.reset_notify, + &cq_armed_list); + } + } + spin_unlock_irqrestore(&send_mcq->lock, flags_cq); + } + spin_unlock_irqrestore(&mqp->sq.lock, flags_qp); + spin_lock_irqsave(&mqp->rq.lock, flags_qp); + /* no handling is needed for SRQ */ + if (!mqp->ibqp.srq) { + if (mqp->rq.tail != mqp->rq.head) { + recv_mcq = to_mcq(mqp->ibqp.recv_cq); + spin_lock_irqsave(&recv_mcq->lock, flags_cq); + if (recv_mcq->mcq.comp && + mqp->ibqp.recv_cq->comp_handler) { + if (!recv_mcq->mcq.reset_notify_added) { + recv_mcq->mcq.reset_notify_added = 1; + list_add_tail(&recv_mcq->mcq.reset_notify, + &cq_armed_list); + } + } + spin_unlock_irqrestore(&recv_mcq->lock, + flags_cq); + } + } + spin_unlock_irqrestore(&mqp->rq.lock, flags_qp); + } + /*At that point all inflight post send were put to be executed as of we + * lock/unlock above locks Now need to arm all involved CQs. + */ + list_for_each_entry(mcq, &cq_armed_list, reset_notify) { + mcq->comp(mcq); + } + spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags); +} + static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param) { @@ -1992,6 +2051,7 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, case MLX5_DEV_EVENT_SYS_ERROR: ibdev->ib_active = false; ibev.event = IB_EVENT_DEVICE_FATAL; + mlx5_ib_handle_internal_error(ibdev); break; case MLX5_DEV_EVENT_PORT_UP: @@ -2595,6 +2655,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) mutex_init(&dev->flow_db.lock); mutex_init(&dev->cap_mask_mutex); + INIT_LIST_HEAD(&dev->qp_list); + spin_lock_init(&dev->reset_flow_resource_lock); if (ll == IB_LINK_LAYER_ETHERNET) { err = mlx5_enable_roce(dev); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 391588e..0001ed5 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -380,6 +380,9 @@ struct mlx5_ib_qp { spinlock_t disable_page_faults_lock; struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS]; #endif + struct list_head qps_list; + struct list_head cq_recv_list; + struct list_head cq_send_list; }; struct mlx5_ib_cq_buf { @@ -441,6 +444,8 @@ struct mlx5_ib_cq { struct mlx5_ib_cq_buf *resize_buf; struct ib_umem *resize_umem; int cqe_size; + struct list_head list_send_qp; + struct list_head list_recv_qp; u32 create_flags; struct list_head wc_list; enum ib_cq_notify_flags notify_flags; @@ -621,6 +626,9 @@ struct mlx5_ib_dev { struct srcu_struct mr_srcu; #endif struct mlx5_ib_flow_db flow_db; + /* protect resources needed as part of reset flow */ + spinlock_t reset_flow_resource_lock; + struct list_head qp_list; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 8cf2ce5..4b02130 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1193,12 +1193,16 @@ error: static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { + struct mlx5_core_dev *mdev = dev->mdev; struct umr_common *umrc = &dev->umrc; struct mlx5_ib_umr_context umr_context; struct mlx5_umr_wr umrwr = {}; struct ib_send_wr *bad; int err; + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + return 0; + mlx5_ib_init_umr_context(&umr_context); umrwr.wr.wr_cqe = &umr_context.cqe; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index f9df4b52..9004905 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -77,6 +77,10 @@ struct mlx5_wqe_eth_pad { u8 rsvd0[16]; }; +static void get_cqs(enum ib_qp_type qp_type, + struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq, + struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq); + static int is_qp0(enum ib_qp_type qp_type) { return qp_type == IB_QPT_SMI; @@ -609,6 +613,11 @@ static int to_mlx5_st(enum ib_qp_type type) } } +static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, + struct mlx5_ib_cq *recv_cq); +static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, + struct mlx5_ib_cq *recv_cq); + static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn) { return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index; @@ -1457,6 +1466,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_create_qp_resp resp; struct mlx5_create_qp_mbox_in *in; struct mlx5_ib_create_qp ucmd; + struct mlx5_ib_cq *send_cq; + struct mlx5_ib_cq *recv_cq; + unsigned long flags; int inlen = sizeof(*in); int err; u32 uidx = MLX5_IB_DEFAULT_UIDX; @@ -1714,6 +1726,23 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, base->container_mibqp = qp; base->mqp.event = mlx5_ib_qp_event; + get_cqs(init_attr->qp_type, init_attr->send_cq, init_attr->recv_cq, + &send_cq, &recv_cq); + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); + mlx5_ib_lock_cqs(send_cq, recv_cq); + /* Maintain device to QPs access, needed for further handling via reset + * flow + */ + list_add_tail(&qp->qps_list, &dev->qp_list); + /* Maintain CQ to QPs access, needed for further handling via reset flow + */ + if (send_cq) + list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp); + if (recv_cq) + list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp); + mlx5_ib_unlock_cqs(send_cq, recv_cq); + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); + return 0; err_create: @@ -1732,23 +1761,23 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv if (send_cq) { if (recv_cq) { if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { - spin_lock_irq(&send_cq->lock); + spin_lock(&send_cq->lock); spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) { - spin_lock_irq(&send_cq->lock); + spin_lock(&send_cq->lock); __acquire(&recv_cq->lock); } else { - spin_lock_irq(&recv_cq->lock); + spin_lock(&recv_cq->lock); spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); } } else { - spin_lock_irq(&send_cq->lock); + spin_lock(&send_cq->lock); __acquire(&recv_cq->lock); } } else if (recv_cq) { - spin_lock_irq(&recv_cq->lock); + spin_lock(&recv_cq->lock); __acquire(&send_cq->lock); } else { __acquire(&send_cq->lock); @@ -1763,21 +1792,21 @@ static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *re if (recv_cq) { if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { spin_unlock(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + spin_unlock(&send_cq->lock); } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) { __release(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + spin_unlock(&send_cq->lock); } else { spin_unlock(&send_cq->lock); - spin_unlock_irq(&recv_cq->lock); + spin_unlock(&recv_cq->lock); } } else { __release(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + spin_unlock(&send_cq->lock); } } else if (recv_cq) { __release(&send_cq->lock); - spin_unlock_irq(&recv_cq->lock); + spin_unlock(&recv_cq->lock); } else { __release(&recv_cq->lock); __release(&send_cq->lock); @@ -1789,17 +1818,18 @@ static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp) return to_mpd(qp->ibqp.pd); } -static void get_cqs(struct mlx5_ib_qp *qp, +static void get_cqs(enum ib_qp_type qp_type, + struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq, struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq) { - switch (qp->ibqp.qp_type) { + switch (qp_type) { case IB_QPT_XRC_TGT: *send_cq = NULL; *recv_cq = NULL; break; case MLX5_IB_QPT_REG_UMR: case IB_QPT_XRC_INI: - *send_cq = to_mcq(qp->ibqp.send_cq); + *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL; *recv_cq = NULL; break; @@ -1811,8 +1841,8 @@ static void get_cqs(struct mlx5_ib_qp *qp, case IB_QPT_RAW_IPV6: case IB_QPT_RAW_ETHERTYPE: case IB_QPT_RAW_PACKET: - *send_cq = to_mcq(qp->ibqp.send_cq); - *recv_cq = to_mcq(qp->ibqp.recv_cq); + *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL; + *recv_cq = ib_recv_cq ? to_mcq(ib_recv_cq) : NULL; break; case IB_QPT_MAX: @@ -1831,6 +1861,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_ib_qp_base *base = &qp->trans_qp.base; struct mlx5_modify_qp_mbox_in *in; + unsigned long flags; int err; if (qp->ibqp.rwq_ind_tbl) { @@ -1861,17 +1892,28 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) base->mqp.qpn); } - get_cqs(qp, &send_cq, &recv_cq); + get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq, + &send_cq, &recv_cq); + + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); + mlx5_ib_lock_cqs(send_cq, recv_cq); + /* del from lists under both locks above to protect reset flow paths */ + list_del(&qp->qps_list); + if (send_cq) + list_del(&qp->cq_send_list); + + if (recv_cq) + list_del(&qp->cq_recv_list); if (qp->create_type == MLX5_QP_KERNEL) { - mlx5_ib_lock_cqs(send_cq, recv_cq); __mlx5_ib_cq_clean(recv_cq, base->mqp.qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); if (send_cq != recv_cq) __mlx5_ib_cq_clean(send_cq, base->mqp.qpn, NULL); - mlx5_ib_unlock_cqs(send_cq, recv_cq); } + mlx5_ib_unlock_cqs(send_cq, recv_cq); + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { destroy_raw_packet_qp(dev, qp); @@ -2559,7 +2601,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } pd = get_pd(qp); - get_cqs(qp, &send_cq, &recv_cq); + get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq, + &send_cq, &recv_cq); context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn); context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0; @@ -3658,6 +3701,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, { struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_qp *qp; struct mlx5_ib_mr *mr; struct mlx5_wqe_data_seg *dpseg; @@ -3685,6 +3729,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qp->sq.lock, flags); + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + err = -EIO; + *bad_wr = wr; + nreq = 0; + goto out; + } + for (nreq = 0; wr; nreq++, wr = wr->next) { if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) { mlx5_ib_warn(dev, "\n"); @@ -3986,6 +4037,8 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_wqe_data_seg *scat; struct mlx5_rwqe_sig *sig; + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_core_dev *mdev = dev->mdev; unsigned long flags; int err = 0; int nreq; @@ -3997,6 +4050,13 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, spin_lock_irqsave(&qp->rq.lock, flags); + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + err = -EIO; + *bad_wr = wr; + nreq = 0; + goto out; + } + ind = qp->rq.head & (qp->rq.wqe_cnt - 1); for (nreq = 0; wr; nreq++, wr = wr->next) { diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 3b2ddd6..55efb34 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -458,6 +458,8 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct mlx5_ib_srq *srq = to_msrq(ibsrq); struct mlx5_wqe_srq_next_seg *next; struct mlx5_wqe_data_seg *scat; + struct mlx5_ib_dev *dev = to_mdev(ibsrq->device); + struct mlx5_core_dev *mdev = dev->mdev; unsigned long flags; int err = 0; int nreq; @@ -465,6 +467,12 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, spin_lock_irqsave(&srq->lock, flags); + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + err = -EIO; + *bad_wr = wr; + goto out; + } + for (nreq = 0; wr; nreq++, wr = wr->next) { if (unlikely(wr->num_sge > srq->msrq.max_gs)) { err = -EINVAL; @@ -507,7 +515,7 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, *srq->db.db = cpu_to_be32(srq->wqe_ctr); } - +out: spin_unlock_irqrestore(&srq->lock, flags); return err; diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 2be976d..2566f6d 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -58,6 +58,8 @@ struct mlx5_core_cq { void (*comp)(struct mlx5_core_cq *); void *priv; } tasklet_ctx; + int reset_notify_added; + struct list_head reset_notify; }; -- cgit v0.10.2 From 4c2aae712cb024f9d30a1fa62e3ba2ff785c6a3e Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Fri, 17 Jun 2016 15:14:50 +0300 Subject: IB/core: Add IPv6 support to flow steering Add IPv6 flow specification support. Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 6c22923..b7f3b8d 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -226,6 +226,7 @@ struct ib_uverbs_flow_spec { struct ib_uverbs_flow_spec_eth eth; struct ib_uverbs_flow_spec_ipv4 ipv4; struct ib_uverbs_flow_spec_tcp_udp tcp_udp; + struct ib_uverbs_flow_spec_ipv6 ipv6; }; }; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 65ab209..f664731 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3105,6 +3105,15 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask, sizeof(struct ib_flow_ipv4_filter)); break; + case IB_FLOW_SPEC_IPV6: + ib_spec->ipv6.size = sizeof(struct ib_flow_spec_ipv6); + if (ib_spec->ipv6.size != kern_spec->ipv6.size) + return -EINVAL; + memcpy(&ib_spec->ipv6.val, &kern_spec->ipv6.val, + sizeof(struct ib_flow_ipv6_filter)); + memcpy(&ib_spec->ipv6.mask, &kern_spec->ipv6.mask, + sizeof(struct ib_flow_ipv6_filter)); + break; case IB_FLOW_SPEC_TCP: case IB_FLOW_SPEC_UDP: ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9b2fafe..9bbca68 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1569,6 +1569,7 @@ enum ib_flow_spec_type { IB_FLOW_SPEC_IB = 0x22, /* L3 header*/ IB_FLOW_SPEC_IPV4 = 0x30, + IB_FLOW_SPEC_IPV6 = 0x31, /* L4 headers*/ IB_FLOW_SPEC_TCP = 0x40, IB_FLOW_SPEC_UDP = 0x41 @@ -1630,6 +1631,18 @@ struct ib_flow_spec_ipv4 { struct ib_flow_ipv4_filter mask; }; +struct ib_flow_ipv6_filter { + u8 src_ip[16]; + u8 dst_ip[16]; +}; + +struct ib_flow_spec_ipv6 { + enum ib_flow_spec_type type; + u16 size; + struct ib_flow_ipv6_filter val; + struct ib_flow_ipv6_filter mask; +}; + struct ib_flow_tcp_udp_filter { __be16 dst_port; __be16 src_port; @@ -1651,6 +1664,7 @@ union ib_flow_spec { struct ib_flow_spec_ib ib; struct ib_flow_spec_ipv4 ipv4; struct ib_flow_spec_tcp_udp tcp_udp; + struct ib_flow_spec_ipv6 ipv6; }; struct ib_flow_attr { diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 2c8bca8..7f035f4b 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -867,6 +867,24 @@ struct ib_uverbs_flow_spec_tcp_udp { struct ib_uverbs_flow_tcp_udp_filter mask; }; +struct ib_uverbs_flow_ipv6_filter { + __u8 src_ip[16]; + __u8 dst_ip[16]; +}; + +struct ib_uverbs_flow_spec_ipv6 { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + struct ib_uverbs_flow_ipv6_filter val; + struct ib_uverbs_flow_ipv6_filter mask; +}; + struct ib_uverbs_flow_attr { __u32 type; __u16 size; -- cgit v0.10.2 From 026bae0cb428102228d110780d90e6ae44bbe4c7 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Fri, 17 Jun 2016 15:14:51 +0300 Subject: IB/mlx5: Enable flow steering for IPv6 traffic Enable flow steering for IPv6 traffic by using an IPv6 spec. Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 9b6d283..8f832fc 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1454,6 +1454,32 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, &ib_spec->ipv4.val.dst_ip, sizeof(ib_spec->ipv4.val.dst_ip)); break; + case IB_FLOW_SPEC_IPV6: + if (ib_spec->size != sizeof(ib_spec->ipv6)) + return -EINVAL; + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + ethertype, 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + ethertype, ETH_P_IPV6); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.mask.src_ip, + sizeof(ib_spec->ipv6.mask.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.val.src_ip, + sizeof(ib_spec->ipv6.val.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.mask.dst_ip, + sizeof(ib_spec->ipv6.mask.dst_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.val.dst_ip, + sizeof(ib_spec->ipv6.val.dst_ip)); + break; case IB_FLOW_SPEC_TCP: if (ib_spec->size != sizeof(ib_spec->tcp_udp)) return -EINVAL; -- cgit v0.10.2 From 402ca53644ff1928f2ea68cce6fe41b2b5f38510 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Fri, 17 Jun 2016 15:02:20 +0300 Subject: IB/mlx5: Report mlx5 TSO capabilities when querying device Enable mlx5 based hardware to report TCP segmentation offload (TSO) capabilities from kernel to user space. A TSO enabled NIC will accept big chunks of data with sizes greater than MTU for TCP traffic. The TSO engine will break the data into separate packets and will insert headers automatically. The capabilities are exposed to user space through query_device by uhw directly. The following capabilities are reported: 1. The maximum payload size in bytes supported for segmentation by TSO engine. 2. Bitmap showing which QP types are supported by TSO operation. The bitmap is built by members from 'enmu ib_qp_type'. For example, similar code should be performed if UD QP is supported: supported_qpts |= 1 << IB_QPT_UD; To make user-space library aware of whether kernel supports uhw or not, a new flag: cmds_supp_uhw will be returned back to user-space through alloc_ucontext. Signed-off-by: Bodong Wang Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8f832fc..a7cc6d7 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -459,8 +459,17 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, int max_rq_sg; int max_sq_sg; u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz); + struct mlx5_ib_query_device_resp resp = {}; + size_t resp_len; + u64 max_tso; - if (uhw->inlen || uhw->outlen) + resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length); + if (uhw->outlen && uhw->outlen < resp_len) + return -EINVAL; + else + resp.response_length = resp_len; + + if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen)) return -EINVAL; memset(props, 0, sizeof(*props)); @@ -513,10 +522,21 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_GEN(mdev, block_lb_mc)) props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; - if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && - (MLX5_CAP_ETH(dev->mdev, csum_cap))) + if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) { + if (MLX5_CAP_ETH(mdev, csum_cap)) props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; + if (field_avail(typeof(resp), tso_caps, uhw->outlen)) { + max_tso = MLX5_CAP_ETH(mdev, max_lso_cap); + if (max_tso) { + resp.tso_caps.max_tso = 1 << max_tso; + resp.tso_caps.supported_qpts |= + 1 << IB_QPT_RAW_PACKET; + resp.response_length += sizeof(resp.tso_caps); + } + } + } + if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; props->device_cap_flags |= IB_DEVICE_UD_TSO; @@ -578,6 +598,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (!mlx5_core_is_pf(mdev)) props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION; + if (uhw->outlen) { + err = ib_copy_to_udata(uhw, &resp, resp.response_length); + + if (err) + return err; + } + return 0; } @@ -995,6 +1022,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (field_avail(typeof(resp), cqe_version, udata->outlen)) resp.response_length += sizeof(resp.cqe_version); + if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) { + resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE; + resp.response_length += sizeof(resp.cmds_supp_uhw); + } + /* * We don't want to expose information from the PCI bar that is located * after 4096 bytes, so if the arch only supports larger pages, let's @@ -1009,8 +1041,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE; resp.response_length += sizeof(resp.hca_core_clock_offset) + - sizeof(resp.reserved2) + - sizeof(resp.reserved3); + sizeof(resp.reserved2); } err = ib_copy_to_udata(udata, &resp, resp.response_length); diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h index 33c54fb..188dac4 100644 --- a/drivers/infiniband/hw/mlx5/user.h +++ b/drivers/infiniband/hw/mlx5/user.h @@ -83,6 +83,10 @@ enum mlx5_ib_alloc_ucontext_resp_mask { MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0, }; +enum mlx5_user_cmds_supp_uhw { + MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0, +}; + struct mlx5_ib_alloc_ucontext_resp { __u32 qp_tab_size; __u32 bf_reg_size; @@ -98,8 +102,8 @@ struct mlx5_ib_alloc_ucontext_resp { __u32 comp_mask; __u32 response_length; __u8 cqe_version; - __u8 reserved2; - __u16 reserved3; + __u8 cmds_supp_uhw; + __u16 reserved2; __u64 hca_core_clock_offset; }; @@ -107,6 +111,22 @@ struct mlx5_ib_alloc_pd_resp { __u32 pdn; }; +struct mlx5_ib_tso_caps { + __u32 max_tso; /* Maximum tso payload size in bytes */ + + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported, e.g. + * supported_qpts |= 1 << IB_QPT_UD + */ + __u32 supported_qpts; +}; + +struct mlx5_ib_query_device_resp { + __u32 comp_mask; + __u32 response_length; + struct mlx5_ib_tso_caps tso_caps; +}; + struct mlx5_ib_create_cq { __u64 buf_addr; __u64 db_addr; -- cgit v0.10.2 From e3353c268b06236d6c40fa1714c114f21f44451c Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Fri, 17 Jun 2016 15:33:31 +0300 Subject: IB/mlx5: Fix MODIFY_QP command input structure Make MODIFY_QP command input structure compliant to specification Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 266320f..acb28c9 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -555,9 +555,9 @@ struct mlx5_destroy_qp_mbox_out { struct mlx5_modify_qp_mbox_in { struct mlx5_inbox_hdr hdr; __be32 qpn; - u8 rsvd1[4]; - __be32 optparam; u8 rsvd0[4]; + __be32 optparam; + u8 rsvd1[4]; struct mlx5_qp_context ctx; u8 rsvd2[16]; }; -- cgit v0.10.2 From af1ba291c5e498973cc325c501dd8da80b234571 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Fri, 17 Jun 2016 15:33:32 +0300 Subject: {net, IB}/mlx5: Refactor internal SRQ API Currently, the SRQ API uses the obsolete mlx5_*_srq_mbox_{in,out} structs which limit the ability to pass the SRQ attributes between net and IB parts of the driver. This patch changes the SRQ API so as to use auto-generated structs and provides a better way to pass attributes which will be in use by coming features. Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 55efb34..ed6ac52 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -74,14 +74,12 @@ static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type) } static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, - struct mlx5_create_srq_mbox_in **in, - struct ib_udata *udata, int buf_size, int *inlen, - int is_xrc) + struct mlx5_srq_attr *in, + struct ib_udata *udata, int buf_size) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_create_srq ucmd = {}; size_t ucmdlen; - void *xsrqc; int err; int npages; int page_shift; @@ -104,7 +102,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, udata->inlen - sizeof(ucmd))) return -EINVAL; - if (is_xrc) { + if (in->type == IB_SRQT_XRC) { err = get_srq_user_index(to_mucontext(pd->uobject->context), &ucmd, udata->inlen, &uidx); if (err) @@ -130,14 +128,13 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, goto err_umem; } - *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont; - *in = mlx5_vzalloc(*inlen); - if (!(*in)) { + in->pas = mlx5_vzalloc(sizeof(*in->pas) * ncont); + if (!in->pas) { err = -ENOMEM; goto err_umem; } - mlx5_ib_populate_pas(dev, srq->umem, page_shift, (*in)->pas, 0); + mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0); err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context), ucmd.db_addr, &srq->db); @@ -146,20 +143,16 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, goto err_in; } - (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; - (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26); - - if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) && - is_xrc){ - xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in, - xrc_srq_context_entry); - MLX5_SET(xrc_srqc, xsrqc, user_index, uidx); - } + in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + in->page_offset = offset; + if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && + in->type == IB_SRQT_XRC) + in->user_index = uidx; return 0; err_in: - kvfree(*in); + kvfree(in->pas); err_umem: ib_umem_release(srq->umem); @@ -168,15 +161,13 @@ err_umem: } static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, - struct mlx5_create_srq_mbox_in **in, int buf_size, - int *inlen, int is_xrc) + struct mlx5_srq_attr *in, int buf_size) { int err; int i; struct mlx5_wqe_srq_next_seg *next; int page_shift; int npages; - void *xsrqc; err = mlx5_db_alloc(dev->mdev, &srq->db); if (err) { @@ -204,13 +195,12 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT)); mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n", buf_size, page_shift, srq->buf.npages, npages); - *inlen = sizeof(**in) + sizeof(*(*in)->pas) * npages; - *in = mlx5_vzalloc(*inlen); - if (!*in) { + in->pas = mlx5_vzalloc(sizeof(*in->pas) * npages); + if (!in->pas) { err = -ENOMEM; goto err_buf; } - mlx5_fill_page_array(&srq->buf, (*in)->pas); + mlx5_fill_page_array(&srq->buf, in->pas); srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL); if (!srq->wrid) { @@ -221,20 +211,15 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, } srq->wq_sig = !!srq_signature; - (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; - - if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) && - is_xrc){ - xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in, - xrc_srq_context_entry); - /* 0xffffff means we ask to work with cqe version 0 */ - MLX5_SET(xrc_srqc, xsrqc, user_index, MLX5_IB_DEFAULT_UIDX); - } + in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && + in->type == IB_SRQT_XRC) + in->user_index = MLX5_IB_DEFAULT_UIDX; return 0; err_in: - kvfree(*in); + kvfree(in->pas); err_buf: mlx5_buf_free(dev->mdev, &srq->buf); @@ -267,10 +252,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, int desc_size; int buf_size; int err; - struct mlx5_create_srq_mbox_in *uninitialized_var(in); - int uninitialized_var(inlen); - int is_xrc; - u32 flgs, xrcdn; + struct mlx5_srq_attr in = {0}; __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); /* Sanity check SRQ size before proceeding */ @@ -302,14 +284,10 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs, srq->msrq.max_avail_gather); - is_xrc = (init_attr->srq_type == IB_SRQT_XRC); - if (pd->uobject) - err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen, - is_xrc); + err = create_srq_user(pd, srq, &in, udata, buf_size); else - err = create_srq_kernel(dev, srq, &in, buf_size, &inlen, - is_xrc); + err = create_srq_kernel(dev, srq, &in, buf_size); if (err) { mlx5_ib_warn(dev, "create srq %s failed, err %d\n", @@ -317,23 +295,23 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, goto err_srq; } - in->ctx.state_log_sz = ilog2(srq->msrq.max); - flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24; - xrcdn = 0; - if (is_xrc) { - xrcdn = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn; - in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(init_attr->ext.xrc.cq)->mcq.cqn); + in.type = init_attr->srq_type; + in.log_size = ilog2(srq->msrq.max); + in.wqe_shift = srq->msrq.wqe_shift - 4; + if (srq->wq_sig) + in.flags |= MLX5_SRQ_FLAG_WQ_SIG; + if (init_attr->srq_type == IB_SRQT_XRC) { + in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn; + in.cqn = to_mcq(init_attr->ext.xrc.cq)->mcq.cqn; } else if (init_attr->srq_type == IB_SRQT_BASIC) { - xrcdn = to_mxrcd(dev->devr.x0)->xrcdn; - in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(dev->devr.c0)->mcq.cqn); + in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn; + in.cqn = to_mcq(dev->devr.c0)->mcq.cqn; } - in->ctx.flags_xrcd = cpu_to_be32((flgs & 0xFF000000) | (xrcdn & 0xFFFFFF)); - - in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn); - in->ctx.db_record = cpu_to_be64(srq->db.dma); - err = mlx5_core_create_srq(dev->mdev, &srq->msrq, in, inlen, is_xrc); - kvfree(in); + in.pd = to_mpd(pd)->pdn; + in.db_record = srq->db.dma; + err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in); + kvfree(in.pas); if (err) { mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err); goto err_usr_kern_srq; @@ -401,7 +379,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) struct mlx5_ib_dev *dev = to_mdev(ibsrq->device); struct mlx5_ib_srq *srq = to_msrq(ibsrq); int ret; - struct mlx5_query_srq_mbox_out *out; + struct mlx5_srq_attr *out; out = kzalloc(sizeof(*out), GFP_KERNEL); if (!out) @@ -411,7 +389,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) if (ret) goto out_box; - srq_attr->srq_limit = be16_to_cpu(out->ctx.lwm); + srq_attr->srq_limit = out->lwm; srq_attr->max_wr = srq->msrq.max - 1; srq_attr->max_sge = srq->msrq.max_gs; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index 04bc522..c07f4d0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -63,12 +63,12 @@ void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) complete(&srq->free); } -static int get_pas_size(void *srqc) +static int get_pas_size(struct mlx5_srq_attr *in) { - u32 log_page_size = MLX5_GET(srqc, srqc, log_page_size) + 12; - u32 log_srq_size = MLX5_GET(srqc, srqc, log_srq_size); - u32 log_rq_stride = MLX5_GET(srqc, srqc, log_rq_stride); - u32 page_offset = MLX5_GET(srqc, srqc, page_offset); + u32 log_page_size = in->log_page_size + 12; + u32 log_srq_size = in->log_size; + u32 log_rq_stride = in->wqe_shift; + u32 page_offset = in->page_offset; u32 po_quanta = 1 << (log_page_size - 6); u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride); u32 page_size = 1 << log_page_size; @@ -78,57 +78,58 @@ static int get_pas_size(void *srqc) return rq_num_pas * sizeof(u64); } -static void rmpc_srqc_reformat(void *srqc, void *rmpc, bool srqc_to_rmpc) +static void set_wq(void *wq, struct mlx5_srq_attr *in) { - void *wq = MLX5_ADDR_OF(rmpc, rmpc, wq); - - if (srqc_to_rmpc) { - switch (MLX5_GET(srqc, srqc, state)) { - case MLX5_SRQC_STATE_GOOD: - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); - break; - case MLX5_SRQC_STATE_ERROR: - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_ERR); - break; - default: - pr_warn("%s: %d: Unknown srq state = 0x%x\n", __func__, - __LINE__, MLX5_GET(srqc, srqc, state)); - MLX5_SET(rmpc, rmpc, state, MLX5_GET(srqc, srqc, state)); - } - - MLX5_SET(wq, wq, wq_signature, MLX5_GET(srqc, srqc, wq_signature)); - MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(srqc, srqc, log_page_size)); - MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(srqc, srqc, log_rq_stride) + 4); - MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(srqc, srqc, log_srq_size)); - MLX5_SET(wq, wq, page_offset, MLX5_GET(srqc, srqc, page_offset)); - MLX5_SET(wq, wq, lwm, MLX5_GET(srqc, srqc, lwm)); - MLX5_SET(wq, wq, pd, MLX5_GET(srqc, srqc, pd)); - MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(srqc, srqc, dbr_addr)); - } else { - switch (MLX5_GET(rmpc, rmpc, state)) { - case MLX5_RMPC_STATE_RDY: - MLX5_SET(srqc, srqc, state, MLX5_SRQC_STATE_GOOD); - break; - case MLX5_RMPC_STATE_ERR: - MLX5_SET(srqc, srqc, state, MLX5_SRQC_STATE_ERROR); - break; - default: - pr_warn("%s: %d: Unknown rmp state = 0x%x\n", - __func__, __LINE__, - MLX5_GET(rmpc, rmpc, state)); - MLX5_SET(srqc, srqc, state, - MLX5_GET(rmpc, rmpc, state)); - } - - MLX5_SET(srqc, srqc, wq_signature, MLX5_GET(wq, wq, wq_signature)); - MLX5_SET(srqc, srqc, log_page_size, MLX5_GET(wq, wq, log_wq_pg_sz)); - MLX5_SET(srqc, srqc, log_rq_stride, MLX5_GET(wq, wq, log_wq_stride) - 4); - MLX5_SET(srqc, srqc, log_srq_size, MLX5_GET(wq, wq, log_wq_sz)); - MLX5_SET(srqc, srqc, page_offset, MLX5_GET(wq, wq, page_offset)); - MLX5_SET(srqc, srqc, lwm, MLX5_GET(wq, wq, lwm)); - MLX5_SET(srqc, srqc, pd, MLX5_GET(wq, wq, pd)); - MLX5_SET64(srqc, srqc, dbr_addr, MLX5_GET64(wq, wq, dbr_addr)); - } + MLX5_SET(wq, wq, wq_signature, !!(in->flags + & MLX5_SRQ_FLAG_WQ_SIG)); + MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size); + MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4); + MLX5_SET(wq, wq, log_wq_sz, in->log_size); + MLX5_SET(wq, wq, page_offset, in->page_offset); + MLX5_SET(wq, wq, lwm, in->lwm); + MLX5_SET(wq, wq, pd, in->pd); + MLX5_SET64(wq, wq, dbr_addr, in->db_record); +} + +static void set_srqc(void *srqc, struct mlx5_srq_attr *in) +{ + MLX5_SET(srqc, srqc, wq_signature, !!(in->flags + & MLX5_SRQ_FLAG_WQ_SIG)); + MLX5_SET(srqc, srqc, log_page_size, in->log_page_size); + MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift); + MLX5_SET(srqc, srqc, log_srq_size, in->log_size); + MLX5_SET(srqc, srqc, page_offset, in->page_offset); + MLX5_SET(srqc, srqc, lwm, in->lwm); + MLX5_SET(srqc, srqc, pd, in->pd); + MLX5_SET64(srqc, srqc, dbr_addr, in->db_record); + MLX5_SET(srqc, srqc, xrcd, in->xrcd); + MLX5_SET(srqc, srqc, cqn, in->cqn); +} + +static void get_wq(void *wq, struct mlx5_srq_attr *in) +{ + if (MLX5_GET(wq, wq, wq_signature)) + in->flags &= MLX5_SRQ_FLAG_WQ_SIG; + in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz); + in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4; + in->log_size = MLX5_GET(wq, wq, log_wq_sz); + in->page_offset = MLX5_GET(wq, wq, page_offset); + in->lwm = MLX5_GET(wq, wq, lwm); + in->pd = MLX5_GET(wq, wq, pd); + in->db_record = MLX5_GET64(wq, wq, dbr_addr); +} + +static void get_srqc(void *srqc, struct mlx5_srq_attr *in) +{ + if (MLX5_GET(srqc, srqc, wq_signature)) + in->flags &= MLX5_SRQ_FLAG_WQ_SIG; + in->log_page_size = MLX5_GET(srqc, srqc, log_page_size); + in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride); + in->log_size = MLX5_GET(srqc, srqc, log_srq_size); + in->page_offset = MLX5_GET(srqc, srqc, page_offset); + in->lwm = MLX5_GET(srqc, srqc, lwm); + in->pd = MLX5_GET(srqc, srqc, pd); + in->db_record = MLX5_GET64(srqc, srqc, dbr_addr); } struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) @@ -149,19 +150,36 @@ struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) EXPORT_SYMBOL(mlx5_core_get_srq); static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, int inlen) + struct mlx5_srq_attr *in) { - struct mlx5_create_srq_mbox_out out; + u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0}; + void *create_in; + void *srqc; + void *pas; + int pas_size; + int inlen; int err; - memset(&out, 0, sizeof(out)); + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; + create_in = mlx5_vzalloc(inlen); + if (!create_in) + return -ENOMEM; + + srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry); + pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_SRQ); + set_srqc(srqc, in); + memcpy(pas, in->pas, pas_size); - err = mlx5_cmd_exec_check_status(dev, (u32 *)in, inlen, (u32 *)(&out), - sizeof(out)); + MLX5_SET(create_srq_in, create_in, opcode, + MLX5_CMD_OP_CREATE_SRQ); - srq->srqn = be32_to_cpu(out.srqn) & 0xffffff; + err = mlx5_cmd_exec_check_status(dev, create_in, inlen, create_out, + sizeof(create_out)); + kvfree(create_in); + if (!err) + srq->srqn = MLX5_GET(create_srq_out, create_out, srqn); return err; } @@ -169,67 +187,75 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, static int destroy_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) { - struct mlx5_destroy_srq_mbox_in in; - struct mlx5_destroy_srq_mbox_out out; + u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0}; + u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0}; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ); - in.srqn = cpu_to_be32(srq->srqn); + MLX5_SET(destroy_srq_in, srq_in, opcode, + MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); - return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), sizeof(in), - (u32 *)(&out), sizeof(out)); + return mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in), + srq_out, sizeof(srq_out)); } static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq) { - struct mlx5_arm_srq_mbox_in in; - struct mlx5_arm_srq_mbox_out out; - - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); + /* arm_srq structs missing using identical xrc ones */ + u32 srq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; + u32 srq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ARM_RQ); - in.hdr.opmod = cpu_to_be16(!!is_srq); - in.srqn = cpu_to_be32(srq->srqn); - in.lwm = cpu_to_be16(lwm); + MLX5_SET(arm_xrc_srq_in, srq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, srq_in, xrc_srqn, srq->srqn); + MLX5_SET(arm_xrc_srq_in, srq_in, lwm, lwm); - return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), - sizeof(in), (u32 *)(&out), - sizeof(out)); + return mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in), + srq_out, sizeof(srq_out)); } static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_query_srq_mbox_out *out) + struct mlx5_srq_attr *out) { - struct mlx5_query_srq_mbox_in in; + u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0}; + u32 *srq_out; + void *srqc; + int err; - memset(&in, 0, sizeof(in)); + srq_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_srq_out)); + if (!srq_out) + return -ENOMEM; - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SRQ); - in.srqn = cpu_to_be32(srq->srqn); + MLX5_SET(query_srq_in, srq_in, opcode, + MLX5_CMD_OP_QUERY_SRQ); + MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn); + err = mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in), + srq_out, + MLX5_ST_SZ_BYTES(query_srq_out)); + if (err) + goto out; - return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), sizeof(in), - (u32 *)out, sizeof(*out)); + srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry); + get_srqc(srqc, out); + if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; +out: + kvfree(srq_out); + return err; } static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, - int srq_inlen) + struct mlx5_srq_attr *in) { u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; void *create_in; - void *srqc; void *xrc_srqc; void *pas; int pas_size; int inlen; int err; - srqc = MLX5_ADDR_OF(create_srq_in, in, srq_context_entry); - pas_size = get_pas_size(srqc); + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; create_in = mlx5_vzalloc(inlen); if (!create_in) @@ -239,7 +265,8 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, xrc_srq_context_entry); pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); - memcpy(xrc_srqc, srqc, MLX5_ST_SZ_BYTES(srqc)); + set_srqc(xrc_srqc, in); + MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); memcpy(pas, in->pas, pas_size); MLX5_SET(create_xrc_srq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ); @@ -293,11 +320,10 @@ static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_query_srq_mbox_out *out) + struct mlx5_srq_attr *out) { u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; u32 *xrcsrq_out; - void *srqc; void *xrc_srqc; int err; @@ -317,8 +343,9 @@ static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out, xrc_srq_context_entry); - srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); - memcpy(srqc, xrc_srqc, MLX5_ST_SZ_BYTES(srqc)); + get_srqc(xrc_srqc, out); + if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; out: kvfree(xrcsrq_out); @@ -326,26 +353,27 @@ out: } static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, int srq_inlen) + struct mlx5_srq_attr *in) { void *create_in; void *rmpc; - void *srqc; + void *wq; int pas_size; int inlen; int err; - srqc = MLX5_ADDR_OF(create_srq_in, in, srq_context_entry); - pas_size = get_pas_size(srqc); + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; create_in = mlx5_vzalloc(inlen); if (!create_in) return -ENOMEM; rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + set_wq(wq, in); memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); - rmpc_srqc_reformat(srqc, rmpc, true); err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn); @@ -390,11 +418,10 @@ static int arm_rmp_cmd(struct mlx5_core_dev *dev, } static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_query_srq_mbox_out *out) + struct mlx5_srq_attr *out) { u32 *rmp_out; void *rmpc; - void *srqc; int err; rmp_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_rmp_out)); @@ -405,9 +432,10 @@ static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, if (err) goto out; - srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context); - rmpc_srqc_reformat(srqc, rmpc, false); + get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out); + if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY) + out->flags |= MLX5_SRQ_FLAG_ERR; out: kvfree(rmp_out); @@ -416,15 +444,14 @@ out: static int create_srq_split(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, - int inlen, int is_xrc) + struct mlx5_srq_attr *in) { if (!dev->issi) - return create_srq_cmd(dev, srq, in, inlen); + return create_srq_cmd(dev, srq, in); else if (srq->common.res == MLX5_RES_XSRQ) - return create_xrc_srq_cmd(dev, srq, in, inlen); + return create_xrc_srq_cmd(dev, srq, in); else - return create_rmp_cmd(dev, srq, in, inlen); + return create_rmp_cmd(dev, srq, in); } static int destroy_srq_split(struct mlx5_core_dev *dev, @@ -439,15 +466,17 @@ static int destroy_srq_split(struct mlx5_core_dev *dev, } int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, int inlen, - int is_xrc) + struct mlx5_srq_attr *in) { int err; struct mlx5_srq_table *table = &dev->priv.srq_table; - srq->common.res = is_xrc ? MLX5_RES_XSRQ : MLX5_RES_SRQ; + if (in->type == IB_SRQT_XRC) + srq->common.res = MLX5_RES_XSRQ; + else + srq->common.res = MLX5_RES_SRQ; - err = create_srq_split(dev, srq, in, inlen, is_xrc); + err = create_srq_split(dev, srq, in); if (err) return err; @@ -502,7 +531,7 @@ int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) EXPORT_SYMBOL(mlx5_core_destroy_srq); int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_query_srq_mbox_out *out) + struct mlx5_srq_attr *out) { if (!dev->issi) return query_srq_cmd(dev, srq, out); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 80776d0..ba93333 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -46,6 +46,7 @@ #include #include +#include enum { MLX5_RQ_BITMASK_VSD = 1 << 1, @@ -772,11 +773,10 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, struct mlx5_cmd_mailbox *head); int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, int inlen, - int is_xrc); + struct mlx5_srq_attr *in); int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq); int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_query_srq_mbox_out *out); + struct mlx5_srq_attr *out); int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq); void mlx5_init_mkey_table(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h index f43ed05..33c97dc 100644 --- a/include/linux/mlx5/srq.h +++ b/include/linux/mlx5/srq.h @@ -35,6 +35,31 @@ #include +enum { + MLX5_SRQ_FLAG_ERR = (1 << 0), + MLX5_SRQ_FLAG_WQ_SIG = (1 << 1), +}; + +struct mlx5_srq_attr { + u32 type; + u32 flags; + u32 log_size; + u32 wqe_shift; + u32 log_page_size; + u32 wqe_cnt; + u32 srqn; + u32 xrcd; + u32 page_offset; + u32 cqn; + u32 pd; + u32 lwm; + u32 user_index; + u64 db_record; + u64 *pas; +}; + +struct mlx5_core_dev; + void mlx5_init_srq_table(struct mlx5_core_dev *dev); void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev); -- cgit v0.10.2 From 0837e86a7a3422b85aa45c6f4631f6a3f74cbd01 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Fri, 17 Jun 2016 15:10:55 +0300 Subject: IB/mlx5: Add per port counters In order to support statistics for ports, we attach each QP to a counter set which is dedicate to this port. Signed-off-by: Mark Bloch Reviewed-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a7cc6d7..b29b841 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2541,6 +2541,41 @@ static void mlx5_disable_roce(struct mlx5_ib_dev *dev) unregister_netdevice_notifier(&dev->roce.nb); } +static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev) +{ + unsigned int i; + + for (i = 0; i < dev->num_ports; i++) + mlx5_core_dealloc_q_counter(dev->mdev, + dev->port[i].q_cnt_id); +} + +static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) +{ + int i; + int ret; + + for (i = 0; i < dev->num_ports; i++) { + ret = mlx5_core_alloc_q_counter(dev->mdev, + &dev->port[i].q_cnt_id); + if (ret) { + mlx5_ib_warn(dev, + "couldn't allocate queue counter for port %d, err %d\n", + i + 1, ret); + goto dealloc_counters; + } + } + + return 0; + +dealloc_counters: + while (--i >= 0) + mlx5_core_dealloc_q_counter(dev->mdev, + dev->port[i].q_cnt_id); + + return ret; +} + static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { struct mlx5_ib_dev *dev; @@ -2563,10 +2598,15 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->mdev = mdev; + dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port), + GFP_KERNEL); + if (!dev->port) + goto err_dealloc; + rwlock_init(&dev->roce.netdev_lock); err = get_port_caps(dev); if (err) - goto err_dealloc; + goto err_free_port; if (mlx5_use_mad_ifc(dev)) get_ext_port_caps(dev); @@ -2729,10 +2769,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (err) goto err_rsrc; - err = ib_register_device(&dev->ib_dev, NULL); + err = mlx5_ib_alloc_q_counters(dev); if (err) goto err_odp; + err = ib_register_device(&dev->ib_dev, NULL); + if (err) + goto err_q_cnt; + err = create_umr_res(dev); if (err) goto err_dev; @@ -2754,6 +2798,9 @@ err_umrc: err_dev: ib_unregister_device(&dev->ib_dev); +err_q_cnt: + mlx5_ib_dealloc_q_counters(dev); + err_odp: mlx5_ib_odp_remove_one(dev); @@ -2764,6 +2811,9 @@ err_disable_roce: if (ll == IB_LINK_LAYER_ETHERNET) mlx5_disable_roce(dev); +err_free_port: + kfree(dev->port); + err_dealloc: ib_dealloc_device((struct ib_device *)dev); @@ -2776,11 +2826,13 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1); ib_unregister_device(&dev->ib_dev); + mlx5_ib_dealloc_q_counters(dev); destroy_umrc_res(dev); mlx5_ib_odp_remove_one(dev); destroy_dev_resources(&dev->devr); if (ll == IB_LINK_LAYER_ETHERNET) mlx5_disable_roce(dev); + kfree(dev->port); ib_dealloc_device(&dev->ib_dev); } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 0001ed5..372385d 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -591,6 +591,10 @@ struct mlx5_ib_resources { struct mutex mutex; }; +struct mlx5_ib_port { + u16 q_cnt_id; +}; + struct mlx5_roce { /* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL * netdev pointer @@ -629,6 +633,8 @@ struct mlx5_ib_dev { /* protect resources needed as part of reset flow */ spinlock_t reset_flow_resource_lock; struct list_head qp_list; + /* Array with num_ports elements */ + struct mlx5_ib_port *port; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 9004905..5ca14a2 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2651,6 +2651,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, else sqd_event = 0; + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num : + qp->port) - 1; + struct mlx5_ib_port *mibport = &dev->port[port_num]; + + context->qp_counter_set_usr_page |= + cpu_to_be32(mibport->q_cnt_id << 16); + } + if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->sq_crq_size |= cpu_to_be16(1 << 4); -- cgit v0.10.2 From 0ad17a8f7fa000cbfc51eedd7fddd20f7664e4b6 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Fri, 17 Jun 2016 15:10:56 +0300 Subject: IB/mlx5: Add port protocol stats Expose new counters using the get protocol stats callback. We expose the following counters: |------------------------------------------------------------------------| | Name | IB | EN | Description | |------------------------------------------------------------------------| |rx_write_requests | + | - | Number of received WRITE requests for | | | | | the associated QP. | |------------------------------------------------------------------------| |rx_read_requests | + | - | Number of received READ requests for | | | | | the associated QP. | |------------------------------------------------------------------------| |rx_atomic_requests | + | - | Number of received ATOMIC requests for | | | | | the associated QP. | |------------------------------------------------------------------------| |out_of_buffer | + | + | Number of drops occurred due to lack | | | | | of WQE for the associated QPs/RQs. | |------------------------------------------------------------------------| |out_of_sequence | + | - | Number of errors in the packet | | | | | transport sequence number | |------------------------------------------------------------------------| |duplicate_request | + | + | Number of received duplicated packets. | | | | | A request that previously executed is | | | | | named duplicated. | |------------------------------------------------------------------------| |rnr_nak_retry_err | + | + | Number of received RNR NAC packets. | | | | | The QP retry limit did not exceed. | |------------------------------------------------------------------------| |packet_seq_err | + | + | Number of received NAK - sequence error| | | | | packets. The QP retry limit did not | | | | | exceed. | |------------------------------------------------------------------------| |implied_nak_err | + | + | Number of times the requester detected | | | | | an ACK with a PSN larger than expected | | | | | PSN for RDMA READ or ATOMIC response | | | | | The QP retry limit did not exceed. | |------------------------------------------------------------------------| |local_ack_timeout_err| + | - | Number of NO ACK responses from | | | | | responder within timer interval. | | | | | The QP retry limit did not exceed. | |------------------------------------------------------------------------| Counters are available if all of them are supported. Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Reviewed-by: Christoph Lameter Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b29b841..11e4866 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2576,6 +2576,78 @@ dealloc_counters: return ret; } +static const char const *names[] = { + "rx_write_requests", + "rx_read_requests", + "rx_atomic_requests", + "out_of_buffer", + "out_of_sequence", + "duplicate_request", + "rnr_nak_retry_err", + "packet_seq_err", + "implied_nak_seq_err", + "local_ack_timeout_err", +}; + +static const size_t stats_offsets[] = { + MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests), + MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests), + MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests), + MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer), + MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence), + MLX5_BYTE_OFF(query_q_counter_out, duplicate_request), + MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err), + MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err), + MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err), + MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err), +}; + +static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, + u8 port_num) +{ + BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets)); + + /* We support only per port stats */ + if (port_num == 0) + return NULL; + + return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names), + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u8 port, int index) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); + void *out; + __be32 val; + int ret; + int i; + + if (!port || !stats) + return -ENOSYS; + + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + ret = mlx5_core_query_q_counter(dev->mdev, + dev->port[port - 1].q_cnt_id, 0, + out, outlen); + if (ret) + goto free; + + for (i = 0; i < ARRAY_SIZE(names); i++) { + val = *(__be32 *)(out + stats_offsets[i]); + stats->value[i] = (u64)be32_to_cpu(val); + } +free: + kvfree(out); + return ARRAY_SIZE(names); +} + static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { struct mlx5_ib_dev *dev; @@ -2720,6 +2792,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); } + if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) && + MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { + dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats; + dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats; + } + if (MLX5_CAP_GEN(mdev, xrc)) { dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; -- cgit v0.10.2 From 5fa76c20458518ed6181adddef2e31c5afc0745c Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 15 Jun 2016 02:21:56 -0400 Subject: IB/core: Add get FW version string to the core Allow for a common core function to get firmware version strings from the individual devices. In later patches this format can then then be used to pass a properly formated version string through the IPoIB layer. The problem with the current code in the IPoIB layer is that it is specific to certain hardware types. Furthermore, this gives us a common function through which the core can provide a common sysfs entry. Eventually we may want to remove the sysfs export but this provides for user space backwards compatibility. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 5c155fa..760ef60 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -311,6 +311,15 @@ static int read_port_immutable(struct ib_device *device) return 0; } +void ib_get_device_fw_str(struct ib_device *dev, char *str, size_t str_len) +{ + if (dev->get_dev_fw_str) + dev->get_dev_fw_str(dev, str, str_len); + else + str[0] = '\0'; +} +EXPORT_SYMBOL(ib_get_device_fw_str); + /** * ib_register_device - Register an IB device with IB core * @device:Device to register diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7e440d4..1dc3d0d 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1956,6 +1956,7 @@ struct ib_device { * in fast paths. */ int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *); + void (*get_dev_fw_str)(struct ib_device *, char *str, size_t str_len); }; struct ib_client { @@ -1991,6 +1992,8 @@ struct ib_client { struct ib_device *ib_alloc_device(size_t size); void ib_dealloc_device(struct ib_device *device); +void ib_get_device_fw_str(struct ib_device *device, char *str, size_t str_len); + int ib_register_device(struct ib_device *device, int (*port_callback)(struct ib_device *, u8, struct kobject *)); -- cgit v0.10.2 From e18036942444f5c6e4befd08e2794a50a93a6e34 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 15 Jun 2016 02:21:57 -0400 Subject: IB/cxgb3: Support device FW version string Also remove fw_ver sysfs to be replaced by the common core one. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Reviewed-by: Steve Wise Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index bb1a839..3edb806 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1183,18 +1183,6 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); } -static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, - ibdev.dev); - struct ethtool_drvinfo info; - struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - - PDBG("%s dev 0x%p\n", __func__, dev); - lldev->ethtool_ops->get_drvinfo(lldev, &info); - return sprintf(buf, "%s\n", info.fw_version); -} - static ssize_t show_hca(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1334,13 +1322,11 @@ static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats, } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static struct device_attribute *iwch_class_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id, }; @@ -1362,6 +1348,18 @@ static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str, + size_t str_len) +{ + struct iwch_dev *iwch_dev = to_iwch_dev(ibdev); + struct ethtool_drvinfo info; + struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; + + PDBG("%s dev 0x%p\n", __func__, iwch_dev); + lldev->ethtool_ops->get_drvinfo(lldev, &info); + snprintf(str, str_len, "%s", info.fw_version); +} + int iwch_register_device(struct iwch_dev *dev) { int ret; @@ -1437,6 +1435,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.get_hw_stats = iwch_get_mib; dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; dev->ibdev.get_port_immutable = iwch_port_immutable; + dev->ibdev.get_dev_fw_str = get_dev_fw_ver_str; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) -- cgit v0.10.2 From ce1922435de3763e6097ffaa21f5f85d091818fd Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 15 Jun 2016 02:21:58 -0400 Subject: IB/cxgb4: Support device FW version string And remove sysfs fw_ver in favor of the core. Reviewed-by: Steve Wise Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index dd8a86b..df127ce 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -409,20 +409,6 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr, CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); } -static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, - ibdev.dev); - PDBG("%s dev 0x%p\n", __func__, dev); - - return sprintf(buf, "%u.%u.%u.%u\n", - FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers), - FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers), - FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers), - FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers)); -} - static ssize_t show_hca(struct device *dev, struct device_attribute *attr, char *buf) { @@ -502,13 +488,11 @@ static int c4iw_get_mib(struct ib_device *ibdev, } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static struct device_attribute *c4iw_class_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id, }; @@ -530,6 +514,20 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void get_dev_fw_str(struct ib_device *dev, char *str, + size_t str_len) +{ + struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, + ibdev); + PDBG("%s dev 0x%p\n", __func__, dev); + + snprintf(str, str_len, "%u.%u.%u.%u", + FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers), + FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers), + FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers), + FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers)); +} + int c4iw_register_device(struct c4iw_dev *dev) { int ret; @@ -605,6 +603,7 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.get_hw_stats = c4iw_get_mib; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; dev->ibdev.get_port_immutable = c4iw_port_immutable; + dev->ibdev.get_dev_fw_str = get_dev_fw_str; dev->ibdev.drain_sq = c4iw_drain_sq; dev->ibdev.drain_rq = c4iw_drain_rq; -- cgit v0.10.2 From f65c52ca23550ab44b08f243a9a4067a911e9fd3 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 15 Jun 2016 02:21:59 -0400 Subject: IB/i40iw: Support device FW version string And remove sysfs support in favor of the core version. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 02a735b..39096a2 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -1973,18 +1973,6 @@ static ssize_t i40iw_show_rev(struct device *dev, } /** - * i40iw_show_fw_ver - */ -static ssize_t i40iw_show_fw_ver(struct device *dev, - struct device_attribute *attr, char *buf) -{ - u32 firmware_version = I40IW_FW_VERSION; - - return sprintf(buf, "%u.%u\n", firmware_version, - (firmware_version & 0x000000ff)); -} - -/** * i40iw_show_hca */ static ssize_t i40iw_show_hca(struct device *dev, @@ -2004,13 +1992,11 @@ static ssize_t i40iw_show_board(struct device *dev, } static DEVICE_ATTR(hw_rev, S_IRUGO, i40iw_show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, i40iw_show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, i40iw_show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, i40iw_show_board, NULL); static struct device_attribute *i40iw_dev_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id }; @@ -2427,6 +2413,15 @@ static const char * const i40iw_hw_stat_names[] = { "iwRdmaInv" }; +static void i40iw_get_dev_fw_str(struct ib_device *dev, char *str, + size_t str_len) +{ + u32 firmware_version = I40IW_FW_VERSION; + + snprintf(str, str_len, "%u.%u", firmware_version, + (firmware_version & 0x000000ff)); +} + /** * i40iw_alloc_hw_stats - Allocate a hw stats structure * @ibdev: device pointer from stack @@ -2650,6 +2645,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev memcpy(iwibdev->ibdev.iwcm->ifname, netdev->name, sizeof(iwibdev->ibdev.iwcm->ifname)); iwibdev->ibdev.get_port_immutable = i40iw_port_immutable; + iwibdev->ibdev.get_dev_fw_str = i40iw_get_dev_fw_str; iwibdev->ibdev.poll_cq = i40iw_poll_cq; iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq; iwibdev->ibdev.post_send = i40iw_post_send; -- cgit v0.10.2 From e9db59fcd28ad6d7aceb74c2fb65a44499fdbf9a Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 15 Jun 2016 02:22:00 -0400 Subject: IB/mlx4: Support device FW version string And remove the sysfs in favor of common core version. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 0eb09e1..1cbd075 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2022,16 +2022,6 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr, return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device); } -static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, - char *buf) -{ - struct mlx4_ib_dev *dev = - container_of(device, struct mlx4_ib_dev, ib_dev.dev); - return sprintf(buf, "%d.%d.%d\n", (int) (dev->dev->caps.fw_ver >> 32), - (int) (dev->dev->caps.fw_ver >> 16) & 0xffff, - (int) dev->dev->caps.fw_ver & 0xffff); -} - static ssize_t show_rev(struct device *device, struct device_attribute *attr, char *buf) { @@ -2050,13 +2040,11 @@ static ssize_t show_board(struct device *device, struct device_attribute *attr, } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static struct device_attribute *mlx4_class_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id }; @@ -2277,6 +2265,17 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void get_fw_ver_str(struct ib_device *device, char *str, + size_t str_len) +{ + struct mlx4_ib_dev *dev = + container_of(device, struct mlx4_ib_dev, ib_dev); + snprintf(str, str_len, "%d.%d.%d", + (int) (dev->dev->caps.fw_ver >> 32), + (int) (dev->dev->caps.fw_ver >> 16) & 0xffff, + (int) dev->dev->caps.fw_ver & 0xffff); +} + static void *mlx4_ib_add(struct mlx4_dev *dev) { struct mlx4_ib_dev *ibdev; @@ -2410,6 +2409,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; ibdev->ib_dev.process_mad = mlx4_ib_process_mad; ibdev->ib_dev.get_port_immutable = mlx4_port_immutable; + ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str; ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext; if (!mlx4_is_slave(ibdev->dev)) { -- cgit v0.10.2 From c73428230d98d1352bcc69cd8306c292a85e1e42 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 15 Jun 2016 02:22:01 -0400 Subject: IB/mlx5: Support device FW version string And remove sysfs entry in favor of the common code. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b48ad85..3fa8557 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1804,15 +1804,6 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr, return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); } -static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, - char *buf) -{ - struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); - return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev), - fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); -} - static ssize_t show_rev(struct device *device, struct device_attribute *attr, char *buf) { @@ -1831,7 +1822,6 @@ static ssize_t show_board(struct device *device, struct device_attribute *attr, } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL); @@ -1839,7 +1829,6 @@ static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL); static struct device_attribute *mlx5_class_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id, &dev_attr_fw_pages, @@ -2275,6 +2264,15 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void get_dev_fw_str(struct ib_device *ibdev, char *str, + size_t str_len) +{ + struct mlx5_ib_dev *dev = + container_of(ibdev, struct mlx5_ib_dev, ib_dev); + snprintf(str, str_len, "%d.%d.%04d", fw_rev_maj(dev->mdev), + fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); +} + static int mlx5_enable_roce(struct mlx5_ib_dev *dev) { int err; @@ -2421,6 +2419,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg; dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; dev->ib_dev.get_port_immutable = mlx5_port_immutable; + dev->ib_dev.get_dev_fw_str = get_dev_fw_str; if (mlx5_core_is_pf(mdev)) { dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config; dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state; -- cgit v0.10.2 From 51ed03978e2c14e6cbe5458cd94f46fed108fda3 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 15 Jun 2016 02:22:02 -0400 Subject: IB/mthca: Supprot device FW version string And remove the sysfs entry in favor of the core support. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 9866c35..da2335f 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1081,16 +1081,6 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr, return sprintf(buf, "%x\n", dev->rev_id); } -static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, - char *buf) -{ - struct mthca_dev *dev = - container_of(device, struct mthca_dev, ib_dev.dev); - return sprintf(buf, "%d.%d.%d\n", (int) (dev->fw_ver >> 32), - (int) (dev->fw_ver >> 16) & 0xffff, - (int) dev->fw_ver & 0xffff); -} - static ssize_t show_hca(struct device *device, struct device_attribute *attr, char *buf) { @@ -1120,13 +1110,11 @@ static ssize_t show_board(struct device *device, struct device_attribute *attr, } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static struct device_attribute *mthca_dev_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id }; @@ -1187,6 +1175,17 @@ static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void get_dev_fw_str(struct ib_device *device, char *str, + size_t str_len) +{ + struct mthca_dev *dev = + container_of(device, struct mthca_dev, ib_dev); + snprintf(str, str_len, "%d.%d.%d", + (int) (dev->fw_ver >> 32), + (int) (dev->fw_ver >> 16) & 0xffff, + (int) dev->fw_ver & 0xffff); +} + int mthca_register_device(struct mthca_dev *dev) { int ret; @@ -1266,6 +1265,7 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.reg_user_mr = mthca_reg_user_mr; dev->ib_dev.dereg_mr = mthca_dereg_mr; dev->ib_dev.get_port_immutable = mthca_port_immutable; + dev->ib_dev.get_dev_fw_str = get_dev_fw_str; if (dev->mthca_flags & MTHCA_FLAG_FMR) { dev->ib_dev.alloc_fmr = mthca_alloc_fmr; -- cgit v0.10.2 From 96357454eb8427732e8deea9f75a95e4c36a5e3b Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 15 Jun 2016 02:22:03 -0400 Subject: IB/nes: Support device FW version string And remove the sysfs in favor of the core version. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 464d6da..bd69125 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -2606,23 +2606,6 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr, /** - * show_fw_ver - */ -static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct nes_ib_device *nesibdev = - container_of(dev, struct nes_ib_device, ibdev.dev); - struct nes_vnic *nesvnic = nesibdev->nesvnic; - - nes_debug(NES_DBG_INIT, "\n"); - return sprintf(buf, "%u.%u\n", - (nesvnic->nesdev->nesadapter->firmware_version >> 16), - (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff)); -} - - -/** * show_hca */ static ssize_t show_hca(struct device *dev, struct device_attribute *attr, @@ -2645,13 +2628,11 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static struct device_attribute *nes_dev_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id }; @@ -3703,6 +3684,19 @@ static int nes_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void get_dev_fw_str(struct ib_device *dev, char *str, + size_t str_len) +{ + struct nes_ib_device *nesibdev = + container_of(dev, struct nes_ib_device, ibdev); + struct nes_vnic *nesvnic = nesibdev->nesvnic; + + nes_debug(NES_DBG_INIT, "\n"); + snprintf(str, str_len, "%u.%u", + (nesvnic->nesdev->nesadapter->firmware_version >> 16), + (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff)); +} + /** * nes_init_ofa_device */ @@ -3802,6 +3796,7 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.iwcm->create_listen = nes_create_listen; nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen; nesibdev->ibdev.get_port_immutable = nes_port_immutable; + nesibdev->ibdev.get_dev_fw_str = get_dev_fw_str; memcpy(nesibdev->ibdev.iwcm->ifname, netdev->name, sizeof(nesibdev->ibdev.iwcm->ifname)); -- cgit v0.10.2 From bd395005d2d9c2a4df59a861e359866b6c6f28f6 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 15 Jun 2016 02:22:04 -0400 Subject: IB/ocrdma: Support device FW version string And remove sysfs in favor of the core support. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 3d75f65..07d0c6c 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -107,6 +107,14 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void get_dev_fw_str(struct ib_device *device, char *str, + size_t str_len) +{ + struct ocrdma_dev *dev = get_ocrdma_dev(device); + + snprintf(str, str_len, "%s", &dev->attr.fw_ver[0]); +} + static int ocrdma_register_device(struct ocrdma_dev *dev) { strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX); @@ -193,6 +201,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.process_mad = ocrdma_process_mad; dev->ibdev.get_port_immutable = ocrdma_port_immutable; + dev->ibdev.get_dev_fw_str = get_dev_fw_str; if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { dev->ibdev.uverbs_cmd_mask |= @@ -262,14 +271,6 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr, return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor); } -static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, - char *buf) -{ - struct ocrdma_dev *dev = dev_get_drvdata(device); - - return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->attr.fw_ver[0]); -} - static ssize_t show_hca_type(struct device *device, struct device_attribute *attr, char *buf) { @@ -279,12 +280,10 @@ static ssize_t show_hca_type(struct device *device, } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL); static struct device_attribute *ocrdma_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type }; -- cgit v0.10.2 From 15453e857a50b3fffa78151e2c84f325b776c0e0 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 15 Jun 2016 02:22:05 -0400 Subject: IB/usnic: Support device FW version string And remove sysfs file in favor of the common core. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 565c881..c229b9f 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -331,6 +331,21 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void usnic_get_dev_fw_str(struct ib_device *device, + char *str, + size_t str_len) +{ + struct usnic_ib_dev *us_ibdev = + container_of(device, struct usnic_ib_dev, ib_dev); + struct ethtool_drvinfo info; + + mutex_lock(&us_ibdev->usdev_lock); + us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info); + mutex_unlock(&us_ibdev->usdev_lock); + + snprintf(str, str_len, "%s", info.fw_version); +} + /* Start of PF discovery section */ static void *usnic_ib_device_add(struct pci_dev *dev) { @@ -414,6 +429,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq; us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr; us_ibdev->ib_dev.get_port_immutable = usnic_port_immutable; + us_ibdev->ib_dev.get_dev_fw_str = usnic_get_dev_fw_str; if (ib_register_device(&us_ibdev->ib_dev, NULL)) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index 3412ea0..80ef3f8 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -45,21 +45,6 @@ #include "usnic_ib_verbs.h" #include "usnic_log.h" -static ssize_t usnic_ib_show_fw_ver(struct device *device, - struct device_attribute *attr, - char *buf) -{ - struct usnic_ib_dev *us_ibdev = - container_of(device, struct usnic_ib_dev, ib_dev.dev); - struct ethtool_drvinfo info; - - mutex_lock(&us_ibdev->usdev_lock); - us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info); - mutex_unlock(&us_ibdev->usdev_lock); - - return scnprintf(buf, PAGE_SIZE, "%s\n", info.fw_version); -} - static ssize_t usnic_ib_show_board(struct device *device, struct device_attribute *attr, char *buf) @@ -192,7 +177,6 @@ usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr, us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); } -static DEVICE_ATTR(fw_ver, S_IRUGO, usnic_ib_show_fw_ver, NULL); static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL); static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL); static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL); @@ -201,7 +185,6 @@ static DEVICE_ATTR(qp_per_vf, S_IRUGO, usnic_ib_show_qp_per_vf, NULL); static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL); static struct device_attribute *usnic_class_attributes[] = { - &dev_attr_fw_ver, &dev_attr_board_id, &dev_attr_config, &dev_attr_iface, -- cgit v0.10.2 From 1a8632121a5e5bdebef00e59bd13951355682bab Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 15 Jun 2016 02:22:06 -0400 Subject: IB/ipoib: Use new device FW version string Using this allows for devices to specify the format of their firmware version rather than forcing a format. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 1502199..7b6d40f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -62,10 +62,8 @@ static void ipoib_get_drvinfo(struct net_device *netdev, { struct ipoib_dev_priv *priv = netdev_priv(netdev); - snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), - "%d.%d.%d", (int)(priv->ca->attrs.fw_ver >> 32), - (int)(priv->ca->attrs.fw_ver >> 16) & 0xffff, - (int)priv->ca->attrs.fw_ver & 0xffff); + ib_get_device_fw_str(priv->ca, drvinfo->fw_version, + sizeof(drvinfo->fw_version)); strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device), sizeof(drvinfo->bus_info)); -- cgit v0.10.2 From 41a6ae1ebd51d074a43d608b8ecfc9dd2b323d5e Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 15 Jun 2016 02:22:07 -0400 Subject: IB/core: Export a common fw_ver sysfs entry Now that all the devices have stopped exporting their own sysfs entry points we can have the core export this on their behalf. Eventually this may be removed but this provides for backwards compatibility. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index a5793c8..0d1ab73 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -1196,16 +1197,28 @@ static ssize_t set_node_desc(struct device *device, return count; } +static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct ib_device *dev = container_of(device, struct ib_device, dev); + + ib_get_device_fw_str(dev, buf, PAGE_SIZE); + strlcat(buf, "\n", PAGE_SIZE); + return strlen(buf); +} + static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL); static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL); static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL); static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc); +static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static struct device_attribute *ib_class_attributes[] = { &dev_attr_node_type, &dev_attr_sys_image_guid, &dev_attr_node_guid, - &dev_attr_node_desc + &dev_attr_node_desc, + &dev_attr_fw_ver, }; static void free_port_list_attributes(struct ib_device *device) -- cgit v0.10.2 From 939b6ca873e7bbef4b26a1cffacf1a1bf2f3f362 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 15 Jun 2016 02:22:08 -0400 Subject: IB/hfi1: Add device FW version string Export the firmware version through the core. Acked-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 4417a0f..49a71e2 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1174,6 +1174,8 @@ struct hfi1_devdata { /* 8051 firmware version helper */ #define dc8051_ver(a, b) ((a) << 8 | (b)) +#define dc8051_ver_maj(a) ((a & 0xff00) >> 8) +#define dc8051_ver_min(a) (a & 0x00ff) /* f_put_tid types */ #define PT_EXPECTED 0 diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 849c4b9..dd4be3c 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1291,9 +1291,12 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) static void hfi1_fill_device_attr(struct hfi1_devdata *dd) { struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; + u16 ver = dd->dc8051_ver; memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props)); + rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 16) | + (u64)dc8051_ver_min(ver); rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | @@ -1567,6 +1570,17 @@ static void init_ibport(struct hfi1_pportdata *ppd) RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); } +static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str, + size_t str_len) +{ + struct rvt_dev_info *rdi = ib_to_rvt(ibdev); + struct hfi1_ibdev *dev = dev_from_rdi(rdi); + u16 ver = dd_from_dev(dev)->dc8051_ver; + + snprintf(str, str_len, "%u.%u", dc8051_ver_maj(ver), + dc8051_ver_min(ver)); +} + /** * hfi1_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -1613,6 +1627,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) /* keep process mad in the driver */ ibdev->process_mad = hfi1_process_mad; + ibdev->get_dev_fw_str = hfi1_get_dev_fw_str; strncpy(ibdev->node_desc, init_utsname()->nodename, sizeof(ibdev->node_desc)); -- cgit v0.10.2