From 7235aa79f683db0d908dcb0c2b7062dfdd765196 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 21 Nov 2012 15:47:40 +0000 Subject: UAPI: (Scripted) Disintegrate include/rdma Signed-off-by: David Howells Acked-by: Arnd Bergmann Acked-by: Thomas Gleixner Acked-by: Michael Kerrisk Acked-by: Paul E. McKenney Acked-by: Dave Jones Signed-off-by: Roland Dreier diff --git a/include/rdma/Kbuild b/include/rdma/Kbuild index ea56f76..e69de29 100644 --- a/include/rdma/Kbuild +++ b/include/rdma/Kbuild @@ -1,6 +0,0 @@ -header-y += ib_user_cm.h -header-y += ib_user_mad.h -header-y += ib_user_sa.h -header-y += ib_user_verbs.h -header-y += rdma_netlink.h -header-y += rdma_user_cm.h diff --git a/include/rdma/ib_user_cm.h b/include/rdma/ib_user_cm.h deleted file mode 100644 index f79014a..0000000 --- a/include/rdma/ib_user_cm.h +++ /dev/null @@ -1,325 +0,0 @@ -/* - * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef IB_USER_CM_H -#define IB_USER_CM_H - -#include -#include - -#define IB_USER_CM_ABI_VERSION 5 - -enum { - IB_USER_CM_CMD_CREATE_ID, - IB_USER_CM_CMD_DESTROY_ID, - IB_USER_CM_CMD_ATTR_ID, - - IB_USER_CM_CMD_LISTEN, - IB_USER_CM_CMD_NOTIFY, - - IB_USER_CM_CMD_SEND_REQ, - IB_USER_CM_CMD_SEND_REP, - IB_USER_CM_CMD_SEND_RTU, - IB_USER_CM_CMD_SEND_DREQ, - IB_USER_CM_CMD_SEND_DREP, - IB_USER_CM_CMD_SEND_REJ, - IB_USER_CM_CMD_SEND_MRA, - IB_USER_CM_CMD_SEND_LAP, - IB_USER_CM_CMD_SEND_APR, - IB_USER_CM_CMD_SEND_SIDR_REQ, - IB_USER_CM_CMD_SEND_SIDR_REP, - - IB_USER_CM_CMD_EVENT, - IB_USER_CM_CMD_INIT_QP_ATTR, -}; -/* - * command ABI structures. - */ -struct ib_ucm_cmd_hdr { - __u32 cmd; - __u16 in; - __u16 out; -}; - -struct ib_ucm_create_id { - __u64 uid; - __u64 response; -}; - -struct ib_ucm_create_id_resp { - __u32 id; -}; - -struct ib_ucm_destroy_id { - __u64 response; - __u32 id; - __u32 reserved; -}; - -struct ib_ucm_destroy_id_resp { - __u32 events_reported; -}; - -struct ib_ucm_attr_id { - __u64 response; - __u32 id; - __u32 reserved; -}; - -struct ib_ucm_attr_id_resp { - __be64 service_id; - __be64 service_mask; - __be32 local_id; - __be32 remote_id; -}; - -struct ib_ucm_init_qp_attr { - __u64 response; - __u32 id; - __u32 qp_state; -}; - -struct ib_ucm_listen { - __be64 service_id; - __be64 service_mask; - __u32 id; - __u32 reserved; -}; - -struct ib_ucm_notify { - __u32 id; - __u32 event; -}; - -struct ib_ucm_private_data { - __u64 data; - __u32 id; - __u8 len; - __u8 reserved[3]; -}; - -struct ib_ucm_req { - __u32 id; - __u32 qpn; - __u32 qp_type; - __u32 psn; - __be64 sid; - __u64 data; - __u64 primary_path; - __u64 alternate_path; - __u8 len; - __u8 peer_to_peer; - __u8 responder_resources; - __u8 initiator_depth; - __u8 remote_cm_response_timeout; - __u8 flow_control; - __u8 local_cm_response_timeout; - __u8 retry_count; - __u8 rnr_retry_count; - __u8 max_cm_retries; - __u8 srq; - __u8 reserved[5]; -}; - -struct ib_ucm_rep { - __u64 uid; - __u64 data; - __u32 id; - __u32 qpn; - __u32 psn; - __u8 len; - __u8 responder_resources; - __u8 initiator_depth; - __u8 target_ack_delay; - __u8 failover_accepted; - __u8 flow_control; - __u8 rnr_retry_count; - __u8 srq; - __u8 reserved[4]; -}; - -struct ib_ucm_info { - __u32 id; - __u32 status; - __u64 info; - __u64 data; - __u8 info_len; - __u8 data_len; - __u8 reserved[6]; -}; - -struct ib_ucm_mra { - __u64 data; - __u32 id; - __u8 len; - __u8 timeout; - __u8 reserved[2]; -}; - -struct ib_ucm_lap { - __u64 path; - __u64 data; - __u32 id; - __u8 len; - __u8 reserved[3]; -}; - -struct ib_ucm_sidr_req { - __u32 id; - __u32 timeout; - __be64 sid; - __u64 data; - __u64 path; - __u16 reserved_pkey; - __u8 len; - __u8 max_cm_retries; - __u8 reserved[4]; -}; - -struct ib_ucm_sidr_rep { - __u32 id; - __u32 qpn; - __u32 qkey; - __u32 status; - __u64 info; - __u64 data; - __u8 info_len; - __u8 data_len; - __u8 reserved[6]; -}; -/* - * event notification ABI structures. - */ -struct ib_ucm_event_get { - __u64 response; - __u64 data; - __u64 info; - __u8 data_len; - __u8 info_len; - __u8 reserved[6]; -}; - -struct ib_ucm_req_event_resp { - struct ib_user_path_rec primary_path; - struct ib_user_path_rec alternate_path; - __be64 remote_ca_guid; - __u32 remote_qkey; - __u32 remote_qpn; - __u32 qp_type; - __u32 starting_psn; - __u8 responder_resources; - __u8 initiator_depth; - __u8 local_cm_response_timeout; - __u8 flow_control; - __u8 remote_cm_response_timeout; - __u8 retry_count; - __u8 rnr_retry_count; - __u8 srq; - __u8 port; - __u8 reserved[7]; -}; - -struct ib_ucm_rep_event_resp { - __be64 remote_ca_guid; - __u32 remote_qkey; - __u32 remote_qpn; - __u32 starting_psn; - __u8 responder_resources; - __u8 initiator_depth; - __u8 target_ack_delay; - __u8 failover_accepted; - __u8 flow_control; - __u8 rnr_retry_count; - __u8 srq; - __u8 reserved[5]; -}; - -struct ib_ucm_rej_event_resp { - __u32 reason; - /* ari in ib_ucm_event_get info field. */ -}; - -struct ib_ucm_mra_event_resp { - __u8 timeout; - __u8 reserved[3]; -}; - -struct ib_ucm_lap_event_resp { - struct ib_user_path_rec path; -}; - -struct ib_ucm_apr_event_resp { - __u32 status; - /* apr info in ib_ucm_event_get info field. */ -}; - -struct ib_ucm_sidr_req_event_resp { - __u16 pkey; - __u8 port; - __u8 reserved; -}; - -struct ib_ucm_sidr_rep_event_resp { - __u32 status; - __u32 qkey; - __u32 qpn; - /* info in ib_ucm_event_get info field. */ -}; - -#define IB_UCM_PRES_DATA 0x01 -#define IB_UCM_PRES_INFO 0x02 -#define IB_UCM_PRES_PRIMARY 0x04 -#define IB_UCM_PRES_ALTERNATE 0x08 - -struct ib_ucm_event_resp { - __u64 uid; - __u32 id; - __u32 event; - __u32 present; - __u32 reserved; - union { - struct ib_ucm_req_event_resp req_resp; - struct ib_ucm_rep_event_resp rep_resp; - struct ib_ucm_rej_event_resp rej_resp; - struct ib_ucm_mra_event_resp mra_resp; - struct ib_ucm_lap_event_resp lap_resp; - struct ib_ucm_apr_event_resp apr_resp; - - struct ib_ucm_sidr_req_event_resp sidr_req_resp; - struct ib_ucm_sidr_rep_event_resp sidr_rep_resp; - - __u32 send_status; - } u; -}; - -#endif /* IB_USER_CM_H */ diff --git a/include/rdma/ib_user_mad.h b/include/rdma/ib_user_mad.h deleted file mode 100644 index d6fce1c..0000000 --- a/include/rdma/ib_user_mad.h +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Voltaire, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef IB_USER_MAD_H -#define IB_USER_MAD_H - -#include -#include - -/* - * Increment this value if any changes that break userspace ABI - * compatibility are made. - */ -#define IB_USER_MAD_ABI_VERSION 5 - -/* - * Make sure that all structs defined in this file remain laid out so - * that they pack the same way on 32-bit and 64-bit architectures (to - * avoid incompatibility between 32-bit userspace and 64-bit kernels). - */ - -/** - * ib_user_mad_hdr_old - Old version of MAD packet header without pkey_index - * @id - ID of agent MAD received with/to be sent with - * @status - 0 on successful receive, ETIMEDOUT if no response - * received (transaction ID in data[] will be set to TID of original - * request) (ignored on send) - * @timeout_ms - Milliseconds to wait for response (unset on receive) - * @retries - Number of automatic retries to attempt - * @qpn - Remote QP number received from/to be sent to - * @qkey - Remote Q_Key to be sent with (unset on receive) - * @lid - Remote lid received from/to be sent to - * @sl - Service level received with/to be sent with - * @path_bits - Local path bits received with/to be sent with - * @grh_present - If set, GRH was received/should be sent - * @gid_index - Local GID index to send with (unset on receive) - * @hop_limit - Hop limit in GRH - * @traffic_class - Traffic class in GRH - * @gid - Remote GID in GRH - * @flow_label - Flow label in GRH - */ -struct ib_user_mad_hdr_old { - __u32 id; - __u32 status; - __u32 timeout_ms; - __u32 retries; - __u32 length; - __be32 qpn; - __be32 qkey; - __be16 lid; - __u8 sl; - __u8 path_bits; - __u8 grh_present; - __u8 gid_index; - __u8 hop_limit; - __u8 traffic_class; - __u8 gid[16]; - __be32 flow_label; -}; - -/** - * ib_user_mad_hdr - MAD packet header - * This layout allows specifying/receiving the P_Key index. To use - * this capability, an application must call the - * IB_USER_MAD_ENABLE_PKEY ioctl on the user MAD file handle before - * any other actions with the file handle. - * @id - ID of agent MAD received with/to be sent with - * @status - 0 on successful receive, ETIMEDOUT if no response - * received (transaction ID in data[] will be set to TID of original - * request) (ignored on send) - * @timeout_ms - Milliseconds to wait for response (unset on receive) - * @retries - Number of automatic retries to attempt - * @qpn - Remote QP number received from/to be sent to - * @qkey - Remote Q_Key to be sent with (unset on receive) - * @lid - Remote lid received from/to be sent to - * @sl - Service level received with/to be sent with - * @path_bits - Local path bits received with/to be sent with - * @grh_present - If set, GRH was received/should be sent - * @gid_index - Local GID index to send with (unset on receive) - * @hop_limit - Hop limit in GRH - * @traffic_class - Traffic class in GRH - * @gid - Remote GID in GRH - * @flow_label - Flow label in GRH - * @pkey_index - P_Key index - */ -struct ib_user_mad_hdr { - __u32 id; - __u32 status; - __u32 timeout_ms; - __u32 retries; - __u32 length; - __be32 qpn; - __be32 qkey; - __be16 lid; - __u8 sl; - __u8 path_bits; - __u8 grh_present; - __u8 gid_index; - __u8 hop_limit; - __u8 traffic_class; - __u8 gid[16]; - __be32 flow_label; - __u16 pkey_index; - __u8 reserved[6]; -}; - -/** - * ib_user_mad - MAD packet - * @hdr - MAD packet header - * @data - Contents of MAD - * - */ -struct ib_user_mad { - struct ib_user_mad_hdr hdr; - __u64 data[0]; -}; - -/* - * Earlier versions of this interface definition declared the - * method_mask[] member as an array of __u32 but treated it as a - * bitmap made up of longs in the kernel. This ambiguity meant that - * 32-bit big-endian applications that can run on both 32-bit and - * 64-bit kernels had no consistent ABI to rely on, and 64-bit - * big-endian applications that treated method_mask as being made up - * of 32-bit words would have their bitmap misinterpreted. - * - * To clear up this confusion, we change the declaration of - * method_mask[] to use unsigned long and handle the conversion from - * 32-bit userspace to 64-bit kernel for big-endian systems in the - * compat_ioctl method. Unfortunately, to keep the structure layout - * the same, we need the method_mask[] array to be aligned only to 4 - * bytes even when long is 64 bits, which forces us into this ugly - * typedef. - */ -typedef unsigned long __attribute__((aligned(4))) packed_ulong; -#define IB_USER_MAD_LONGS_PER_METHOD_MASK (128 / (8 * sizeof (long))) - -/** - * ib_user_mad_reg_req - MAD registration request - * @id - Set by the kernel; used to identify agent in future requests. - * @qpn - Queue pair number; must be 0 or 1. - * @method_mask - The caller will receive unsolicited MADs for any method - * where @method_mask = 1. - * @mgmt_class - Indicates which management class of MADs should be receive - * by the caller. This field is only required if the user wishes to - * receive unsolicited MADs, otherwise it should be 0. - * @mgmt_class_version - Indicates which version of MADs for the given - * management class to receive. - * @oui: Indicates IEEE OUI when mgmt_class is a vendor class - * in the range from 0x30 to 0x4f. Otherwise not used. - * @rmpp_version: If set, indicates the RMPP version used. - * - */ -struct ib_user_mad_reg_req { - __u32 id; - packed_ulong method_mask[IB_USER_MAD_LONGS_PER_METHOD_MASK]; - __u8 qpn; - __u8 mgmt_class; - __u8 mgmt_class_version; - __u8 oui[3]; - __u8 rmpp_version; -}; - -#define IB_IOCTL_MAGIC 0x1b - -#define IB_USER_MAD_REGISTER_AGENT _IOWR(IB_IOCTL_MAGIC, 1, \ - struct ib_user_mad_reg_req) - -#define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, __u32) - -#define IB_USER_MAD_ENABLE_PKEY _IO(IB_IOCTL_MAGIC, 3) - -#endif /* IB_USER_MAD_H */ diff --git a/include/rdma/ib_user_sa.h b/include/rdma/ib_user_sa.h deleted file mode 100644 index cfc7c9b..0000000 --- a/include/rdma/ib_user_sa.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2005 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef IB_USER_SA_H -#define IB_USER_SA_H - -#include - -enum { - IB_PATH_GMP = 1, - IB_PATH_PRIMARY = (1<<1), - IB_PATH_ALTERNATE = (1<<2), - IB_PATH_OUTBOUND = (1<<3), - IB_PATH_INBOUND = (1<<4), - IB_PATH_INBOUND_REVERSE = (1<<5), - IB_PATH_BIDIRECTIONAL = IB_PATH_OUTBOUND | IB_PATH_INBOUND_REVERSE -}; - -struct ib_path_rec_data { - __u32 flags; - __u32 reserved; - __u32 path_rec[16]; -}; - -struct ib_user_path_rec { - __u8 dgid[16]; - __u8 sgid[16]; - __be16 dlid; - __be16 slid; - __u32 raw_traffic; - __be32 flow_label; - __u32 reversible; - __u32 mtu; - __be16 pkey; - __u8 hop_limit; - __u8 traffic_class; - __u8 numb_path; - __u8 sl; - __u8 mtu_selector; - __u8 rate_selector; - __u8 rate; - __u8 packet_life_time_selector; - __u8 packet_life_time; - __u8 preference; -}; - -#endif /* IB_USER_SA_H */ diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h deleted file mode 100644 index 81aba3a..0000000 --- a/include/rdma/ib_user_verbs.h +++ /dev/null @@ -1,734 +0,0 @@ -/* - * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. - * Copyright (c) 2005 PathScale, Inc. All rights reserved. - * Copyright (c) 2006 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef IB_USER_VERBS_H -#define IB_USER_VERBS_H - -#include - -/* - * Increment this value if any changes that break userspace ABI - * compatibility are made. - */ -#define IB_USER_VERBS_ABI_VERSION 6 - -enum { - IB_USER_VERBS_CMD_GET_CONTEXT, - IB_USER_VERBS_CMD_QUERY_DEVICE, - IB_USER_VERBS_CMD_QUERY_PORT, - IB_USER_VERBS_CMD_ALLOC_PD, - IB_USER_VERBS_CMD_DEALLOC_PD, - IB_USER_VERBS_CMD_CREATE_AH, - IB_USER_VERBS_CMD_MODIFY_AH, - IB_USER_VERBS_CMD_QUERY_AH, - IB_USER_VERBS_CMD_DESTROY_AH, - IB_USER_VERBS_CMD_REG_MR, - IB_USER_VERBS_CMD_REG_SMR, - IB_USER_VERBS_CMD_REREG_MR, - IB_USER_VERBS_CMD_QUERY_MR, - IB_USER_VERBS_CMD_DEREG_MR, - IB_USER_VERBS_CMD_ALLOC_MW, - IB_USER_VERBS_CMD_BIND_MW, - IB_USER_VERBS_CMD_DEALLOC_MW, - IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL, - IB_USER_VERBS_CMD_CREATE_CQ, - IB_USER_VERBS_CMD_RESIZE_CQ, - IB_USER_VERBS_CMD_DESTROY_CQ, - IB_USER_VERBS_CMD_POLL_CQ, - IB_USER_VERBS_CMD_PEEK_CQ, - IB_USER_VERBS_CMD_REQ_NOTIFY_CQ, - IB_USER_VERBS_CMD_CREATE_QP, - IB_USER_VERBS_CMD_QUERY_QP, - IB_USER_VERBS_CMD_MODIFY_QP, - IB_USER_VERBS_CMD_DESTROY_QP, - IB_USER_VERBS_CMD_POST_SEND, - IB_USER_VERBS_CMD_POST_RECV, - IB_USER_VERBS_CMD_ATTACH_MCAST, - IB_USER_VERBS_CMD_DETACH_MCAST, - IB_USER_VERBS_CMD_CREATE_SRQ, - IB_USER_VERBS_CMD_MODIFY_SRQ, - IB_USER_VERBS_CMD_QUERY_SRQ, - IB_USER_VERBS_CMD_DESTROY_SRQ, - IB_USER_VERBS_CMD_POST_SRQ_RECV, - IB_USER_VERBS_CMD_OPEN_XRCD, - IB_USER_VERBS_CMD_CLOSE_XRCD, - IB_USER_VERBS_CMD_CREATE_XSRQ, - IB_USER_VERBS_CMD_OPEN_QP -}; - -/* - * Make sure that all structs defined in this file remain laid out so - * that they pack the same way on 32-bit and 64-bit architectures (to - * avoid incompatibility between 32-bit userspace and 64-bit kernels). - * Specifically: - * - Do not use pointer types -- pass pointers in __u64 instead. - * - Make sure that any structure larger than 4 bytes is padded to a - * multiple of 8 bytes. Otherwise the structure size will be - * different between 32-bit and 64-bit architectures. - */ - -struct ib_uverbs_async_event_desc { - __u64 element; - __u32 event_type; /* enum ib_event_type */ - __u32 reserved; -}; - -struct ib_uverbs_comp_event_desc { - __u64 cq_handle; -}; - -/* - * All commands from userspace should start with a __u32 command field - * followed by __u16 in_words and out_words fields (which give the - * length of the command block and response buffer if any in 32-bit - * words). The kernel driver will read these fields first and read - * the rest of the command struct based on these value. - */ - -struct ib_uverbs_cmd_hdr { - __u32 command; - __u16 in_words; - __u16 out_words; -}; - -struct ib_uverbs_get_context { - __u64 response; - __u64 driver_data[0]; -}; - -struct ib_uverbs_get_context_resp { - __u32 async_fd; - __u32 num_comp_vectors; -}; - -struct ib_uverbs_query_device { - __u64 response; - __u64 driver_data[0]; -}; - -struct ib_uverbs_query_device_resp { - __u64 fw_ver; - __be64 node_guid; - __be64 sys_image_guid; - __u64 max_mr_size; - __u64 page_size_cap; - __u32 vendor_id; - __u32 vendor_part_id; - __u32 hw_ver; - __u32 max_qp; - __u32 max_qp_wr; - __u32 device_cap_flags; - __u32 max_sge; - __u32 max_sge_rd; - __u32 max_cq; - __u32 max_cqe; - __u32 max_mr; - __u32 max_pd; - __u32 max_qp_rd_atom; - __u32 max_ee_rd_atom; - __u32 max_res_rd_atom; - __u32 max_qp_init_rd_atom; - __u32 max_ee_init_rd_atom; - __u32 atomic_cap; - __u32 max_ee; - __u32 max_rdd; - __u32 max_mw; - __u32 max_raw_ipv6_qp; - __u32 max_raw_ethy_qp; - __u32 max_mcast_grp; - __u32 max_mcast_qp_attach; - __u32 max_total_mcast_qp_attach; - __u32 max_ah; - __u32 max_fmr; - __u32 max_map_per_fmr; - __u32 max_srq; - __u32 max_srq_wr; - __u32 max_srq_sge; - __u16 max_pkeys; - __u8 local_ca_ack_delay; - __u8 phys_port_cnt; - __u8 reserved[4]; -}; - -struct ib_uverbs_query_port { - __u64 response; - __u8 port_num; - __u8 reserved[7]; - __u64 driver_data[0]; -}; - -struct ib_uverbs_query_port_resp { - __u32 port_cap_flags; - __u32 max_msg_sz; - __u32 bad_pkey_cntr; - __u32 qkey_viol_cntr; - __u32 gid_tbl_len; - __u16 pkey_tbl_len; - __u16 lid; - __u16 sm_lid; - __u8 state; - __u8 max_mtu; - __u8 active_mtu; - __u8 lmc; - __u8 max_vl_num; - __u8 sm_sl; - __u8 subnet_timeout; - __u8 init_type_reply; - __u8 active_width; - __u8 active_speed; - __u8 phys_state; - __u8 link_layer; - __u8 reserved[2]; -}; - -struct ib_uverbs_alloc_pd { - __u64 response; - __u64 driver_data[0]; -}; - -struct ib_uverbs_alloc_pd_resp { - __u32 pd_handle; -}; - -struct ib_uverbs_dealloc_pd { - __u32 pd_handle; -}; - -struct ib_uverbs_open_xrcd { - __u64 response; - __u32 fd; - __u32 oflags; - __u64 driver_data[0]; -}; - -struct ib_uverbs_open_xrcd_resp { - __u32 xrcd_handle; -}; - -struct ib_uverbs_close_xrcd { - __u32 xrcd_handle; -}; - -struct ib_uverbs_reg_mr { - __u64 response; - __u64 start; - __u64 length; - __u64 hca_va; - __u32 pd_handle; - __u32 access_flags; - __u64 driver_data[0]; -}; - -struct ib_uverbs_reg_mr_resp { - __u32 mr_handle; - __u32 lkey; - __u32 rkey; -}; - -struct ib_uverbs_dereg_mr { - __u32 mr_handle; -}; - -struct ib_uverbs_create_comp_channel { - __u64 response; -}; - -struct ib_uverbs_create_comp_channel_resp { - __u32 fd; -}; - -struct ib_uverbs_create_cq { - __u64 response; - __u64 user_handle; - __u32 cqe; - __u32 comp_vector; - __s32 comp_channel; - __u32 reserved; - __u64 driver_data[0]; -}; - -struct ib_uverbs_create_cq_resp { - __u32 cq_handle; - __u32 cqe; -}; - -struct ib_uverbs_resize_cq { - __u64 response; - __u32 cq_handle; - __u32 cqe; - __u64 driver_data[0]; -}; - -struct ib_uverbs_resize_cq_resp { - __u32 cqe; - __u32 reserved; - __u64 driver_data[0]; -}; - -struct ib_uverbs_poll_cq { - __u64 response; - __u32 cq_handle; - __u32 ne; -}; - -struct ib_uverbs_wc { - __u64 wr_id; - __u32 status; - __u32 opcode; - __u32 vendor_err; - __u32 byte_len; - union { - __u32 imm_data; - __u32 invalidate_rkey; - } ex; - __u32 qp_num; - __u32 src_qp; - __u32 wc_flags; - __u16 pkey_index; - __u16 slid; - __u8 sl; - __u8 dlid_path_bits; - __u8 port_num; - __u8 reserved; -}; - -struct ib_uverbs_poll_cq_resp { - __u32 count; - __u32 reserved; - struct ib_uverbs_wc wc[0]; -}; - -struct ib_uverbs_req_notify_cq { - __u32 cq_handle; - __u32 solicited_only; -}; - -struct ib_uverbs_destroy_cq { - __u64 response; - __u32 cq_handle; - __u32 reserved; -}; - -struct ib_uverbs_destroy_cq_resp { - __u32 comp_events_reported; - __u32 async_events_reported; -}; - -struct ib_uverbs_global_route { - __u8 dgid[16]; - __u32 flow_label; - __u8 sgid_index; - __u8 hop_limit; - __u8 traffic_class; - __u8 reserved; -}; - -struct ib_uverbs_ah_attr { - struct ib_uverbs_global_route grh; - __u16 dlid; - __u8 sl; - __u8 src_path_bits; - __u8 static_rate; - __u8 is_global; - __u8 port_num; - __u8 reserved; -}; - -struct ib_uverbs_qp_attr { - __u32 qp_attr_mask; - __u32 qp_state; - __u32 cur_qp_state; - __u32 path_mtu; - __u32 path_mig_state; - __u32 qkey; - __u32 rq_psn; - __u32 sq_psn; - __u32 dest_qp_num; - __u32 qp_access_flags; - - struct ib_uverbs_ah_attr ah_attr; - struct ib_uverbs_ah_attr alt_ah_attr; - - /* ib_qp_cap */ - __u32 max_send_wr; - __u32 max_recv_wr; - __u32 max_send_sge; - __u32 max_recv_sge; - __u32 max_inline_data; - - __u16 pkey_index; - __u16 alt_pkey_index; - __u8 en_sqd_async_notify; - __u8 sq_draining; - __u8 max_rd_atomic; - __u8 max_dest_rd_atomic; - __u8 min_rnr_timer; - __u8 port_num; - __u8 timeout; - __u8 retry_cnt; - __u8 rnr_retry; - __u8 alt_port_num; - __u8 alt_timeout; - __u8 reserved[5]; -}; - -struct ib_uverbs_create_qp { - __u64 response; - __u64 user_handle; - __u32 pd_handle; - __u32 send_cq_handle; - __u32 recv_cq_handle; - __u32 srq_handle; - __u32 max_send_wr; - __u32 max_recv_wr; - __u32 max_send_sge; - __u32 max_recv_sge; - __u32 max_inline_data; - __u8 sq_sig_all; - __u8 qp_type; - __u8 is_srq; - __u8 reserved; - __u64 driver_data[0]; -}; - -struct ib_uverbs_open_qp { - __u64 response; - __u64 user_handle; - __u32 pd_handle; - __u32 qpn; - __u8 qp_type; - __u8 reserved[7]; - __u64 driver_data[0]; -}; - -/* also used for open response */ -struct ib_uverbs_create_qp_resp { - __u32 qp_handle; - __u32 qpn; - __u32 max_send_wr; - __u32 max_recv_wr; - __u32 max_send_sge; - __u32 max_recv_sge; - __u32 max_inline_data; - __u32 reserved; -}; - -/* - * This struct needs to remain a multiple of 8 bytes to keep the - * alignment of the modify QP parameters. - */ -struct ib_uverbs_qp_dest { - __u8 dgid[16]; - __u32 flow_label; - __u16 dlid; - __u16 reserved; - __u8 sgid_index; - __u8 hop_limit; - __u8 traffic_class; - __u8 sl; - __u8 src_path_bits; - __u8 static_rate; - __u8 is_global; - __u8 port_num; -}; - -struct ib_uverbs_query_qp { - __u64 response; - __u32 qp_handle; - __u32 attr_mask; - __u64 driver_data[0]; -}; - -struct ib_uverbs_query_qp_resp { - struct ib_uverbs_qp_dest dest; - struct ib_uverbs_qp_dest alt_dest; - __u32 max_send_wr; - __u32 max_recv_wr; - __u32 max_send_sge; - __u32 max_recv_sge; - __u32 max_inline_data; - __u32 qkey; - __u32 rq_psn; - __u32 sq_psn; - __u32 dest_qp_num; - __u32 qp_access_flags; - __u16 pkey_index; - __u16 alt_pkey_index; - __u8 qp_state; - __u8 cur_qp_state; - __u8 path_mtu; - __u8 path_mig_state; - __u8 sq_draining; - __u8 max_rd_atomic; - __u8 max_dest_rd_atomic; - __u8 min_rnr_timer; - __u8 port_num; - __u8 timeout; - __u8 retry_cnt; - __u8 rnr_retry; - __u8 alt_port_num; - __u8 alt_timeout; - __u8 sq_sig_all; - __u8 reserved[5]; - __u64 driver_data[0]; -}; - -struct ib_uverbs_modify_qp { - struct ib_uverbs_qp_dest dest; - struct ib_uverbs_qp_dest alt_dest; - __u32 qp_handle; - __u32 attr_mask; - __u32 qkey; - __u32 rq_psn; - __u32 sq_psn; - __u32 dest_qp_num; - __u32 qp_access_flags; - __u16 pkey_index; - __u16 alt_pkey_index; - __u8 qp_state; - __u8 cur_qp_state; - __u8 path_mtu; - __u8 path_mig_state; - __u8 en_sqd_async_notify; - __u8 max_rd_atomic; - __u8 max_dest_rd_atomic; - __u8 min_rnr_timer; - __u8 port_num; - __u8 timeout; - __u8 retry_cnt; - __u8 rnr_retry; - __u8 alt_port_num; - __u8 alt_timeout; - __u8 reserved[2]; - __u64 driver_data[0]; -}; - -struct ib_uverbs_modify_qp_resp { -}; - -struct ib_uverbs_destroy_qp { - __u64 response; - __u32 qp_handle; - __u32 reserved; -}; - -struct ib_uverbs_destroy_qp_resp { - __u32 events_reported; -}; - -/* - * The ib_uverbs_sge structure isn't used anywhere, since we assume - * the ib_sge structure is packed the same way on 32-bit and 64-bit - * architectures in both kernel and user space. It's just here to - * document the ABI. - */ -struct ib_uverbs_sge { - __u64 addr; - __u32 length; - __u32 lkey; -}; - -struct ib_uverbs_send_wr { - __u64 wr_id; - __u32 num_sge; - __u32 opcode; - __u32 send_flags; - union { - __u32 imm_data; - __u32 invalidate_rkey; - } ex; - union { - struct { - __u64 remote_addr; - __u32 rkey; - __u32 reserved; - } rdma; - struct { - __u64 remote_addr; - __u64 compare_add; - __u64 swap; - __u32 rkey; - __u32 reserved; - } atomic; - struct { - __u32 ah; - __u32 remote_qpn; - __u32 remote_qkey; - __u32 reserved; - } ud; - } wr; -}; - -struct ib_uverbs_post_send { - __u64 response; - __u32 qp_handle; - __u32 wr_count; - __u32 sge_count; - __u32 wqe_size; - struct ib_uverbs_send_wr send_wr[0]; -}; - -struct ib_uverbs_post_send_resp { - __u32 bad_wr; -}; - -struct ib_uverbs_recv_wr { - __u64 wr_id; - __u32 num_sge; - __u32 reserved; -}; - -struct ib_uverbs_post_recv { - __u64 response; - __u32 qp_handle; - __u32 wr_count; - __u32 sge_count; - __u32 wqe_size; - struct ib_uverbs_recv_wr recv_wr[0]; -}; - -struct ib_uverbs_post_recv_resp { - __u32 bad_wr; -}; - -struct ib_uverbs_post_srq_recv { - __u64 response; - __u32 srq_handle; - __u32 wr_count; - __u32 sge_count; - __u32 wqe_size; - struct ib_uverbs_recv_wr recv[0]; -}; - -struct ib_uverbs_post_srq_recv_resp { - __u32 bad_wr; -}; - -struct ib_uverbs_create_ah { - __u64 response; - __u64 user_handle; - __u32 pd_handle; - __u32 reserved; - struct ib_uverbs_ah_attr attr; -}; - -struct ib_uverbs_create_ah_resp { - __u32 ah_handle; -}; - -struct ib_uverbs_destroy_ah { - __u32 ah_handle; -}; - -struct ib_uverbs_attach_mcast { - __u8 gid[16]; - __u32 qp_handle; - __u16 mlid; - __u16 reserved; - __u64 driver_data[0]; -}; - -struct ib_uverbs_detach_mcast { - __u8 gid[16]; - __u32 qp_handle; - __u16 mlid; - __u16 reserved; - __u64 driver_data[0]; -}; - -struct ib_uverbs_create_srq { - __u64 response; - __u64 user_handle; - __u32 pd_handle; - __u32 max_wr; - __u32 max_sge; - __u32 srq_limit; - __u64 driver_data[0]; -}; - -struct ib_uverbs_create_xsrq { - __u64 response; - __u64 user_handle; - __u32 srq_type; - __u32 pd_handle; - __u32 max_wr; - __u32 max_sge; - __u32 srq_limit; - __u32 reserved; - __u32 xrcd_handle; - __u32 cq_handle; - __u64 driver_data[0]; -}; - -struct ib_uverbs_create_srq_resp { - __u32 srq_handle; - __u32 max_wr; - __u32 max_sge; - __u32 srqn; -}; - -struct ib_uverbs_modify_srq { - __u32 srq_handle; - __u32 attr_mask; - __u32 max_wr; - __u32 srq_limit; - __u64 driver_data[0]; -}; - -struct ib_uverbs_query_srq { - __u64 response; - __u32 srq_handle; - __u32 reserved; - __u64 driver_data[0]; -}; - -struct ib_uverbs_query_srq_resp { - __u32 max_wr; - __u32 max_sge; - __u32 srq_limit; - __u32 reserved; -}; - -struct ib_uverbs_destroy_srq { - __u64 response; - __u32 srq_handle; - __u32 reserved; -}; - -struct ib_uverbs_destroy_srq_resp { - __u32 events_reported; -}; - -#endif /* IB_USER_VERBS_H */ diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index bd3d8b2..e38de79 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -1,41 +1,9 @@ #ifndef _RDMA_NETLINK_H #define _RDMA_NETLINK_H -#include - -enum { - RDMA_NL_RDMA_CM = 1 -}; - -#define RDMA_NL_GET_CLIENT(type) ((type & (((1 << 6) - 1) << 10)) >> 10) -#define RDMA_NL_GET_OP(type) (type & ((1 << 10) - 1)) -#define RDMA_NL_GET_TYPE(client, op) ((client << 10) + op) - -enum { - RDMA_NL_RDMA_CM_ID_STATS = 0, - RDMA_NL_RDMA_CM_NUM_OPS -}; - -enum { - RDMA_NL_RDMA_CM_ATTR_SRC_ADDR = 1, - RDMA_NL_RDMA_CM_ATTR_DST_ADDR, - RDMA_NL_RDMA_CM_NUM_ATTR, -}; - -struct rdma_cm_id_stats { - __u32 qp_num; - __u32 bound_dev_if; - __u32 port_space; - __s32 pid; - __u8 cm_state; - __u8 node_type; - __u8 port_num; - __u8 qp_type; -}; - -#ifdef __KERNEL__ #include +#include struct ibnl_client_cbs { int (*dump)(struct sk_buff *skb, struct netlink_callback *nlcb); @@ -88,6 +56,4 @@ void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq, int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh, int len, void *data, int type); -#endif /* __KERNEL__ */ - #endif /* _RDMA_NETLINK_H */ diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h deleted file mode 100644 index 1ee9239..0000000 --- a/include/rdma/rdma_user_cm.h +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef RDMA_USER_CM_H -#define RDMA_USER_CM_H - -#include -#include -#include -#include - -#define RDMA_USER_CM_ABI_VERSION 4 - -#define RDMA_MAX_PRIVATE_DATA 256 - -enum { - RDMA_USER_CM_CMD_CREATE_ID, - RDMA_USER_CM_CMD_DESTROY_ID, - RDMA_USER_CM_CMD_BIND_ADDR, - RDMA_USER_CM_CMD_RESOLVE_ADDR, - RDMA_USER_CM_CMD_RESOLVE_ROUTE, - RDMA_USER_CM_CMD_QUERY_ROUTE, - RDMA_USER_CM_CMD_CONNECT, - RDMA_USER_CM_CMD_LISTEN, - RDMA_USER_CM_CMD_ACCEPT, - RDMA_USER_CM_CMD_REJECT, - RDMA_USER_CM_CMD_DISCONNECT, - RDMA_USER_CM_CMD_INIT_QP_ATTR, - RDMA_USER_CM_CMD_GET_EVENT, - RDMA_USER_CM_CMD_GET_OPTION, - RDMA_USER_CM_CMD_SET_OPTION, - RDMA_USER_CM_CMD_NOTIFY, - RDMA_USER_CM_CMD_JOIN_MCAST, - RDMA_USER_CM_CMD_LEAVE_MCAST, - RDMA_USER_CM_CMD_MIGRATE_ID -}; - -/* - * command ABI structures. - */ -struct rdma_ucm_cmd_hdr { - __u32 cmd; - __u16 in; - __u16 out; -}; - -struct rdma_ucm_create_id { - __u64 uid; - __u64 response; - __u16 ps; - __u8 qp_type; - __u8 reserved[5]; -}; - -struct rdma_ucm_create_id_resp { - __u32 id; -}; - -struct rdma_ucm_destroy_id { - __u64 response; - __u32 id; - __u32 reserved; -}; - -struct rdma_ucm_destroy_id_resp { - __u32 events_reported; -}; - -struct rdma_ucm_bind_addr { - __u64 response; - struct sockaddr_in6 addr; - __u32 id; -}; - -struct rdma_ucm_resolve_addr { - struct sockaddr_in6 src_addr; - struct sockaddr_in6 dst_addr; - __u32 id; - __u32 timeout_ms; -}; - -struct rdma_ucm_resolve_route { - __u32 id; - __u32 timeout_ms; -}; - -struct rdma_ucm_query_route { - __u64 response; - __u32 id; - __u32 reserved; -}; - -struct rdma_ucm_query_route_resp { - __u64 node_guid; - struct ib_user_path_rec ib_route[2]; - struct sockaddr_in6 src_addr; - struct sockaddr_in6 dst_addr; - __u32 num_paths; - __u8 port_num; - __u8 reserved[3]; -}; - -struct rdma_ucm_conn_param { - __u32 qp_num; - __u32 reserved; - __u8 private_data[RDMA_MAX_PRIVATE_DATA]; - __u8 private_data_len; - __u8 srq; - __u8 responder_resources; - __u8 initiator_depth; - __u8 flow_control; - __u8 retry_count; - __u8 rnr_retry_count; - __u8 valid; -}; - -struct rdma_ucm_ud_param { - __u32 qp_num; - __u32 qkey; - struct ib_uverbs_ah_attr ah_attr; - __u8 private_data[RDMA_MAX_PRIVATE_DATA]; - __u8 private_data_len; - __u8 reserved[7]; -}; - -struct rdma_ucm_connect { - struct rdma_ucm_conn_param conn_param; - __u32 id; - __u32 reserved; -}; - -struct rdma_ucm_listen { - __u32 id; - __u32 backlog; -}; - -struct rdma_ucm_accept { - __u64 uid; - struct rdma_ucm_conn_param conn_param; - __u32 id; - __u32 reserved; -}; - -struct rdma_ucm_reject { - __u32 id; - __u8 private_data_len; - __u8 reserved[3]; - __u8 private_data[RDMA_MAX_PRIVATE_DATA]; -}; - -struct rdma_ucm_disconnect { - __u32 id; -}; - -struct rdma_ucm_init_qp_attr { - __u64 response; - __u32 id; - __u32 qp_state; -}; - -struct rdma_ucm_notify { - __u32 id; - __u32 event; -}; - -struct rdma_ucm_join_mcast { - __u64 response; /* rdma_ucm_create_id_resp */ - __u64 uid; - struct sockaddr_in6 addr; - __u32 id; -}; - -struct rdma_ucm_get_event { - __u64 response; -}; - -struct rdma_ucm_event_resp { - __u64 uid; - __u32 id; - __u32 event; - __u32 status; - union { - struct rdma_ucm_conn_param conn; - struct rdma_ucm_ud_param ud; - } param; -}; - -/* Option levels */ -enum { - RDMA_OPTION_ID = 0, - RDMA_OPTION_IB = 1 -}; - -/* Option details */ -enum { - RDMA_OPTION_ID_TOS = 0, - RDMA_OPTION_ID_REUSEADDR = 1, - RDMA_OPTION_ID_AFONLY = 2, - RDMA_OPTION_IB_PATH = 1 -}; - -struct rdma_ucm_set_option { - __u64 optval; - __u32 id; - __u32 level; - __u32 optname; - __u32 optlen; -}; - -struct rdma_ucm_migrate_id { - __u64 response; - __u32 id; - __u32 fd; -}; - -struct rdma_ucm_migrate_resp { - __u32 events_reported; -}; - -#endif /* RDMA_USER_CM_H */ diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild index aafaa5a..687ae33 100644 --- a/include/uapi/rdma/Kbuild +++ b/include/uapi/rdma/Kbuild @@ -1 +1,7 @@ # UAPI Header export list +header-y += ib_user_cm.h +header-y += ib_user_mad.h +header-y += ib_user_sa.h +header-y += ib_user_verbs.h +header-y += rdma_netlink.h +header-y += rdma_user_cm.h diff --git a/include/uapi/rdma/ib_user_cm.h b/include/uapi/rdma/ib_user_cm.h new file mode 100644 index 0000000..f79014a --- /dev/null +++ b/include/uapi/rdma/ib_user_cm.h @@ -0,0 +1,325 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IB_USER_CM_H +#define IB_USER_CM_H + +#include +#include + +#define IB_USER_CM_ABI_VERSION 5 + +enum { + IB_USER_CM_CMD_CREATE_ID, + IB_USER_CM_CMD_DESTROY_ID, + IB_USER_CM_CMD_ATTR_ID, + + IB_USER_CM_CMD_LISTEN, + IB_USER_CM_CMD_NOTIFY, + + IB_USER_CM_CMD_SEND_REQ, + IB_USER_CM_CMD_SEND_REP, + IB_USER_CM_CMD_SEND_RTU, + IB_USER_CM_CMD_SEND_DREQ, + IB_USER_CM_CMD_SEND_DREP, + IB_USER_CM_CMD_SEND_REJ, + IB_USER_CM_CMD_SEND_MRA, + IB_USER_CM_CMD_SEND_LAP, + IB_USER_CM_CMD_SEND_APR, + IB_USER_CM_CMD_SEND_SIDR_REQ, + IB_USER_CM_CMD_SEND_SIDR_REP, + + IB_USER_CM_CMD_EVENT, + IB_USER_CM_CMD_INIT_QP_ATTR, +}; +/* + * command ABI structures. + */ +struct ib_ucm_cmd_hdr { + __u32 cmd; + __u16 in; + __u16 out; +}; + +struct ib_ucm_create_id { + __u64 uid; + __u64 response; +}; + +struct ib_ucm_create_id_resp { + __u32 id; +}; + +struct ib_ucm_destroy_id { + __u64 response; + __u32 id; + __u32 reserved; +}; + +struct ib_ucm_destroy_id_resp { + __u32 events_reported; +}; + +struct ib_ucm_attr_id { + __u64 response; + __u32 id; + __u32 reserved; +}; + +struct ib_ucm_attr_id_resp { + __be64 service_id; + __be64 service_mask; + __be32 local_id; + __be32 remote_id; +}; + +struct ib_ucm_init_qp_attr { + __u64 response; + __u32 id; + __u32 qp_state; +}; + +struct ib_ucm_listen { + __be64 service_id; + __be64 service_mask; + __u32 id; + __u32 reserved; +}; + +struct ib_ucm_notify { + __u32 id; + __u32 event; +}; + +struct ib_ucm_private_data { + __u64 data; + __u32 id; + __u8 len; + __u8 reserved[3]; +}; + +struct ib_ucm_req { + __u32 id; + __u32 qpn; + __u32 qp_type; + __u32 psn; + __be64 sid; + __u64 data; + __u64 primary_path; + __u64 alternate_path; + __u8 len; + __u8 peer_to_peer; + __u8 responder_resources; + __u8 initiator_depth; + __u8 remote_cm_response_timeout; + __u8 flow_control; + __u8 local_cm_response_timeout; + __u8 retry_count; + __u8 rnr_retry_count; + __u8 max_cm_retries; + __u8 srq; + __u8 reserved[5]; +}; + +struct ib_ucm_rep { + __u64 uid; + __u64 data; + __u32 id; + __u32 qpn; + __u32 psn; + __u8 len; + __u8 responder_resources; + __u8 initiator_depth; + __u8 target_ack_delay; + __u8 failover_accepted; + __u8 flow_control; + __u8 rnr_retry_count; + __u8 srq; + __u8 reserved[4]; +}; + +struct ib_ucm_info { + __u32 id; + __u32 status; + __u64 info; + __u64 data; + __u8 info_len; + __u8 data_len; + __u8 reserved[6]; +}; + +struct ib_ucm_mra { + __u64 data; + __u32 id; + __u8 len; + __u8 timeout; + __u8 reserved[2]; +}; + +struct ib_ucm_lap { + __u64 path; + __u64 data; + __u32 id; + __u8 len; + __u8 reserved[3]; +}; + +struct ib_ucm_sidr_req { + __u32 id; + __u32 timeout; + __be64 sid; + __u64 data; + __u64 path; + __u16 reserved_pkey; + __u8 len; + __u8 max_cm_retries; + __u8 reserved[4]; +}; + +struct ib_ucm_sidr_rep { + __u32 id; + __u32 qpn; + __u32 qkey; + __u32 status; + __u64 info; + __u64 data; + __u8 info_len; + __u8 data_len; + __u8 reserved[6]; +}; +/* + * event notification ABI structures. + */ +struct ib_ucm_event_get { + __u64 response; + __u64 data; + __u64 info; + __u8 data_len; + __u8 info_len; + __u8 reserved[6]; +}; + +struct ib_ucm_req_event_resp { + struct ib_user_path_rec primary_path; + struct ib_user_path_rec alternate_path; + __be64 remote_ca_guid; + __u32 remote_qkey; + __u32 remote_qpn; + __u32 qp_type; + __u32 starting_psn; + __u8 responder_resources; + __u8 initiator_depth; + __u8 local_cm_response_timeout; + __u8 flow_control; + __u8 remote_cm_response_timeout; + __u8 retry_count; + __u8 rnr_retry_count; + __u8 srq; + __u8 port; + __u8 reserved[7]; +}; + +struct ib_ucm_rep_event_resp { + __be64 remote_ca_guid; + __u32 remote_qkey; + __u32 remote_qpn; + __u32 starting_psn; + __u8 responder_resources; + __u8 initiator_depth; + __u8 target_ack_delay; + __u8 failover_accepted; + __u8 flow_control; + __u8 rnr_retry_count; + __u8 srq; + __u8 reserved[5]; +}; + +struct ib_ucm_rej_event_resp { + __u32 reason; + /* ari in ib_ucm_event_get info field. */ +}; + +struct ib_ucm_mra_event_resp { + __u8 timeout; + __u8 reserved[3]; +}; + +struct ib_ucm_lap_event_resp { + struct ib_user_path_rec path; +}; + +struct ib_ucm_apr_event_resp { + __u32 status; + /* apr info in ib_ucm_event_get info field. */ +}; + +struct ib_ucm_sidr_req_event_resp { + __u16 pkey; + __u8 port; + __u8 reserved; +}; + +struct ib_ucm_sidr_rep_event_resp { + __u32 status; + __u32 qkey; + __u32 qpn; + /* info in ib_ucm_event_get info field. */ +}; + +#define IB_UCM_PRES_DATA 0x01 +#define IB_UCM_PRES_INFO 0x02 +#define IB_UCM_PRES_PRIMARY 0x04 +#define IB_UCM_PRES_ALTERNATE 0x08 + +struct ib_ucm_event_resp { + __u64 uid; + __u32 id; + __u32 event; + __u32 present; + __u32 reserved; + union { + struct ib_ucm_req_event_resp req_resp; + struct ib_ucm_rep_event_resp rep_resp; + struct ib_ucm_rej_event_resp rej_resp; + struct ib_ucm_mra_event_resp mra_resp; + struct ib_ucm_lap_event_resp lap_resp; + struct ib_ucm_apr_event_resp apr_resp; + + struct ib_ucm_sidr_req_event_resp sidr_req_resp; + struct ib_ucm_sidr_rep_event_resp sidr_rep_resp; + + __u32 send_status; + } u; +}; + +#endif /* IB_USER_CM_H */ diff --git a/include/uapi/rdma/ib_user_mad.h b/include/uapi/rdma/ib_user_mad.h new file mode 100644 index 0000000..d6fce1c --- /dev/null +++ b/include/uapi/rdma/ib_user_mad.h @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IB_USER_MAD_H +#define IB_USER_MAD_H + +#include +#include + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define IB_USER_MAD_ABI_VERSION 5 + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + */ + +/** + * ib_user_mad_hdr_old - Old version of MAD packet header without pkey_index + * @id - ID of agent MAD received with/to be sent with + * @status - 0 on successful receive, ETIMEDOUT if no response + * received (transaction ID in data[] will be set to TID of original + * request) (ignored on send) + * @timeout_ms - Milliseconds to wait for response (unset on receive) + * @retries - Number of automatic retries to attempt + * @qpn - Remote QP number received from/to be sent to + * @qkey - Remote Q_Key to be sent with (unset on receive) + * @lid - Remote lid received from/to be sent to + * @sl - Service level received with/to be sent with + * @path_bits - Local path bits received with/to be sent with + * @grh_present - If set, GRH was received/should be sent + * @gid_index - Local GID index to send with (unset on receive) + * @hop_limit - Hop limit in GRH + * @traffic_class - Traffic class in GRH + * @gid - Remote GID in GRH + * @flow_label - Flow label in GRH + */ +struct ib_user_mad_hdr_old { + __u32 id; + __u32 status; + __u32 timeout_ms; + __u32 retries; + __u32 length; + __be32 qpn; + __be32 qkey; + __be16 lid; + __u8 sl; + __u8 path_bits; + __u8 grh_present; + __u8 gid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 gid[16]; + __be32 flow_label; +}; + +/** + * ib_user_mad_hdr - MAD packet header + * This layout allows specifying/receiving the P_Key index. To use + * this capability, an application must call the + * IB_USER_MAD_ENABLE_PKEY ioctl on the user MAD file handle before + * any other actions with the file handle. + * @id - ID of agent MAD received with/to be sent with + * @status - 0 on successful receive, ETIMEDOUT if no response + * received (transaction ID in data[] will be set to TID of original + * request) (ignored on send) + * @timeout_ms - Milliseconds to wait for response (unset on receive) + * @retries - Number of automatic retries to attempt + * @qpn - Remote QP number received from/to be sent to + * @qkey - Remote Q_Key to be sent with (unset on receive) + * @lid - Remote lid received from/to be sent to + * @sl - Service level received with/to be sent with + * @path_bits - Local path bits received with/to be sent with + * @grh_present - If set, GRH was received/should be sent + * @gid_index - Local GID index to send with (unset on receive) + * @hop_limit - Hop limit in GRH + * @traffic_class - Traffic class in GRH + * @gid - Remote GID in GRH + * @flow_label - Flow label in GRH + * @pkey_index - P_Key index + */ +struct ib_user_mad_hdr { + __u32 id; + __u32 status; + __u32 timeout_ms; + __u32 retries; + __u32 length; + __be32 qpn; + __be32 qkey; + __be16 lid; + __u8 sl; + __u8 path_bits; + __u8 grh_present; + __u8 gid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 gid[16]; + __be32 flow_label; + __u16 pkey_index; + __u8 reserved[6]; +}; + +/** + * ib_user_mad - MAD packet + * @hdr - MAD packet header + * @data - Contents of MAD + * + */ +struct ib_user_mad { + struct ib_user_mad_hdr hdr; + __u64 data[0]; +}; + +/* + * Earlier versions of this interface definition declared the + * method_mask[] member as an array of __u32 but treated it as a + * bitmap made up of longs in the kernel. This ambiguity meant that + * 32-bit big-endian applications that can run on both 32-bit and + * 64-bit kernels had no consistent ABI to rely on, and 64-bit + * big-endian applications that treated method_mask as being made up + * of 32-bit words would have their bitmap misinterpreted. + * + * To clear up this confusion, we change the declaration of + * method_mask[] to use unsigned long and handle the conversion from + * 32-bit userspace to 64-bit kernel for big-endian systems in the + * compat_ioctl method. Unfortunately, to keep the structure layout + * the same, we need the method_mask[] array to be aligned only to 4 + * bytes even when long is 64 bits, which forces us into this ugly + * typedef. + */ +typedef unsigned long __attribute__((aligned(4))) packed_ulong; +#define IB_USER_MAD_LONGS_PER_METHOD_MASK (128 / (8 * sizeof (long))) + +/** + * ib_user_mad_reg_req - MAD registration request + * @id - Set by the kernel; used to identify agent in future requests. + * @qpn - Queue pair number; must be 0 or 1. + * @method_mask - The caller will receive unsolicited MADs for any method + * where @method_mask = 1. + * @mgmt_class - Indicates which management class of MADs should be receive + * by the caller. This field is only required if the user wishes to + * receive unsolicited MADs, otherwise it should be 0. + * @mgmt_class_version - Indicates which version of MADs for the given + * management class to receive. + * @oui: Indicates IEEE OUI when mgmt_class is a vendor class + * in the range from 0x30 to 0x4f. Otherwise not used. + * @rmpp_version: If set, indicates the RMPP version used. + * + */ +struct ib_user_mad_reg_req { + __u32 id; + packed_ulong method_mask[IB_USER_MAD_LONGS_PER_METHOD_MASK]; + __u8 qpn; + __u8 mgmt_class; + __u8 mgmt_class_version; + __u8 oui[3]; + __u8 rmpp_version; +}; + +#define IB_IOCTL_MAGIC 0x1b + +#define IB_USER_MAD_REGISTER_AGENT _IOWR(IB_IOCTL_MAGIC, 1, \ + struct ib_user_mad_reg_req) + +#define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, __u32) + +#define IB_USER_MAD_ENABLE_PKEY _IO(IB_IOCTL_MAGIC, 3) + +#endif /* IB_USER_MAD_H */ diff --git a/include/uapi/rdma/ib_user_sa.h b/include/uapi/rdma/ib_user_sa.h new file mode 100644 index 0000000..cfc7c9b --- /dev/null +++ b/include/uapi/rdma/ib_user_sa.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IB_USER_SA_H +#define IB_USER_SA_H + +#include + +enum { + IB_PATH_GMP = 1, + IB_PATH_PRIMARY = (1<<1), + IB_PATH_ALTERNATE = (1<<2), + IB_PATH_OUTBOUND = (1<<3), + IB_PATH_INBOUND = (1<<4), + IB_PATH_INBOUND_REVERSE = (1<<5), + IB_PATH_BIDIRECTIONAL = IB_PATH_OUTBOUND | IB_PATH_INBOUND_REVERSE +}; + +struct ib_path_rec_data { + __u32 flags; + __u32 reserved; + __u32 path_rec[16]; +}; + +struct ib_user_path_rec { + __u8 dgid[16]; + __u8 sgid[16]; + __be16 dlid; + __be16 slid; + __u32 raw_traffic; + __be32 flow_label; + __u32 reversible; + __u32 mtu; + __be16 pkey; + __u8 hop_limit; + __u8 traffic_class; + __u8 numb_path; + __u8 sl; + __u8 mtu_selector; + __u8 rate_selector; + __u8 rate; + __u8 packet_life_time_selector; + __u8 packet_life_time; + __u8 preference; +}; + +#endif /* IB_USER_SA_H */ diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h new file mode 100644 index 0000000..81aba3a --- /dev/null +++ b/include/uapi/rdma/ib_user_verbs.h @@ -0,0 +1,734 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * Copyright (c) 2006 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IB_USER_VERBS_H +#define IB_USER_VERBS_H + +#include + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define IB_USER_VERBS_ABI_VERSION 6 + +enum { + IB_USER_VERBS_CMD_GET_CONTEXT, + IB_USER_VERBS_CMD_QUERY_DEVICE, + IB_USER_VERBS_CMD_QUERY_PORT, + IB_USER_VERBS_CMD_ALLOC_PD, + IB_USER_VERBS_CMD_DEALLOC_PD, + IB_USER_VERBS_CMD_CREATE_AH, + IB_USER_VERBS_CMD_MODIFY_AH, + IB_USER_VERBS_CMD_QUERY_AH, + IB_USER_VERBS_CMD_DESTROY_AH, + IB_USER_VERBS_CMD_REG_MR, + IB_USER_VERBS_CMD_REG_SMR, + IB_USER_VERBS_CMD_REREG_MR, + IB_USER_VERBS_CMD_QUERY_MR, + IB_USER_VERBS_CMD_DEREG_MR, + IB_USER_VERBS_CMD_ALLOC_MW, + IB_USER_VERBS_CMD_BIND_MW, + IB_USER_VERBS_CMD_DEALLOC_MW, + IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL, + IB_USER_VERBS_CMD_CREATE_CQ, + IB_USER_VERBS_CMD_RESIZE_CQ, + IB_USER_VERBS_CMD_DESTROY_CQ, + IB_USER_VERBS_CMD_POLL_CQ, + IB_USER_VERBS_CMD_PEEK_CQ, + IB_USER_VERBS_CMD_REQ_NOTIFY_CQ, + IB_USER_VERBS_CMD_CREATE_QP, + IB_USER_VERBS_CMD_QUERY_QP, + IB_USER_VERBS_CMD_MODIFY_QP, + IB_USER_VERBS_CMD_DESTROY_QP, + IB_USER_VERBS_CMD_POST_SEND, + IB_USER_VERBS_CMD_POST_RECV, + IB_USER_VERBS_CMD_ATTACH_MCAST, + IB_USER_VERBS_CMD_DETACH_MCAST, + IB_USER_VERBS_CMD_CREATE_SRQ, + IB_USER_VERBS_CMD_MODIFY_SRQ, + IB_USER_VERBS_CMD_QUERY_SRQ, + IB_USER_VERBS_CMD_DESTROY_SRQ, + IB_USER_VERBS_CMD_POST_SRQ_RECV, + IB_USER_VERBS_CMD_OPEN_XRCD, + IB_USER_VERBS_CMD_CLOSE_XRCD, + IB_USER_VERBS_CMD_CREATE_XSRQ, + IB_USER_VERBS_CMD_OPEN_QP +}; + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * Specifically: + * - Do not use pointer types -- pass pointers in __u64 instead. + * - Make sure that any structure larger than 4 bytes is padded to a + * multiple of 8 bytes. Otherwise the structure size will be + * different between 32-bit and 64-bit architectures. + */ + +struct ib_uverbs_async_event_desc { + __u64 element; + __u32 event_type; /* enum ib_event_type */ + __u32 reserved; +}; + +struct ib_uverbs_comp_event_desc { + __u64 cq_handle; +}; + +/* + * All commands from userspace should start with a __u32 command field + * followed by __u16 in_words and out_words fields (which give the + * length of the command block and response buffer if any in 32-bit + * words). The kernel driver will read these fields first and read + * the rest of the command struct based on these value. + */ + +struct ib_uverbs_cmd_hdr { + __u32 command; + __u16 in_words; + __u16 out_words; +}; + +struct ib_uverbs_get_context { + __u64 response; + __u64 driver_data[0]; +}; + +struct ib_uverbs_get_context_resp { + __u32 async_fd; + __u32 num_comp_vectors; +}; + +struct ib_uverbs_query_device { + __u64 response; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_device_resp { + __u64 fw_ver; + __be64 node_guid; + __be64 sys_image_guid; + __u64 max_mr_size; + __u64 page_size_cap; + __u32 vendor_id; + __u32 vendor_part_id; + __u32 hw_ver; + __u32 max_qp; + __u32 max_qp_wr; + __u32 device_cap_flags; + __u32 max_sge; + __u32 max_sge_rd; + __u32 max_cq; + __u32 max_cqe; + __u32 max_mr; + __u32 max_pd; + __u32 max_qp_rd_atom; + __u32 max_ee_rd_atom; + __u32 max_res_rd_atom; + __u32 max_qp_init_rd_atom; + __u32 max_ee_init_rd_atom; + __u32 atomic_cap; + __u32 max_ee; + __u32 max_rdd; + __u32 max_mw; + __u32 max_raw_ipv6_qp; + __u32 max_raw_ethy_qp; + __u32 max_mcast_grp; + __u32 max_mcast_qp_attach; + __u32 max_total_mcast_qp_attach; + __u32 max_ah; + __u32 max_fmr; + __u32 max_map_per_fmr; + __u32 max_srq; + __u32 max_srq_wr; + __u32 max_srq_sge; + __u16 max_pkeys; + __u8 local_ca_ack_delay; + __u8 phys_port_cnt; + __u8 reserved[4]; +}; + +struct ib_uverbs_query_port { + __u64 response; + __u8 port_num; + __u8 reserved[7]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_port_resp { + __u32 port_cap_flags; + __u32 max_msg_sz; + __u32 bad_pkey_cntr; + __u32 qkey_viol_cntr; + __u32 gid_tbl_len; + __u16 pkey_tbl_len; + __u16 lid; + __u16 sm_lid; + __u8 state; + __u8 max_mtu; + __u8 active_mtu; + __u8 lmc; + __u8 max_vl_num; + __u8 sm_sl; + __u8 subnet_timeout; + __u8 init_type_reply; + __u8 active_width; + __u8 active_speed; + __u8 phys_state; + __u8 link_layer; + __u8 reserved[2]; +}; + +struct ib_uverbs_alloc_pd { + __u64 response; + __u64 driver_data[0]; +}; + +struct ib_uverbs_alloc_pd_resp { + __u32 pd_handle; +}; + +struct ib_uverbs_dealloc_pd { + __u32 pd_handle; +}; + +struct ib_uverbs_open_xrcd { + __u64 response; + __u32 fd; + __u32 oflags; + __u64 driver_data[0]; +}; + +struct ib_uverbs_open_xrcd_resp { + __u32 xrcd_handle; +}; + +struct ib_uverbs_close_xrcd { + __u32 xrcd_handle; +}; + +struct ib_uverbs_reg_mr { + __u64 response; + __u64 start; + __u64 length; + __u64 hca_va; + __u32 pd_handle; + __u32 access_flags; + __u64 driver_data[0]; +}; + +struct ib_uverbs_reg_mr_resp { + __u32 mr_handle; + __u32 lkey; + __u32 rkey; +}; + +struct ib_uverbs_dereg_mr { + __u32 mr_handle; +}; + +struct ib_uverbs_create_comp_channel { + __u64 response; +}; + +struct ib_uverbs_create_comp_channel_resp { + __u32 fd; +}; + +struct ib_uverbs_create_cq { + __u64 response; + __u64 user_handle; + __u32 cqe; + __u32 comp_vector; + __s32 comp_channel; + __u32 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_create_cq_resp { + __u32 cq_handle; + __u32 cqe; +}; + +struct ib_uverbs_resize_cq { + __u64 response; + __u32 cq_handle; + __u32 cqe; + __u64 driver_data[0]; +}; + +struct ib_uverbs_resize_cq_resp { + __u32 cqe; + __u32 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_poll_cq { + __u64 response; + __u32 cq_handle; + __u32 ne; +}; + +struct ib_uverbs_wc { + __u64 wr_id; + __u32 status; + __u32 opcode; + __u32 vendor_err; + __u32 byte_len; + union { + __u32 imm_data; + __u32 invalidate_rkey; + } ex; + __u32 qp_num; + __u32 src_qp; + __u32 wc_flags; + __u16 pkey_index; + __u16 slid; + __u8 sl; + __u8 dlid_path_bits; + __u8 port_num; + __u8 reserved; +}; + +struct ib_uverbs_poll_cq_resp { + __u32 count; + __u32 reserved; + struct ib_uverbs_wc wc[0]; +}; + +struct ib_uverbs_req_notify_cq { + __u32 cq_handle; + __u32 solicited_only; +}; + +struct ib_uverbs_destroy_cq { + __u64 response; + __u32 cq_handle; + __u32 reserved; +}; + +struct ib_uverbs_destroy_cq_resp { + __u32 comp_events_reported; + __u32 async_events_reported; +}; + +struct ib_uverbs_global_route { + __u8 dgid[16]; + __u32 flow_label; + __u8 sgid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 reserved; +}; + +struct ib_uverbs_ah_attr { + struct ib_uverbs_global_route grh; + __u16 dlid; + __u8 sl; + __u8 src_path_bits; + __u8 static_rate; + __u8 is_global; + __u8 port_num; + __u8 reserved; +}; + +struct ib_uverbs_qp_attr { + __u32 qp_attr_mask; + __u32 qp_state; + __u32 cur_qp_state; + __u32 path_mtu; + __u32 path_mig_state; + __u32 qkey; + __u32 rq_psn; + __u32 sq_psn; + __u32 dest_qp_num; + __u32 qp_access_flags; + + struct ib_uverbs_ah_attr ah_attr; + struct ib_uverbs_ah_attr alt_ah_attr; + + /* ib_qp_cap */ + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + + __u16 pkey_index; + __u16 alt_pkey_index; + __u8 en_sqd_async_notify; + __u8 sq_draining; + __u8 max_rd_atomic; + __u8 max_dest_rd_atomic; + __u8 min_rnr_timer; + __u8 port_num; + __u8 timeout; + __u8 retry_cnt; + __u8 rnr_retry; + __u8 alt_port_num; + __u8 alt_timeout; + __u8 reserved[5]; +}; + +struct ib_uverbs_create_qp { + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 send_cq_handle; + __u32 recv_cq_handle; + __u32 srq_handle; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + __u8 sq_sig_all; + __u8 qp_type; + __u8 is_srq; + __u8 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_open_qp { + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 qpn; + __u8 qp_type; + __u8 reserved[7]; + __u64 driver_data[0]; +}; + +/* also used for open response */ +struct ib_uverbs_create_qp_resp { + __u32 qp_handle; + __u32 qpn; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + __u32 reserved; +}; + +/* + * This struct needs to remain a multiple of 8 bytes to keep the + * alignment of the modify QP parameters. + */ +struct ib_uverbs_qp_dest { + __u8 dgid[16]; + __u32 flow_label; + __u16 dlid; + __u16 reserved; + __u8 sgid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 sl; + __u8 src_path_bits; + __u8 static_rate; + __u8 is_global; + __u8 port_num; +}; + +struct ib_uverbs_query_qp { + __u64 response; + __u32 qp_handle; + __u32 attr_mask; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_qp_resp { + struct ib_uverbs_qp_dest dest; + struct ib_uverbs_qp_dest alt_dest; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + __u32 qkey; + __u32 rq_psn; + __u32 sq_psn; + __u32 dest_qp_num; + __u32 qp_access_flags; + __u16 pkey_index; + __u16 alt_pkey_index; + __u8 qp_state; + __u8 cur_qp_state; + __u8 path_mtu; + __u8 path_mig_state; + __u8 sq_draining; + __u8 max_rd_atomic; + __u8 max_dest_rd_atomic; + __u8 min_rnr_timer; + __u8 port_num; + __u8 timeout; + __u8 retry_cnt; + __u8 rnr_retry; + __u8 alt_port_num; + __u8 alt_timeout; + __u8 sq_sig_all; + __u8 reserved[5]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_modify_qp { + struct ib_uverbs_qp_dest dest; + struct ib_uverbs_qp_dest alt_dest; + __u32 qp_handle; + __u32 attr_mask; + __u32 qkey; + __u32 rq_psn; + __u32 sq_psn; + __u32 dest_qp_num; + __u32 qp_access_flags; + __u16 pkey_index; + __u16 alt_pkey_index; + __u8 qp_state; + __u8 cur_qp_state; + __u8 path_mtu; + __u8 path_mig_state; + __u8 en_sqd_async_notify; + __u8 max_rd_atomic; + __u8 max_dest_rd_atomic; + __u8 min_rnr_timer; + __u8 port_num; + __u8 timeout; + __u8 retry_cnt; + __u8 rnr_retry; + __u8 alt_port_num; + __u8 alt_timeout; + __u8 reserved[2]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_modify_qp_resp { +}; + +struct ib_uverbs_destroy_qp { + __u64 response; + __u32 qp_handle; + __u32 reserved; +}; + +struct ib_uverbs_destroy_qp_resp { + __u32 events_reported; +}; + +/* + * The ib_uverbs_sge structure isn't used anywhere, since we assume + * the ib_sge structure is packed the same way on 32-bit and 64-bit + * architectures in both kernel and user space. It's just here to + * document the ABI. + */ +struct ib_uverbs_sge { + __u64 addr; + __u32 length; + __u32 lkey; +}; + +struct ib_uverbs_send_wr { + __u64 wr_id; + __u32 num_sge; + __u32 opcode; + __u32 send_flags; + union { + __u32 imm_data; + __u32 invalidate_rkey; + } ex; + union { + struct { + __u64 remote_addr; + __u32 rkey; + __u32 reserved; + } rdma; + struct { + __u64 remote_addr; + __u64 compare_add; + __u64 swap; + __u32 rkey; + __u32 reserved; + } atomic; + struct { + __u32 ah; + __u32 remote_qpn; + __u32 remote_qkey; + __u32 reserved; + } ud; + } wr; +}; + +struct ib_uverbs_post_send { + __u64 response; + __u32 qp_handle; + __u32 wr_count; + __u32 sge_count; + __u32 wqe_size; + struct ib_uverbs_send_wr send_wr[0]; +}; + +struct ib_uverbs_post_send_resp { + __u32 bad_wr; +}; + +struct ib_uverbs_recv_wr { + __u64 wr_id; + __u32 num_sge; + __u32 reserved; +}; + +struct ib_uverbs_post_recv { + __u64 response; + __u32 qp_handle; + __u32 wr_count; + __u32 sge_count; + __u32 wqe_size; + struct ib_uverbs_recv_wr recv_wr[0]; +}; + +struct ib_uverbs_post_recv_resp { + __u32 bad_wr; +}; + +struct ib_uverbs_post_srq_recv { + __u64 response; + __u32 srq_handle; + __u32 wr_count; + __u32 sge_count; + __u32 wqe_size; + struct ib_uverbs_recv_wr recv[0]; +}; + +struct ib_uverbs_post_srq_recv_resp { + __u32 bad_wr; +}; + +struct ib_uverbs_create_ah { + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 reserved; + struct ib_uverbs_ah_attr attr; +}; + +struct ib_uverbs_create_ah_resp { + __u32 ah_handle; +}; + +struct ib_uverbs_destroy_ah { + __u32 ah_handle; +}; + +struct ib_uverbs_attach_mcast { + __u8 gid[16]; + __u32 qp_handle; + __u16 mlid; + __u16 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_detach_mcast { + __u8 gid[16]; + __u32 qp_handle; + __u16 mlid; + __u16 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_create_srq { + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 max_wr; + __u32 max_sge; + __u32 srq_limit; + __u64 driver_data[0]; +}; + +struct ib_uverbs_create_xsrq { + __u64 response; + __u64 user_handle; + __u32 srq_type; + __u32 pd_handle; + __u32 max_wr; + __u32 max_sge; + __u32 srq_limit; + __u32 reserved; + __u32 xrcd_handle; + __u32 cq_handle; + __u64 driver_data[0]; +}; + +struct ib_uverbs_create_srq_resp { + __u32 srq_handle; + __u32 max_wr; + __u32 max_sge; + __u32 srqn; +}; + +struct ib_uverbs_modify_srq { + __u32 srq_handle; + __u32 attr_mask; + __u32 max_wr; + __u32 srq_limit; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_srq { + __u64 response; + __u32 srq_handle; + __u32 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_srq_resp { + __u32 max_wr; + __u32 max_sge; + __u32 srq_limit; + __u32 reserved; +}; + +struct ib_uverbs_destroy_srq { + __u64 response; + __u32 srq_handle; + __u32 reserved; +}; + +struct ib_uverbs_destroy_srq_resp { + __u32 events_reported; +}; + +#endif /* IB_USER_VERBS_H */ diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h new file mode 100644 index 0000000..8297285 --- /dev/null +++ b/include/uapi/rdma/rdma_netlink.h @@ -0,0 +1,37 @@ +#ifndef _UAPI_RDMA_NETLINK_H +#define _UAPI_RDMA_NETLINK_H + +#include + +enum { + RDMA_NL_RDMA_CM = 1 +}; + +#define RDMA_NL_GET_CLIENT(type) ((type & (((1 << 6) - 1) << 10)) >> 10) +#define RDMA_NL_GET_OP(type) (type & ((1 << 10) - 1)) +#define RDMA_NL_GET_TYPE(client, op) ((client << 10) + op) + +enum { + RDMA_NL_RDMA_CM_ID_STATS = 0, + RDMA_NL_RDMA_CM_NUM_OPS +}; + +enum { + RDMA_NL_RDMA_CM_ATTR_SRC_ADDR = 1, + RDMA_NL_RDMA_CM_ATTR_DST_ADDR, + RDMA_NL_RDMA_CM_NUM_ATTR, +}; + +struct rdma_cm_id_stats { + __u32 qp_num; + __u32 bound_dev_if; + __u32 port_space; + __s32 pid; + __u8 cm_state; + __u8 node_type; + __u8 port_num; + __u8 qp_type; +}; + + +#endif /* _UAPI_RDMA_NETLINK_H */ diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h new file mode 100644 index 0000000..1ee9239 --- /dev/null +++ b/include/uapi/rdma/rdma_user_cm.h @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RDMA_USER_CM_H +#define RDMA_USER_CM_H + +#include +#include +#include +#include + +#define RDMA_USER_CM_ABI_VERSION 4 + +#define RDMA_MAX_PRIVATE_DATA 256 + +enum { + RDMA_USER_CM_CMD_CREATE_ID, + RDMA_USER_CM_CMD_DESTROY_ID, + RDMA_USER_CM_CMD_BIND_ADDR, + RDMA_USER_CM_CMD_RESOLVE_ADDR, + RDMA_USER_CM_CMD_RESOLVE_ROUTE, + RDMA_USER_CM_CMD_QUERY_ROUTE, + RDMA_USER_CM_CMD_CONNECT, + RDMA_USER_CM_CMD_LISTEN, + RDMA_USER_CM_CMD_ACCEPT, + RDMA_USER_CM_CMD_REJECT, + RDMA_USER_CM_CMD_DISCONNECT, + RDMA_USER_CM_CMD_INIT_QP_ATTR, + RDMA_USER_CM_CMD_GET_EVENT, + RDMA_USER_CM_CMD_GET_OPTION, + RDMA_USER_CM_CMD_SET_OPTION, + RDMA_USER_CM_CMD_NOTIFY, + RDMA_USER_CM_CMD_JOIN_MCAST, + RDMA_USER_CM_CMD_LEAVE_MCAST, + RDMA_USER_CM_CMD_MIGRATE_ID +}; + +/* + * command ABI structures. + */ +struct rdma_ucm_cmd_hdr { + __u32 cmd; + __u16 in; + __u16 out; +}; + +struct rdma_ucm_create_id { + __u64 uid; + __u64 response; + __u16 ps; + __u8 qp_type; + __u8 reserved[5]; +}; + +struct rdma_ucm_create_id_resp { + __u32 id; +}; + +struct rdma_ucm_destroy_id { + __u64 response; + __u32 id; + __u32 reserved; +}; + +struct rdma_ucm_destroy_id_resp { + __u32 events_reported; +}; + +struct rdma_ucm_bind_addr { + __u64 response; + struct sockaddr_in6 addr; + __u32 id; +}; + +struct rdma_ucm_resolve_addr { + struct sockaddr_in6 src_addr; + struct sockaddr_in6 dst_addr; + __u32 id; + __u32 timeout_ms; +}; + +struct rdma_ucm_resolve_route { + __u32 id; + __u32 timeout_ms; +}; + +struct rdma_ucm_query_route { + __u64 response; + __u32 id; + __u32 reserved; +}; + +struct rdma_ucm_query_route_resp { + __u64 node_guid; + struct ib_user_path_rec ib_route[2]; + struct sockaddr_in6 src_addr; + struct sockaddr_in6 dst_addr; + __u32 num_paths; + __u8 port_num; + __u8 reserved[3]; +}; + +struct rdma_ucm_conn_param { + __u32 qp_num; + __u32 reserved; + __u8 private_data[RDMA_MAX_PRIVATE_DATA]; + __u8 private_data_len; + __u8 srq; + __u8 responder_resources; + __u8 initiator_depth; + __u8 flow_control; + __u8 retry_count; + __u8 rnr_retry_count; + __u8 valid; +}; + +struct rdma_ucm_ud_param { + __u32 qp_num; + __u32 qkey; + struct ib_uverbs_ah_attr ah_attr; + __u8 private_data[RDMA_MAX_PRIVATE_DATA]; + __u8 private_data_len; + __u8 reserved[7]; +}; + +struct rdma_ucm_connect { + struct rdma_ucm_conn_param conn_param; + __u32 id; + __u32 reserved; +}; + +struct rdma_ucm_listen { + __u32 id; + __u32 backlog; +}; + +struct rdma_ucm_accept { + __u64 uid; + struct rdma_ucm_conn_param conn_param; + __u32 id; + __u32 reserved; +}; + +struct rdma_ucm_reject { + __u32 id; + __u8 private_data_len; + __u8 reserved[3]; + __u8 private_data[RDMA_MAX_PRIVATE_DATA]; +}; + +struct rdma_ucm_disconnect { + __u32 id; +}; + +struct rdma_ucm_init_qp_attr { + __u64 response; + __u32 id; + __u32 qp_state; +}; + +struct rdma_ucm_notify { + __u32 id; + __u32 event; +}; + +struct rdma_ucm_join_mcast { + __u64 response; /* rdma_ucm_create_id_resp */ + __u64 uid; + struct sockaddr_in6 addr; + __u32 id; +}; + +struct rdma_ucm_get_event { + __u64 response; +}; + +struct rdma_ucm_event_resp { + __u64 uid; + __u32 id; + __u32 event; + __u32 status; + union { + struct rdma_ucm_conn_param conn; + struct rdma_ucm_ud_param ud; + } param; +}; + +/* Option levels */ +enum { + RDMA_OPTION_ID = 0, + RDMA_OPTION_IB = 1 +}; + +/* Option details */ +enum { + RDMA_OPTION_ID_TOS = 0, + RDMA_OPTION_ID_REUSEADDR = 1, + RDMA_OPTION_ID_AFONLY = 2, + RDMA_OPTION_IB_PATH = 1 +}; + +struct rdma_ucm_set_option { + __u64 optval; + __u32 id; + __u32 level; + __u32 optname; + __u32 optlen; +}; + +struct rdma_ucm_migrate_id { + __u64 response; + __u32 id; + __u32 fd; +}; + +struct rdma_ucm_migrate_resp { + __u32 events_reported; +}; + +#endif /* RDMA_USER_CM_H */ -- cgit v0.10.2 From 3127e4ea54fc023e35adb1d9af29d49c6d582d12 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Fri, 2 Nov 2012 23:17:34 +0000 Subject: RDMA/nes: Fix incorrect address of IP header Fix for incorrect ip header address when forwarding fpdus to hardware. Signed-off-by: Tatyana Nikolova Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c index 3ba7be3..8cf74fd 100644 --- a/drivers/infiniband/hw/nes/nes_mgt.c +++ b/drivers/infiniband/hw/nes/nes_mgt.c @@ -447,7 +447,7 @@ static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp) set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX, lower_32_bits(u64tmp)); set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_HIGH_IDX, - upper_32_bits(u64tmp >> 32)); + upper_32_bits(u64tmp)); set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX, lower_32_bits(fpdu_info->frags[0].physaddr)); -- cgit v0.10.2 From bff3976bef4f917554292d48ce43636f3d040182 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Fri, 2 Nov 2012 23:17:45 +0000 Subject: RDMA/nes: Fix for unlinking skbs from empty list Signed-off-by: Tatyana Nikolova Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c index 8cf74fd..1f5d69e 100644 --- a/drivers/infiniband/hw/nes/nes_mgt.c +++ b/drivers/infiniband/hw/nes/nes_mgt.c @@ -210,6 +210,9 @@ static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp } while (1) { + if (skb_queue_empty(&nesqp->pau_list)) + goto out; + seq = nes_get_seq(skb, ack, wnd, fin_rcvd, rst_rcvd); if (seq == nextseq) { if (skb->len || processacks) @@ -218,14 +221,13 @@ static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp goto out; } - if (skb->next == (struct sk_buff *)&nesqp->pau_list) - goto out; - old_skb = skb; skb = skb->next; skb_unlink(old_skb, &nesqp->pau_list); nes_mgt_free_skb(nesdev, old_skb, PCI_DMA_TODEVICE); nes_rem_ref_cm_node(nesqp->cm_node); + if (skb == (struct sk_buff *)&nesqp->pau_list) + goto out; } return skb; @@ -384,7 +386,8 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp, if (frags[i].skb->len == 0) { /* Pull skb off the list - it will be freed in the callback */ spin_lock_irqsave(&nesqp->pau_lock, flags); - skb_unlink(frags[i].skb, &nesqp->pau_list); + if (!skb_queue_empty(&nesqp->pau_list)) + skb_unlink(frags[i].skb, &nesqp->pau_list); spin_unlock_irqrestore(&nesqp->pau_lock, flags); } else { /* Last skb still has data so update the seq */ -- cgit v0.10.2 From fc8d7547b1e19e1bb8f3206837ee0c7c6538e2e5 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Fri, 2 Nov 2012 23:17:54 +0000 Subject: RDMA/nes: Fix for sending fpdus in order to hardware Locking fix to prevent race conditions. Fpdus (per qp) need to be forwarded to hardware in the order of their sequence numbers. Signed-off-by: Tatyana Nikolova Signed-off-by: Donald Wood Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c index 1f5d69e..4d6d77f 100644 --- a/drivers/infiniband/hw/nes/nes_mgt.c +++ b/drivers/infiniband/hw/nes/nes_mgt.c @@ -247,7 +247,6 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp, struct nes_rskb_cb *cb; struct pau_fpdu_info *fpdu_info = NULL; struct pau_fpdu_frag frags[MAX_FPDU_FRAGS]; - unsigned long flags; u32 fpdu_len = 0; u32 tmp_len; int frag_cnt = 0; @@ -262,12 +261,10 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp, *pau_fpdu_info = NULL; - spin_lock_irqsave(&nesqp->pau_lock, flags); skb = nes_get_next_skb(nesdev, nesqp, NULL, nesqp->pau_rcv_nxt, &ack, &wnd, &fin_rcvd, &rst_rcvd); - if (!skb) { - spin_unlock_irqrestore(&nesqp->pau_lock, flags); + if (!skb) goto out; - } + cb = (struct nes_rskb_cb *)&skb->cb[0]; if (skb->len) { fpdu_len = be16_to_cpu(*(__be16 *) skb->data) + MPA_FRAMING; @@ -292,10 +289,9 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp, skb = nes_get_next_skb(nesdev, nesqp, skb, nesqp->pau_rcv_nxt + frag_tot, &ack, &wnd, &fin_rcvd, &rst_rcvd); - if (!skb) { - spin_unlock_irqrestore(&nesqp->pau_lock, flags); + if (!skb) goto out; - } else if (rst_rcvd) { + if (rst_rcvd) { /* rst received in the middle of fpdu */ for (; i >= 0; i--) { skb_unlink(frags[i].skb, &nesqp->pau_list); @@ -322,8 +318,6 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp, frag_cnt = 1; } - spin_unlock_irqrestore(&nesqp->pau_lock, flags); - /* Found one */ fpdu_info = kzalloc(sizeof(*fpdu_info), GFP_ATOMIC); if (fpdu_info == NULL) { @@ -385,10 +379,8 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp, if (frags[i].skb->len == 0) { /* Pull skb off the list - it will be freed in the callback */ - spin_lock_irqsave(&nesqp->pau_lock, flags); if (!skb_queue_empty(&nesqp->pau_list)) skb_unlink(frags[i].skb, &nesqp->pau_list); - spin_unlock_irqrestore(&nesqp->pau_lock, flags); } else { /* Last skb still has data so update the seq */ iph = (struct iphdr *)(cb->data_start + ETH_HLEN); @@ -417,14 +409,18 @@ static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp) struct pau_fpdu_info *fpdu_info; struct nes_hw_cqp_wqe *cqp_wqe; struct nes_cqp_request *cqp_request; + unsigned long flags; u64 u64tmp; u32 u32tmp; int rc; while (1) { + spin_lock_irqsave(&nesqp->pau_lock, flags); rc = get_fpdu_info(nesdev, nesqp, &fpdu_info); - if (fpdu_info == NULL) + if (rc || (fpdu_info == NULL)) { + spin_unlock_irqrestore(&nesqp->pau_lock, flags); return rc; + } cqp_request = fpdu_info->cqp_request; cqp_wqe = &cqp_request->cqp_wqe; @@ -478,6 +474,7 @@ static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp) atomic_set(&cqp_request->refcount, 1); nes_post_cqp_request(nesdev, cqp_request); + spin_unlock_irqrestore(&nesqp->pau_lock, flags); } return 0; -- cgit v0.10.2 From cecdcd5f24be8c532ad8dcbbd93c7b477cfd3413 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Tue, 13 Nov 2012 22:20:41 +0000 Subject: RDMA/nes: Fix for incorrect multicast address in the perfect filter table Signed-off-by: Tatyana Nikolova Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 0564be7..9542e16 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -944,12 +944,13 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) addr, perfect_filter_register_address+(mc_index * 8), mc_nic_index); - macaddr_high = ((u16) addr[0]) << 8; - macaddr_high += (u16) addr[1]; - macaddr_low = ((u32) addr[2]) << 24; - macaddr_low += ((u32) addr[3]) << 16; - macaddr_low += ((u32) addr[4]) << 8; - macaddr_low += (u32) addr[5]; + macaddr_high = ((u8) addr[0]) << 8; + macaddr_high += (u8) addr[1]; + macaddr_low = ((u8) addr[2]) << 24; + macaddr_low += ((u8) addr[3]) << 16; + macaddr_low += ((u8) addr[4]) << 8; + macaddr_low += (u8) addr[5]; + nes_write_indexed(nesdev, perfect_filter_register_address+(mc_index * 8), macaddr_low); -- cgit v0.10.2 From 079abea6a37fd3b4f7e1b7cf9e4d055463988753 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 3 Nov 2012 10:58:37 +0000 Subject: RDMA/nes: Use WARN() Use WARN() rather than printk() followed by WARN_ON(1), for conciseness. A simplified version of the semantic patch that makes this transformation is as follows: (http://coccinelle.lip6.fr/) // @@ expression list es; @@ -printk( +WARN(1, es); -WARN_ON(1); // Signed-off-by: Julia Lawall [ Remove extra KERN_ERR from WARN() format. - Roland ] Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index cfaacaf..feb41e7 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -629,11 +629,9 @@ static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_a case SEND_RDMA_READ_ZERO: default: - if (cm_node->send_rdma0_op != SEND_RDMA_READ_ZERO) { - printk(KERN_ERR "%s[%u]: Unsupported RDMA0 len operation=%u\n", - __func__, __LINE__, cm_node->send_rdma0_op); - WARN_ON(1); - } + if (cm_node->send_rdma0_op != SEND_RDMA_READ_ZERO) + WARN(1, "Unsupported RDMA0 len operation=%u\n", + cm_node->send_rdma0_op); nes_debug(NES_DBG_CM, "Sending first rdma operation.\n"); wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(NES_IWARP_SQ_OP_RDMAR); diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c index 4d6d77f..4166452 100644 --- a/drivers/infiniband/hw/nes/nes_mgt.c +++ b/drivers/infiniband/hw/nes/nes_mgt.c @@ -649,11 +649,9 @@ static void nes_chg_qh_handler(struct nes_device *nesdev, struct nes_cqp_request nesqp = qh_chg->nesqp; /* Should we handle the bad completion */ - if (cqp_request->major_code) { - printk(KERN_ERR PFX "Invalid cqp_request major_code=0x%x\n", + if (cqp_request->major_code) + WARN(1, PFX "Invalid cqp_request major_code=0x%x\n", cqp_request->major_code); - WARN_ON(1); - } switch (nesqp->pau_state) { case PAU_DEL_QH: -- cgit v0.10.2 From 5390f86796a1f444ca1a7ba7315951e26acd958d Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 25 Oct 2012 14:27:07 +0000 Subject: IB/ipath: Remove unreachable code Signed-off-by: Alan Cox Acked-by: Mike Marciniszyn Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 49b09c6..be2a60e 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -719,16 +719,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) goto done; /* - * we ignore most issues after reporting them, but have to specially - * handle hardware-disabled chips. - */ - if (ret == 2) { - /* unique error, known to ipath_init_one */ - ret = -EPERM; - goto done; - } - - /* * We could bump this to allow for full rcvegrcnt + rcvtidcnt, * but then it no longer nicely fits power of two, and since * we now use routines that backend onto __get_free_pages, the -- cgit v0.10.2 From c9795bd708e37ed4154e838a1f4576192eeeacca Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 25 Oct 2012 14:36:51 +0000 Subject: RDMA/amsol1100: Fix missing break Signed-off-by: Alan Cox Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c index 32d34e8..706cf97 100644 --- a/drivers/infiniband/hw/amso1100/c2_ae.c +++ b/drivers/infiniband/hw/amso1100/c2_ae.c @@ -311,6 +311,7 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index) if (cq->ibcq.event_handler) cq->ibcq.event_handler(&ib_event, cq->ibcq.cq_context); + break; } default: -- cgit v0.10.2 From 08ff32352d6ff7083533dc1c25618d42f92ec28e Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 21 Oct 2012 14:59:24 +0000 Subject: mlx4: 64-byte CQE/EQE support ConnectX-3 devices can use either 64- or 32-byte completion queue entries (CQEs) and event queue entries (EQEs). Using 64-byte EQEs/CQEs performs better because each entry is aligned to a complete cacheline. This patch queries the HCA's capabilities, and if it supports 64-byte CQEs and EQES the driver will configure the HW to work in 64-byte mode. The 32-byte vs 64-byte mode is global per HCA and not per CQ or EQ. Since this mode is global, userspace (libmlx4) must be updated to work with the configured CQE size, and guests using SR-IOV virtual functions need to know both EQE and CQE size. In case one of the 64-byte CQE/EQE capabilities is activated, the patch makes sure that older guest drivers that use the QUERY_DEV_FUNC command (e.g as done in mlx4_core of Linux 3.3..3.6) will notice that they need an update to be able to work with the PPF. This is done by changing the returned pf_context_behaviour not to be zero any more. In case none of these capabilities is activated that value remains zero and older guest drivers can run OK. The SRIOV related flow is as follows 1. the PPF does the detection of the new capabilities using QUERY_DEV_CAP command. 2. the PPF activates the new capabilities using INIT_HCA. 3. the VF detects if the PPF activated the capabilities using QUERY_HCA, and if this is the case activates them for itself too. Note that the VF detects that it must be aware to the new PF behaviour using QUERY_FUNC_CAP. Steps 1 and 2 apply also for native mode. User space notification is done through a new field introduced in struct mlx4_ib_ucontext which holds device capabilities for which user space must take action. This changes the binary interface so the ABI towards libmlx4 exposed through uverbs is bumped from 3 to 4 but only when **needed** i.e. only when the driver does use 64-byte CQEs or future device capabilities which must be in sync by user space. This practice allows to work with unmodified libmlx4 on older devices (e.g A0, B0) which don't support 64-byte CQEs. In order to keep existing systems functional when they update to a newer kernel that contains these changes in VF and userspace ABI, a module parameter enable_64b_cqe_eqe must be set to enable 64-byte mode; the default is currently false. Signed-off-by: Eli Cohen Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index c9eb6a6..ae67df3 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -66,7 +66,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type) static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n) { - return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe)); + return mlx4_buf_offset(&buf->buf, n * buf->entry_size); } static void *get_cqe(struct mlx4_ib_cq *cq, int n) @@ -77,8 +77,9 @@ static void *get_cqe(struct mlx4_ib_cq *cq, int n) static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n) { struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe); + struct mlx4_cqe *tcqe = ((cq->buf.entry_size == 64) ? (cqe + 1) : cqe); - return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ + return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe; } @@ -99,12 +100,13 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * { int err; - err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe), + err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size, PAGE_SIZE * 2, &buf->buf); if (err) goto out; + buf->entry_size = dev->dev->caps.cqe_size; err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift, &buf->mtt); if (err) @@ -120,8 +122,7 @@ err_mtt: mlx4_mtt_cleanup(dev->dev, &buf->mtt); err_buf: - mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe), - &buf->buf); + mlx4_buf_free(dev->dev, nent * buf->entry_size, &buf->buf); out: return err; @@ -129,7 +130,7 @@ out: static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe) { - mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf); + mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf); } static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context, @@ -137,8 +138,9 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont u64 buf_addr, int cqe) { int err; + int cqe_size = dev->dev->caps.cqe_size; - *umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe), + *umem = ib_umem_get(context, buf_addr, cqe * cqe_size, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(*umem)) return PTR_ERR(*umem); @@ -331,16 +333,23 @@ static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq) { struct mlx4_cqe *cqe, *new_cqe; int i; + int cqe_size = cq->buf.entry_size; + int cqe_inc = cqe_size == 64 ? 1 : 0; i = cq->mcq.cons_index; cqe = get_cqe(cq, i & cq->ibcq.cqe); + cqe += cqe_inc; + while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) { new_cqe = get_cqe_from_buf(&cq->resize_buf->buf, (i + 1) & cq->resize_buf->cqe); - memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe)); + memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), cqe_size); + new_cqe += cqe_inc; + new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) | (((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0); cqe = get_cqe(cq, ++i & cq->ibcq.cqe); + cqe += cqe_inc; } ++cq->mcq.cons_index; } @@ -438,6 +447,7 @@ err_buf: out: mutex_unlock(&cq->resize_mutex); + return err; } @@ -586,6 +596,9 @@ repoll: if (!cqe) return -EAGAIN; + if (cq->buf.entry_size == 64) + cqe++; + ++cq->mcq.cons_index; /* @@ -807,6 +820,7 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) int nfreed = 0; struct mlx4_cqe *cqe, *dest; u8 owner_bit; + int cqe_inc = cq->buf.entry_size == 64 ? 1 : 0; /* * First we need to find the current producer index, so we @@ -825,12 +839,16 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) */ while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); + cqe += cqe_inc; + if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) { if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index)); ++nfreed; } else if (nfreed) { dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe); + dest += cqe_inc; + owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK; memcpy(dest, cqe, sizeof *cqe); dest->owner_sr_opcode = owner_bit | diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 718ec6b..e7d81c0 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -563,15 +563,24 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, { struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_ucontext *context; + struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3; struct mlx4_ib_alloc_ucontext_resp resp; int err; if (!dev->ib_active) return ERR_PTR(-EAGAIN); - resp.qp_tab_size = dev->dev->caps.num_qps; - resp.bf_reg_size = dev->dev->caps.bf_reg_size; - resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; + if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) { + resp_v3.qp_tab_size = dev->dev->caps.num_qps; + resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size; + resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; + } else { + resp.dev_caps = dev->dev->caps.userspace_caps; + resp.qp_tab_size = dev->dev->caps.num_qps; + resp.bf_reg_size = dev->dev->caps.bf_reg_size; + resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; + resp.cqe_size = dev->dev->caps.cqe_size; + } context = kmalloc(sizeof *context, GFP_KERNEL); if (!context) @@ -586,7 +595,11 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); - err = ib_copy_to_udata(udata, &resp, sizeof resp); + if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) + err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3)); + else + err = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (err) { mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar); kfree(context); @@ -1342,7 +1355,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dma_device = &dev->pdev->dev; - ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; + if (dev->caps.userspace_caps) + ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; + else + ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION; + ibdev->ib_dev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index e04cbc9..dcd845b 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -90,6 +90,7 @@ struct mlx4_ib_xrcd { struct mlx4_ib_cq_buf { struct mlx4_buf buf; struct mlx4_mtt mtt; + int entry_size; }; struct mlx4_ib_cq_resize { diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h index 13beede..07e6769 100644 --- a/drivers/infiniband/hw/mlx4/user.h +++ b/drivers/infiniband/hw/mlx4/user.h @@ -40,7 +40,9 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define MLX4_IB_UVERBS_ABI_VERSION 3 + +#define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION 3 +#define MLX4_IB_UVERBS_ABI_VERSION 4 /* * Make sure that all structs defined in this file remain laid out so @@ -50,10 +52,18 @@ * instead. */ +struct mlx4_ib_alloc_ucontext_resp_v3 { + __u32 qp_tab_size; + __u16 bf_reg_size; + __u16 bf_regs_per_page; +}; + struct mlx4_ib_alloc_ucontext_resp { + __u32 dev_caps; __u32 qp_tab_size; __u16 bf_reg_size; __u16 bf_regs_per_page; + __u32 cqe_size; }; struct mlx4_ib_alloc_pd_resp { diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 3d1899f..e791e70 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1755,7 +1755,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) spin_lock_init(&s_state->lock); } - memset(&priv->mfunc.master.cmd_eqe, 0, sizeof(struct mlx4_eqe)); + memset(&priv->mfunc.master.cmd_eqe, 0, dev->caps.eqe_size); priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD; INIT_WORK(&priv->mfunc.master.comm_work, mlx4_master_comm_channel); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index aa9c2f6..b8d0854 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -51,7 +51,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, int err; cq->size = entries; - cq->buf_size = cq->size * sizeof(struct mlx4_cqe); + cq->buf_size = cq->size * mdev->dev->caps.cqe_size; cq->ring = ring; cq->is_tx = mode; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index edd9cb8..93a3256 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1600,6 +1600,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, goto out; } priv->rx_ring_num = prof->rx_ring_num; + priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0; priv->mac_index = -1; priv->msg_enable = MLX4_EN_MSG_LEVEL; spin_lock_init(&priv->stats_lock); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 5aba5ec..6fa106f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -566,6 +566,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud struct ethhdr *ethh; dma_addr_t dma; u64 s_mac; + int factor = priv->cqe_factor; if (!priv->port_up) return 0; @@ -574,7 +575,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud * descriptor offset can be deduced from the CQE index instead of * reading 'cqe->index' */ index = cq->mcq.cons_index & ring->size_mask; - cqe = &cq->buf[index]; + cqe = &cq->buf[(index << factor) + factor]; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, @@ -709,7 +710,7 @@ next: ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; - cqe = &cq->buf[index]; + cqe = &cq->buf[(index << factor) + factor]; if (++polled == budget) { /* We are here because we reached the NAPI budget - * flush only pending LRO sessions */ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index b35094c..25c157a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -315,12 +315,13 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) struct mlx4_cqe *buf = cq->buf; u32 packets = 0; u32 bytes = 0; + int factor = priv->cqe_factor; if (!priv->port_up) return; index = cons_index & size_mask; - cqe = &buf[index]; + cqe = &buf[(index << factor) + factor]; ring_index = ring->cons & size_mask; /* Process all completed CQEs */ @@ -349,7 +350,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) ++cons_index; index = cons_index & size_mask; - cqe = &buf[index]; + cqe = &buf[(index << factor) + factor]; } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index b84a88b..c509a86 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -101,15 +101,21 @@ static void eq_set_ci(struct mlx4_eq *eq, int req_not) mb(); } -static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry) +static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor) { - unsigned long off = (entry & (eq->nent - 1)) * MLX4_EQ_ENTRY_SIZE; - return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE; + /* (entry & (eq->nent - 1)) gives us a cyclic array */ + unsigned long offset = (entry & (eq->nent - 1)) * (MLX4_EQ_ENTRY_SIZE << eqe_factor); + /* CX3 is capable of extending the EQE from 32 to 64 bytes. + * When this feature is enabled, the first (in the lower addresses) + * 32 bytes in the 64 byte EQE are reserved and the next 32 bytes + * contain the legacy EQE information. + */ + return eq->page_list[offset / PAGE_SIZE].buf + (offset + (eqe_factor ? MLX4_EQ_ENTRY_SIZE : 0)) % PAGE_SIZE; } -static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq) +static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor) { - struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index); + struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor); return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe; } @@ -177,7 +183,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe) return; } - memcpy(s_eqe, eqe, sizeof(struct mlx4_eqe) - 1); + memcpy(s_eqe, eqe, dev->caps.eqe_size - 1); s_eqe->slave_id = slave; /* ensure all information is written before setting the ownersip bit */ wmb(); @@ -441,7 +447,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) int i; enum slave_port_gen_event gen_event; - while ((eqe = next_eqe_sw(eq))) { + while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) { /* * Make sure we read EQ entry contents after we've * checked the ownership bit. @@ -864,7 +870,8 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent, eq->dev = dev; eq->nent = roundup_pow_of_two(max(nent, 2)); - npages = PAGE_ALIGN(eq->nent * MLX4_EQ_ENTRY_SIZE) / PAGE_SIZE; + /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */ + npages = PAGE_ALIGN(eq->nent * (MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor)) / PAGE_SIZE; eq->page_list = kmalloc(npages * sizeof *eq->page_list, GFP_KERNEL); @@ -966,8 +973,9 @@ static void mlx4_free_eq(struct mlx4_dev *dev, struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *mailbox; int err; - int npages = PAGE_ALIGN(MLX4_EQ_ENTRY_SIZE * eq->nent) / PAGE_SIZE; int i; + /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */ + int npages = PAGE_ALIGN((MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor) * eq->nent) / PAGE_SIZE; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 4f30b99..9a9de51 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -110,6 +110,8 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags) [42] = "Multicast VEP steering support", [48] = "Counters support", [59] = "Port management change event support", + [61] = "64 byte EQE support", + [62] = "64 byte CQE support", }; int i; @@ -235,7 +237,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, field = dev->caps.num_ports; MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); - size = 0; /* no PF behaviour is set for now */ + size = dev->caps.function_caps; /* set PF behaviours */ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET); field = 0; /* protected FMR support not available as yet */ @@ -1237,6 +1239,24 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4); + /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */ + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_EQE) { + *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 29); + dev->caps.eqe_size = 64; + dev->caps.eqe_factor = 1; + } else { + dev->caps.eqe_size = 32; + dev->caps.eqe_factor = 0; + } + + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) { + *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30); + dev->caps.cqe_size = 64; + dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE; + } else { + dev->caps.cqe_size = 32; + } + /* QPC/EEC/CQC/EQC/RDMARC attributes */ MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET); @@ -1319,6 +1339,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox; __be32 *outbox; int err; + u8 byte_field; #define QUERY_HCA_GLOBAL_CAPS_OFFSET 0x04 @@ -1370,6 +1391,13 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); } + /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */ + MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_OFFSETS); + if (byte_field & 0x20) /* 64-bytes eqe enabled */ + param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED; + if (byte_field & 0x40) /* 64-bytes cqe enabled */ + param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED; + /* TPT attributes */ MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 85abe9c..2c2e7ad 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -172,6 +172,7 @@ struct mlx4_init_hca_param { u8 log_uar_sz; u8 uar_page_sz; /* log pg sz in 4k chunks */ u8 fs_hash_enable_bits; + u64 dev_cap_enabled; }; struct mlx4_init_ib_param { diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 2aa80af..4337f68 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -95,8 +95,14 @@ MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" " Not in use with device managed" " flow steering"); +static bool enable_64b_cqe_eqe; +module_param(enable_64b_cqe_eqe, bool, 0444); +MODULE_PARM_DESC(enable_64b_cqe_eqe, + "Enable 64 byte CQEs/EQEs when the the FW supports this"); + #define HCA_GLOBAL_CAP_MASK 0 -#define PF_CONTEXT_BEHAVIOUR_MASK 0 + +#define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE static char mlx4_version[] __devinitdata = DRV_NAME ": Mellanox ConnectX core driver v" @@ -386,6 +392,21 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; + + if (!enable_64b_cqe_eqe) { + if (dev_cap->flags & + (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) { + mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n"); + dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE; + dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE; + } + } + + if ((dev_cap->flags & + (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) && + mlx4_is_master(dev)) + dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE; + return 0; } /*The function checks if there are live vf, return the num of them*/ @@ -599,6 +620,21 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) goto err_mem; } + if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) { + dev->caps.eqe_size = 64; + dev->caps.eqe_factor = 1; + } else { + dev->caps.eqe_size = 32; + dev->caps.eqe_factor = 0; + } + + if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { + dev->caps.cqe_size = 64; + dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE; + } else { + dev->caps.cqe_size = 32; + } + return 0; err_mem: diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 9d27e42..73b5c2a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -487,6 +487,7 @@ struct mlx4_en_priv { int mac_index; unsigned max_mtu; int base_qpn; + int cqe_factor; struct mlx4_en_rss_map rss_map; __be32 ctrl_flags; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 6d1acb0..21821da 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -142,6 +142,8 @@ enum { MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48, MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55, MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59, + MLX4_DEV_CAP_FLAG_64B_EQE = 1LL << 61, + MLX4_DEV_CAP_FLAG_64B_CQE = 1LL << 62 }; enum { @@ -151,6 +153,20 @@ enum { MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3 }; +enum { + MLX4_DEV_CAP_64B_EQE_ENABLED = 1LL << 0, + MLX4_DEV_CAP_64B_CQE_ENABLED = 1LL << 1 +}; + +enum { + MLX4_USER_DEV_CAP_64B_CQE = 1L << 0 +}; + +enum { + MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0 +}; + + #define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) enum { @@ -419,6 +435,11 @@ struct mlx4_caps { u32 max_counters; u8 port_ib_mtu[MLX4_MAX_PORTS + 1]; u16 sqp_demux; + u32 eqe_size; + u32 cqe_size; + u8 eqe_factor; + u32 userspace_caps; /* userspace must be aware of these */ + u32 function_caps; /* VFs must be aware of these */ }; struct mlx4_buf_list { -- cgit v0.10.2 From 76f267b7ad978fec755f74ada4020edcb7493657 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 3 Nov 2012 10:58:27 +0000 Subject: RDMA/cxgb4: use WARN Use WARN rather than printk followed by WARN_ON(1), for conciseness. A simplified version of the semantic patch that makes this transformation is as follows: (http://coccinelle.lip6.fr/) // @@ expression list es; @@ -printk( +WARN(1, es); -WARN_ON(1); // Signed-off-by: Julia Lawall Acked-by: Steve Wise Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 6cfd4d8..5de8696 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -151,9 +151,8 @@ static void stop_ep_timer(struct c4iw_ep *ep) { PDBG("%s ep %p\n", __func__, ep); if (!timer_pending(&ep->timer)) { - printk(KERN_ERR "%s timer stopped when its not running! " + WARN(1, "%s timer stopped when its not running! " "ep %p state %u\n", __func__, ep, ep->com.state); - WARN_ON(1); return; } del_timer_sync(&ep->timer); @@ -2551,9 +2550,8 @@ static void process_timeout(struct c4iw_ep *ep) __state_set(&ep->com, ABORTING); break; default: - printk(KERN_ERR "%s unexpected state ep %p tid %u state %u\n", + WARN(1, "%s unexpected state ep %p tid %u state %u\n", __func__, ep, ep->hwtid, ep->com.state); - WARN_ON(1); abort = 0; } mutex_unlock(&ep->com.mutex); -- cgit v0.10.2 From 5107c2a3d117de8219a53e622c0f1f1bc3f7d1ae Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 3 Nov 2012 10:58:29 +0000 Subject: RDMA/cxgb3: use WARN Use WARN rather than printk followed by WARN_ON(1), for conciseness. A simplified version of the semantic patch that makes this transformation is as follows: (http://coccinelle.lip6.fr/) // @@ expression list es; @@ -printk( +WARN(1, es); -WARN_ON(1); // Signed-off-by: Julia Lawall Acked-by: Steve Wise Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index aaf88ef9..3e094cd 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -128,9 +128,8 @@ static void stop_ep_timer(struct iwch_ep *ep) { PDBG("%s ep %p\n", __func__, ep); if (!timer_pending(&ep->timer)) { - printk(KERN_ERR "%s timer stopped when its not running! ep %p state %u\n", + WARN(1, "%s timer stopped when its not running! ep %p state %u\n", __func__, ep, ep->com.state); - WARN_ON(1); return; } del_timer_sync(&ep->timer); @@ -1756,9 +1755,8 @@ static void ep_timeout(unsigned long arg) __state_set(&ep->com, ABORTING); break; default: - printk(KERN_ERR "%s unexpected state ep %p state %u\n", + WARN(1, "%s unexpected state ep %p state %u\n", __func__, ep, ep->com.state); - WARN_ON(1); abort = 0; } spin_unlock_irqrestore(&ep->com.lock, flags); -- cgit v0.10.2 From ceb7decb366591e9b67d70832e07f5d240572a3d Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 27 Nov 2012 16:24:29 +0000 Subject: IB/mlx4: Fix spinlock order to avoid lockdep warnings lockdep warns about taking a hard-irq-unsafe lock (sriov->id_map_lock) inside a hard-irq-safe lock (sriov->going_down_lock). Since id_map_lock is never taken in the interrupt context, we can simply reverse the order of taking the two spinlocks, thus avoiding the warning and the depencency. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Cc: Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index 80079e5..dbc99d4 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -268,15 +268,15 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id) struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; unsigned long flags; - spin_lock_irqsave(&sriov->going_down_lock, flags); spin_lock(&sriov->id_map_lock); + spin_lock_irqsave(&sriov->going_down_lock, flags); /*make sure that there is no schedule inside the scheduled work.*/ if (!sriov->is_going_down) { id->scheduled_delete = 1; schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT); } - spin_unlock(&sriov->id_map_lock); spin_unlock_irqrestore(&sriov->going_down_lock, flags); + spin_unlock(&sriov->id_map_lock); } int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id, -- cgit v0.10.2 From 311f813a2daefcba03f706a692fe0c67888d7622 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 27 Nov 2012 16:24:30 +0000 Subject: mlx4_core: Fix potential deadlock in mlx4_eq_int() The slave_state_lock spinlock is used in both interrupt context and process context, hence irq locking must be used. Found by lockdep. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Cc: Signed-off-by: Roland Dreier diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index e791e70..fdc5f23 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1498,6 +1498,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, u32 reply; u8 is_going_down = 0; int i; + unsigned long flags; slave_state[slave].comm_toggle ^= 1; reply = (u32) slave_state[slave].comm_toggle << 31; @@ -1576,12 +1577,12 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, mlx4_warn(dev, "Bad comm cmd:%d from slave:%d\n", cmd, slave); goto reset_slave; } - spin_lock(&priv->mfunc.master.slave_state_lock); + spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); if (!slave_state[slave].is_slave_going_down) slave_state[slave].last_cmd = cmd; else is_going_down = 1; - spin_unlock(&priv->mfunc.master.slave_state_lock); + spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); if (is_going_down) { mlx4_warn(dev, "Slave is going down aborting command(%d)" " executing from slave:%d\n", @@ -1597,10 +1598,10 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, reset_slave: /* cleanup any slave resources */ mlx4_delete_all_resources_for_slave(dev, slave); - spin_lock(&priv->mfunc.master.slave_state_lock); + spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); if (!slave_state[slave].is_slave_going_down) slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET; - spin_unlock(&priv->mfunc.master.slave_state_lock); + spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); /*with slave in the middle of flr, no need to clean resources again.*/ inform_slave_state: memset(&slave_state[slave].event_eq, 0, diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index c509a86..3cb7727 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -407,6 +407,7 @@ void mlx4_master_handle_slave_flr(struct work_struct *work) struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state; int i; int err; + unsigned long flags; mlx4_dbg(dev, "mlx4_handle_slave_flr\n"); @@ -418,10 +419,10 @@ void mlx4_master_handle_slave_flr(struct work_struct *work) mlx4_delete_all_resources_for_slave(dev, i); /*return the slave to running mode*/ - spin_lock(&priv->mfunc.master.slave_state_lock); + spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); slave_state[i].last_cmd = MLX4_COMM_CMD_RESET; slave_state[i].is_slave_going_down = 0; - spin_unlock(&priv->mfunc.master.slave_state_lock); + spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); /*notify the FW:*/ err = mlx4_cmd(dev, 0, i, 0, MLX4_CMD_INFORM_FLR_DONE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); @@ -446,6 +447,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) u8 update_slave_state; int i; enum slave_port_gen_event gen_event; + unsigned long flags; while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) { /* @@ -653,13 +655,13 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) } else update_slave_state = 1; - spin_lock(&priv->mfunc.master.slave_state_lock); + spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); if (update_slave_state) { priv->mfunc.master.slave_state[flr_slave].active = false; priv->mfunc.master.slave_state[flr_slave].last_cmd = MLX4_COMM_CMD_FLR; priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1; } - spin_unlock(&priv->mfunc.master.slave_state_lock); + spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); queue_work(priv->mfunc.master.comm_wq, &priv->mfunc.master.slave_flr_event_work); break; -- cgit v0.10.2 From 63f05be2c075022d1b3227037f82ad75c4d5ab1d Mon Sep 17 00:00:00 2001 From: shefty Date: Wed, 28 Nov 2012 20:39:52 +0000 Subject: RDMA/cm: Change return value from find_gid_port() Problem reported by Dan Carpenter : The patch 3c86aa70bf67: "RDMA/cm: Add RDMA CM support for IBoE devices" from Oct 13, 2010, leads to the following warning: net/sunrpc/xprtrdma/svc_rdma_transport.c:722 svc_rdma_create() error: passing non neg 1 to ERR_PTR This bug would result in a NULL dereference. svc_rdma_create() is supposed to return ERR_PTRs or valid pointers, but instead it returns ERR_PTRs, valid pointers and 1. The call tree is: svc_rdma_create() => rdma_bind_addr() => cma_acquire_dev() => find_gid_port() rdma_bind_addr() should return a valid errno. Fix this by having find_gid_port() also return a valid errno. If we can't find the specified GID on a given port, return -EADDRNOTAVAIL, rather than -EAGAIN, to better indicate the error. We also drop using the special return value of '1' and instead pass through the error returned by the underlying verbs call. On such errors, rather than aborting the search, we simply continue to check the next device/port. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index a7568c3..d789eea 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -345,17 +345,17 @@ static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_nu err = ib_query_port(device, port_num, &props); if (err) - return 1; + return err; for (i = 0; i < props.gid_tbl_len; ++i) { err = ib_query_gid(device, port_num, i, &tmp); if (err) - return 1; + return err; if (!memcmp(&tmp, gid, sizeof tmp)) return 0; } - return -EAGAIN; + return -EADDRNOTAVAIL; } static int cma_acquire_dev(struct rdma_id_private *id_priv) @@ -388,8 +388,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv) if (!ret) { id_priv->id.port_num = port; goto out; - } else if (ret == 1) - break; + } } } } -- cgit v0.10.2 From c9b03c1ae55decc721310b79d8f50d44fbb37dc7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sat, 3 Sep 2011 09:34:48 +0200 Subject: IB/srp: Increase block layer timeout Increase the block layer timeout for disks so that it is above the InfiniBand transport layer timeout. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 922d845..5aa70e9 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1419,6 +1419,33 @@ err: return -ENOMEM; } +static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask) +{ + uint64_t T_tr_ns, max_compl_time_ms; + uint32_t rq_tmo_jiffies; + + /* + * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair, + * table 91), both the QP timeout and the retry count have to be set + * for RC QP's during the RTR to RTS transition. + */ + WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) != + (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)); + + /* + * Set target->rq_tmo_jiffies to one second more than the largest time + * it can take before an error completion is generated. See also + * C9-140..142 in the IBTA spec for more information about how to + * convert the QP Local ACK Timeout value to nanoseconds. + */ + T_tr_ns = 4096 * (1ULL << qp_attr->timeout); + max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns; + do_div(max_compl_time_ms, NSEC_PER_MSEC); + rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000); + + return rq_tmo_jiffies; +} + static void srp_cm_rep_handler(struct ib_cm_id *cm_id, struct srp_login_rsp *lrsp, struct srp_target_port *target) @@ -1478,6 +1505,8 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id, if (ret) goto error_free; + target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask); + ret = ib_modify_qp(target->qp, qp_attr, attr_mask); if (ret) goto error_free; @@ -1729,6 +1758,21 @@ static int srp_reset_host(struct scsi_cmnd *scmnd) return ret; } +static int srp_slave_configure(struct scsi_device *sdev) +{ + struct Scsi_Host *shost = sdev->host; + struct srp_target_port *target = host_to_target(shost); + struct request_queue *q = sdev->request_queue; + unsigned long timeout; + + if (sdev->type == TYPE_DISK) { + timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies); + blk_queue_rq_timeout(q, timeout); + } + + return 0; +} + static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1861,6 +1905,7 @@ static struct scsi_host_template srp_template = { .module = THIS_MODULE, .name = "InfiniBand SRP initiator", .proc_name = DRV_NAME, + .slave_configure = srp_slave_configure, .info = srp_target_info, .queuecommand = srp_queuecommand, .eh_abort_handler = srp_abort, diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 020caf0..e3a6304 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -163,6 +163,8 @@ struct srp_target_port { struct ib_sa_query *path_query; int path_query_id; + u32 rq_tmo_jiffies; + struct ib_cm_id *cm_id; int max_ti_iu_len; -- cgit v0.10.2 From 09be70a238005cc33f2a52b0aeae52f117e81582 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sat, 17 Mar 2012 17:18:54 +0000 Subject: IB/srp: Eliminate state SRP_TARGET_CONNECTING Block the SCSI host while reconnecting instead of representing the reconnection activity as a distinct SRP target state. This allows us to eliminate the target state SRP_TARGET_CONNECTING. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 5aa70e9..a226199 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -646,13 +646,16 @@ static void srp_reset_req(struct srp_target_port *target, struct srp_request *re static int srp_reconnect_target(struct srp_target_port *target) { + struct Scsi_Host *shost = target->scsi_host; struct ib_qp_attr qp_attr; struct ib_wc wc; int i, ret; - if (!srp_change_state(target, SRP_TARGET_LIVE, SRP_TARGET_CONNECTING)) + if (target->state != SRP_TARGET_LIVE) return -EAGAIN; + scsi_target_block(&shost->shost_gendev); + srp_disconnect_target(target); /* * Now get a new local CM ID so that we avoid confusing the @@ -660,16 +663,16 @@ static int srp_reconnect_target(struct srp_target_port *target) */ ret = srp_new_cm_id(target); if (ret) - goto err; + goto unblock; qp_attr.qp_state = IB_QPS_RESET; ret = ib_modify_qp(target->qp, &qp_attr, IB_QP_STATE); if (ret) - goto err; + goto unblock; ret = srp_init_qp(target, target->qp); if (ret) - goto err; + goto unblock; while (ib_poll_cq(target->recv_cq, 1, &wc) > 0) ; /* nothing */ @@ -688,11 +691,15 @@ static int srp_reconnect_target(struct srp_target_port *target) target->qp_in_error = 0; ret = srp_connect_target(target); + +unblock: + scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING : + SDEV_TRANSPORT_OFFLINE); + if (ret) goto err; - if (!srp_change_state(target, SRP_TARGET_CONNECTING, SRP_TARGET_LIVE)) - ret = -EAGAIN; + shost_printk(KERN_INFO, target->scsi_host, PFX "reconnect succeeded\n"); return ret; @@ -710,7 +717,7 @@ err: * the flush_scheduled_work() in srp_remove_one(). */ spin_lock_irq(&target->lock); - if (target->state == SRP_TARGET_CONNECTING) { + if (target->state == SRP_TARGET_LIVE) { target->state = SRP_TARGET_DEAD; INIT_WORK(&target->work, srp_remove_work); queue_work(ib_wq, &target->work); @@ -1311,9 +1318,6 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) unsigned long flags; int len; - if (target->state == SRP_TARGET_CONNECTING) - goto err; - if (target->state == SRP_TARGET_DEAD || target->state == SRP_TARGET_REMOVED) { scmnd->result = DID_BAD_TARGET << 16; @@ -1377,7 +1381,6 @@ err_iu: err_unlock: spin_unlock_irqrestore(&target->lock, flags); -err: return SCSI_MLQUEUE_HOST_BUSY; } diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index e3a6304..8b436ce 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -80,7 +80,6 @@ enum { enum srp_target_state { SRP_TARGET_LIVE, - SRP_TARGET_CONNECTING, SRP_TARGET_DEAD, SRP_TARGET_REMOVED }; -- cgit v0.10.2 From f3718231203a165dbfd5c5bd0d9a6db522bb73e3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 19 Apr 2012 14:42:54 +0000 Subject: IB/srp: Keep processing commands during host removal Some SCSI upper layer drivers, e.g. sd, issue SCSI commands from inside scsi_remove_host() (see the sd_shutdown() call in sd_remove()). Make sure that these commands have a chance to reach the SCSI device. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index a226199..9371d58 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1318,13 +1318,6 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) unsigned long flags; int len; - if (target->state == SRP_TARGET_DEAD || - target->state == SRP_TARGET_REMOVED) { - scmnd->result = DID_BAD_TARGET << 16; - scmnd->scsi_done(scmnd); - return 0; - } - spin_lock_irqsave(&target->lock, flags); iu = __srp_get_tx_iu(target, SRP_IU_CMD); if (!iu) -- cgit v0.10.2 From 224db157431299582a65fb9d4c9228363146f8a9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 24 Oct 2012 15:05:37 +0200 Subject: IB/srp: Simplify SCSI error handling Since scsi_remove_host() has been modified so that SCSI error handling functions will no longer be invoked after scsi_remove_host() returns, the test at the start of srp_send_tsk_mgmt() is now superfluous. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 9371d58..baafee0 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1661,10 +1661,6 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target, struct srp_iu *iu; struct srp_tsk_mgmt *tsk_mgmt; - if (target->state == SRP_TARGET_DEAD || - target->state == SRP_TARGET_REMOVED) - return -1; - init_completion(&target->tsk_mgmt_done); spin_lock_irq(&target->lock); -- cgit v0.10.2 From 948d1e889e5bd6ceffcc31db2beba39331087df3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sat, 3 Sep 2011 09:25:42 +0200 Subject: IB/srp: Introduce srp_handle_qp_err() Introduce the function srp_handle_qp_err(), change the type of qp_in_error from int into bool and move the initialization of that variable from srp_reconnect_target() to srp_connect_target(). Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index baafee0..c288330 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -515,6 +515,8 @@ static int srp_connect_target(struct srp_target_port *target) int retries = 3; int ret; + target->qp_in_error = false; + ret = srp_lookup_path(target); if (ret) return ret; @@ -689,7 +691,6 @@ static int srp_reconnect_target(struct srp_target_port *target) for (i = 0; i < SRP_SQ_SIZE; ++i) list_add(&target->tx_ring[i]->list, &target->free_tx); - target->qp_in_error = 0; ret = srp_connect_target(target); unblock: @@ -1269,6 +1270,15 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) PFX "Recv failed with error code %d\n", res); } +static void srp_handle_qp_err(enum ib_wc_status wc_status, + enum ib_wc_opcode wc_opcode, + struct srp_target_port *target) +{ + shost_printk(KERN_ERR, target->scsi_host, PFX "failed %s status %d\n", + wc_opcode & IB_WC_RECV ? "receive" : "send", wc_status); + target->qp_in_error = true; +} + static void srp_recv_completion(struct ib_cq *cq, void *target_ptr) { struct srp_target_port *target = target_ptr; @@ -1276,15 +1286,12 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr) ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); while (ib_poll_cq(cq, 1, &wc) > 0) { - if (wc.status) { - shost_printk(KERN_ERR, target->scsi_host, - PFX "failed receive status %d\n", - wc.status); - target->qp_in_error = 1; + if (likely(wc.status == IB_WC_SUCCESS)) { + srp_handle_recv(target, &wc); + } else { + srp_handle_qp_err(wc.status, wc.opcode, target); break; } - - srp_handle_recv(target, &wc); } } @@ -1295,16 +1302,13 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr) struct srp_iu *iu; while (ib_poll_cq(cq, 1, &wc) > 0) { - if (wc.status) { - shost_printk(KERN_ERR, target->scsi_host, - PFX "failed send status %d\n", - wc.status); - target->qp_in_error = 1; + if (likely(wc.status == IB_WC_SUCCESS)) { + iu = (struct srp_iu *) (uintptr_t) wc.wr_id; + list_add(&iu->list, &target->free_tx); + } else { + srp_handle_qp_err(wc.status, wc.opcode, target); break; } - - iu = (struct srp_iu *) (uintptr_t) wc.wr_id; - list_add(&iu->list, &target->free_tx); } } @@ -2269,7 +2273,6 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto err_free_ib; - target->qp_in_error = 0; ret = srp_connect_target(target); if (ret) { shost_printk(KERN_ERR, target->scsi_host, diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 8b436ce..02dc3ac 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -179,7 +179,7 @@ struct srp_target_port { struct list_head list; struct completion done; int status; - int qp_in_error; + bool qp_in_error; struct completion tsk_mgmt_done; u8 tsk_mgmt_status; -- cgit v0.10.2 From 4f0af69799b0e7b9805c4d174169f5ed7bf315c1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 26 Nov 2012 11:16:40 +0100 Subject: IB/srp: Process all error completions If the RDMA RC connection is closed, tell the SCSI mid-layer to terminate all pending commands instead of only the first. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index c288330..f0843ef 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1274,8 +1274,12 @@ static void srp_handle_qp_err(enum ib_wc_status wc_status, enum ib_wc_opcode wc_opcode, struct srp_target_port *target) { - shost_printk(KERN_ERR, target->scsi_host, PFX "failed %s status %d\n", - wc_opcode & IB_WC_RECV ? "receive" : "send", wc_status); + if (!target->qp_in_error) { + shost_printk(KERN_ERR, target->scsi_host, + PFX "failed %s status %d\n", + wc_opcode & IB_WC_RECV ? "receive" : "send", + wc_status); + } target->qp_in_error = true; } @@ -1290,7 +1294,6 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr) srp_handle_recv(target, &wc); } else { srp_handle_qp_err(wc.status, wc.opcode, target); - break; } } } @@ -1307,7 +1310,6 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr) list_add(&iu->list, &target->free_tx); } else { srp_handle_qp_err(wc.status, wc.opcode, target); - break; } } } -- cgit v0.10.2 From 294c875a65269361defd7aeb97804ba99eb57cbf Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sun, 25 Dec 2011 12:18:12 +0000 Subject: IB/srp: Suppress superfluous error messages Keep track of the connection state. Only report QP errors while connected. Only invoke ib_send_cm_dreq() when connected so that invoking srp_disconnect_target() after having received a DREQ does not cause an error message to be printed. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index f0843ef..3c64bf4 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -428,17 +428,34 @@ static int srp_send_req(struct srp_target_port *target) return status; } +static bool srp_change_conn_state(struct srp_target_port *target, + bool connected) +{ + bool changed = false; + + spin_lock_irq(&target->lock); + if (target->connected != connected) { + target->connected = connected; + changed = true; + } + spin_unlock_irq(&target->lock); + + return changed; +} + static void srp_disconnect_target(struct srp_target_port *target) { - /* XXX should send SRP_I_LOGOUT request */ + if (srp_change_conn_state(target, false)) { + /* XXX should send SRP_I_LOGOUT request */ - init_completion(&target->done); - if (ib_send_cm_dreq(target->cm_id, NULL, 0)) { - shost_printk(KERN_DEBUG, target->scsi_host, - PFX "Sending CM DREQ failed\n"); - return; + init_completion(&target->done); + if (ib_send_cm_dreq(target->cm_id, NULL, 0)) { + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "Sending CM DREQ failed\n"); + } else { + wait_for_completion(&target->done); + } } - wait_for_completion(&target->done); } static bool srp_change_state(struct srp_target_port *target, @@ -515,6 +532,8 @@ static int srp_connect_target(struct srp_target_port *target) int retries = 3; int ret; + WARN_ON_ONCE(target->connected); + target->qp_in_error = false; ret = srp_lookup_path(target); @@ -536,6 +555,7 @@ static int srp_connect_target(struct srp_target_port *target) */ switch (target->status) { case 0: + srp_change_conn_state(target, true); return 0; case SRP_PORT_REDIRECT: @@ -1274,7 +1294,7 @@ static void srp_handle_qp_err(enum ib_wc_status wc_status, enum ib_wc_opcode wc_opcode, struct srp_target_port *target) { - if (!target->qp_in_error) { + if (target->connected && !target->qp_in_error) { shost_printk(KERN_ERR, target->scsi_host, PFX "failed %s status %d\n", wc_opcode & IB_WC_RECV ? "receive" : "send", @@ -1630,6 +1650,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) case IB_CM_DREQ_RECEIVED: shost_printk(KERN_WARNING, target->scsi_host, PFX "DREQ received - connection closed\n"); + srp_change_conn_state(target, false); if (ib_send_cm_drep(cm_id, NULL, 0)) shost_printk(KERN_ERR, target->scsi_host, PFX "Sending CM DREP failed\n"); @@ -1942,6 +1963,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target) spin_unlock(&host->target_lock); target->state = SRP_TARGET_LIVE; + target->connected = false; scsi_scan_target(&target->scsi_host->shost_gendev, 0, target->scsi_id, SCAN_WILD_CARD, 0); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 02dc3ac..ef95fa4 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -163,6 +163,7 @@ struct srp_target_port { int path_query_id; u32 rq_tmo_jiffies; + bool connected; struct ib_cm_id *cm_id; -- cgit v0.10.2 From ee12d6a80cfcd08b862ed3c8e109442e466b0302 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sun, 25 Dec 2011 19:41:07 +0000 Subject: IB/srp: Introduce the helper function srp_remove_target() Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 3c64bf4..beb6878 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -506,6 +506,17 @@ static void srp_del_scsi_host_attr(struct Scsi_Host *shost) device_remove_file(&shost->shost_dev, *attr); } +static void srp_remove_target(struct srp_target_port *target) +{ + srp_del_scsi_host_attr(target->scsi_host); + srp_remove_host(target->scsi_host); + scsi_remove_host(target->scsi_host); + ib_destroy_cm_id(target->cm_id); + srp_free_target_ib(target); + srp_free_req_data(target); + scsi_host_put(target->scsi_host); +} + static void srp_remove_work(struct work_struct *work) { struct srp_target_port *target = @@ -518,13 +529,7 @@ static void srp_remove_work(struct work_struct *work) list_del(&target->list); spin_unlock(&target->srp_host->target_lock); - srp_del_scsi_host_attr(target->scsi_host); - srp_remove_host(target->scsi_host); - scsi_remove_host(target->scsi_host); - ib_destroy_cm_id(target->cm_id); - srp_free_target_ib(target); - srp_free_req_data(target); - scsi_host_put(target->scsi_host); + srp_remove_target(target); } static int srp_connect_target(struct srp_target_port *target) -- cgit v0.10.2 From ef6c49d87c3418c442a22e55e3ce2f91b163d69e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 26 Dec 2011 16:49:18 +0000 Subject: IB/srp: Eliminate state SRP_TARGET_DEAD Only queue removal work after having changed the target state into SRP_TARGET_REMOVED and not if that state was already equal to SRP_TARGET_REMOVED. That allows us to remove the state SRP_TARGET_DEAD. Add a call to srp_disconnect_target() in srp_remove_target() -- due to previous changes it is now safe to invoke that function even if the IB connection has already been disconnected. This change allows us to replace the target removal code in srp_remove_one() by an (indirect) call to srp_remove_target(). Rename srp_target_port.work into srp_target_port.remove_work to reflect its usage. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index beb6878..95590a3 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -428,6 +428,23 @@ static int srp_send_req(struct srp_target_port *target) return status; } +static bool srp_queue_remove_work(struct srp_target_port *target) +{ + bool changed = false; + + spin_lock_irq(&target->lock); + if (target->state != SRP_TARGET_REMOVED) { + target->state = SRP_TARGET_REMOVED; + changed = true; + } + spin_unlock_irq(&target->lock); + + if (changed) + queue_work(system_long_wq, &target->remove_work); + + return changed; +} + static bool srp_change_conn_state(struct srp_target_port *target, bool connected) { @@ -458,21 +475,6 @@ static void srp_disconnect_target(struct srp_target_port *target) } } -static bool srp_change_state(struct srp_target_port *target, - enum srp_target_state old, - enum srp_target_state new) -{ - bool changed = false; - - spin_lock_irq(&target->lock); - if (target->state == old) { - target->state = new; - changed = true; - } - spin_unlock_irq(&target->lock); - return changed; -} - static void srp_free_req_data(struct srp_target_port *target) { struct ib_device *ibdev = target->srp_host->srp_dev->dev; @@ -508,9 +510,12 @@ static void srp_del_scsi_host_attr(struct Scsi_Host *shost) static void srp_remove_target(struct srp_target_port *target) { + WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); + srp_del_scsi_host_attr(target->scsi_host); srp_remove_host(target->scsi_host); scsi_remove_host(target->scsi_host); + srp_disconnect_target(target); ib_destroy_cm_id(target->cm_id); srp_free_target_ib(target); srp_free_req_data(target); @@ -520,10 +525,9 @@ static void srp_remove_target(struct srp_target_port *target) static void srp_remove_work(struct work_struct *work) { struct srp_target_port *target = - container_of(work, struct srp_target_port, work); + container_of(work, struct srp_target_port, remove_work); - if (!srp_change_state(target, SRP_TARGET_DEAD, SRP_TARGET_REMOVED)) - return; + WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); spin_lock(&target->srp_host->target_lock); list_del(&target->list); @@ -738,17 +742,8 @@ err: * However, we have to defer the real removal because we * are in the context of the SCSI error handler now, which * will deadlock if we call scsi_remove_host(). - * - * Schedule our work inside the lock to avoid a race with - * the flush_scheduled_work() in srp_remove_one(). */ - spin_lock_irq(&target->lock); - if (target->state == SRP_TARGET_LIVE) { - target->state = SRP_TARGET_DEAD; - INIT_WORK(&target->work, srp_remove_work); - queue_work(ib_wq, &target->work); - } - spin_unlock_irq(&target->lock); + srp_queue_remove_work(target); return ret; } @@ -2258,6 +2253,7 @@ static ssize_t srp_create_target(struct device *dev, sizeof (struct srp_indirect_buf) + target->cmd_sg_cnt * sizeof (struct srp_direct_buf); + INIT_WORK(&target->remove_work, srp_remove_work); spin_lock_init(&target->lock); INIT_LIST_HEAD(&target->free_tx); INIT_LIST_HEAD(&target->free_reqs); @@ -2491,8 +2487,7 @@ static void srp_remove_one(struct ib_device *device) { struct srp_device *srp_dev; struct srp_host *host, *tmp_host; - LIST_HEAD(target_list); - struct srp_target_port *target, *tmp_target; + struct srp_target_port *target; srp_dev = ib_get_client_data(device, &srp_client); @@ -2505,35 +2500,17 @@ static void srp_remove_one(struct ib_device *device) wait_for_completion(&host->released); /* - * Mark all target ports as removed, so we stop queueing - * commands and don't try to reconnect. + * Remove all target ports. */ spin_lock(&host->target_lock); - list_for_each_entry(target, &host->target_list, list) { - spin_lock_irq(&target->lock); - target->state = SRP_TARGET_REMOVED; - spin_unlock_irq(&target->lock); - } + list_for_each_entry(target, &host->target_list, list) + srp_queue_remove_work(target); spin_unlock(&host->target_lock); /* - * Wait for any reconnection tasks that may have - * started before we marked our target ports as - * removed, and any target port removal tasks. + * Wait for target port removal tasks. */ - flush_workqueue(ib_wq); - - list_for_each_entry_safe(target, tmp_target, - &host->target_list, list) { - srp_del_scsi_host_attr(target->scsi_host); - srp_remove_host(target->scsi_host); - scsi_remove_host(target->scsi_host); - srp_disconnect_target(target); - ib_destroy_cm_id(target->cm_id); - srp_free_target_ib(target); - srp_free_req_data(target); - scsi_host_put(target->scsi_host); - } + flush_workqueue(system_long_wq); kfree(host); } diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index ef95fa4..de2d0b3 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -80,8 +80,7 @@ enum { enum srp_target_state { SRP_TARGET_LIVE, - SRP_TARGET_DEAD, - SRP_TARGET_REMOVED + SRP_TARGET_REMOVED, }; enum srp_iu_type { @@ -175,7 +174,7 @@ struct srp_target_port { struct srp_iu *rx_ring[SRP_RQ_SIZE]; struct srp_request req_ring[SRP_CMD_SQ_SIZE]; - struct work_struct work; + struct work_struct remove_work; struct list_head list; struct completion done; -- cgit v0.10.2 From 73aa89ed9e2bebf0c3fff4504e6dff1421b5c819 Mon Sep 17 00:00:00 2001 From: Ishai Rabinovitz Date: Mon, 26 Nov 2012 11:44:53 +0100 Subject: IB/srp: destroy and recreate QP and CQs when reconnecting HW QP FATAL errors persist over a reset operation, but we can recover from that by recreating the QP and associated CQs for each connection. Creating a new QP/CQ also completely forecloses any possibility of getting stale completions or packets on the new connection. Signed-off-by: Ishai Rabinovitz Signed-off-by: Michael S. Tsirkin [ updated to current code from OFED, cleaned up commit message ] Signed-off-by: David Dillow Signed-off-by: Bart Van Assche Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 95590a3..85771eb 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -222,27 +222,29 @@ static int srp_new_cm_id(struct srp_target_port *target) static int srp_create_target_ib(struct srp_target_port *target) { struct ib_qp_init_attr *init_attr; + struct ib_cq *recv_cq, *send_cq; + struct ib_qp *qp; int ret; init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); if (!init_attr) return -ENOMEM; - target->recv_cq = ib_create_cq(target->srp_host->srp_dev->dev, - srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0); - if (IS_ERR(target->recv_cq)) { - ret = PTR_ERR(target->recv_cq); + recv_cq = ib_create_cq(target->srp_host->srp_dev->dev, + srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0); + if (IS_ERR(recv_cq)) { + ret = PTR_ERR(recv_cq); goto err; } - target->send_cq = ib_create_cq(target->srp_host->srp_dev->dev, - srp_send_completion, NULL, target, SRP_SQ_SIZE, 0); - if (IS_ERR(target->send_cq)) { - ret = PTR_ERR(target->send_cq); + send_cq = ib_create_cq(target->srp_host->srp_dev->dev, + srp_send_completion, NULL, target, SRP_SQ_SIZE, 0); + if (IS_ERR(send_cq)) { + ret = PTR_ERR(send_cq); goto err_recv_cq; } - ib_req_notify_cq(target->recv_cq, IB_CQ_NEXT_COMP); + ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP); init_attr->event_handler = srp_qp_event; init_attr->cap.max_send_wr = SRP_SQ_SIZE; @@ -251,30 +253,41 @@ static int srp_create_target_ib(struct srp_target_port *target) init_attr->cap.max_send_sge = 1; init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; init_attr->qp_type = IB_QPT_RC; - init_attr->send_cq = target->send_cq; - init_attr->recv_cq = target->recv_cq; + init_attr->send_cq = send_cq; + init_attr->recv_cq = recv_cq; - target->qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr); - if (IS_ERR(target->qp)) { - ret = PTR_ERR(target->qp); + qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr); + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); goto err_send_cq; } - ret = srp_init_qp(target, target->qp); + ret = srp_init_qp(target, qp); if (ret) goto err_qp; + if (target->qp) + ib_destroy_qp(target->qp); + if (target->recv_cq) + ib_destroy_cq(target->recv_cq); + if (target->send_cq) + ib_destroy_cq(target->send_cq); + + target->qp = qp; + target->recv_cq = recv_cq; + target->send_cq = send_cq; + kfree(init_attr); return 0; err_qp: - ib_destroy_qp(target->qp); + ib_destroy_qp(qp); err_send_cq: - ib_destroy_cq(target->send_cq); + ib_destroy_cq(send_cq); err_recv_cq: - ib_destroy_cq(target->recv_cq); + ib_destroy_cq(recv_cq); err: kfree(init_attr); @@ -289,6 +302,9 @@ static void srp_free_target_ib(struct srp_target_port *target) ib_destroy_cq(target->send_cq); ib_destroy_cq(target->recv_cq); + target->qp = NULL; + target->send_cq = target->recv_cq = NULL; + for (i = 0; i < SRP_RQ_SIZE; ++i) srp_free_iu(target->srp_host, target->rx_ring[i]); for (i = 0; i < SRP_SQ_SIZE; ++i) @@ -678,8 +694,6 @@ static void srp_reset_req(struct srp_target_port *target, struct srp_request *re static int srp_reconnect_target(struct srp_target_port *target) { struct Scsi_Host *shost = target->scsi_host; - struct ib_qp_attr qp_attr; - struct ib_wc wc; int i, ret; if (target->state != SRP_TARGET_LIVE) @@ -696,20 +710,10 @@ static int srp_reconnect_target(struct srp_target_port *target) if (ret) goto unblock; - qp_attr.qp_state = IB_QPS_RESET; - ret = ib_modify_qp(target->qp, &qp_attr, IB_QP_STATE); - if (ret) - goto unblock; - - ret = srp_init_qp(target, target->qp); + ret = srp_create_target_ib(target); if (ret) goto unblock; - while (ib_poll_cq(target->recv_cq, 1, &wc) > 0) - ; /* nothing */ - while (ib_poll_cq(target->send_cq, 1, &wc) > 0) - ; /* nothing */ - for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { struct srp_request *req = &target->req_ring[i]; if (req->scmnd) -- cgit v0.10.2 From 55d93898a14528a5d4199ba572bea1f0bec5870e Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Mon, 26 Nov 2012 11:57:47 +0100 Subject: IB/srp: send disconnect request without waiting for CM timewait exit Now that SRP recreates the CM ID, QP, and CQ for each connection, there is no need to wait for the timewait state to complete. Signed-off-by: Vu Pham Signed-off-by: David Dillow Signed-off-by: Bart Van Assche Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 85771eb..84d9298 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -481,12 +481,9 @@ static void srp_disconnect_target(struct srp_target_port *target) if (srp_change_conn_state(target, false)) { /* XXX should send SRP_I_LOGOUT request */ - init_completion(&target->done); if (ib_send_cm_dreq(target->cm_id, NULL, 0)) { shost_printk(KERN_DEBUG, target->scsi_host, PFX "Sending CM DREQ failed\n"); - } else { - wait_for_completion(&target->done); } } } @@ -1664,7 +1661,6 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) shost_printk(KERN_ERR, target->scsi_host, PFX "connection closed\n"); - comp = 1; target->status = 0; break; -- cgit v0.10.2 From a4605a93696ee0768e55e4bce1ff7f0ee39bcf79 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sun, 9 Oct 2011 14:09:07 +0200 Subject: IB/srp: Document sysfs attributes Document the sysfs attributes of the SRP initiator (ib_srp) according to the rules specified in Documentation/ABI/README. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier diff --git a/Documentation/ABI/stable/sysfs-driver-ib_srp b/Documentation/ABI/stable/sysfs-driver-ib_srp new file mode 100644 index 0000000..481aae9 --- /dev/null +++ b/Documentation/ABI/stable/sysfs-driver-ib_srp @@ -0,0 +1,156 @@ +What: /sys/class/infiniband_srp/srp--/add_target +Date: January 2, 2006 +KernelVersion: 2.6.15 +Contact: linux-rdma@vger.kernel.org +Description: Interface for making ib_srp connect to a new target. + One can request ib_srp to connect to a new target by writing + a comma-separated list of login parameters to this sysfs + attribute. The supported parameters are: + * id_ext, a 16-digit hexadecimal number specifying the eight + byte identifier extension in the 16-byte SRP target port + identifier. The target port identifier is sent by ib_srp + to the target in the SRP_LOGIN_REQ request. + * ioc_guid, a 16-digit hexadecimal number specifying the eight + byte I/O controller GUID portion of the 16-byte target port + identifier. + * dgid, a 32-digit hexadecimal number specifying the + destination GID. + * pkey, a four-digit hexadecimal number specifying the + InfiniBand partition key. + * service_id, a 16-digit hexadecimal number specifying the + InfiniBand service ID used to establish communication with + the SRP target. How to find out the value of the service ID + is specified in the documentation of the SRP target. + * max_sect, a decimal number specifying the maximum number of + 512-byte sectors to be transferred via a single SCSI command. + * max_cmd_per_lun, a decimal number specifying the maximum + number of outstanding commands for a single LUN. + * io_class, a hexadecimal number specifying the SRP I/O class. + Must be either 0xff00 (rev 10) or 0x0100 (rev 16a). The I/O + class defines the format of the SRP initiator and target + port identifiers. + * initiator_ext, a 16-digit hexadecimal number specifying the + identifier extension portion of the SRP initiator port + identifier. This data is sent by the initiator to the target + in the SRP_LOGIN_REQ request. + * cmd_sg_entries, a number in the range 1..255 that specifies + the maximum number of data buffer descriptors stored in the + SRP_CMD information unit itself. With allow_ext_sg=0 the + parameter cmd_sg_entries defines the maximum S/G list length + for a single SRP_CMD, and commands whose S/G list length + exceeds this limit after S/G list collapsing will fail. + * allow_ext_sg, whether ib_srp is allowed to include a partial + memory descriptor list in an SRP_CMD instead of the entire + list. If a partial memory descriptor list has been included + in an SRP_CMD the remaining memory descriptors are + communicated from initiator to target via an additional RDMA + transfer. Setting allow_ext_sg to 1 increases the maximum + amount of data that can be transferred between initiator and + target via a single SCSI command. Since not all SRP target + implementations support partial memory descriptor lists the + default value for this option is 0. + * sg_tablesize, a number in the range 1..2048 specifying the + maximum S/G list length the SCSI layer is allowed to pass to + ib_srp. Specifying a value that exceeds cmd_sg_entries is + only safe with partial memory descriptor list support enabled + (allow_ext_sg=1). + +What: /sys/class/infiniband_srp/srp--/ibdev +Date: January 2, 2006 +KernelVersion: 2.6.15 +Contact: linux-rdma@vger.kernel.org +Description: HCA name (). + +What: /sys/class/infiniband_srp/srp--/port +Date: January 2, 2006 +KernelVersion: 2.6.15 +Contact: linux-rdma@vger.kernel.org +Description: HCA port number (). + +What: /sys/class/scsi_host/host/allow_ext_sg +Date: May 19, 2011 +KernelVersion: 2.6.39 +Contact: linux-rdma@vger.kernel.org +Description: Whether ib_srp is allowed to include a partial memory + descriptor list in an SRP_CMD when communicating with an SRP + target. + +What: /sys/class/scsi_host/host/cmd_sg_entries +Date: May 19, 2011 +KernelVersion: 2.6.39 +Contact: linux-rdma@vger.kernel.org +Description: Maximum number of data buffer descriptors that may be sent to + the target in a single SRP_CMD request. + +What: /sys/class/scsi_host/host/dgid +Date: June 17, 2006 +KernelVersion: 2.6.17 +Contact: linux-rdma@vger.kernel.org +Description: InfiniBand destination GID used for communication with the SRP + target. Differs from orig_dgid if port redirection has happened. + +What: /sys/class/scsi_host/host/id_ext +Date: June 17, 2006 +KernelVersion: 2.6.17 +Contact: linux-rdma@vger.kernel.org +Description: Eight-byte identifier extension portion of the 16-byte target + port identifier. + +What: /sys/class/scsi_host/host/ioc_guid +Date: June 17, 2006 +KernelVersion: 2.6.17 +Contact: linux-rdma@vger.kernel.org +Description: Eight-byte I/O controller GUID portion of the 16-byte target + port identifier. + +What: /sys/class/scsi_host/host/local_ib_device +Date: November 29, 2006 +KernelVersion: 2.6.19 +Contact: linux-rdma@vger.kernel.org +Description: Name of the InfiniBand HCA used for communicating with the + SRP target. + +What: /sys/class/scsi_host/host/local_ib_port +Date: November 29, 2006 +KernelVersion: 2.6.19 +Contact: linux-rdma@vger.kernel.org +Description: Number of the HCA port used for communicating with the + SRP target. + +What: /sys/class/scsi_host/host/orig_dgid +Date: June 17, 2006 +KernelVersion: 2.6.17 +Contact: linux-rdma@vger.kernel.org +Description: InfiniBand destination GID specified in the parameters + written to the add_target sysfs attribute. + +What: /sys/class/scsi_host/host/pkey +Date: June 17, 2006 +KernelVersion: 2.6.17 +Contact: linux-rdma@vger.kernel.org +Description: A 16-bit number representing the InfiniBand partition key used + for communication with the SRP target. + +What: /sys/class/scsi_host/host/req_lim +Date: October 20, 2010 +KernelVersion: 2.6.36 +Contact: linux-rdma@vger.kernel.org +Description: Number of requests ib_srp can send to the target before it has + to wait for more credits. For more information see also the + SRP credit algorithm in the SRP specification. + +What: /sys/class/scsi_host/host/service_id +Date: June 17, 2006 +KernelVersion: 2.6.17 +Contact: linux-rdma@vger.kernel.org +Description: InfiniBand service ID used for establishing communication with + the SRP target. + +What: /sys/class/scsi_host/host/zero_req_lim +Date: September 20, 2006 +KernelVersion: 2.6.18 +Contact: linux-rdma@vger.kernel.org +Description: Number of times the initiator had to wait before sending a + request to the target because it ran out of credits. For more + information see also the SRP credit algorithm in the SRP + specification. -- cgit v0.10.2 From ac9be30e91cffe07f762bd91ca40de065bf257d7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 21 Oct 2011 19:31:07 +0200 Subject: srp_transport: Fix attribute registration Register transport attributes after the attribute array has been set up instead of before. The current code can trigger a race condition because the code reading the attribute array can run on another thread than the code that initialized that array. Make sure that any code reading the attribute array will see all values written into that array. Signed-off-by: Bart Van Assche Cc: FUJITA Tomonori Cc: Robert Jennings Acked-by: David Dillow Signed-off-by: Roland Dreier diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index 21a045e..07c4394 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -324,13 +324,14 @@ srp_attach_transport(struct srp_function_template *ft) i->rport_attr_cont.ac.attrs = &i->rport_attrs[0]; i->rport_attr_cont.ac.class = &srp_rport_class.class; i->rport_attr_cont.ac.match = srp_rport_match; - transport_container_register(&i->rport_attr_cont); count = 0; SETUP_RPORT_ATTRIBUTE_RD(port_id); SETUP_RPORT_ATTRIBUTE_RD(roles); i->rport_attrs[count] = NULL; + transport_container_register(&i->rport_attr_cont); + i->f = ft; return &i->t; -- cgit v0.10.2 From 9134a855178f141b9be48aa19b49ed92d82665ce Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sat, 10 Sep 2011 16:31:57 +0200 Subject: srp_transport: Simplify attribute initialization code Eliminate the private_rport_attrs[] array and the SETUP_*() macros used to set up that array since the information in that array duplicates the information in the static device attributes. Also, verify whether SRP_RPORT_ATTRS is large enough since it is easy to forget to update that macro when adding new attributes. Signed-off-by: Bart Van Assche Cc: FUJITA Tomonori Cc: Robert Jennings Acked-by: David Dillow Signed-off-by: Roland Dreier diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index 07c4394..0d85f79 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -47,7 +47,6 @@ struct srp_internal { struct device_attribute *host_attrs[SRP_HOST_ATTRS + 1]; struct device_attribute *rport_attrs[SRP_RPORT_ATTRS + 1]; - struct device_attribute private_rport_attrs[SRP_RPORT_ATTRS]; struct transport_container rport_attr_cont; }; @@ -72,24 +71,6 @@ static DECLARE_TRANSPORT_CLASS(srp_host_class, "srp_host", srp_host_setup, static DECLARE_TRANSPORT_CLASS(srp_rport_class, "srp_remote_ports", NULL, NULL, NULL); -#define SETUP_TEMPLATE(attrb, field, perm, test, ro_test, ro_perm) \ - i->private_##attrb[count] = dev_attr_##field; \ - i->private_##attrb[count].attr.mode = perm; \ - if (ro_test) { \ - i->private_##attrb[count].attr.mode = ro_perm; \ - i->private_##attrb[count].store = NULL; \ - } \ - i->attrb[count] = &i->private_##attrb[count]; \ - if (test) \ - count++ - -#define SETUP_RPORT_ATTRIBUTE_RD(field) \ - SETUP_TEMPLATE(rport_attrs, field, S_IRUGO, 1, 0, 0) - -#define SETUP_RPORT_ATTRIBUTE_RW(field) \ - SETUP_TEMPLATE(rport_attrs, field, S_IRUGO | S_IWUSR, \ - 1, 1, S_IRUGO) - #define SRP_PID(p) \ (p)->port_id[0], (p)->port_id[1], (p)->port_id[2], (p)->port_id[3], \ (p)->port_id[4], (p)->port_id[5], (p)->port_id[6], (p)->port_id[7], \ @@ -326,9 +307,10 @@ srp_attach_transport(struct srp_function_template *ft) i->rport_attr_cont.ac.match = srp_rport_match; count = 0; - SETUP_RPORT_ATTRIBUTE_RD(port_id); - SETUP_RPORT_ATTRIBUTE_RD(roles); - i->rport_attrs[count] = NULL; + i->rport_attrs[count++] = &dev_attr_port_id; + i->rport_attrs[count++] = &dev_attr_roles; + i->rport_attrs[count++] = NULL; + BUG_ON(count > ARRAY_SIZE(i->rport_attrs)); transport_container_register(&i->rport_attr_cont); -- cgit v0.10.2 From 7e1265bfe75f0ef1a9f5cfde202df68b7e35a53f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sun, 9 Oct 2011 14:35:48 +0200 Subject: srp_transport: Document sysfs attributes Signed-off-by: Bart Van Assche Cc: FUJITA Tomonori Cc: Robert Jennings Acked-by: David Dillow Signed-off-by: Roland Dreier diff --git a/Documentation/ABI/stable/sysfs-transport-srp b/Documentation/ABI/stable/sysfs-transport-srp new file mode 100644 index 0000000..7b0d4a5 --- /dev/null +++ b/Documentation/ABI/stable/sysfs-transport-srp @@ -0,0 +1,12 @@ +What: /sys/class/srp_remote_ports/port-:/port_id +Date: June 27, 2007 +KernelVersion: 2.6.24 +Contact: linux-scsi@vger.kernel.org +Description: 16-byte local SRP port identifier in hexadecimal format. An + example: 4c:49:4e:55:58:20:56:49:4f:00:00:00:00:00:00:00. + +What: /sys/class/srp_remote_ports/port-:/roles +Date: June 27, 2007 +KernelVersion: 2.6.24 +Contact: linux-scsi@vger.kernel.org +Description: Role of the remote port. Either "SRP Initiator" or "SRP Target". -- cgit v0.10.2 From dc1bdbd9b8a077018d82230bc378f1bcfd8adba8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 16 Sep 2011 20:41:13 +0200 Subject: IB/srp: Allow SRP disconnect through sysfs Make it possible to disconnect the IB RC connection used by the SRP protocol to communicate with a target. Have the SRP transport layer create a sysfs "delete" attribute for initiator drivers that support this functionality. Signed-off-by: Bart Van Assche Acked-by: David Dillow Cc: FUJITA Tomonori Cc: Robert Jennings Signed-off-by: Roland Dreier diff --git a/Documentation/ABI/stable/sysfs-transport-srp b/Documentation/ABI/stable/sysfs-transport-srp index 7b0d4a5..b36fb0d 100644 --- a/Documentation/ABI/stable/sysfs-transport-srp +++ b/Documentation/ABI/stable/sysfs-transport-srp @@ -1,3 +1,10 @@ +What: /sys/class/srp_remote_ports/port-:/delete +Date: June 1, 2012 +KernelVersion: 3.7 +Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org +Description: Instructs an SRP initiator to disconnect from a target and to + remove all LUNs imported from that target. + What: /sys/class/srp_remote_ports/port-:/port_id Date: June 27, 2007 KernelVersion: 2.6.24 diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 84d9298..d5088ce 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -549,6 +549,13 @@ static void srp_remove_work(struct work_struct *work) srp_remove_target(target); } +static void srp_rport_delete(struct srp_rport *rport) +{ + struct srp_target_port *target = rport->lld_data; + + srp_queue_remove_work(target); +} + static int srp_connect_target(struct srp_target_port *target) { int retries = 3; @@ -1958,6 +1965,8 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target) return PTR_ERR(rport); } + rport->lld_data = target; + spin_lock(&host->target_lock); list_add_tail(&target->list, &host->target_list); spin_unlock(&host->target_lock); @@ -2524,6 +2533,7 @@ static void srp_remove_one(struct ib_device *device) } static struct srp_function_template ib_srp_transport_functions = { + .rport_delete = srp_rport_delete, }; static int __init srp_init_module(void) diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index 0d85f79..f379c7f 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -38,7 +38,7 @@ struct srp_host_attrs { #define to_srp_host_attrs(host) ((struct srp_host_attrs *)(host)->shost_data) #define SRP_HOST_ATTRS 0 -#define SRP_RPORT_ATTRS 2 +#define SRP_RPORT_ATTRS 3 struct srp_internal { struct scsi_transport_template t; @@ -116,6 +116,24 @@ show_srp_rport_roles(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(roles, S_IRUGO, show_srp_rport_roles, NULL); +static ssize_t store_srp_rport_delete(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct srp_rport *rport = transport_class_to_srp_rport(dev); + struct Scsi_Host *shost = dev_to_shost(dev); + struct srp_internal *i = to_srp_internal(shost->transportt); + + if (i->f->rport_delete) { + i->f->rport_delete(rport); + return count; + } else { + return -ENOSYS; + } +} + +static DEVICE_ATTR(delete, S_IWUSR, NULL, store_srp_rport_delete); + static void srp_rport_release(struct device *dev) { struct srp_rport *rport = dev_to_rport(dev); @@ -309,6 +327,8 @@ srp_attach_transport(struct srp_function_template *ft) count = 0; i->rport_attrs[count++] = &dev_attr_port_id; i->rport_attrs[count++] = &dev_attr_roles; + if (ft->rport_delete) + i->rport_attrs[count++] = &dev_attr_delete; i->rport_attrs[count++] = NULL; BUG_ON(count > ARRAY_SIZE(i->rport_attrs)); diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h index 9c60ca1..ff0f04a 100644 --- a/include/scsi/scsi_transport_srp.h +++ b/include/scsi/scsi_transport_srp.h @@ -14,13 +14,21 @@ struct srp_rport_identifiers { }; struct srp_rport { + /* for initiator and target drivers */ + struct device dev; u8 port_id[16]; u8 roles; + + /* for initiator drivers */ + + void *lld_data; /* LLD private data */ }; struct srp_function_template { + /* for initiator drivers */ + void (*rport_delete)(struct srp_rport *rport); /* for target drivers */ int (* tsk_mgmt_response)(struct Scsi_Host *, u64, u64, int); int (* it_nexus_response)(struct Scsi_Host *, u64, int); -- cgit v0.10.2 From 00ad255d17c2d12a035370836cb93630711d48ca Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Thu, 6 Dec 2012 19:56:31 +0000 Subject: RDMA/nes: Fix for BUG_ON due to adding already-pending timer To avoid nes tcp_timer crash for SMP architectures, add_timer is replaced with mod_timer. Signed-off-by: Tatyana Nikolova Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index feb41e7..22ea67e 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -669,7 +669,6 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, struct nes_cm_core *cm_core = cm_node->cm_core; struct nes_timer_entry *new_send; int ret = 0; - u32 was_timer_set; new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); if (!new_send) @@ -721,12 +720,8 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, } } - was_timer_set = timer_pending(&cm_core->tcp_timer); - - if (!was_timer_set) { - cm_core->tcp_timer.expires = new_send->timetosend; - add_timer(&cm_core->tcp_timer); - } + if (!timer_pending(&cm_core->tcp_timer)) + mod_timer(&cm_core->tcp_timer, new_send->timetosend); return ret; } @@ -944,10 +939,8 @@ static void nes_cm_timer_tick(unsigned long pass) } if (settimer) { - if (!timer_pending(&cm_core->tcp_timer)) { - cm_core->tcp_timer.expires = nexttimeout; - add_timer(&cm_core->tcp_timer); - } + if (!timer_pending(&cm_core->tcp_timer)) + mod_timer(&cm_core->tcp_timer, nexttimeout); } } @@ -1312,8 +1305,6 @@ static int mini_cm_del_listen(struct nes_cm_core *cm_core, static inline int mini_cm_accelerated(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) { - u32 was_timer_set; - cm_node->accelerated = 1; if (cm_node->accept_pend) { @@ -1323,11 +1314,8 @@ static inline int mini_cm_accelerated(struct nes_cm_core *cm_core, BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0); } - was_timer_set = timer_pending(&cm_core->tcp_timer); - if (!was_timer_set) { - cm_core->tcp_timer.expires = jiffies + NES_SHORT_TIME; - add_timer(&cm_core->tcp_timer); - } + if (!timer_pending(&cm_core->tcp_timer)) + mod_timer(&cm_core->tcp_timer, (jiffies + NES_SHORT_TIME)); return 0; } -- cgit v0.10.2 From 7bfcfa51c35cdd2d37e0d70fc11790642dd11fb3 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Thu, 6 Dec 2012 19:58:27 +0000 Subject: RDMA/nes: Fix for terminate timer crash The terminate timer needs to be initialized just once. Signed-off-by: Tatyana Nikolova Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 5cac29e..33cc589 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -532,6 +532,7 @@ void nes_iwarp_ce_handler(struct nes_device *, struct nes_hw_cq *); int nes_destroy_cqp(struct nes_device *); int nes_nic_cm_xmit(struct sk_buff *, struct net_device *); void nes_recheck_link_status(struct work_struct *work); +void nes_terminate_timeout(unsigned long context); /* nes_nic.c */ struct net_device *nes_netdev_init(struct nes_device *, void __iomem *); diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index fe7965e..67647e2 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -75,7 +75,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, static void process_critical_error(struct nes_device *nesdev); static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number); static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode); -static void nes_terminate_timeout(unsigned long context); static void nes_terminate_start_timer(struct nes_qp *nesqp); #ifdef CONFIG_INFINIBAND_NES_DEBUG @@ -3520,7 +3519,7 @@ static void nes_terminate_received(struct nes_device *nesdev, } /* Timeout routine in case terminate fails to complete */ -static void nes_terminate_timeout(unsigned long context) +void nes_terminate_timeout(unsigned long context) { struct nes_qp *nesqp = (struct nes_qp *)(unsigned long)context; @@ -3530,11 +3529,7 @@ static void nes_terminate_timeout(unsigned long context) /* Set a timer in case hw cannot complete the terminate sequence */ static void nes_terminate_start_timer(struct nes_qp *nesqp) { - init_timer(&nesqp->terminate_timer); - nesqp->terminate_timer.function = nes_terminate_timeout; - nesqp->terminate_timer.expires = jiffies + HZ; - nesqp->terminate_timer.data = (unsigned long)nesqp; - add_timer(&nesqp->terminate_timer); + mod_timer(&nesqp->terminate_timer, (jiffies + HZ)); } /** diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index cd0ecb2..c47ec25 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1404,6 +1404,9 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, } nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR); + init_timer(&nesqp->terminate_timer); + nesqp->terminate_timer.function = nes_terminate_timeout; + nesqp->terminate_timer.data = (unsigned long)nesqp; /* update the QP table */ nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp; @@ -1413,7 +1416,6 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, return &nesqp->ibqp; } - /** * nes_clean_cq */ -- cgit v0.10.2 From 7d9c199a55200c9b9fcad08e150470d02fb385be Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Thu, 6 Dec 2012 20:05:02 +0000 Subject: RDMA/nes: Fix for crash when registering zero length MR for CQ Signed-off-by: Tatyana Nikolova Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index c47ec25..07e4fba 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -2561,6 +2561,11 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ibmr; case IWNES_MEMREG_TYPE_QP: case IWNES_MEMREG_TYPE_CQ: + if (!region->length) { + nes_debug(NES_DBG_MR, "Unable to register zero length region for CQ\n"); + ib_umem_release(region); + return ERR_PTR(-EINVAL); + } nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL); if (!nespbl) { nes_debug(NES_DBG_MR, "Unable to allocate PBL\n"); -- cgit v0.10.2