From 06c56e44f3e32a859420ecac97996cc6f12827bb Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Sep 2005 09:19:02 -0700 Subject: [PATCH] IPoIB: fix memory leak Fix IPoIB memory leak on device removal. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 0e8ac13..49d120d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1062,6 +1062,8 @@ static void ipoib_remove_one(struct ib_device *device) ipoib_dev_cleanup(priv->dev); free_netdev(priv->dev); } + + kfree(dev_list); } static int __init ipoib_init_module(void) -- cgit v0.10.2 From 1d6801f9dd3ebb054ae685153a01b1a4ec817f46 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Sep 2005 09:19:44 -0700 Subject: [PATCH] IB/sa_query: avoid unnecessary list scan Using ib_get_client_data in SA event handler performs a list scan. It's better to use container_of to get the sa device directly. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 126ac80..9191793 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -431,8 +431,8 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event event->event == IB_EVENT_LID_CHANGE || event->event == IB_EVENT_PKEY_CHANGE || event->event == IB_EVENT_SM_CHANGE) { - struct ib_sa_device *sa_dev = - ib_get_client_data(event->device, &sa_client); + struct ib_sa_device *sa_dev; + sa_dev = container_of(handler, typeof(*sa_dev), event_handler); schedule_work(&sa_dev->port[event->element.port_num - sa_dev->start_port].update_task); -- cgit v0.10.2 From 0b2b35f68140ceeb1b78ef85680198e63ebc8649 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 1 Sep 2005 09:28:03 -0700 Subject: [PATCH] IB: Add user-supplied context to userspace CM ABI - Add user specified context to all uCM events. Users will not retrieve any events associated with the context after destroying the corresponding cm_id. - Provide the ib_cm_init_qp_attr() call to userspace clients of the CM. This call may be used to set QP attributes properly before modifying the QP. - Fixes some error handling synchonization and cleanup issues. - Performs some minor code cleanup. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 7959582..d0f0b0a 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -72,7 +72,6 @@ enum { static struct semaphore ctx_id_mutex; static struct idr ctx_id_table; -static int ctx_id_rover = 0; static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) { @@ -97,33 +96,16 @@ static void ib_ucm_ctx_put(struct ib_ucm_context *ctx) wake_up(&ctx->wait); } -static ssize_t ib_ucm_destroy_ctx(struct ib_ucm_file *file, int id) +static inline int ib_ucm_new_cm_id(int event) { - struct ib_ucm_context *ctx; - struct ib_ucm_event *uevent; - - down(&ctx_id_mutex); - ctx = idr_find(&ctx_id_table, id); - if (!ctx) - ctx = ERR_PTR(-ENOENT); - else if (ctx->file != file) - ctx = ERR_PTR(-EINVAL); - else - idr_remove(&ctx_id_table, ctx->id); - up(&ctx_id_mutex); - - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - atomic_dec(&ctx->ref); - wait_event(ctx->wait, !atomic_read(&ctx->ref)); + return event == IB_CM_REQ_RECEIVED || event == IB_CM_SIDR_REQ_RECEIVED; +} - /* No new events will be generated after destroying the cm_id. */ - if (!IS_ERR(ctx->cm_id)) - ib_destroy_cm_id(ctx->cm_id); +static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx) +{ + struct ib_ucm_event *uevent; - /* Cleanup events not yet reported to the user. */ - down(&file->mutex); + down(&ctx->file->mutex); list_del(&ctx->file_list); while (!list_empty(&ctx->events)) { @@ -133,15 +115,12 @@ static ssize_t ib_ucm_destroy_ctx(struct ib_ucm_file *file, int id) list_del(&uevent->ctx_list); /* clear incoming connections. */ - if (uevent->cm_id) + if (ib_ucm_new_cm_id(uevent->resp.event)) ib_destroy_cm_id(uevent->cm_id); kfree(uevent); } - up(&file->mutex); - - kfree(ctx); - return 0; + up(&ctx->file->mutex); } static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) @@ -153,36 +132,31 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) if (!ctx) return NULL; + memset(ctx, 0, sizeof *ctx); atomic_set(&ctx->ref, 1); init_waitqueue_head(&ctx->wait); ctx->file = file; - INIT_LIST_HEAD(&ctx->events); - list_add_tail(&ctx->file_list, &file->ctxs); - - ctx_id_rover = (ctx_id_rover + 1) & INT_MAX; -retry: - result = idr_pre_get(&ctx_id_table, GFP_KERNEL); - if (!result) - goto error; + do { + result = idr_pre_get(&ctx_id_table, GFP_KERNEL); + if (!result) + goto error; - down(&ctx_id_mutex); - result = idr_get_new_above(&ctx_id_table, ctx, ctx_id_rover, &ctx->id); - up(&ctx_id_mutex); + down(&ctx_id_mutex); + result = idr_get_new(&ctx_id_table, ctx, &ctx->id); + up(&ctx_id_mutex); + } while (result == -EAGAIN); - if (result == -EAGAIN) - goto retry; if (result) goto error; + list_add_tail(&ctx->file_list, &file->ctxs); ucm_dbg("Allocated CM ID <%d>\n", ctx->id); - return ctx; + error: - list_del(&ctx->file_list); kfree(ctx); - return NULL; } /* @@ -219,12 +193,9 @@ static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath, kpath->packet_life_time_selector; } -static void ib_ucm_event_req_get(struct ib_ucm_context *ctx, - struct ib_ucm_req_event_resp *ureq, +static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, struct ib_cm_req_event_param *kreq) { - ureq->listen_id = ctx->id; - ureq->remote_ca_guid = kreq->remote_ca_guid; ureq->remote_qkey = kreq->remote_qkey; ureq->remote_qpn = kreq->remote_qpn; @@ -259,14 +230,6 @@ static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep, urep->srq = krep->srq; } -static void ib_ucm_event_sidr_req_get(struct ib_ucm_context *ctx, - struct ib_ucm_sidr_req_event_resp *ureq, - struct ib_cm_sidr_req_event_param *kreq) -{ - ureq->listen_id = ctx->id; - ureq->pkey = kreq->pkey; -} - static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep, struct ib_cm_sidr_rep_event_param *krep) { @@ -275,15 +238,14 @@ static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep, urep->qpn = krep->qpn; }; -static int ib_ucm_event_process(struct ib_ucm_context *ctx, - struct ib_cm_event *evt, +static int ib_ucm_event_process(struct ib_cm_event *evt, struct ib_ucm_event *uvt) { void *info = NULL; switch (evt->event) { case IB_CM_REQ_RECEIVED: - ib_ucm_event_req_get(ctx, &uvt->resp.u.req_resp, + ib_ucm_event_req_get(&uvt->resp.u.req_resp, &evt->param.req_rcvd); uvt->data_len = IB_CM_REQ_PRIVATE_DATA_SIZE; uvt->resp.present = IB_UCM_PRES_PRIMARY; @@ -331,8 +293,8 @@ static int ib_ucm_event_process(struct ib_ucm_context *ctx, info = evt->param.apr_rcvd.apr_info; break; case IB_CM_SIDR_REQ_RECEIVED: - ib_ucm_event_sidr_req_get(ctx, &uvt->resp.u.sidr_req_resp, - &evt->param.sidr_req_rcvd); + uvt->resp.u.sidr_req_resp.pkey = + evt->param.sidr_req_rcvd.pkey; uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; break; case IB_CM_SIDR_REP_RECEIVED: @@ -378,31 +340,24 @@ static int ib_ucm_event_handler(struct ib_cm_id *cm_id, struct ib_ucm_event *uevent; struct ib_ucm_context *ctx; int result = 0; - int id; ctx = cm_id->context; - if (event->event == IB_CM_REQ_RECEIVED || - event->event == IB_CM_SIDR_REQ_RECEIVED) - id = IB_UCM_CM_ID_INVALID; - else - id = ctx->id; - uevent = kmalloc(sizeof(*uevent), GFP_KERNEL); if (!uevent) goto err1; memset(uevent, 0, sizeof(*uevent)); - uevent->resp.id = id; + uevent->ctx = ctx; + uevent->cm_id = cm_id; + uevent->resp.uid = ctx->uid; + uevent->resp.id = ctx->id; uevent->resp.event = event->event; - result = ib_ucm_event_process(ctx, event, uevent); + result = ib_ucm_event_process(event, uevent); if (result) goto err2; - uevent->ctx = ctx; - uevent->cm_id = (id == IB_UCM_CM_ID_INVALID) ? cm_id : NULL; - down(&ctx->file->mutex); list_add_tail(&uevent->file_list, &ctx->file->events); list_add_tail(&uevent->ctx_list, &ctx->events); @@ -414,7 +369,7 @@ err2: kfree(uevent); err1: /* Destroy new cm_id's */ - return (id == IB_UCM_CM_ID_INVALID); + return ib_ucm_new_cm_id(event->event); } static ssize_t ib_ucm_event(struct ib_ucm_file *file, @@ -423,7 +378,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file, { struct ib_ucm_context *ctx; struct ib_ucm_event_get cmd; - struct ib_ucm_event *uevent = NULL; + struct ib_ucm_event *uevent; int result = 0; DEFINE_WAIT(wait); @@ -436,7 +391,6 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file, * wait */ down(&file->mutex); - while (list_empty(&file->events)) { if (file->filp->f_flags & O_NONBLOCK) { @@ -463,21 +417,18 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file, uevent = list_entry(file->events.next, struct ib_ucm_event, file_list); - if (!uevent->cm_id) - goto user; + if (ib_ucm_new_cm_id(uevent->resp.event)) { + ctx = ib_ucm_ctx_alloc(file); + if (!ctx) { + result = -ENOMEM; + goto done; + } - ctx = ib_ucm_ctx_alloc(file); - if (!ctx) { - result = -ENOMEM; - goto done; + ctx->cm_id = uevent->cm_id; + ctx->cm_id->context = ctx; + uevent->resp.id = ctx->id; } - ctx->cm_id = uevent->cm_id; - ctx->cm_id->context = ctx; - - uevent->resp.id = ctx->id; - -user: if (copy_to_user((void __user *)(unsigned long)cmd.response, &uevent->resp, sizeof(uevent->resp))) { result = -EFAULT; @@ -485,12 +436,10 @@ user: } if (uevent->data) { - if (cmd.data_len < uevent->data_len) { result = -ENOMEM; goto done; } - if (copy_to_user((void __user *)(unsigned long)cmd.data, uevent->data, uevent->data_len)) { result = -EFAULT; @@ -499,12 +448,10 @@ user: } if (uevent->info) { - if (cmd.info_len < uevent->info_len) { result = -ENOMEM; goto done; } - if (copy_to_user((void __user *)(unsigned long)cmd.info, uevent->info, uevent->info_len)) { result = -EFAULT; @@ -514,6 +461,7 @@ user: list_del(&uevent->file_list); list_del(&uevent->ctx_list); + uevent->ctx->events_reported++; kfree(uevent->data); kfree(uevent->info); @@ -545,6 +493,7 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, if (!ctx) return -ENOMEM; + ctx->uid = cmd.uid; ctx->cm_id = ib_create_cm_id(ib_ucm_event_handler, ctx); if (IS_ERR(ctx->cm_id)) { result = PTR_ERR(ctx->cm_id); @@ -561,7 +510,14 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, return 0; err: - ib_ucm_destroy_ctx(file, ctx->id); + down(&ctx_id_mutex); + idr_remove(&ctx_id_table, ctx->id); + up(&ctx_id_mutex); + + if (!IS_ERR(ctx->cm_id)) + ib_destroy_cm_id(ctx->cm_id); + + kfree(ctx); return result; } @@ -570,11 +526,44 @@ static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file, int in_len, int out_len) { struct ib_ucm_destroy_id cmd; + struct ib_ucm_destroy_id_resp resp; + struct ib_ucm_context *ctx; + int result = 0; + + if (out_len < sizeof(resp)) + return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - return ib_ucm_destroy_ctx(file, cmd.id); + down(&ctx_id_mutex); + ctx = idr_find(&ctx_id_table, cmd.id); + if (!ctx) + ctx = ERR_PTR(-ENOENT); + else if (ctx->file != file) + ctx = ERR_PTR(-EINVAL); + else + idr_remove(&ctx_id_table, ctx->id); + up(&ctx_id_mutex); + + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + atomic_dec(&ctx->ref); + wait_event(ctx->wait, !atomic_read(&ctx->ref)); + + /* No new events will be generated after destroying the cm_id. */ + ib_destroy_cm_id(ctx->cm_id); + /* Cleanup events not yet reported to the user. */ + ib_ucm_cleanup_events(ctx); + + resp.events_reported = ctx->events_reported; + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + result = -EFAULT; + + kfree(ctx); + return result; } static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file, @@ -609,6 +598,98 @@ static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file, return result; } +static void ib_ucm_copy_ah_attr(struct ib_ucm_ah_attr *dest_attr, + struct ib_ah_attr *src_attr) +{ + memcpy(dest_attr->grh_dgid, src_attr->grh.dgid.raw, + sizeof src_attr->grh.dgid); + dest_attr->grh_flow_label = src_attr->grh.flow_label; + dest_attr->grh_sgid_index = src_attr->grh.sgid_index; + dest_attr->grh_hop_limit = src_attr->grh.hop_limit; + dest_attr->grh_traffic_class = src_attr->grh.traffic_class; + + dest_attr->dlid = src_attr->dlid; + dest_attr->sl = src_attr->sl; + dest_attr->src_path_bits = src_attr->src_path_bits; + dest_attr->static_rate = src_attr->static_rate; + dest_attr->is_global = (src_attr->ah_flags & IB_AH_GRH); + dest_attr->port_num = src_attr->port_num; +} + +static void ib_ucm_copy_qp_attr(struct ib_ucm_init_qp_attr_resp *dest_attr, + struct ib_qp_attr *src_attr) +{ + dest_attr->cur_qp_state = src_attr->cur_qp_state; + dest_attr->path_mtu = src_attr->path_mtu; + dest_attr->path_mig_state = src_attr->path_mig_state; + dest_attr->qkey = src_attr->qkey; + dest_attr->rq_psn = src_attr->rq_psn; + dest_attr->sq_psn = src_attr->sq_psn; + dest_attr->dest_qp_num = src_attr->dest_qp_num; + dest_attr->qp_access_flags = src_attr->qp_access_flags; + + dest_attr->max_send_wr = src_attr->cap.max_send_wr; + dest_attr->max_recv_wr = src_attr->cap.max_recv_wr; + dest_attr->max_send_sge = src_attr->cap.max_send_sge; + dest_attr->max_recv_sge = src_attr->cap.max_recv_sge; + dest_attr->max_inline_data = src_attr->cap.max_inline_data; + + ib_ucm_copy_ah_attr(&dest_attr->ah_attr, &src_attr->ah_attr); + ib_ucm_copy_ah_attr(&dest_attr->alt_ah_attr, &src_attr->alt_ah_attr); + + dest_attr->pkey_index = src_attr->pkey_index; + dest_attr->alt_pkey_index = src_attr->alt_pkey_index; + dest_attr->en_sqd_async_notify = src_attr->en_sqd_async_notify; + dest_attr->sq_draining = src_attr->sq_draining; + dest_attr->max_rd_atomic = src_attr->max_rd_atomic; + dest_attr->max_dest_rd_atomic = src_attr->max_dest_rd_atomic; + dest_attr->min_rnr_timer = src_attr->min_rnr_timer; + dest_attr->port_num = src_attr->port_num; + dest_attr->timeout = src_attr->timeout; + dest_attr->retry_cnt = src_attr->retry_cnt; + dest_attr->rnr_retry = src_attr->rnr_retry; + dest_attr->alt_port_num = src_attr->alt_port_num; + dest_attr->alt_timeout = src_attr->alt_timeout; +} + +static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_init_qp_attr_resp resp; + struct ib_ucm_init_qp_attr cmd; + struct ib_ucm_context *ctx; + struct ib_qp_attr qp_attr; + int result = 0; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + resp.qp_attr_mask = 0; + memset(&qp_attr, 0, sizeof qp_attr); + qp_attr.qp_state = cmd.qp_state; + result = ib_cm_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); + if (result) + goto out; + + ib_ucm_copy_qp_attr(&resp, &qp_attr); + + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + result = -EFAULT; + +out: + ib_ucm_ctx_put(ctx); + return result; +} + static ssize_t ib_ucm_listen(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) @@ -808,6 +889,7 @@ static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file, ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { + ctx->uid = cmd.uid; result = ib_send_cm_rep(ctx->cm_id, ¶m); ib_ucm_ctx_put(ctx); } else @@ -1086,6 +1168,7 @@ static ssize_t (*ucm_cmd_table[])(struct ib_ucm_file *file, [IB_USER_CM_CMD_SEND_SIDR_REQ] = ib_ucm_send_sidr_req, [IB_USER_CM_CMD_SEND_SIDR_REP] = ib_ucm_send_sidr_rep, [IB_USER_CM_CMD_EVENT] = ib_ucm_event, + [IB_USER_CM_CMD_INIT_QP_ATTR] = ib_ucm_init_qp_attr, }; static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, @@ -1161,12 +1244,18 @@ static int ib_ucm_close(struct inode *inode, struct file *filp) down(&file->mutex); while (!list_empty(&file->ctxs)) { - ctx = list_entry(file->ctxs.next, struct ib_ucm_context, file_list); - up(&file->mutex); - ib_ucm_destroy_ctx(file, ctx->id); + + down(&ctx_id_mutex); + idr_remove(&ctx_id_table, ctx->id); + up(&ctx_id_mutex); + + ib_destroy_cm_id(ctx->cm_id); + ib_ucm_cleanup_events(ctx); + kfree(ctx); + down(&file->mutex); } up(&file->mutex); diff --git a/drivers/infiniband/core/ucm.h b/drivers/infiniband/core/ucm.h index c8819b9..f46f37b 100644 --- a/drivers/infiniband/core/ucm.h +++ b/drivers/infiniband/core/ucm.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -43,8 +44,6 @@ #include #include -#define IB_UCM_CM_ID_INVALID 0xffffffff - struct ib_ucm_file { struct semaphore mutex; struct file *filp; @@ -58,9 +57,11 @@ struct ib_ucm_context { int id; wait_queue_head_t wait; atomic_t ref; + int events_reported; struct ib_ucm_file *file; struct ib_cm_id *cm_id; + __u64 uid; struct list_head events; /* list of pending events. */ struct list_head file_list; /* member in file ctx list */ @@ -71,16 +72,12 @@ struct ib_ucm_event { struct list_head file_list; /* member in file event list */ struct list_head ctx_list; /* member in ctx event list */ + struct ib_cm_id *cm_id; struct ib_ucm_event_resp resp; void *data; void *info; int data_len; int info_len; - /* - * new connection identifiers needs to be saved until - * userspace can get a handle on them. - */ - struct ib_cm_id *cm_id; }; #endif /* UCM_H */ diff --git a/include/rdma/ib_user_cm.h b/include/rdma/ib_user_cm.h index 72182d1..e4d1654 100644 --- a/include/rdma/ib_user_cm.h +++ b/include/rdma/ib_user_cm.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -37,7 +38,7 @@ #include -#define IB_USER_CM_ABI_VERSION 1 +#define IB_USER_CM_ABI_VERSION 2 enum { IB_USER_CM_CMD_CREATE_ID, @@ -60,6 +61,7 @@ enum { IB_USER_CM_CMD_SEND_SIDR_REP, IB_USER_CM_CMD_EVENT, + IB_USER_CM_CMD_INIT_QP_ATTR, }; /* * command ABI structures. @@ -71,6 +73,7 @@ struct ib_ucm_cmd_hdr { }; struct ib_ucm_create_id { + __u64 uid; __u64 response; }; @@ -79,9 +82,14 @@ struct ib_ucm_create_id_resp { }; struct ib_ucm_destroy_id { + __u64 response; __u32 id; }; +struct ib_ucm_destroy_id_resp { + __u32 events_reported; +}; + struct ib_ucm_attr_id { __u64 response; __u32 id; @@ -94,6 +102,64 @@ struct ib_ucm_attr_id_resp { __be32 remote_id; }; +struct ib_ucm_init_qp_attr { + __u64 response; + __u32 id; + __u32 qp_state; +}; + +struct ib_ucm_ah_attr { + __u8 grh_dgid[16]; + __u32 grh_flow_label; + __u16 dlid; + __u16 reserved; + __u8 grh_sgid_index; + __u8 grh_hop_limit; + __u8 grh_traffic_class; + __u8 sl; + __u8 src_path_bits; + __u8 static_rate; + __u8 is_global; + __u8 port_num; +}; + +struct ib_ucm_init_qp_attr_resp { + __u32 qp_attr_mask; + __u32 qp_state; + __u32 cur_qp_state; + __u32 path_mtu; + __u32 path_mig_state; + __u32 qkey; + __u32 rq_psn; + __u32 sq_psn; + __u32 dest_qp_num; + __u32 qp_access_flags; + + struct ib_ucm_ah_attr ah_attr; + struct ib_ucm_ah_attr alt_ah_attr; + + /* ib_qp_cap */ + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + + __u16 pkey_index; + __u16 alt_pkey_index; + __u8 en_sqd_async_notify; + __u8 sq_draining; + __u8 max_rd_atomic; + __u8 max_dest_rd_atomic; + __u8 min_rnr_timer; + __u8 port_num; + __u8 timeout; + __u8 retry_cnt; + __u8 rnr_retry; + __u8 alt_port_num; + __u8 alt_timeout; +}; + struct ib_ucm_listen { __be64 service_id; __be64 service_mask; @@ -157,6 +223,7 @@ struct ib_ucm_req { }; struct ib_ucm_rep { + __u64 uid; __u64 data; __u32 id; __u32 qpn; @@ -232,7 +299,6 @@ struct ib_ucm_event_get { }; struct ib_ucm_req_event_resp { - __u32 listen_id; /* device */ /* port */ struct ib_ucm_path_rec primary_path; @@ -287,7 +353,6 @@ struct ib_ucm_apr_event_resp { }; struct ib_ucm_sidr_req_event_resp { - __u32 listen_id; /* device */ /* port */ __u16 pkey; @@ -307,6 +372,7 @@ struct ib_ucm_sidr_rep_event_resp { #define IB_UCM_PRES_ALTERNATE 0x08 struct ib_ucm_event_resp { + __u64 uid; __u32 id; __u32 event; __u32 present; -- cgit v0.10.2 From c9fe2b3287498b80781284306064104ef9c8a31a Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 7 Sep 2005 09:43:23 -0700 Subject: [PATCH] IB: really reset QPs When we modify a QP to the RESET state, completely clean up the QP so that it is really and truly reset. Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 0164b84..c753f73 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -220,6 +220,16 @@ static void *get_send_wqe(struct mthca_qp *qp, int n) (PAGE_SIZE - 1)); } +static void mthca_wq_init(struct mthca_wq *wq) +{ + spin_lock_init(&wq->lock); + wq->next_ind = 0; + wq->last_comp = wq->max - 1; + wq->head = 0; + wq->tail = 0; + wq->last = NULL; +} + void mthca_qp_event(struct mthca_dev *dev, u32 qpn, enum ib_event_type event_type) { @@ -833,8 +843,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) store_attrs(to_msqp(qp), attr, attr_mask); /* - * If we are moving QP0 to RTR, bring the IB link up; if we - * are moving QP0 to RESET or ERROR, bring the link back down. + * If we moved QP0 to RTR, bring the IB link up; if we moved + * QP0 to RESET or ERROR, bring the link back down. */ if (is_qp0(dev, qp)) { if (cur_state != IB_QPS_RTR && @@ -848,6 +858,26 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) mthca_CLOSE_IB(dev, to_msqp(qp)->port, &status); } + /* + * If we moved a kernel QP to RESET, clean up all old CQ + * entries and reinitialize the QP. + */ + if (!err && new_state == IB_QPS_RESET && !qp->ibqp.uobject) { + mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn, + qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); + if (qp->ibqp.send_cq != qp->ibqp.recv_cq) + mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn, + qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); + + mthca_wq_init(&qp->sq); + mthca_wq_init(&qp->rq); + + if (mthca_is_memfree(dev)) { + *qp->sq.db = 0; + *qp->rq.db = 0; + } + } + return err; } @@ -1003,16 +1033,6 @@ static void mthca_free_memfree(struct mthca_dev *dev, } } -static void mthca_wq_init(struct mthca_wq* wq) -{ - spin_lock_init(&wq->lock); - wq->next_ind = 0; - wq->last_comp = wq->max - 1; - wq->head = 0; - wq->tail = 0; - wq->last = NULL; -} - static int mthca_alloc_qp_common(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_cq *send_cq, -- cgit v0.10.2 From 30a7e8ef13b2ff0db7b15af9afdd12b93783f01e Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 7 Sep 2005 09:45:00 -0700 Subject: [PATCH] IB: Initialize qp->wait Add missing call to init_waitqueue_head(). Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index c753f73..bcef06b 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1044,6 +1044,7 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, int i; atomic_set(&qp->refcount, 1); + init_waitqueue_head(&qp->wait); qp->state = IB_QPS_RESET; qp->atomic_rd_en = 0; qp->resp_depth = 0; -- cgit v0.10.2 From b5dcbf47e10e568273213a4410daa27c11cdba3a Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 7 Sep 2005 11:03:41 -0700 Subject: [PATCH] IB: RMPP fixes - Fix payload length of middle RMPP sent segments. Middle payload lengths should be 0 on the send side. (This is perhaps a compliance and should not be an interop issue as middle payload lengths are supposed to be ignored on receive). - Fix length in first segment of multipacket sends (This is a compliance issue but does not affect at least OpenIB to OpenIB RMPP transfers). Signed-off-by: Hal Rosenstock Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index 43fd805..2bd8b1c 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -593,7 +593,8 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(mad_send_wr->total_seg * (sizeof(struct ib_rmpp_mad) - - offsetof(struct ib_rmpp_mad, data))); + offsetof(struct ib_rmpp_mad, data)) - + mad_send_wr->pad); mad_send_wr->sg_list[0].length = sizeof(struct ib_rmpp_mad); } else { mad_send_wr->send_wr.num_sge = 2; @@ -602,6 +603,7 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) mad_send_wr->sg_list[1].length = sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset; mad_send_wr->sg_list[1].lkey = mad_send_wr->sg_list[0].lkey; + rmpp_mad->rmpp_hdr.paylen_newwin = 0; } if (mad_send_wr->seg_num == mad_send_wr->total_seg) { -- cgit v0.10.2 From 17781cd6186cb3472ff34b2d9a15e647bd311e8b Mon Sep 17 00:00:00 2001 From: James Lentini Date: Wed, 7 Sep 2005 12:43:08 -0700 Subject: [PATCH] IB: clean up user access config options Add a new config option INFINIBAND_USER_MAD to control whether we build ib_umad. Change INFINIBAND_USER_VERBS to INFINIBAND_USER_ACCESS, and have it control ib_ucm and ib_uat as well as ib_uverbs. Signed-off-by: James Lentini Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 32cdfb3..325d502 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -8,15 +8,26 @@ config INFINIBAND any protocols you wish to use as well as drivers for your InfiniBand hardware. -config INFINIBAND_USER_VERBS - tristate "InfiniBand userspace verbs support" +config INFINIBAND_USER_MAD + tristate "InfiniBand userspace MAD support" depends on INFINIBAND ---help--- - Userspace InfiniBand verbs support. This is the kernel side - of userspace verbs, which allows userspace processes to - directly access InfiniBand hardware for fast-path - operations. You will also need libibverbs and a hardware - driver library from . + Userspace InfiniBand Management Datagram (MAD) support. This + is the kernel side of the userspace MAD support, which allows + userspace processes to send and receive MADs. You will also + need libibumad from . + +config INFINIBAND_USER_ACCESS + tristate "InfiniBand userspace access (verbs and CM)" + depends on INFINIBAND + ---help--- + Userspace InfiniBand access support. This enables the + kernel side of userspace verbs and the userspace + communication manager (CM). This allows userspace processes + to set up connections and directly access InfiniBand + hardware for fast-path operations. You will also need + libibverbs, libibcm and a hardware driver library from + . source "drivers/infiniband/hw/mthca/Kconfig" diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 678a7e0..ec3353f 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -1,6 +1,7 @@ obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ - ib_cm.o ib_umad.o ib_ucm.o -obj-$(CONFIG_INFINIBAND_USER_VERBS) += ib_uverbs.o + ib_cm.o +obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o +obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ device.o fmr_pool.o cache.o -- cgit v0.10.2 From 1325cc79163058739b70bed9860fccbecac6236b Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Fri, 9 Sep 2005 13:45:51 -0700 Subject: [PATCH] IB: Define more SA methods ib_sa.h: Define more SA methods (initially for madeye decode) Signed-off-by: Hal Rosenstock Signed-off-by: Roland Dreier diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index c022edf..0f4f5ec 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -46,7 +46,11 @@ enum { IB_SA_METHOD_GET_TABLE = 0x12, IB_SA_METHOD_GET_TABLE_RESP = 0x92, - IB_SA_METHOD_DELETE = 0x15 + IB_SA_METHOD_DELETE = 0x15, + IB_SA_METHOD_DELETE_RESP = 0x95, + IB_SA_METHOD_GET_MULTI = 0x14, + IB_SA_METHOD_GET_MULTI_RESP = 0x94, + IB_SA_METHOD_GET_TRACE_TBL = 0x13 }; enum ib_sa_selector { -- cgit v0.10.2 From fbed8eee70cf7e11fbf231afafc0ccb313acc62e Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Fri, 9 Sep 2005 15:24:04 -0700 Subject: [PATCH] IB: Move SA attributes to ib_sa.h SA: Move SA attributes to ib_sa.h so are accessible to more than sa_query.c. Also, remove deprecated attributes and add one missing one. Signed-off-by: Hal Rosenstock Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 9191793..78de2dd 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -113,32 +113,6 @@ static DEFINE_IDR(query_idr); static spinlock_t tid_lock; static u32 tid; -enum { - IB_SA_ATTR_CLASS_PORTINFO = 0x01, - IB_SA_ATTR_NOTICE = 0x02, - IB_SA_ATTR_INFORM_INFO = 0x03, - IB_SA_ATTR_NODE_REC = 0x11, - IB_SA_ATTR_PORT_INFO_REC = 0x12, - IB_SA_ATTR_SL2VL_REC = 0x13, - IB_SA_ATTR_SWITCH_REC = 0x14, - IB_SA_ATTR_LINEAR_FDB_REC = 0x15, - IB_SA_ATTR_RANDOM_FDB_REC = 0x16, - IB_SA_ATTR_MCAST_FDB_REC = 0x17, - IB_SA_ATTR_SM_INFO_REC = 0x18, - IB_SA_ATTR_LINK_REC = 0x20, - IB_SA_ATTR_GUID_INFO_REC = 0x30, - IB_SA_ATTR_SERVICE_REC = 0x31, - IB_SA_ATTR_PARTITION_REC = 0x33, - IB_SA_ATTR_RANGE_REC = 0x34, - IB_SA_ATTR_PATH_REC = 0x35, - IB_SA_ATTR_VL_ARB_REC = 0x36, - IB_SA_ATTR_MC_GROUP_REC = 0x37, - IB_SA_ATTR_MC_MEMBER_REC = 0x38, - IB_SA_ATTR_TRACE_REC = 0x39, - IB_SA_ATTR_MULTI_PATH_REC = 0x3a, - IB_SA_ATTR_SERVICE_ASSOC_REC = 0x3b -}; - #define PATH_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_path_rec, field), \ .struct_size_bytes = sizeof ((struct ib_sa_path_rec *) 0)->field, \ diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 0f4f5ec..a7555c8 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -53,6 +53,31 @@ enum { IB_SA_METHOD_GET_TRACE_TBL = 0x13 }; +enum { + IB_SA_ATTR_CLASS_PORTINFO = 0x01, + IB_SA_ATTR_NOTICE = 0x02, + IB_SA_ATTR_INFORM_INFO = 0x03, + IB_SA_ATTR_NODE_REC = 0x11, + IB_SA_ATTR_PORT_INFO_REC = 0x12, + IB_SA_ATTR_SL2VL_REC = 0x13, + IB_SA_ATTR_SWITCH_REC = 0x14, + IB_SA_ATTR_LINEAR_FDB_REC = 0x15, + IB_SA_ATTR_RANDOM_FDB_REC = 0x16, + IB_SA_ATTR_MCAST_FDB_REC = 0x17, + IB_SA_ATTR_SM_INFO_REC = 0x18, + IB_SA_ATTR_LINK_REC = 0x20, + IB_SA_ATTR_GUID_INFO_REC = 0x30, + IB_SA_ATTR_SERVICE_REC = 0x31, + IB_SA_ATTR_PARTITION_REC = 0x33, + IB_SA_ATTR_PATH_REC = 0x35, + IB_SA_ATTR_VL_ARB_REC = 0x36, + IB_SA_ATTR_MC_MEMBER_REC = 0x38, + IB_SA_ATTR_TRACE_REC = 0x39, + IB_SA_ATTR_MULTI_PATH_REC = 0x3a, + IB_SA_ATTR_SERVICE_ASSOC_REC = 0x3b, + IB_SA_ATTR_INFORM_INFO_REC = 0xf3 +}; + enum ib_sa_selector { IB_SA_GTE = 0, IB_SA_LTE = 1, -- cgit v0.10.2 From 2e9f7cb7869059e55cd91f5e23c6380f3763db56 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 9 Sep 2005 15:45:57 -0700 Subject: [PATCH] IB: Add struct for ClassPortInfo Add structure definition for ClassPortInfo format. This is needed for (at least) handling CM redirects. Signed-off-by: Roland Dreier diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index fc6b1c1..53184a3 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -173,6 +173,27 @@ struct ib_vendor_mad { u8 data[216]; }; +struct ib_class_port_info +{ + u8 base_version; + u8 class_version; + __be16 capability_mask; + u8 reserved[3]; + u8 resp_time_value; + u8 redirect_gid[16]; + __be32 redirect_tcslfl; + __be16 redirect_lid; + __be16 redirect_pkey; + __be32 redirect_qp; + __be32 redirect_qkey; + u8 trap_gid[16]; + __be32 trap_tcslfl; + __be16 trap_lid; + __be16 trap_pkey; + __be32 trap_hlqp; + __be32 trap_qkey; +}; + /** * ib_mad_send_buf - MAD data buffer and work request for sends. * @mad: References an allocated MAD data buffer. The size of the data -- cgit v0.10.2 From 63aaf647529e8a56bdf31fd8f2979d4371c6a332 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 9 Sep 2005 15:55:08 -0700 Subject: Make sure that userspace does not retrieve stale asynchronous or completion events after destroying a CQ, QP or SRQ. We do this by sweeping the event lists before returning from a destroy calls, and then return the number of events already reported before the destroy call. This allows userspace wait until it has processed all events for an object returned from the kernel before it frees its context for the object. The ABI of the destroy CQ, destroy QP and destroy SRQ commands has to change to return the event count, so bump the ABI version from 1 to 2. The userspace libibverbs library has already been updated to handle both the old and new ABI versions. Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 180b3d4..b1897be 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -76,20 +76,28 @@ struct ib_uverbs_file { struct ib_uverbs_event_file comp_file[1]; }; -struct ib_uverbs_async_event { - struct ib_uverbs_async_event_desc desc; +struct ib_uverbs_event { + union { + struct ib_uverbs_async_event_desc async; + struct ib_uverbs_comp_event_desc comp; + } desc; struct list_head list; + struct list_head obj_list; + u32 *counter; }; -struct ib_uverbs_comp_event { - struct ib_uverbs_comp_event_desc desc; - struct list_head list; +struct ib_uevent_object { + struct ib_uobject uobject; + struct list_head event_list; + u32 events_reported; }; -struct ib_uobject_mr { - struct ib_uobject uobj; - struct page *page_list; - struct scatterlist *sg_list; +struct ib_ucq_object { + struct ib_uobject uobject; + struct list_head comp_list; + struct list_head async_list; + u32 comp_events_reported; + u32 async_events_reported; }; extern struct semaphore ib_uverbs_idr_mutex; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index ebccf9f..e91ebde 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -590,7 +590,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, struct ib_uverbs_create_cq cmd; struct ib_uverbs_create_cq_resp resp; struct ib_udata udata; - struct ib_uobject *uobj; + struct ib_ucq_object *uobj; struct ib_cq *cq; int ret; @@ -611,8 +611,12 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, if (!uobj) return -ENOMEM; - uobj->user_handle = cmd.user_handle; - uobj->context = file->ucontext; + uobj->uobject.user_handle = cmd.user_handle; + uobj->uobject.context = file->ucontext; + uobj->comp_events_reported = 0; + uobj->async_events_reported = 0; + INIT_LIST_HEAD(&uobj->comp_list); + INIT_LIST_HEAD(&uobj->async_list); cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe, file->ucontext, &udata); @@ -622,7 +626,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, } cq->device = file->device->ib_dev; - cq->uobject = uobj; + cq->uobject = &uobj->uobject; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; cq->cq_context = file; @@ -635,7 +639,7 @@ retry: } down(&ib_uverbs_idr_mutex); - ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->id); + ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->uobject.id); up(&ib_uverbs_idr_mutex); if (ret == -EAGAIN) @@ -644,11 +648,11 @@ retry: goto err_cq; spin_lock_irq(&file->ucontext->lock); - list_add_tail(&uobj->list, &file->ucontext->cq_list); + list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list); spin_unlock_irq(&file->ucontext->lock); memset(&resp, 0, sizeof resp); - resp.cq_handle = uobj->id; + resp.cq_handle = uobj->uobject.id; resp.cqe = cq->cqe; if (copy_to_user((void __user *) (unsigned long) cmd.response, @@ -661,11 +665,11 @@ retry: err_list: spin_lock_irq(&file->ucontext->lock); - list_del(&uobj->list); + list_del(&uobj->uobject.list); spin_unlock_irq(&file->ucontext->lock); down(&ib_uverbs_idr_mutex); - idr_remove(&ib_uverbs_cq_idr, uobj->id); + idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id); up(&ib_uverbs_idr_mutex); err_cq: @@ -680,21 +684,27 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { - struct ib_uverbs_destroy_cq cmd; - struct ib_cq *cq; - struct ib_uobject *uobj; - int ret = -EINVAL; + struct ib_uverbs_destroy_cq cmd; + struct ib_uverbs_destroy_cq_resp resp; + struct ib_cq *cq; + struct ib_ucq_object *uobj; + struct ib_uverbs_event *evt, *tmp; + u64 user_handle; + int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + memset(&resp, 0, sizeof resp); + down(&ib_uverbs_idr_mutex); cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); if (!cq || cq->uobject->context != file->ucontext) goto out; - uobj = cq->uobject; + user_handle = cq->uobject->user_handle; + uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); ret = ib_destroy_cq(cq); if (ret) @@ -703,11 +713,32 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, idr_remove(&ib_uverbs_cq_idr, cmd.cq_handle); spin_lock_irq(&file->ucontext->lock); - list_del(&uobj->list); + list_del(&uobj->uobject.list); spin_unlock_irq(&file->ucontext->lock); + spin_lock_irq(&file->comp_file[0].lock); + list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&file->comp_file[0].lock); + + spin_lock_irq(&file->async_file.lock); + list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&file->async_file.lock); + + resp.comp_events_reported = uobj->comp_events_reported; + resp.async_events_reported = uobj->async_events_reported; + kfree(uobj); + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + out: up(&ib_uverbs_idr_mutex); @@ -721,7 +752,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, struct ib_uverbs_create_qp cmd; struct ib_uverbs_create_qp_resp resp; struct ib_udata udata; - struct ib_uobject *uobj; + struct ib_uevent_object *uobj; struct ib_pd *pd; struct ib_cq *scq, *rcq; struct ib_srq *srq; @@ -772,8 +803,10 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, attr.cap.max_recv_sge = cmd.max_recv_sge; attr.cap.max_inline_data = cmd.max_inline_data; - uobj->user_handle = cmd.user_handle; - uobj->context = file->ucontext; + uobj->uobject.user_handle = cmd.user_handle; + uobj->uobject.context = file->ucontext; + uobj->events_reported = 0; + INIT_LIST_HEAD(&uobj->event_list); qp = pd->device->create_qp(pd, &attr, &udata); if (IS_ERR(qp)) { @@ -786,7 +819,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, qp->send_cq = attr.send_cq; qp->recv_cq = attr.recv_cq; qp->srq = attr.srq; - qp->uobject = uobj; + qp->uobject = &uobj->uobject; qp->event_handler = attr.event_handler; qp->qp_context = attr.qp_context; qp->qp_type = attr.qp_type; @@ -805,17 +838,17 @@ retry: goto err_destroy; } - ret = idr_get_new(&ib_uverbs_qp_idr, qp, &uobj->id); + ret = idr_get_new(&ib_uverbs_qp_idr, qp, &uobj->uobject.id); if (ret == -EAGAIN) goto retry; if (ret) goto err_destroy; - resp.qp_handle = uobj->id; + resp.qp_handle = uobj->uobject.id; spin_lock_irq(&file->ucontext->lock); - list_add_tail(&uobj->list, &file->ucontext->qp_list); + list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list); spin_unlock_irq(&file->ucontext->lock); if (copy_to_user((void __user *) (unsigned long) cmd.response, @@ -830,7 +863,7 @@ retry: err_list: spin_lock_irq(&file->ucontext->lock); - list_del(&uobj->list); + list_del(&uobj->uobject.list); spin_unlock_irq(&file->ucontext->lock); err_destroy: @@ -930,21 +963,25 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { - struct ib_uverbs_destroy_qp cmd; - struct ib_qp *qp; - struct ib_uobject *uobj; - int ret = -EINVAL; + struct ib_uverbs_destroy_qp cmd; + struct ib_uverbs_destroy_qp_resp resp; + struct ib_qp *qp; + struct ib_uevent_object *uobj; + struct ib_uverbs_event *evt, *tmp; + int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + memset(&resp, 0, sizeof resp); + down(&ib_uverbs_idr_mutex); qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); if (!qp || qp->uobject->context != file->ucontext) goto out; - uobj = qp->uobject; + uobj = container_of(qp->uobject, struct ib_uevent_object, uobject); ret = ib_destroy_qp(qp); if (ret) @@ -953,11 +990,24 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, idr_remove(&ib_uverbs_qp_idr, cmd.qp_handle); spin_lock_irq(&file->ucontext->lock); - list_del(&uobj->list); + list_del(&uobj->uobject.list); spin_unlock_irq(&file->ucontext->lock); + spin_lock_irq(&file->async_file.lock); + list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&file->async_file.lock); + + resp.events_reported = uobj->events_reported; + kfree(uobj); + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + out: up(&ib_uverbs_idr_mutex); @@ -1015,7 +1065,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, struct ib_uverbs_create_srq cmd; struct ib_uverbs_create_srq_resp resp; struct ib_udata udata; - struct ib_uobject *uobj; + struct ib_uevent_object *uobj; struct ib_pd *pd; struct ib_srq *srq; struct ib_srq_init_attr attr; @@ -1050,8 +1100,10 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, attr.attr.max_sge = cmd.max_sge; attr.attr.srq_limit = cmd.srq_limit; - uobj->user_handle = cmd.user_handle; - uobj->context = file->ucontext; + uobj->uobject.user_handle = cmd.user_handle; + uobj->uobject.context = file->ucontext; + uobj->events_reported = 0; + INIT_LIST_HEAD(&uobj->event_list); srq = pd->device->create_srq(pd, &attr, &udata); if (IS_ERR(srq)) { @@ -1061,7 +1113,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, srq->device = pd->device; srq->pd = pd; - srq->uobject = uobj; + srq->uobject = &uobj->uobject; srq->event_handler = attr.event_handler; srq->srq_context = attr.srq_context; atomic_inc(&pd->usecnt); @@ -1075,17 +1127,17 @@ retry: goto err_destroy; } - ret = idr_get_new(&ib_uverbs_srq_idr, srq, &uobj->id); + ret = idr_get_new(&ib_uverbs_srq_idr, srq, &uobj->uobject.id); if (ret == -EAGAIN) goto retry; if (ret) goto err_destroy; - resp.srq_handle = uobj->id; + resp.srq_handle = uobj->uobject.id; spin_lock_irq(&file->ucontext->lock); - list_add_tail(&uobj->list, &file->ucontext->srq_list); + list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list); spin_unlock_irq(&file->ucontext->lock); if (copy_to_user((void __user *) (unsigned long) cmd.response, @@ -1100,7 +1152,7 @@ retry: err_list: spin_lock_irq(&file->ucontext->lock); - list_del(&uobj->list); + list_del(&uobj->uobject.list); spin_unlock_irq(&file->ucontext->lock); err_destroy: @@ -1149,21 +1201,25 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { - struct ib_uverbs_destroy_srq cmd; - struct ib_srq *srq; - struct ib_uobject *uobj; - int ret = -EINVAL; + struct ib_uverbs_destroy_srq cmd; + struct ib_uverbs_destroy_srq_resp resp; + struct ib_srq *srq; + struct ib_uevent_object *uobj; + struct ib_uverbs_event *evt, *tmp; + int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; down(&ib_uverbs_idr_mutex); + memset(&resp, 0, sizeof resp); + srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); if (!srq || srq->uobject->context != file->ucontext) goto out; - uobj = srq->uobject; + uobj = container_of(srq->uobject, struct ib_uevent_object, uobject); ret = ib_destroy_srq(srq); if (ret) @@ -1172,11 +1228,24 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, idr_remove(&ib_uverbs_srq_idr, cmd.srq_handle); spin_lock_irq(&file->ucontext->lock); - list_del(&uobj->list); + list_del(&uobj->uobject.list); spin_unlock_irq(&file->ucontext->lock); + spin_lock_irq(&file->async_file.lock); + list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&file->async_file.lock); + + resp.events_reported = uobj->events_reported; + kfree(uobj); + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + out: up(&ib_uverbs_idr_mutex); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 09caf5b..ce5bdb7 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -120,7 +120,7 @@ static int ib_dealloc_ucontext(struct ib_ucontext *context) idr_remove(&ib_uverbs_qp_idr, uobj->id); ib_destroy_qp(qp); list_del(&uobj->list); - kfree(uobj); + kfree(container_of(uobj, struct ib_uevent_object, uobject)); } list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { @@ -128,7 +128,7 @@ static int ib_dealloc_ucontext(struct ib_ucontext *context) idr_remove(&ib_uverbs_cq_idr, uobj->id); ib_destroy_cq(cq); list_del(&uobj->list); - kfree(uobj); + kfree(container_of(uobj, struct ib_ucq_object, uobject)); } list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { @@ -136,7 +136,7 @@ static int ib_dealloc_ucontext(struct ib_ucontext *context) idr_remove(&ib_uverbs_srq_idr, uobj->id); ib_destroy_srq(srq); list_del(&uobj->list); - kfree(uobj); + kfree(container_of(uobj, struct ib_uevent_object, uobject)); } /* XXX Free MWs */ @@ -182,7 +182,7 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_event_file *file = filp->private_data; - void *event; + struct ib_uverbs_event *event; int eventsz; int ret = 0; @@ -207,21 +207,23 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, return -ENODEV; } - if (file->is_async) { - event = list_entry(file->event_list.next, - struct ib_uverbs_async_event, list); + event = list_entry(file->event_list.next, struct ib_uverbs_event, list); + + if (file->is_async) eventsz = sizeof (struct ib_uverbs_async_event_desc); - } else { - event = list_entry(file->event_list.next, - struct ib_uverbs_comp_event, list); + else eventsz = sizeof (struct ib_uverbs_comp_event_desc); - } if (eventsz > count) { ret = -EINVAL; event = NULL; - } else + } else { list_del(file->event_list.next); + if (event->counter) { + ++(*event->counter); + list_del(&event->obj_list); + } + } spin_unlock_irq(&file->lock); @@ -257,16 +259,13 @@ static unsigned int ib_uverbs_event_poll(struct file *filp, static void ib_uverbs_event_release(struct ib_uverbs_event_file *file) { - struct list_head *entry, *tmp; + struct ib_uverbs_event *entry, *tmp; spin_lock_irq(&file->lock); if (file->fd != -1) { file->fd = -1; - list_for_each_safe(entry, tmp, &file->event_list) - if (file->is_async) - kfree(list_entry(entry, struct ib_uverbs_async_event, list)); - else - kfree(list_entry(entry, struct ib_uverbs_comp_event, list)); + list_for_each_entry_safe(entry, tmp, &file->event_list, list) + kfree(entry); } spin_unlock_irq(&file->lock); } @@ -304,18 +303,23 @@ static struct file_operations uverbs_event_fops = { void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) { - struct ib_uverbs_file *file = cq_context; - struct ib_uverbs_comp_event *entry; - unsigned long flags; + struct ib_uverbs_file *file = cq_context; + struct ib_ucq_object *uobj; + struct ib_uverbs_event *entry; + unsigned long flags; entry = kmalloc(sizeof *entry, GFP_ATOMIC); if (!entry) return; - entry->desc.cq_handle = cq->uobject->user_handle; + uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); + + entry->desc.comp.cq_handle = cq->uobject->user_handle; + entry->counter = &uobj->comp_events_reported; spin_lock_irqsave(&file->comp_file[0].lock, flags); list_add_tail(&entry->list, &file->comp_file[0].event_list); + list_add_tail(&entry->obj_list, &uobj->comp_list); spin_unlock_irqrestore(&file->comp_file[0].lock, flags); wake_up_interruptible(&file->comp_file[0].poll_wait); @@ -323,20 +327,25 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) } static void ib_uverbs_async_handler(struct ib_uverbs_file *file, - __u64 element, __u64 event) + __u64 element, __u64 event, + struct list_head *obj_list, + u32 *counter) { - struct ib_uverbs_async_event *entry; + struct ib_uverbs_event *entry; unsigned long flags; entry = kmalloc(sizeof *entry, GFP_ATOMIC); if (!entry) return; - entry->desc.element = element; - entry->desc.event_type = event; + entry->desc.async.element = element; + entry->desc.async.event_type = event; + entry->counter = counter; spin_lock_irqsave(&file->async_file.lock, flags); list_add_tail(&entry->list, &file->async_file.event_list); + if (obj_list) + list_add_tail(&entry->obj_list, obj_list); spin_unlock_irqrestore(&file->async_file.lock, flags); wake_up_interruptible(&file->async_file.poll_wait); @@ -345,23 +354,39 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) { - ib_uverbs_async_handler(context_ptr, - event->element.cq->uobject->user_handle, - event->event); + struct ib_ucq_object *uobj; + + uobj = container_of(event->element.cq->uobject, + struct ib_ucq_object, uobject); + + ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, + event->event, &uobj->async_list, + &uobj->async_events_reported); + } void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) { - ib_uverbs_async_handler(context_ptr, - event->element.qp->uobject->user_handle, - event->event); + struct ib_uevent_object *uobj; + + uobj = container_of(event->element.qp->uobject, + struct ib_uevent_object, uobject); + + ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, + event->event, &uobj->event_list, + &uobj->events_reported); } void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) { - ib_uverbs_async_handler(context_ptr, - event->element.srq->uobject->user_handle, - event->event); + struct ib_uevent_object *uobj; + + uobj = container_of(event->element.srq->uobject, + struct ib_uevent_object, uobject); + + ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, + event->event, &uobj->event_list, + &uobj->events_reported); } static void ib_uverbs_event_handler(struct ib_event_handler *handler, @@ -370,7 +395,8 @@ static void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_uverbs_file *file = container_of(handler, struct ib_uverbs_file, event_handler); - ib_uverbs_async_handler(file, event->element.port_num, event->event); + ib_uverbs_async_handler(file, event->element.port_num, event->event, + NULL, NULL); } static int ib_uverbs_event_init(struct ib_uverbs_event_file *file, diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index 7ebb01c..fd85725 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -42,7 +42,7 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define IB_USER_VERBS_ABI_VERSION 1 +#define IB_USER_VERBS_ABI_VERSION 2 enum { IB_USER_VERBS_CMD_QUERY_PARAMS, @@ -292,7 +292,14 @@ struct ib_uverbs_create_cq_resp { }; struct ib_uverbs_destroy_cq { + __u64 response; __u32 cq_handle; + __u32 reserved; +}; + +struct ib_uverbs_destroy_cq_resp { + __u32 comp_events_reported; + __u32 async_events_reported; }; struct ib_uverbs_create_qp { @@ -372,7 +379,13 @@ struct ib_uverbs_modify_qp_resp { }; struct ib_uverbs_destroy_qp { + __u64 response; __u32 qp_handle; + __u32 reserved; +}; + +struct ib_uverbs_destroy_qp_resp { + __u32 events_reported; }; struct ib_uverbs_attach_mcast { @@ -416,7 +429,13 @@ struct ib_uverbs_modify_srq { }; struct ib_uverbs_destroy_srq { + __u64 response; __u32 srq_handle; + __u32 reserved; +}; + +struct ib_uverbs_destroy_srq_resp { + __u32 events_reported; }; #endif /* IB_USER_VERBS_H */ -- cgit v0.10.2 From 354ba39cf96e439149541acf3c6c7c0df0a3ef25 Mon Sep 17 00:00:00 2001 From: John Kingman Date: Fri, 9 Sep 2005 18:23:32 -0700 Subject: [PATCH] IB CM: support CM redir Changes to CM to support CM and port redirection (REJ reason 24). Signed-off-by: John Kingman storagegear.com> Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 4de93ba..9613654 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -173,7 +173,8 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, if (IS_ERR(ah)) return PTR_ERR(ah); - m = ib_create_send_mad(mad_agent, 1, cm_id_priv->av.pkey_index, + m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, + cm_id_priv->av.pkey_index, ah, 0, sizeof(struct ib_mad_hdr), sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr), GFP_ATOMIC); @@ -536,6 +537,7 @@ struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler, cm_id_priv->id.state = IB_CM_IDLE; cm_id_priv->id.cm_handler = cm_handler; cm_id_priv->id.context = context; + cm_id_priv->id.remote_cm_qpn = 1; ret = cm_alloc_id(cm_id_priv); if (ret) goto error; diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 77fe903..5308683 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -290,6 +290,7 @@ struct ib_cm_id { enum ib_cm_lap_state lap_state; /* internal CM/debug use */ __be32 local_id; __be32 remote_id; + u32 remote_cm_qpn; /* 1 unless redirected */ }; /** -- cgit v0.10.2 From 1b205c2d2464bfecbba80227e74b412596dc5521 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 9 Sep 2005 20:52:00 -0700 Subject: [PATCH] IB: fix CM use-after-free If the CM REQ handling function gets to error2, then it frees cm_id_priv->timewait_info. But the next line goes through ib_destroy_cm_id() -> ib_send_cm_rej() -> cm_reset_to_idle(), which ends up calling cm_cleanup_timewait(), which dereferences the pointer we just freed. Make sure we clear cm_id_priv->timewait_info after freeing it, so that doesn't happen. Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 9613654..54db6d4 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1315,6 +1315,7 @@ error3: atomic_dec(&cm_id_priv->refcount); cm_deref_id(listen_cm_id_priv); cm_cleanup_timewait(cm_id_priv->timewait_info); error2: kfree(cm_id_priv->timewait_info); + cm_id_priv->timewait_info = NULL; error1: ib_destroy_cm_id(&cm_id_priv->id); return ret; } -- cgit v0.10.2