diff options
23 files changed, 470 insertions, 165 deletions
@@ -347,7 +347,7 @@ AFLAGS_KERNEL = # Needed to be compatible with the O= option LINUXINCLUDE := -Iinclude \ $(if $(KBUILD_SRC),-Iinclude2 -I$(srctree)/include) \ - -imacros include/linux/autoconf.h + -include include/linux/autoconf.h CPPFLAGS := -D__KERNEL__ $(LINUXINCLUDE) @@ -407,7 +407,7 @@ outputmakefile: # of make so .config is not included in this case either (for *config). no-dot-config-targets := clean mrproper distclean \ - cscope TAGS tags help %docs check% + cscope TAGS tags help %docs check% kernelrelease config-targets := 0 mixed-targets := 0 diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index aed5ca2..5ea741f 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -31,7 +31,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: user_mad.c 2814 2005-07-06 19:14:09Z halr $ + * $Id: user_mad.c 4010 2005-11-09 23:11:56Z roland $ */ #include <linux/module.h> @@ -110,13 +110,13 @@ struct ib_umad_device { }; struct ib_umad_file { - struct ib_umad_port *port; - struct list_head recv_list; - struct list_head port_list; - spinlock_t recv_lock; - wait_queue_head_t recv_wait; - struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS]; - struct ib_mr *mr[IB_UMAD_MAX_AGENTS]; + struct ib_umad_port *port; + struct list_head recv_list; + struct list_head port_list; + spinlock_t recv_lock; + wait_queue_head_t recv_wait; + struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS]; + int agents_dead; }; struct ib_umad_packet { @@ -145,6 +145,12 @@ static void ib_umad_release_dev(struct kref *ref) kfree(dev); } +/* caller must hold port->mutex at least for reading */ +static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id) +{ + return file->agents_dead ? NULL : file->agent[id]; +} + static int queue_packet(struct ib_umad_file *file, struct ib_mad_agent *agent, struct ib_umad_packet *packet) @@ -152,10 +158,11 @@ static int queue_packet(struct ib_umad_file *file, int ret = 1; down_read(&file->port->mutex); + for (packet->mad.hdr.id = 0; packet->mad.hdr.id < IB_UMAD_MAX_AGENTS; packet->mad.hdr.id++) - if (agent == file->agent[packet->mad.hdr.id]) { + if (agent == __get_agent(file, packet->mad.hdr.id)) { spin_lock_irq(&file->recv_lock); list_add_tail(&packet->list, &file->recv_list); spin_unlock_irq(&file->recv_lock); @@ -327,7 +334,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, down_read(&file->port->mutex); - agent = file->agent[packet->mad.hdr.id]; + agent = __get_agent(file, packet->mad.hdr.id); if (!agent) { ret = -EINVAL; goto err_up; @@ -481,7 +488,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg) } for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id) - if (!file->agent[agent_id]) + if (!__get_agent(file, agent_id)) goto found; ret = -ENOMEM; @@ -505,29 +512,15 @@ found: goto out; } - file->agent[agent_id] = agent; - - file->mr[agent_id] = ib_get_dma_mr(agent->qp->pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(file->mr[agent_id])) { - ret = -ENOMEM; - goto err; - } - if (put_user(agent_id, (u32 __user *) (arg + offsetof(struct ib_user_mad_reg_req, id)))) { ret = -EFAULT; - goto err_mr; + ib_unregister_mad_agent(agent); + goto out; } + file->agent[agent_id] = agent; ret = 0; - goto out; - -err_mr: - ib_dereg_mr(file->mr[agent_id]); - -err: - file->agent[agent_id] = NULL; - ib_unregister_mad_agent(agent); out: up_write(&file->port->mutex); @@ -536,27 +529,29 @@ out: static int ib_umad_unreg_agent(struct ib_umad_file *file, unsigned long arg) { + struct ib_mad_agent *agent = NULL; u32 id; int ret = 0; - down_write(&file->port->mutex); + if (get_user(id, (u32 __user *) arg)) + return -EFAULT; - if (get_user(id, (u32 __user *) arg)) { - ret = -EFAULT; - goto out; - } + down_write(&file->port->mutex); - if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !file->agent[id]) { + if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { ret = -EINVAL; goto out; } - ib_dereg_mr(file->mr[id]); - ib_unregister_mad_agent(file->agent[id]); + agent = file->agent[id]; file->agent[id] = NULL; out: up_write(&file->port->mutex); + + if (agent) + ib_unregister_mad_agent(agent); + return ret; } @@ -621,23 +616,29 @@ static int ib_umad_close(struct inode *inode, struct file *filp) struct ib_umad_file *file = filp->private_data; struct ib_umad_device *dev = file->port->umad_dev; struct ib_umad_packet *packet, *tmp; + int already_dead; int i; down_write(&file->port->mutex); - for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i) - if (file->agent[i]) { - ib_dereg_mr(file->mr[i]); - ib_unregister_mad_agent(file->agent[i]); - } + + already_dead = file->agents_dead; + file->agents_dead = 1; list_for_each_entry_safe(packet, tmp, &file->recv_list, list) kfree(packet); list_del(&file->port_list); - up_write(&file->port->mutex); - kfree(file); + downgrade_write(&file->port->mutex); + + if (!already_dead) + for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i) + if (file->agent[i]) + ib_unregister_mad_agent(file->agent[i]); + + up_read(&file->port->mutex); + kfree(file); kref_put(&dev->ref, ib_umad_release_dev); return 0; @@ -801,7 +802,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, goto err_class; port->sm_dev->owner = THIS_MODULE; port->sm_dev->ops = &umad_sm_fops; - kobject_set_name(&port->dev->kobj, "issm%d", port->dev_num); + kobject_set_name(&port->sm_dev->kobj, "issm%d", port->dev_num); if (cdev_add(port->sm_dev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1)) goto err_sm_cdev; @@ -863,14 +864,36 @@ static void ib_umad_kill_port(struct ib_umad_port *port) port->ib_dev = NULL; - list_for_each_entry(file, &port->file_list, port_list) - for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id) { - if (!file->agent[id]) - continue; - ib_dereg_mr(file->mr[id]); - ib_unregister_mad_agent(file->agent[id]); - file->agent[id] = NULL; - } + /* + * Now go through the list of files attached to this port and + * unregister all of their MAD agents. We need to hold + * port->mutex while doing this to avoid racing with + * ib_umad_close(), but we can't hold the mutex for writing + * while calling ib_unregister_mad_agent(), since that might + * deadlock by calling back into queue_packet(). So we + * downgrade our lock to a read lock, and then drop and + * reacquire the write lock for the next iteration. + * + * We do list_del_init() on the file's list_head so that the + * list_del in ib_umad_close() is still OK, even after the + * file is removed from the list. + */ + while (!list_empty(&port->file_list)) { + file = list_entry(port->file_list.next, struct ib_umad_file, + port_list); + + file->agents_dead = 1; + list_del_init(&file->port_list); + + downgrade_write(&port->mutex); + + for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id) + if (file->agent[id]) + ib_unregister_mad_agent(file->agent[id]); + + up_read(&port->mutex); + down_write(&port->mutex); + } up_write(&port->mutex); @@ -913,7 +936,7 @@ static void ib_umad_add_one(struct ib_device *device) err: while (--i >= s) - ib_umad_kill_port(&umad_dev->port[i]); + ib_umad_kill_port(&umad_dev->port[i - s]); kref_put(&umad_dev->ref, ib_umad_release_dev); } diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 63a7415..ed45da8 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -708,7 +708,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, resp->wc[i].opcode = wc[i].opcode; resp->wc[i].vendor_err = wc[i].vendor_err; resp->wc[i].byte_len = wc[i].byte_len; - resp->wc[i].imm_data = wc[i].imm_data; + resp->wc[i].imm_data = (__u32 __force) wc[i].imm_data; resp->wc[i].qp_num = wc[i].qp_num; resp->wc[i].src_qp = wc[i].src_qp; resp->wc[i].wc_flags = wc[i].wc_flags; @@ -908,7 +908,12 @@ retry: if (ret) goto err_destroy; - resp.qp_handle = uobj->uobject.id; + resp.qp_handle = uobj->uobject.id; + resp.max_recv_sge = attr.cap.max_recv_sge; + resp.max_send_sge = attr.cap.max_send_sge; + resp.max_recv_wr = attr.cap.max_recv_wr; + resp.max_send_wr = attr.cap.max_send_wr; + resp.max_inline_data = attr.cap.max_inline_data; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { @@ -1135,7 +1140,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, next->num_sge = user_wr->num_sge; next->opcode = user_wr->opcode; next->send_flags = user_wr->send_flags; - next->imm_data = user_wr->imm_data; + next->imm_data = (__be32 __force) user_wr->imm_data; if (qp->qp_type == IB_QPT_UD) { next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr, @@ -1701,7 +1706,6 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, } attr.max_wr = cmd.max_wr; - attr.max_sge = cmd.max_sge; attr.srq_limit = cmd.srq_limit; ret = ib_modify_srq(srq, &attr, cmd.attr_mask); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 4186cc8..4c15e11 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -325,16 +325,8 @@ EXPORT_SYMBOL(ib_destroy_cq); int ib_resize_cq(struct ib_cq *cq, int cqe) { - int ret; - - if (!cq->device->resize_cq) - return -ENOSYS; - - ret = cq->device->resize_cq(cq, &cqe); - if (!ret) - cq->cqe = cqe; - - return ret; + return cq->device->resize_cq ? + cq->device->resize_cq(cq, cqe) : -ENOSYS; } EXPORT_SYMBOL(ib_resize_cq); diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index 25ebab6..c3bec74 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -97,7 +97,7 @@ static void poll_catas(unsigned long dev_ptr) } spin_lock_irqsave(&catas_lock, flags); - if (dev->catas_err.stop) + if (!dev->catas_err.stop) mod_timer(&dev->catas_err.timer, jiffies + MTHCA_CATAS_POLL_INTERVAL); spin_unlock_irqrestore(&catas_lock, flags); diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 49f211d..9ed3458 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1060,6 +1060,8 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, dev_lim->hca.arbel.resize_srq = field & 1; MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET); dev_lim->max_sg = min_t(int, field, dev_lim->max_sg); + MTHCA_GET(size, outbox, QUERY_DEV_LIM_MAX_DESC_SZ_RQ_OFFSET); + dev_lim->max_desc_sz = min_t(int, size, dev_lim->max_desc_sz); MTHCA_GET(size, outbox, QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET); dev_lim->mpt_entry_sz = size; MTHCA_GET(field, outbox, QUERY_DEV_LIM_PBL_SZ_OFFSET); diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index f98e235..4a8adce 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -258,7 +258,7 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, { struct mthca_cq *cq; struct mthca_cqe *cqe; - int prod_index; + u32 prod_index; int nfreed = 0; spin_lock_irq(&dev->cq_table.lock); @@ -293,19 +293,15 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, * Now sweep backwards through the CQ, removing CQ entries * that match our QP by copying older entries on top of them. */ - while (prod_index > cq->cons_index) { - cqe = get_cqe(cq, (prod_index - 1) & cq->ibcq.cqe); + while ((int) --prod_index - (int) cq->cons_index >= 0) { + cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); if (cqe->my_qpn == cpu_to_be32(qpn)) { if (srq) mthca_free_srq_wqe(srq, be32_to_cpu(cqe->wqe)); ++nfreed; - } - else if (nfreed) - memcpy(get_cqe(cq, (prod_index - 1 + nfreed) & - cq->ibcq.cqe), - cqe, - MTHCA_CQ_ENTRY_SIZE); - --prod_index; + } else if (nfreed) + memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe), + cqe, MTHCA_CQ_ENTRY_SIZE); } if (nfreed) { diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index e7e5d3b..497ff79 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -131,6 +131,7 @@ struct mthca_limits { int max_sg; int num_qps; int max_wqes; + int max_desc_sz; int max_qp_init_rdma; int reserved_qps; int num_srqs; @@ -154,6 +155,7 @@ struct mthca_limits { int reserved_mcgs; int num_pds; int reserved_pds; + u32 page_size_cap; u32 flags; u8 port_width_cap; }; diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 45c6328..147f248 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -168,6 +168,7 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim mdev->limits.max_srq_wqes = dev_lim->max_srq_sz; mdev->limits.reserved_srqs = dev_lim->reserved_srqs; mdev->limits.reserved_eecs = dev_lim->reserved_eecs; + mdev->limits.max_desc_sz = dev_lim->max_desc_sz; /* * Subtract 1 from the limit because we need to allocate a * spare CQE so the HCA HW can tell the difference between an @@ -181,6 +182,7 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim mdev->limits.reserved_uars = dev_lim->reserved_uars; mdev->limits.reserved_pds = dev_lim->reserved_pds; mdev->limits.port_width_cap = dev_lim->max_port_width; + mdev->limits.page_size_cap = ~(u32) (dev_lim->min_page_sz - 1); mdev->limits.flags = dev_lim->flags; /* IB_DEVICE_RESIZE_MAX_WR not supported by driver. diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 6b01666..4cc7e28 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -90,6 +90,7 @@ static int mthca_query_device(struct ib_device *ibdev, memcpy(&props->node_guid, out_mad->data + 12, 8); props->max_mr_size = ~0ull; + props->page_size_cap = mdev->limits.page_size_cap; props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps; props->max_qp_wr = mdev->limits.max_wqes; props->max_sge = mdev->limits.max_sg; @@ -615,11 +616,11 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, return ERR_PTR(err); } - init_attr->cap.max_inline_data = 0; init_attr->cap.max_send_wr = qp->sq.max; init_attr->cap.max_recv_wr = qp->rq.max; init_attr->cap.max_send_sge = qp->sq.max_gs; init_attr->cap.max_recv_sge = qp->rq.max_gs; + init_attr->cap.max_inline_data = qp->max_inline_data; return &qp->ibqp; } diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index bcd4b01..1e73947 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -251,6 +251,7 @@ struct mthca_qp { struct mthca_wq sq; enum ib_sig_type sq_policy; int send_wqe_offset; + int max_inline_data; u64 *wrid; union mthca_buf queue; diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 8852ea4..760c418d 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -885,6 +885,48 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) return err; } +static void mthca_adjust_qp_caps(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_qp *qp) +{ + int max_data_size; + + /* + * Calculate the maximum size of WQE s/g segments, excluding + * the next segment and other non-data segments. + */ + max_data_size = min(dev->limits.max_desc_sz, 1 << qp->sq.wqe_shift) - + sizeof (struct mthca_next_seg); + + switch (qp->transport) { + case MLX: + max_data_size -= 2 * sizeof (struct mthca_data_seg); + break; + + case UD: + if (mthca_is_memfree(dev)) + max_data_size -= sizeof (struct mthca_arbel_ud_seg); + else + max_data_size -= sizeof (struct mthca_tavor_ud_seg); + break; + + default: + max_data_size -= sizeof (struct mthca_raddr_seg); + break; + } + + /* We don't support inline data for kernel QPs (yet). */ + if (!pd->ibpd.uobject) + qp->max_inline_data = 0; + else + qp->max_inline_data = max_data_size - MTHCA_INLINE_HEADER_SIZE; + + qp->sq.max_gs = max_data_size / sizeof (struct mthca_data_seg); + qp->rq.max_gs = (min(dev->limits.max_desc_sz, 1 << qp->rq.wqe_shift) - + sizeof (struct mthca_next_seg)) / + sizeof (struct mthca_data_seg); +} + /* * Allocate and register buffer for WQEs. qp->rq.max, sq.max, * rq.max_gs and sq.max_gs must all be assigned. @@ -902,27 +944,53 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, size = sizeof (struct mthca_next_seg) + qp->rq.max_gs * sizeof (struct mthca_data_seg); + if (size > dev->limits.max_desc_sz) + return -EINVAL; + for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size; qp->rq.wqe_shift++) ; /* nothing */ - size = sizeof (struct mthca_next_seg) + - qp->sq.max_gs * sizeof (struct mthca_data_seg); + size = qp->sq.max_gs * sizeof (struct mthca_data_seg); switch (qp->transport) { case MLX: size += 2 * sizeof (struct mthca_data_seg); break; + case UD: - if (mthca_is_memfree(dev)) - size += sizeof (struct mthca_arbel_ud_seg); - else - size += sizeof (struct mthca_tavor_ud_seg); + size += mthca_is_memfree(dev) ? + sizeof (struct mthca_arbel_ud_seg) : + sizeof (struct mthca_tavor_ud_seg); break; + + case UC: + size += sizeof (struct mthca_raddr_seg); + break; + + case RC: + size += sizeof (struct mthca_raddr_seg); + /* + * An atomic op will require an atomic segment, a + * remote address segment and one scatter entry. + */ + size = max_t(int, size, + sizeof (struct mthca_atomic_seg) + + sizeof (struct mthca_raddr_seg) + + sizeof (struct mthca_data_seg)); + break; + default: - /* bind seg is as big as atomic + raddr segs */ - size += sizeof (struct mthca_bind_seg); + break; } + /* Make sure that we have enough space for a bind request */ + size = max_t(int, size, sizeof (struct mthca_bind_seg)); + + size += sizeof (struct mthca_next_seg); + + if (size > dev->limits.max_desc_sz) + return -EINVAL; + for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size; qp->sq.wqe_shift++) ; /* nothing */ @@ -1066,6 +1134,8 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, return ret; } + mthca_adjust_qp_caps(dev, pd, qp); + /* * If this is a userspace QP, we're done now. The doorbells * will be allocated and buffers will be initialized in @@ -1486,8 +1556,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } wqe += sizeof (struct mthca_atomic_seg); - size += sizeof (struct mthca_raddr_seg) / 16 + - sizeof (struct mthca_atomic_seg); + size += (sizeof (struct mthca_raddr_seg) + + sizeof (struct mthca_atomic_seg)) / 16; break; case IB_WR_RDMA_WRITE: @@ -1637,6 +1707,7 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); + __be32 doorbell[2]; unsigned long flags; int err = 0; int nreq; @@ -1654,6 +1725,22 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, ind = qp->rq.next_ind; for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { + nreq = 0; + + doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0); + doorbell[1] = cpu_to_be32(qp->qpn << 8); + + wmb(); + + mthca_write64(doorbell, + dev->kar + MTHCA_RECEIVE_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + + qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB; + size0 = 0; + } + if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { mthca_err(dev, "RQ %06x full (%u head, %u tail," " %d max, %d nreq)\n", qp->qpn, @@ -1711,8 +1798,6 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, out: if (likely(nreq)) { - __be32 doorbell[2]; - doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0); doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq); @@ -1806,8 +1891,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } wqe += sizeof (struct mthca_atomic_seg); - size += sizeof (struct mthca_raddr_seg) / 16 + - sizeof (struct mthca_atomic_seg); + size += (sizeof (struct mthca_raddr_seg) + + sizeof (struct mthca_atomic_seg)) / 16; break; case IB_WR_RDMA_READ: diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index 26d5161..f7d2342 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -417,6 +417,7 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, { struct mthca_dev *dev = to_mdev(ibsrq->device); struct mthca_srq *srq = to_msrq(ibsrq); + __be32 doorbell[2]; unsigned long flags; int err = 0; int first_ind; @@ -432,6 +433,25 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, first_ind = srq->first_free; for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { + nreq = 0; + + doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift); + doorbell[1] = cpu_to_be32(srq->srqn << 8); + + /* + * Make sure that descriptors are written + * before doorbell is rung. + */ + wmb(); + + mthca_write64(doorbell, + dev->kar + MTHCA_RECEIVE_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + + first_ind = srq->first_free; + } + ind = srq->first_free; if (ind < 0) { @@ -494,8 +514,6 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, } if (likely(nreq)) { - __be32 doorbell[2]; - doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift); doorbell[1] = cpu_to_be32((srq->srqn << 8) | nreq); diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h index 1f4c0ff..73f1c0b 100644 --- a/drivers/infiniband/hw/mthca/mthca_wqe.h +++ b/drivers/infiniband/hw/mthca/mthca_wqe.h @@ -49,7 +49,8 @@ enum { }; enum { - MTHCA_INVAL_LKEY = 0x100 + MTHCA_INVAL_LKEY = 0x100, + MTHCA_TAVOR_MAX_WQES_PER_RECV_DB = 256 }; struct mthca_next_seg { diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 0095acc..9923a15 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -179,6 +179,7 @@ struct ipoib_dev_priv { #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG struct list_head fs_list; struct dentry *mcg_dentry; + struct dentry *path_dentry; #endif }; @@ -270,7 +271,6 @@ void ipoib_mcast_dev_flush(struct net_device *dev); #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev); -void ipoib_mcast_iter_free(struct ipoib_mcast_iter *iter); int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter); void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter, union ib_gid *gid, @@ -278,6 +278,11 @@ void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter, unsigned int *queuelen, unsigned int *complete, unsigned int *send_only); + +struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev); +int ipoib_path_iter_next(struct ipoib_path_iter *iter); +void ipoib_path_iter_read(struct ipoib_path_iter *iter, + struct ipoib_path *path); #endif int ipoib_mcast_attach(struct net_device *dev, u16 mlid, @@ -299,13 +304,13 @@ void ipoib_pkey_poll(void *dev); int ipoib_pkey_dev_delay_open(struct net_device *dev); #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG -int ipoib_create_debug_file(struct net_device *dev); -void ipoib_delete_debug_file(struct net_device *dev); +void ipoib_create_debug_files(struct net_device *dev); +void ipoib_delete_debug_files(struct net_device *dev); int ipoib_register_debugfs(void); void ipoib_unregister_debugfs(void); #else -static inline int ipoib_create_debug_file(struct net_device *dev) { return 0; } -static inline void ipoib_delete_debug_file(struct net_device *dev) { } +static inline void ipoib_create_debug_files(struct net_device *dev) { } +static inline void ipoib_delete_debug_files(struct net_device *dev) { } static inline int ipoib_register_debugfs(void) { return 0; } static inline void ipoib_unregister_debugfs(void) { } #endif diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 38b150f..685258e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -43,6 +43,18 @@ struct file_operations; static struct dentry *ipoib_root; +static void format_gid(union ib_gid *gid, char *buf) +{ + int i, n; + + for (n = 0, i = 0; i < 8; ++i) { + n += sprintf(buf + n, "%x", + be16_to_cpu(((__be16 *) gid->raw)[i])); + if (i < 7) + buf[n++] = ':'; + } +} + static void *ipoib_mcg_seq_start(struct seq_file *file, loff_t *pos) { struct ipoib_mcast_iter *iter; @@ -54,7 +66,7 @@ static void *ipoib_mcg_seq_start(struct seq_file *file, loff_t *pos) while (n--) { if (ipoib_mcast_iter_next(iter)) { - ipoib_mcast_iter_free(iter); + kfree(iter); return NULL; } } @@ -70,7 +82,7 @@ static void *ipoib_mcg_seq_next(struct seq_file *file, void *iter_ptr, (*pos)++; if (ipoib_mcast_iter_next(iter)) { - ipoib_mcast_iter_free(iter); + kfree(iter); return NULL; } @@ -87,32 +99,32 @@ static int ipoib_mcg_seq_show(struct seq_file *file, void *iter_ptr) struct ipoib_mcast_iter *iter = iter_ptr; char gid_buf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"]; union ib_gid mgid; - int i, n; unsigned long created; unsigned int queuelen, complete, send_only; - if (iter) { - ipoib_mcast_iter_read(iter, &mgid, &created, &queuelen, - &complete, &send_only); + if (!iter) + return 0; - for (n = 0, i = 0; i < sizeof mgid / 2; ++i) { - n += sprintf(gid_buf + n, "%x", - be16_to_cpu(((__be16 *) mgid.raw)[i])); - if (i < sizeof mgid / 2 - 1) - gid_buf[n++] = ':'; - } - } + ipoib_mcast_iter_read(iter, &mgid, &created, &queuelen, + &complete, &send_only); - seq_printf(file, "GID: %*s", -(1 + (int) sizeof gid_buf), gid_buf); + format_gid(&mgid, gid_buf); seq_printf(file, - " created: %10ld queuelen: %4d complete: %d send_only: %d\n", - created, queuelen, complete, send_only); + "GID: %s\n" + " created: %10ld\n" + " queuelen: %9d\n" + " complete: %9s\n" + " send_only: %8s\n" + "\n", + gid_buf, created, queuelen, + complete ? "yes" : "no", + send_only ? "yes" : "no"); return 0; } -static struct seq_operations ipoib_seq_ops = { +static struct seq_operations ipoib_mcg_seq_ops = { .start = ipoib_mcg_seq_start, .next = ipoib_mcg_seq_next, .stop = ipoib_mcg_seq_stop, @@ -124,7 +136,7 @@ static int ipoib_mcg_open(struct inode *inode, struct file *file) struct seq_file *seq; int ret; - ret = seq_open(file, &ipoib_seq_ops); + ret = seq_open(file, &ipoib_mcg_seq_ops); if (ret) return ret; @@ -134,7 +146,7 @@ static int ipoib_mcg_open(struct inode *inode, struct file *file) return 0; } -static struct file_operations ipoib_fops = { +static struct file_operations ipoib_mcg_fops = { .owner = THIS_MODULE, .open = ipoib_mcg_open, .read = seq_read, @@ -142,25 +154,138 @@ static struct file_operations ipoib_fops = { .release = seq_release }; -int ipoib_create_debug_file(struct net_device *dev) +static void *ipoib_path_seq_start(struct seq_file *file, loff_t *pos) +{ + struct ipoib_path_iter *iter; + loff_t n = *pos; + + iter = ipoib_path_iter_init(file->private); + if (!iter) + return NULL; + + while (n--) { + if (ipoib_path_iter_next(iter)) { + kfree(iter); + return NULL; + } + } + + return iter; +} + +static void *ipoib_path_seq_next(struct seq_file *file, void *iter_ptr, + loff_t *pos) +{ + struct ipoib_path_iter *iter = iter_ptr; + + (*pos)++; + + if (ipoib_path_iter_next(iter)) { + kfree(iter); + return NULL; + } + + return iter; +} + +static void ipoib_path_seq_stop(struct seq_file *file, void *iter_ptr) +{ + /* nothing for now */ +} + +static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr) +{ + struct ipoib_path_iter *iter = iter_ptr; + char gid_buf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"]; + struct ipoib_path path; + int rate; + + if (!iter) + return 0; + + ipoib_path_iter_read(iter, &path); + + format_gid(&path.pathrec.dgid, gid_buf); + + seq_printf(file, + "GID: %s\n" + " complete: %6s\n", + gid_buf, path.pathrec.dlid ? "yes" : "no"); + + if (path.pathrec.dlid) { + rate = ib_sa_rate_enum_to_int(path.pathrec.rate) * 25; + + seq_printf(file, + " DLID: 0x%04x\n" + " SL: %12d\n" + " rate: %*d%s Gb/sec\n", + be16_to_cpu(path.pathrec.dlid), + path.pathrec.sl, + 10 - ((rate % 10) ? 2 : 0), + rate / 10, rate % 10 ? ".5" : ""); + } + + seq_putc(file, '\n'); + + return 0; +} + +static struct seq_operations ipoib_path_seq_ops = { + .start = ipoib_path_seq_start, + .next = ipoib_path_seq_next, + .stop = ipoib_path_seq_stop, + .show = ipoib_path_seq_show, +}; + +static int ipoib_path_open(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + int ret; + + ret = seq_open(file, &ipoib_path_seq_ops); + if (ret) + return ret; + + seq = file->private_data; + seq->private = inode->u.generic_ip; + + return 0; +} + +static struct file_operations ipoib_path_fops = { + .owner = THIS_MODULE, + .open = ipoib_path_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +void ipoib_create_debug_files(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - char name[IFNAMSIZ + sizeof "_mcg"]; + char name[IFNAMSIZ + sizeof "_path"]; snprintf(name, sizeof name, "%s_mcg", dev->name); - priv->mcg_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO, - ipoib_root, dev, &ipoib_fops); - - return priv->mcg_dentry ? 0 : -ENOMEM; + ipoib_root, dev, &ipoib_mcg_fops); + if (!priv->mcg_dentry) + ipoib_warn(priv, "failed to create mcg debug file\n"); + + snprintf(name, sizeof name, "%s_path", dev->name); + priv->path_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO, + ipoib_root, dev, &ipoib_path_fops); + if (!priv->path_dentry) + ipoib_warn(priv, "failed to create path debug file\n"); } -void ipoib_delete_debug_file(struct net_device *dev) +void ipoib_delete_debug_files(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); if (priv->mcg_dentry) debugfs_remove(priv->mcg_dentry); + if (priv->path_dentry) + debugfs_remove(priv->path_dentry); } int ipoib_register_debugfs(void) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index ce02962..2fa3075 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -58,6 +58,11 @@ module_param_named(debug_level, ipoib_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); #endif +struct ipoib_path_iter { + struct net_device *dev; + struct ipoib_path path; +}; + static const u8 ipv4_bcast_addr[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, @@ -250,6 +255,64 @@ static void path_free(struct net_device *dev, struct ipoib_path *path) kfree(path); } +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG + +struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev) +{ + struct ipoib_path_iter *iter; + + iter = kmalloc(sizeof *iter, GFP_KERNEL); + if (!iter) + return NULL; + + iter->dev = dev; + memset(iter->path.pathrec.dgid.raw, 0, 16); + + if (ipoib_path_iter_next(iter)) { + kfree(iter); + return NULL; + } + + return iter; +} + +int ipoib_path_iter_next(struct ipoib_path_iter *iter) +{ + struct ipoib_dev_priv *priv = netdev_priv(iter->dev); + struct rb_node *n; + struct ipoib_path *path; + int ret = 1; + + spin_lock_irq(&priv->lock); + + n = rb_first(&priv->path_tree); + + while (n) { + path = rb_entry(n, struct ipoib_path, rb_node); + + if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw, + sizeof (union ib_gid)) < 0) { + iter->path = *path; + ret = 0; + break; + } + + n = rb_next(n); + } + + spin_unlock_irq(&priv->lock); + + return ret; +} + +void ipoib_path_iter_read(struct ipoib_path_iter *iter, + struct ipoib_path *path) +{ + *path = iter->path; +} + +#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ + void ipoib_flush_paths(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -763,7 +826,7 @@ void ipoib_dev_cleanup(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv; - ipoib_delete_debug_file(dev); + ipoib_delete_debug_files(dev); /* Delete any child interfaces first */ list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { @@ -972,8 +1035,7 @@ static struct net_device *ipoib_add_port(const char *format, goto register_failed; } - if (ipoib_create_debug_file(priv->dev)) - goto debug_failed; + ipoib_create_debug_files(priv->dev); if (ipoib_add_pkey_attr(priv->dev)) goto sysfs_failed; @@ -987,9 +1049,7 @@ static struct net_device *ipoib_add_port(const char *format, return priv->dev; sysfs_failed: - ipoib_delete_debug_file(priv->dev); - -debug_failed: + ipoib_delete_debug_files(priv->dev); unregister_netdev(priv->dev); register_failed: diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 3ecf78a..c33ed87 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -120,12 +120,8 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast) if (mcast->ah) ipoib_put_ah(mcast->ah); - while (!skb_queue_empty(&mcast->pkt_queue)) { - struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); - - skb->dev = dev; - dev_kfree_skb_any(skb); - } + while (!skb_queue_empty(&mcast->pkt_queue)) + dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); kfree(mcast); } @@ -317,13 +313,8 @@ ipoib_mcast_sendonly_join_complete(int status, IPOIB_GID_ARG(mcast->mcmember.mgid), status); /* Flush out any queued packets */ - while (!skb_queue_empty(&mcast->pkt_queue)) { - struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); - - skb->dev = dev; - - dev_kfree_skb_any(skb); - } + while (!skb_queue_empty(&mcast->pkt_queue)) + dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); /* Clear the busy flag so we try again */ clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); @@ -928,21 +919,16 @@ struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev) return NULL; iter->dev = dev; - memset(iter->mgid.raw, 0, sizeof iter->mgid); + memset(iter->mgid.raw, 0, 16); if (ipoib_mcast_iter_next(iter)) { - ipoib_mcast_iter_free(iter); + kfree(iter); return NULL; } return iter; } -void ipoib_mcast_iter_free(struct ipoib_mcast_iter *iter) -{ - kfree(iter); -} - int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter) { struct ipoib_dev_priv *priv = netdev_priv(iter->dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 332d730..d280b34 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -113,8 +113,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) priv->parent = ppriv->dev; - if (ipoib_create_debug_file(priv->dev)) - goto debug_failed; + ipoib_create_debug_files(priv->dev); if (ipoib_add_pkey_attr(priv->dev)) goto sysfs_failed; @@ -130,9 +129,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) return 0; sysfs_failed: - ipoib_delete_debug_file(priv->dev); - -debug_failed: + ipoib_delete_debug_files(priv->dev); unregister_netdev(priv->dev); register_failed: diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index 072f3a2..5ff1490 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -43,7 +43,7 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define IB_USER_VERBS_ABI_VERSION 3 +#define IB_USER_VERBS_ABI_VERSION 4 enum { IB_USER_VERBS_CMD_GET_CONTEXT, @@ -333,6 +333,11 @@ struct ib_uverbs_create_qp { struct ib_uverbs_create_qp_resp { __u32 qp_handle; __u32 qpn; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; }; /* @@ -552,9 +557,7 @@ struct ib_uverbs_modify_srq { __u32 srq_handle; __u32 attr_mask; __u32 max_wr; - __u32 max_sge; __u32 srq_limit; - __u32 reserved; __u64 driver_data[0]; }; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index f72d46d..a7f4c35 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -881,7 +881,7 @@ struct ib_device { struct ib_ucontext *context, struct ib_udata *udata); int (*destroy_cq)(struct ib_cq *cq); - int (*resize_cq)(struct ib_cq *cq, int *cqe); + int (*resize_cq)(struct ib_cq *cq, int cqe); int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc); int (*peek_cq)(struct ib_cq *cq, int wc_cnt); diff --git a/kernel/sys.c b/kernel/sys.c index c43b3e2..bce933e 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1497,6 +1497,8 @@ EXPORT_SYMBOL(in_egroup_p); DECLARE_RWSEM(uts_sem); +EXPORT_SYMBOL(uts_sem); + asmlinkage long sys_newuname(struct new_utsname __user * name) { int errno = 0; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ff81b5c..987225b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1330,7 +1330,7 @@ void show_free_areas(void) } else printk("\n"); - for_each_cpu(cpu) { + for_each_online_cpu(cpu) { struct per_cpu_pageset *pageset; pageset = zone_pcp(zone, cpu); |