From 8a095030f7551d07860fd54890d1bcdc77630c29 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Wed, 17 Oct 2012 03:23:55 +0000 Subject: IB/mlx4: Fix build error on platforms where UL is not 64 bits Line 110 uses UL as a compiler cast for the 0x constant, but it's not large enough to hold a 64-bit value on a 32-bit arch. Signed-off-by: Doug Ledford [ Use "-1" instead of "FFFFFFFFFFFFFFFFULL". - Roland ] Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index d2fb38d..2f215b9 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -107,7 +107,7 @@ static __be64 get_cached_alias_guid(struct mlx4_ib_dev *dev, int port, int index { if (index >= NUM_ALIAS_GUID_PER_PORT) { pr_err("%s: ERROR: asked for index:%d\n", __func__, index); - return (__force __be64) ((u64) 0xFFFFFFFFFFFFFFFFUL); + return (__force __be64) -1; } return *(__be64 *)&dev->sriov.demux[port - 1].guid_cache[index]; } -- cgit v0.10.2 From 2c75d2ccb6e5ffb96ce8624ef4c1f7ba5bd96499 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 17 Oct 2012 16:42:58 +0000 Subject: IB/mlx4: Fix QP1 P_Key processing in the Primary Physical Function (PPF) In the MAD paravirtualization code, one of the checks performed when forwarding QP1 (GSI) packets from wire to slave was a P_Key check: the P_Key received in the MAD must be present in the guest's paravirtualized P_Key table, and at least one of the (packet P_Key, guest P_Key) must be a full-membership P_Key. However, if everyone involved has only limited membership in the default P_Key, then packets sent by full-member remote hosts arrive at the PPF but are not passed on to the VFs with the current P_Key1 check. Fix this as follows: 1. Don't care if P_Key received over wire is full or not. If it successfully passed HW checks on the real QP1, then simply pass it to guest regardless of whether the guest has full or limited membership in its P_Key table. 2. If the guest (including paravirtualized master) has both full and limited P_Key forms in its table, preferentially pass the paravirtualized P_Key index of the full P_Key form in the tunnel header. 3. In the multicast join flow (mlx4/mcg.c), use the index for the default P_Key (wherever it is located) in replies generated from within the mcg module (previously, P_Key index 0 was used in all cases). Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 21a7941..0a903c1 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -409,38 +409,45 @@ int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid) } -static int get_pkey_phys_indices(struct mlx4_ib_dev *ibdev, u8 port, u8 ph_pkey_ix, - u8 *full_pk_ix, u8 *partial_pk_ix, - int *is_full_member) +static int find_slave_port_pkey_ix(struct mlx4_ib_dev *dev, int slave, + u8 port, u16 pkey, u16 *ix) { - u16 search_pkey; - int fm; - int err = 0; - u16 pk; + int i, ret; + u8 unassigned_pkey_ix, pkey_ix, partial_ix = 0xFF; + u16 slot_pkey; - err = ib_get_cached_pkey(&ibdev->ib_dev, port, ph_pkey_ix, &search_pkey); - if (err) - return err; + if (slave == mlx4_master_func_num(dev->dev)) + return ib_find_cached_pkey(&dev->ib_dev, port, pkey, ix); - fm = (search_pkey & 0x8000) ? 1 : 0; - if (fm) { - *full_pk_ix = ph_pkey_ix; - search_pkey &= 0x7FFF; - } else { - *partial_pk_ix = ph_pkey_ix; - search_pkey |= 0x8000; - } + unassigned_pkey_ix = dev->dev->phys_caps.pkey_phys_table_len[port] - 1; - if (ib_find_exact_cached_pkey(&ibdev->ib_dev, port, search_pkey, &pk)) - pk = 0xFFFF; + for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) { + if (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == unassigned_pkey_ix) + continue; - if (fm) - *partial_pk_ix = (pk & 0xFF); - else - *full_pk_ix = (pk & 0xFF); + pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][i]; - *is_full_member = fm; - return err; + ret = ib_get_cached_pkey(&dev->ib_dev, port, pkey_ix, &slot_pkey); + if (ret) + continue; + if ((slot_pkey & 0x7FFF) == (pkey & 0x7FFF)) { + if (slot_pkey & 0x8000) { + *ix = (u16) pkey_ix; + return 0; + } else { + /* take first partial pkey index found */ + if (partial_ix == 0xFF) + partial_ix = pkey_ix; + } + } + } + + if (partial_ix < 0xFF) { + *ix = (u16) partial_ix; + return 0; + } + + return -EINVAL; } int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, @@ -458,10 +465,8 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, unsigned tun_tx_ix = 0; int dqpn; int ret = 0; - int i; - int is_full_member = 0; u16 tun_pkey_ix; - u8 ph_pkey_ix, full_pk_ix = 0, partial_pk_ix = 0; + u16 cached_pkey; if (dest_qpt > IB_QPT_GSI) return -EINVAL; @@ -481,27 +486,17 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, else tun_qp = &tun_ctx->qp[1]; - /* compute pkey index for slave */ - /* get physical pkey -- virtualized Dom0 pkey to phys*/ + /* compute P_Key index to put in tunnel header for slave */ if (dest_qpt) { - ph_pkey_ix = - dev->pkeys.virt2phys_pkey[mlx4_master_func_num(dev->dev)][port - 1][wc->pkey_index]; - - /* now, translate this to the slave pkey index */ - ret = get_pkey_phys_indices(dev, port, ph_pkey_ix, &full_pk_ix, - &partial_pk_ix, &is_full_member); + u16 pkey_ix; + ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey); if (ret) return -EINVAL; - for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) { - if ((dev->pkeys.virt2phys_pkey[slave][port - 1][i] == full_pk_ix) || - (is_full_member && - (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == partial_pk_ix))) - break; - } - if (i == dev->dev->caps.pkey_table_len[port]) + ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix); + if (ret) return -EINVAL; - tun_pkey_ix = i; + tun_pkey_ix = pkey_ix; } else tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0]; diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 3c3b54c..44ff480 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -233,7 +233,8 @@ static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx, ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr); - wc.pkey_index = 0; + if (ib_find_cached_pkey(&dev->ib_dev, ctx->port, IB_DEFAULT_PKEY_FULL, &wc.pkey_index)) + return -EINVAL; wc.sl = 0; wc.dlid_path_bits = 0; wc.port_num = ctx->port; -- cgit v0.10.2 From bef83ed92caca45a394344a14b0234aea977da6e Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 17 Oct 2012 16:42:59 +0000 Subject: IB/mlx4: Synchronize cleanup of MCGs in MCG paravirtualization A client re-register event invokes cleanup of all MCGs. This is required to protect against misbehaved guests leading to corruption of join/leave database. However, since cleaning up the MCGs is a heavy operation, it is pushed to a work queue for further processing. Client re-register is also propagated to ULPs (e.g IPoIB). However, since the cleanup is performed in a workqueue, the ULP could leave and re-join groups before the cleanup occurs. In this case, when the cleanup takes place, it prunes the (newly-joined) MCGs and the ULP is left without actual MCGs while believing it joined them. Fix this by setting the flushing flag before invoking the cleanup task and clearing it after flushing is complete. Signed-off-by: Eli Cohen Reviewed-by: Jack Morgenstein Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 44ff480..25b2cdf 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -1075,10 +1075,6 @@ static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy unsigned long end; int count; - if (ctx->flushing) - return; - - ctx->flushing = 1; for (i = 0; i < MAX_VFS; ++i) clean_vf_mcast(ctx, i); @@ -1108,9 +1104,6 @@ static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy force_clean_group(group); } mutex_unlock(&ctx->mcg_table_lock); - - if (!destroy_wq) - ctx->flushing = 0; } struct clean_work { @@ -1124,6 +1117,7 @@ static void mcg_clean_task(struct work_struct *work) struct clean_work *cw = container_of(work, struct clean_work, work); _mlx4_ib_mcg_port_cleanup(cw->ctx, cw->destroy_wq); + cw->ctx->flushing = 0; kfree(cw); } @@ -1131,13 +1125,20 @@ void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq) { struct clean_work *work; + if (ctx->flushing) + return; + + ctx->flushing = 1; + if (destroy_wq) { _mlx4_ib_mcg_port_cleanup(ctx, destroy_wq); + ctx->flushing = 0; return; } work = kmalloc(sizeof *work, GFP_KERNEL); if (!work) { + ctx->flushing = 0; mcg_warn("failed allocating work for cleanup\n"); return; } -- cgit v0.10.2 From 32c631f9f2566d09d32abc2f4c7aa24c6d8b5641 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 12 Oct 2012 21:10:11 +0000 Subject: RDMA/cxgb4: Don't free chunk that we have failed to allocate In the error path of registering memory when there's a failure to allocate a chunk from the memory pool, we try to free the same chunk we just failed to allocate, which will BUG(). Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 57e07c6..afd8179 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -468,7 +468,7 @@ struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd, ret = alloc_pbl(mhp, npages); if (ret) { kfree(page_list); - goto err_pbl; + goto err; } ret = write_pbl(&mhp->rhp->rdev, page_list, mhp->attr.pbl_addr, -- cgit v0.10.2 From 3cf164c8dee834cef9dac4e7325f89a207339ac8 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 21 Oct 2012 14:59:22 +0000 Subject: mlx4_core: Remove annoying debug messages from SR-IOV flow These debug prints left behind by commits c82e9aa0a8bc ("mlx4_core: resource tracking for HCA resources used by guests"), 54679e148287 ("mlx4: Implement QP paravirtualization and maintain phys_pkey_cache for smp_snoop") and 993c401e2079 ("mlx4_core: Add IB port-state machine and port mgmt event propagation") make it pretty hard to actually use the mlx4_core debug messages when running in SRIOV/IB mode -- for example, the module load sequence of a device with one VF yielded 631 debug prints, with 408 of them being from this set. Let's just remove them. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 51c7649..194221b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -329,9 +329,6 @@ int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, ctx = &priv->mfunc.master.slave_state[slave]; spin_lock_irqsave(&ctx->lock, flags); - mlx4_dbg(dev, "%s: slave: %d, current state: %d new event :%d\n", - __func__, slave, cur_state, event); - switch (cur_state) { case SLAVE_PORT_DOWN: if (MLX4_PORT_STATE_DEV_EVENT_PORT_UP == event) @@ -366,9 +363,6 @@ int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, goto out; } ret = mlx4_get_slave_port_state(dev, slave, port); - mlx4_dbg(dev, "%s: slave: %d, current state: %d new event" - " :%d gen_event: %d\n", - __func__, slave, cur_state, event, *gen_event); out: spin_unlock_irqrestore(&ctx->lock, flags); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 926c911..b05705f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -330,9 +330,6 @@ static void update_pkey_index(struct mlx4_dev *dev, int slave, new_index = priv->virt2phys_pkey[slave][port - 1][orig_index]; *(u8 *)(inbox->buf + 35) = new_index; - - mlx4_dbg(dev, "port = %d, orig pkey index = %d, " - "new pkey index = %d\n", port, orig_index, new_index); } static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, @@ -351,9 +348,6 @@ static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) qp_ctx->alt_path.mgid_index = slave & 0x7F; } - - mlx4_dbg(dev, "slave %d, new gid index: 0x%x ", - slave, qp_ctx->pri_path.mgid_index); } static int mpt_mask(struct mlx4_dev *dev) -- cgit v0.10.2 From 41929ed2656bab8af1734bf5d0088385e72e294f Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Sun, 21 Oct 2012 14:59:23 +0000 Subject: mlx4_core: Perform correct resource cleanup if mlx4_QUERY_ADAPTER() fails Fixed the resource cleanup to act correctly and prevent a kernel oops when mlx4_QUERY_ADAPTER() fails. Signed-off-by: Dotan Barak Reviewed-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 80df2ab..2aa80af 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1405,7 +1405,10 @@ unmap_bf: unmap_bf_area(dev); err_close: - mlx4_close_hca(dev); + if (mlx4_is_slave(dev)) + mlx4_slave_exit(dev); + else + mlx4_CLOSE_HCA(dev, 0); err_free_icm: if (!mlx4_is_slave(dev)) -- cgit v0.10.2