From e4221314a593b00e035f70efbef52021f9a3a5fc Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 3 Nov 2011 17:48:25 -0700 Subject: IB/mthca: Fix buddy->num_free allocation size The num_free field of mthca_buddy has a type of array of unsigned int while it was allocated as an array of pointers. On 64-bit platforms this allocates twice more than required. Fix this by allocating the correct size for the type. This is the same bug just fixed in mlx4 by Eli Cohen . Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index ab876f9..ed9a989 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -146,7 +146,7 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order) buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *), GFP_KERNEL); - buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *), + buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free, GFP_KERNEL); if (!buddy->bits || !buddy->num_free) goto err_out; -- cgit v0.10.2 From 30ab7e230b996f750d4fc24b6bf8214e83effa12 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 4 Nov 2011 08:26:52 -0400 Subject: IB/qib: Fix panic in RC error flushing logic The following panic can occur when flushing a QP: RIP: 0010:[] [] qib_send_complete+0x3b/0x190 [ib_qib] RSP: 0018:ffff8803cdc6fc90 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff8803d84ba000 RCX: 0000000000000000 RDX: 0000000000000005 RSI: ffffc90015a53430 RDI: ffff8803d84ba000 RBP: ffff8803cdc6fce0 R08: ffff8803cdc6fc90 R09: 0000000000000001 R10: 00000000ffffffff R11: 0000000000000000 R12: ffff8803d84ba0c0 R13: ffff8803d84ba5cc R14: 0000000000000800 R15: 0000000000000246 FS: 0000000000000000(0000) GS:ffff880036600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 0000000000000034 CR3: 00000003e44f9000 CR4: 00000000000406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process qib/0 (pid: 1350, threadinfo ffff8803cdc6e000, task ffff88042728a100) Stack: 53544c5553455201 0000000100000005 0000000000000000 ffff8803d84ba000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000001 ffff8803cdc6fd30 ffffffffa0165d7a Call Trace: [] qib_make_rc_req+0x36a/0xe80 [ib_qib] [] ? qib_make_rc_req+0x0/0xe80 [ib_qib] [] qib_do_send+0xf3/0xb60 [ib_qib] [] ? thread_return+0x4e/0x777 [] ? qib_do_send+0x0/0xb60 [ib_qib] [] worker_thread+0x170/0x2a0 [] ? autoremove_wake_function+0x0/0x40 [] ? worker_thread+0x0/0x2a0 [] kthread+0x96/0xa0 [] child_rip+0xa/0x20 [] ? kthread+0x0/0xa0 [] ? child_rip+0x0/0x20 RIP [] qib_send_complete+0x3b/0x190 [ib_qib] The RC error state flush logic in qib_make_rc_req() could return all of the acked wqes and potentially have emptied the queue. It would then unconditionally try return a flush completion via qib_send_complete() for an invalid wqe, or worse a valid one that is not queued. The panic results when the completion code tries to maintain an MR reference count for a NULL MR. This fix modifies logic to only send one completion per qib_make_rc_req() call and changing the completion status from IB_WC_SUCCESS to IB_WC_WR_FLUSH_ERR as the completions progress. The outer loop will call as many times as necessary to flush the queue. Reviewed-by: Ram Vepa Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index afaf4ac..894afac 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -271,13 +271,9 @@ int qib_make_rc_req(struct qib_qp *qp) goto bail; } wqe = get_swqe_ptr(qp, qp->s_last); - while (qp->s_last != qp->s_acked) { - qib_send_complete(qp, wqe, IB_WC_SUCCESS); - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - wqe = get_swqe_ptr(qp, qp->s_last); - } - qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); + qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ? + IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR); + /* will get called again */ goto done; } -- cgit v0.10.2