From 1c94283ddbe8a9945c4aaac8b0be90d47f97f2df Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 7 May 2012 14:02:31 -0400 Subject: IB/qib: Add cache line awareness to qib_qp and qib_devdata structures This patch reorganizes the QP and devdata files to be more cache line aware. qib_qp fields in particular are split into read-mostly, send, and receive fields. qib_devdata fields are split into read-mostly and read/write fields Testing has show that bidirectional tests improve by as much as 100% with this patch. Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 2d63887..7e62f41 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -530,8 +530,6 @@ struct qib_pportdata { /* qib_lflags driver is waiting for */ u32 state_wanted; spinlock_t lflags_lock; - /* number of (port-specific) interrupts for this port -- saturates... */ - u32 int_counter; /* ref count for each pkey */ atomic_t pkeyrefs[4]; @@ -543,24 +541,26 @@ struct qib_pportdata { u64 *statusp; /* SendDMA related entries */ - spinlock_t sdma_lock; - struct qib_sdma_state sdma_state; - unsigned long sdma_buf_jiffies; + + /* read mostly */ struct qib_sdma_desc *sdma_descq; + struct qib_sdma_state sdma_state; + dma_addr_t sdma_descq_phys; + volatile __le64 *sdma_head_dma; /* DMA'ed by chip */ + dma_addr_t sdma_head_phys; + u16 sdma_descq_cnt; + + /* read/write using lock */ + spinlock_t sdma_lock ____cacheline_aligned_in_smp; + struct list_head sdma_activelist; u64 sdma_descq_added; u64 sdma_descq_removed; - u16 sdma_descq_cnt; u16 sdma_descq_tail; u16 sdma_descq_head; - u16 sdma_next_intr; - u16 sdma_reset_wait; u8 sdma_generation; - struct tasklet_struct sdma_sw_clean_up_task; - struct list_head sdma_activelist; - dma_addr_t sdma_descq_phys; - volatile __le64 *sdma_head_dma; /* DMA'ed by chip */ - dma_addr_t sdma_head_phys; + struct tasklet_struct sdma_sw_clean_up_task + ____cacheline_aligned_in_smp; wait_queue_head_t state_wait; /* for state_wanted */ diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 7e7e16f..1ce56b5 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -1038,6 +1038,11 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, goto bail_swq; } RCU_INIT_POINTER(qp->next, NULL); + qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL); + if (!qp->s_hdr) { + ret = ERR_PTR(-ENOMEM); + goto bail_qp; + } qp->timeout_jiffies = usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / 1000UL); @@ -1159,6 +1164,7 @@ bail_ip: vfree(qp->r_rq.wq); free_qpn(&dev->qpn_table, qp->ibqp.qp_num); bail_qp: + kfree(qp->s_hdr); kfree(qp); bail_swq: vfree(swq); @@ -1214,6 +1220,7 @@ int qib_destroy_qp(struct ib_qp *ibqp) else vfree(qp->r_rq.wq); vfree(qp->s_wq); + kfree(qp->s_hdr); kfree(qp); return 0; } diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 765b4cb..b641416 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -244,9 +244,9 @@ int qib_make_rc_req(struct qib_qp *qp) int ret = 0; int delta; - ohdr = &qp->s_hdr.u.oth; + ohdr = &qp->s_hdr->u.oth; if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) - ohdr = &qp->s_hdr.u.l.oth; + ohdr = &qp->s_hdr->u.l.oth; /* * The lock is needed to synchronize between the sending tasklet, diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index b4b37e4..c0ee7e0 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -688,17 +688,17 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr, nwords = (qp->s_cur_size + extra_bytes) >> 2; lrh0 = QIB_LRH_BTH; if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { - qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr.u.l.grh, + qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh, &qp->remote_ah_attr.grh, qp->s_hdrwords, nwords); lrh0 = QIB_LRH_GRH; } lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 | qp->remote_ah_attr.sl << 4; - qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); - qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); - qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - qp->s_hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | + qp->s_hdr->lrh[0] = cpu_to_be16(lrh0); + qp->s_hdr->lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); + qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); + qp->s_hdr->lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | qp->remote_ah_attr.src_path_bits); bth0 |= qib_get_pkey(ibp, qp->s_pkey_index); bth0 |= extra_bytes << 20; @@ -758,7 +758,7 @@ void qib_do_send(struct work_struct *work) * If the packet cannot be sent now, return and * the send tasklet will be woken up later. */ - if (qib_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords, + if (qib_verbs_send(qp, qp->s_hdr, qp->s_hdrwords, qp->s_cur_sge, qp->s_cur_size)) break; /* Record that s_hdr is empty. */ diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 7ce2ac2..ce7387f 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -72,9 +72,9 @@ int qib_make_uc_req(struct qib_qp *qp) goto done; } - ohdr = &qp->s_hdr.u.oth; + ohdr = &qp->s_hdr->u.oth; if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) - ohdr = &qp->s_hdr.u.l.oth; + ohdr = &qp->s_hdr->u.l.oth; /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 828609f..a468bf2 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -321,11 +321,11 @@ int qib_make_ud_req(struct qib_qp *qp) if (ah_attr->ah_flags & IB_AH_GRH) { /* Header size in 32-bit words. */ - qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr.u.l.grh, + qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh, &ah_attr->grh, qp->s_hdrwords, nwords); lrh0 = QIB_LRH_GRH; - ohdr = &qp->s_hdr.u.l.oth; + ohdr = &qp->s_hdr->u.l.oth; /* * Don't worry about sending to locally attached multicast * QPs. It is unspecified by the spec. what happens. @@ -333,7 +333,7 @@ int qib_make_ud_req(struct qib_qp *qp) } else { /* Header size in 32-bit words. */ lrh0 = QIB_LRH_BTH; - ohdr = &qp->s_hdr.u.oth; + ohdr = &qp->s_hdr->u.oth; } if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { qp->s_hdrwords++; @@ -346,15 +346,15 @@ int qib_make_ud_req(struct qib_qp *qp) lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */ else lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12; - qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); - qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ - qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); + qp->s_hdr->lrh[0] = cpu_to_be16(lrh0); + qp->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ + qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); lid = ppd->lid; if (lid) { lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1); - qp->s_hdr.lrh[3] = cpu_to_be16(lid); + qp->s_hdr->lrh[3] = cpu_to_be16(lid); } else - qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE; + qp->s_hdr->lrh[3] = IB_LID_PERMISSIVE; if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= IB_BTH_SOLICITED; bth0 |= extra_bytes << 20; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 0c19ef0..4876060 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -367,9 +367,10 @@ struct qib_rwq { struct qib_rq { struct qib_rwq *wq; - spinlock_t lock; /* protect changes in this struct */ u32 size; /* size of RWQE array */ u8 max_sge; + spinlock_t lock /* protect changes in this struct */ + ____cacheline_aligned_in_smp; }; struct qib_srq { @@ -412,31 +413,75 @@ struct qib_ack_entry { */ struct qib_qp { struct ib_qp ibqp; - struct qib_qp *next; /* link list for QPN hash table */ - struct qib_qp *timer_next; /* link list for qib_ib_timer() */ - struct list_head iowait; /* link for wait PIO buf */ - struct list_head rspwait; /* link for waititing to respond */ + /* read mostly fields above and below */ struct ib_ah_attr remote_ah_attr; struct ib_ah_attr alt_ah_attr; - struct qib_ib_header s_hdr; /* next packet header to send */ - atomic_t refcount; - wait_queue_head_t wait; - wait_queue_head_t wait_dma; - struct timer_list s_timer; - struct work_struct s_work; + struct qib_qp *next; /* link list for QPN hash table */ + struct qib_swqe *s_wq; /* send work queue */ struct qib_mmap_info *ip; + struct qib_ib_header *s_hdr; /* next packet header to send */ + unsigned long timeout_jiffies; /* computed from timeout */ + + enum ib_mtu path_mtu; + u32 remote_qpn; + u32 pmtu; /* decoded from path_mtu */ + u32 qkey; /* QKEY for this QP (for UD or RD) */ + u32 s_size; /* send work queue size */ + u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ + + u8 state; /* QP state */ + u8 qp_access_flags; + u8 alt_timeout; /* Alternate path timeout for this QP */ + u8 timeout; /* Timeout for this QP */ + u8 s_srate; + u8 s_mig_state; + u8 port_num; + u8 s_pkey_index; /* PKEY index to use */ + u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ + u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ + u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ + u8 s_retry_cnt; /* number of times to retry */ + u8 s_rnr_retry_cnt; + u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ + u8 s_max_sge; /* size of s_wq->sg_list */ + u8 s_draining; + + /* start of read/write fields */ + + atomic_t refcount ____cacheline_aligned_in_smp; + wait_queue_head_t wait; + + + struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1] + ____cacheline_aligned_in_smp; + struct qib_sge_state s_rdma_read_sge; + + spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ + unsigned long r_aflags; + u64 r_wr_id; /* ID for current receive WQE */ + u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ + u32 r_len; /* total length of r_sge */ + u32 r_rcv_len; /* receive data len processed */ + u32 r_psn; /* expected rcv packet sequence number */ + u32 r_msn; /* message sequence number */ + + u8 r_state; /* opcode of last packet received */ + u8 r_flags; + u8 r_head_ack_queue; /* index into s_ack_queue[] */ + + struct list_head rspwait; /* link for waititing to respond */ + + struct qib_sge_state r_sge; /* current receive data */ + struct qib_rq r_rq; /* receive work queue */ + + spinlock_t s_lock ____cacheline_aligned_in_smp; struct qib_sge_state *s_cur_sge; + u32 s_flags; struct qib_verbs_txreq *s_tx; - struct qib_mregion *s_rdma_mr; + struct qib_swqe *s_wqe; struct qib_sge_state s_sge; /* current send request data */ - struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1]; - struct qib_sge_state s_ack_rdma_sge; - struct qib_sge_state s_rdma_read_sge; - struct qib_sge_state r_sge; /* current receive data */ - spinlock_t r_lock; /* used for APM */ - spinlock_t s_lock; + struct qib_mregion *s_rdma_mr; atomic_t s_dma_busy; - u32 s_flags; u32 s_cur_size; /* size of send packet in bytes */ u32 s_len; /* total length of s_sge */ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ @@ -447,60 +492,34 @@ struct qib_qp { u32 s_psn; /* current packet sequence number */ u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ - u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ - u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ - u64 r_wr_id; /* ID for current receive WQE */ - unsigned long r_aflags; - u32 r_len; /* total length of r_sge */ - u32 r_rcv_len; /* receive data len processed */ - u32 r_psn; /* expected rcv packet sequence number */ - u32 r_msn; /* message sequence number */ + u32 s_head; /* new entries added here */ + u32 s_tail; /* next entry to process */ + u32 s_cur; /* current work queue entry */ + u32 s_acked; /* last un-ACK'ed entry */ + u32 s_last; /* last completed entry */ + u32 s_ssn; /* SSN of tail entry */ + u32 s_lsn; /* limit sequence number (credit) */ u16 s_hdrwords; /* size of s_hdr in 32 bit words */ u16 s_rdma_ack_cnt; - u8 state; /* QP state */ u8 s_state; /* opcode of last packet sent */ u8 s_ack_state; /* opcode of packet to ACK */ u8 s_nak_state; /* non-zero if NAK is pending */ - u8 r_state; /* opcode of last packet received */ u8 r_nak_state; /* non-zero if NAK is pending */ - u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ - u8 r_flags; - u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ - u8 r_head_ack_queue; /* index into s_ack_queue[] */ - u8 qp_access_flags; - u8 s_max_sge; /* size of s_wq->sg_list */ - u8 s_retry_cnt; /* number of times to retry */ - u8 s_rnr_retry_cnt; u8 s_retry; /* requester retry counter */ u8 s_rnr_retry; /* requester RNR retry counter */ - u8 s_pkey_index; /* PKEY index to use */ - u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ - u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ u8 s_tail_ack_queue; /* index into s_ack_queue[] */ - u8 s_srate; - u8 s_draining; - u8 s_mig_state; - u8 timeout; /* Timeout for this QP */ - u8 alt_timeout; /* Alternate path timeout for this QP */ - u8 port_num; - enum ib_mtu path_mtu; - u32 pmtu; /* decoded from path_mtu */ - u32 remote_qpn; - u32 qkey; /* QKEY for this QP (for UD or RD) */ - u32 s_size; /* send work queue size */ - u32 s_head; /* new entries added here */ - u32 s_tail; /* next entry to process */ - u32 s_cur; /* current work queue entry */ - u32 s_acked; /* last un-ACK'ed entry */ - u32 s_last; /* last completed entry */ - u32 s_ssn; /* SSN of tail entry */ - u32 s_lsn; /* limit sequence number (credit) */ - unsigned long timeout_jiffies; /* computed from timeout */ - struct qib_swqe *s_wq; /* send work queue */ - struct qib_swqe *s_wqe; - struct qib_rq r_rq; /* receive work queue */ - struct qib_sge r_sg_list[0]; /* verified SGEs */ + + struct qib_sge_state s_ack_rdma_sge; + struct timer_list s_timer; + struct list_head iowait; /* link for wait PIO buf */ + + struct work_struct s_work; + + wait_queue_head_t wait_dma; + + struct qib_sge r_sg_list[0] /* verified SGEs */ + ____cacheline_aligned_in_smp; }; /* -- cgit v0.10.2