diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-22 00:40:26 (GMT) |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-22 00:40:26 (GMT) |
commit | 184e2516614f7055d4c3a2e63fd8a3eb95fff6d6 (patch) | |
tree | 9822dd3cc97f8cfed3cbda6167818b60355cc7ec | |
parent | 0264405b84505f60ae00625f261e75a32c7ddf56 (diff) | |
parent | d72623b665d84b1e07fe43854e83387fce8dd134 (diff) | |
download | linux-184e2516614f7055d4c3a2e63fd8a3eb95fff6d6.tar.xz |
Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
Pull more infiniband changes from Roland Dreier:
"Second batch of InfiniBand/RDMA changes for 3.8:
- cxgb4 changes to fix lookup engine hash collisions
- mlx4 changes to make flow steering usable
- fix to IPoIB to avoid pinning dst reference for too long"
* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband:
RDMA/cxgb4: Fix bug for active and passive LE hash collision path
RDMA/cxgb4: Fix LE hash collision bug for passive open connection
RDMA/cxgb4: Fix LE hash collision bug for active open connection
mlx4_core: Allow choosing flow steering mode
mlx4_core: Adjustments to Flow Steering activation logic for SR-IOV
mlx4_core: Fix error flow in the flow steering wrapper
mlx4_core: Add QPN enforcement for flow steering rules set by VFs
cxgb4: Add LE hash collision bug fix path in LLD driver
cxgb4: Add T4 filter support
IPoIB: Call skb_dst_drop() once skb is enqueued for sending
22 files changed, 2234 insertions, 214 deletions
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 5de8696..c13745c 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -38,10 +38,12 @@ #include <linux/inetdevice.h> #include <linux/ip.h> #include <linux/tcp.h> +#include <linux/if_vlan.h> #include <net/neighbour.h> #include <net/netevent.h> #include <net/route.h> +#include <net/tcp.h> #include "iw_cxgb4.h" @@ -61,6 +63,14 @@ static char *states[] = { NULL, }; +static int nocong; +module_param(nocong, int, 0644); +MODULE_PARM_DESC(nocong, "Turn of congestion control (default=0)"); + +static int enable_ecn; +module_param(enable_ecn, int, 0644); +MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)"); + static int dack_mode = 1; module_param(dack_mode, int, 0644); MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)"); @@ -265,6 +275,7 @@ void _c4iw_free_ep(struct kref *kref) cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); + remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); } kfree(ep); } @@ -441,6 +452,50 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } +#define VLAN_NONE 0xfff +#define FILTER_SEL_VLAN_NONE 0xffff +#define FILTER_SEL_WIDTH_P_FC (3+1) /* port uses 3 bits, FCoE one bit */ +#define FILTER_SEL_WIDTH_VIN_P_FC \ + (6 + 7 + FILTER_SEL_WIDTH_P_FC) /* 6 bits are unused, VF uses 7 bits*/ +#define FILTER_SEL_WIDTH_TAG_P_FC \ + (3 + FILTER_SEL_WIDTH_VIN_P_FC) /* PF uses 3 bits */ +#define FILTER_SEL_WIDTH_VLD_TAG_P_FC (1 + FILTER_SEL_WIDTH_TAG_P_FC) + +static unsigned int select_ntuple(struct c4iw_dev *dev, struct dst_entry *dst, + struct l2t_entry *l2t) +{ + unsigned int ntuple = 0; + u32 viid; + + switch (dev->rdev.lldi.filt_mode) { + + /* default filter mode */ + case HW_TPL_FR_MT_PR_IV_P_FC: + if (l2t->vlan == VLAN_NONE) + ntuple |= FILTER_SEL_VLAN_NONE << FILTER_SEL_WIDTH_P_FC; + else { + ntuple |= l2t->vlan << FILTER_SEL_WIDTH_P_FC; + ntuple |= 1 << FILTER_SEL_WIDTH_VLD_TAG_P_FC; + } + ntuple |= l2t->lport << S_PORT | IPPROTO_TCP << + FILTER_SEL_WIDTH_VLD_TAG_P_FC; + break; + case HW_TPL_FR_MT_PR_OV_P_FC: { + viid = cxgb4_port_viid(l2t->neigh->dev); + + ntuple |= FW_VIID_VIN_GET(viid) << FILTER_SEL_WIDTH_P_FC; + ntuple |= FW_VIID_PFN_GET(viid) << FILTER_SEL_WIDTH_VIN_P_FC; + ntuple |= FW_VIID_VIVLD_GET(viid) << FILTER_SEL_WIDTH_TAG_P_FC; + ntuple |= l2t->lport << S_PORT | IPPROTO_TCP << + FILTER_SEL_WIDTH_VLD_TAG_P_FC; + break; + } + default: + break; + } + return ntuple; +} + static int send_connect(struct c4iw_ep *ep) { struct cpl_act_open_req *req; @@ -463,7 +518,8 @@ static int send_connect(struct c4iw_ep *ep) cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); wscale = compute_wscale(rcv_win); - opt0 = KEEP_ALIVE(1) | + opt0 = (nocong ? NO_CONG(1) : 0) | + KEEP_ALIVE(1) | DELACK(1) | WND_SCALE(wscale) | MSS_IDX(mtu_idx) | @@ -474,6 +530,7 @@ static int send_connect(struct c4iw_ep *ep) ULP_MODE(ULP_MODE_TCPDDP) | RCV_BUFSIZ(rcv_win>>10); opt2 = RX_CHANNEL(0) | + CCTRL_ECN(enable_ecn) | RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); if (enable_tcp_timestamps) opt2 |= TSTAMPS_EN(1); @@ -492,8 +549,9 @@ static int send_connect(struct c4iw_ep *ep) req->local_ip = ep->com.local_addr.sin_addr.s_addr; req->peer_ip = ep->com.remote_addr.sin_addr.s_addr; req->opt0 = cpu_to_be64(opt0); - req->params = 0; + req->params = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst, ep->l2t)); req->opt2 = cpu_to_be32(opt2); + set_bit(ACT_OPEN_REQ, &ep->com.history); return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } @@ -770,6 +828,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) /* setup the hwtid for this connection */ ep->hwtid = tid; cxgb4_insert_tid(t, ep, tid); + insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid); ep->snd_seq = be32_to_cpu(req->snd_isn); ep->rcv_seq = be32_to_cpu(req->rcv_isn); @@ -777,7 +836,9 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) set_emss(ep, ntohs(req->tcp_opt)); /* dealloc the atid */ + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); cxgb4_free_atid(t, atid); + set_bit(ACT_ESTAB, &ep->com.history); /* start MPA negotiation */ send_flowc(ep, NULL); @@ -803,6 +864,7 @@ static void close_complete_upcall(struct c4iw_ep *ep) ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; ep->com.qp = NULL; + set_bit(CLOSE_UPCALL, &ep->com.history); } } @@ -811,6 +873,7 @@ static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); close_complete_upcall(ep); state_set(&ep->com, ABORTING); + set_bit(ABORT_CONN, &ep->com.history); return send_abort(ep, skb, gfp); } @@ -825,6 +888,7 @@ static void peer_close_upcall(struct c4iw_ep *ep) PDBG("peer close delivered ep %p cm_id %p tid %u\n", ep, ep->com.cm_id, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); + set_bit(DISCONN_UPCALL, &ep->com.history); } } @@ -843,6 +907,7 @@ static void peer_abort_upcall(struct c4iw_ep *ep) ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; ep->com.qp = NULL; + set_bit(ABORT_UPCALL, &ep->com.history); } } @@ -875,6 +940,7 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, status); + set_bit(CONN_RPL_UPCALL, &ep->com.history); ep->com.cm_id->event_handler(ep->com.cm_id, &event); if (status < 0) { @@ -915,6 +981,7 @@ static void connect_request_upcall(struct c4iw_ep *ep) ep->parent_ep->com.cm_id, &event); } + set_bit(CONNREQ_UPCALL, &ep->com.history); c4iw_put_ep(&ep->parent_ep->com); ep->parent_ep = NULL; } @@ -931,6 +998,7 @@ static void established_upcall(struct c4iw_ep *ep) if (ep->com.cm_id) { PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); + set_bit(ESTAB_UPCALL, &ep->com.history); } } @@ -1316,6 +1384,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) unsigned int dlen = ntohs(hdr->len); unsigned int tid = GET_TID(hdr); struct tid_info *t = dev->rdev.lldi.tids; + __u8 status = hdr->status; ep = lookup_tid(t, tid); PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen); @@ -1338,9 +1407,9 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) case MPA_REP_SENT: break; default: - printk(KERN_ERR MOD "%s Unexpected streaming data." - " ep %p state %d tid %u\n", - __func__, ep, state_read(&ep->com), ep->hwtid); + pr_err("%s Unexpected streaming data." \ + " ep %p state %d tid %u status %d\n", + __func__, ep, state_read(&ep->com), ep->hwtid, status); /* * The ep will timeout and inform the ULP of the failure. @@ -1383,6 +1452,63 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } +static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) +{ + struct sk_buff *skb; + struct fw_ofld_connection_wr *req; + unsigned int mtu_idx; + int wscale; + + skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); + req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR)); + req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16))); + req->le.filter = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst, + ep->l2t)); + req->le.lport = ep->com.local_addr.sin_port; + req->le.pport = ep->com.remote_addr.sin_port; + req->le.u.ipv4.lip = ep->com.local_addr.sin_addr.s_addr; + req->le.u.ipv4.pip = ep->com.remote_addr.sin_addr.s_addr; + req->tcb.t_state_to_astid = + htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_SENT) | + V_FW_OFLD_CONNECTION_WR_ASTID(atid)); + req->tcb.cplrxdataack_cplpassacceptrpl = + htons(F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK); + req->tcb.tx_max = jiffies; + req->tcb.rcv_adv = htons(1); + cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); + wscale = compute_wscale(rcv_win); + req->tcb.opt0 = TCAM_BYPASS(1) | + (nocong ? NO_CONG(1) : 0) | + KEEP_ALIVE(1) | + DELACK(1) | + WND_SCALE(wscale) | + MSS_IDX(mtu_idx) | + L2T_IDX(ep->l2t->idx) | + TX_CHAN(ep->tx_chan) | + SMAC_SEL(ep->smac_idx) | + DSCP(ep->tos) | + ULP_MODE(ULP_MODE_TCPDDP) | + RCV_BUFSIZ(rcv_win >> 10); + req->tcb.opt2 = PACE(1) | + TX_QUEUE(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) | + RX_CHANNEL(0) | + CCTRL_ECN(enable_ecn) | + RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); + if (enable_tcp_timestamps) + req->tcb.opt2 |= TSTAMPS_EN(1); + if (enable_tcp_sack) + req->tcb.opt2 |= SACK_EN(1); + if (wscale && enable_tcp_window_scaling) + req->tcb.opt2 |= WND_SCALE_EN(1); + req->tcb.opt0 = cpu_to_be64(req->tcb.opt0); + req->tcb.opt2 = cpu_to_be32(req->tcb.opt2); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx); + set_bit(ACT_OFLD_CONN, &ep->com.history); + c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +} + /* * Return whether a failed active open has allocated a TID */ @@ -1392,6 +1518,111 @@ static inline int act_open_has_tid(int status) status != CPL_ERR_ARP_MISS; } +#define ACT_OPEN_RETRY_COUNT 2 + +static int c4iw_reconnect(struct c4iw_ep *ep) +{ + int err = 0; + struct rtable *rt; + struct port_info *pi; + struct net_device *pdev; + int step; + struct neighbour *neigh; + + PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id); + init_timer(&ep->timer); + + /* + * Allocate an active TID to initiate a TCP connection. + */ + ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep); + if (ep->atid == -1) { + pr_err("%s - cannot alloc atid.\n", __func__); + err = -ENOMEM; + goto fail2; + } + insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid); + + /* find a route */ + rt = find_route(ep->com.dev, + ep->com.cm_id->local_addr.sin_addr.s_addr, + ep->com.cm_id->remote_addr.sin_addr.s_addr, + ep->com.cm_id->local_addr.sin_port, + ep->com.cm_id->remote_addr.sin_port, 0); + if (!rt) { + pr_err("%s - cannot find route.\n", __func__); + err = -EHOSTUNREACH; + goto fail3; + } + ep->dst = &rt->dst; + + neigh = dst_neigh_lookup(ep->dst, + &ep->com.cm_id->remote_addr.sin_addr.s_addr); + /* get a l2t entry */ + if (neigh->dev->flags & IFF_LOOPBACK) { + PDBG("%s LOOPBACK\n", __func__); + pdev = ip_dev_find(&init_net, + ep->com.cm_id->remote_addr.sin_addr.s_addr); + ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, + neigh, pdev, 0); + pi = (struct port_info *)netdev_priv(pdev); + ep->mtu = pdev->mtu; + ep->tx_chan = cxgb4_port_chan(pdev); + ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; + dev_put(pdev); + } else { + ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, + neigh, neigh->dev, 0); + pi = (struct port_info *)netdev_priv(neigh->dev); + ep->mtu = dst_mtu(ep->dst); + ep->tx_chan = cxgb4_port_chan(neigh->dev); + ep->smac_idx = (cxgb4_port_viid(neigh->dev) & + 0x7F) << 1; + } + + step = ep->com.dev->rdev.lldi.ntxq / ep->com.dev->rdev.lldi.nchan; + ep->txq_idx = pi->port_id * step; + ep->ctrlq_idx = pi->port_id; + step = ep->com.dev->rdev.lldi.nrxq / ep->com.dev->rdev.lldi.nchan; + ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[pi->port_id * step]; + + if (!ep->l2t) { + pr_err("%s - cannot alloc l2e.\n", __func__); + err = -ENOMEM; + goto fail4; + } + + PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", + __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, + ep->l2t->idx); + + state_set(&ep->com, CONNECTING); + ep->tos = 0; + + /* send connect request to rnic */ + err = send_connect(ep); + if (!err) + goto out; + + cxgb4_l2t_release(ep->l2t); +fail4: + dst_release(ep->dst); +fail3: + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); + cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); +fail2: + /* + * remember to send notification to upper layer. + * We are in here so the upper layer is not aware that this is + * re-connect attempt and so, upper layer is still waiting for + * response of 1st connect request. + */ + connect_reply_upcall(ep, -ECONNRESET); + c4iw_put_ep(&ep->com); +out: + return err; +} + static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *ep; @@ -1412,6 +1643,8 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } + set_bit(ACT_OPEN_RPL, &ep->com.history); + /* * Log interesting failures. */ @@ -1419,6 +1652,29 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) case CPL_ERR_CONN_RESET: case CPL_ERR_CONN_TIMEDOUT: break; + case CPL_ERR_TCAM_FULL: + if (dev->rdev.lldi.enable_fw_ofld_conn) { + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.tcam_full++; + mutex_unlock(&dev->rdev.stats.lock); + send_fw_act_open_req(ep, + GET_TID_TID(GET_AOPEN_ATID( + ntohl(rpl->atid_status)))); + return 0; + } + break; + case CPL_ERR_CONN_EXIST: + if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { + set_bit(ACT_RETRY_INUSE, &ep->com.history); + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, + atid); + cxgb4_free_atid(t, atid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_reconnect(ep); + return 0; + } + break; default: printk(KERN_INFO MOD "Active open failure - " "atid %u status %u errno %d %pI4:%u->%pI4:%u\n", @@ -1436,6 +1692,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) if (status && act_open_has_tid(status)) cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl)); + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); cxgb4_free_atid(t, atid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); @@ -1452,13 +1709,14 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) struct c4iw_listen_ep *ep = lookup_stid(t, stid); if (!ep) { - printk(KERN_ERR MOD "stid %d lookup failure!\n", stid); - return 0; + PDBG("%s stid %d lookup failure!\n", __func__, stid); + goto out; } PDBG("%s ep %p status %d error %d\n", __func__, ep, rpl->status, status2errno(rpl->status)); c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status)); +out: return 0; } @@ -1510,14 +1768,15 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb, skb_get(skb); cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); wscale = compute_wscale(rcv_win); - opt0 = KEEP_ALIVE(1) | + opt0 = (nocong ? NO_CONG(1) : 0) | + KEEP_ALIVE(1) | DELACK(1) | WND_SCALE(wscale) | MSS_IDX(mtu_idx) | L2T_IDX(ep->l2t->idx) | TX_CHAN(ep->tx_chan) | SMAC_SEL(ep->smac_idx) | - DSCP(ep->tos) | + DSCP(ep->tos >> 2) | ULP_MODE(ULP_MODE_TCPDDP) | RCV_BUFSIZ(rcv_win>>10); opt2 = RX_CHANNEL(0) | @@ -1529,6 +1788,15 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb, opt2 |= SACK_EN(1); if (wscale && enable_tcp_window_scaling) opt2 |= WND_SCALE_EN(1); + if (enable_ecn) { + const struct tcphdr *tcph; + u32 hlen = ntohl(req->hdr_len); + + tcph = (const void *)(req + 1) + G_ETH_HDR_LEN(hlen) + + G_IP_HDR_LEN(hlen); + if (tcph->ece && tcph->cwr) + opt2 |= CCTRL_ECN(1); + } rpl = cplhdr(skb); INIT_TP_WR(rpl, ep->hwtid); @@ -1645,22 +1913,30 @@ out: static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) { - struct c4iw_ep *child_ep, *parent_ep; + struct c4iw_ep *child_ep = NULL, *parent_ep; struct cpl_pass_accept_req *req = cplhdr(skb); unsigned int stid = GET_POPEN_TID(ntohl(req->tos_stid)); struct tid_info *t = dev->rdev.lldi.tids; unsigned int hwtid = GET_TID(req); struct dst_entry *dst; struct rtable *rt; - __be32 local_ip, peer_ip; + __be32 local_ip, peer_ip = 0; __be16 local_port, peer_port; int err; + u16 peer_mss = ntohs(req->tcpopt.mss); parent_ep = lookup_stid(t, stid); - PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid); - + if (!parent_ep) { + PDBG("%s connect request on invalid stid %d\n", __func__, stid); + goto reject; + } get_4tuple(req, &local_ip, &peer_ip, &local_port, &peer_port); + PDBG("%s parent ep %p hwtid %u laddr 0x%x raddr 0x%x lport %d " \ + "rport %d peer_mss %d\n", __func__, parent_ep, hwtid, + ntohl(local_ip), ntohl(peer_ip), ntohs(local_port), + ntohs(peer_port), peer_mss); + if (state_read(&parent_ep->com) != LISTEN) { printk(KERN_ERR "%s - listening ep not in LISTEN\n", __func__); @@ -1694,6 +1970,9 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } + if (peer_mss && child_ep->mtu > (peer_mss + 40)) + child_ep->mtu = peer_mss + 40; + state_set(&child_ep->com, CONNECTING); child_ep->com.dev = dev; child_ep->com.cm_id = NULL; @@ -1715,6 +1994,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) init_timer(&child_ep->timer); cxgb4_insert_tid(t, child_ep, hwtid); accept_cr(child_ep, peer_ip, skb, req); + set_bit(PASS_ACCEPT_REQ, &child_ep->com.history); goto out; reject: reject_cr(dev, hwtid, peer_ip, skb); @@ -1734,12 +2014,17 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) ep->snd_seq = be32_to_cpu(req->snd_isn); ep->rcv_seq = be32_to_cpu(req->rcv_isn); + PDBG("%s ep %p hwtid %u tcp_opt 0x%02x\n", __func__, ep, tid, + ntohs(req->tcp_opt)); + set_emss(ep, ntohs(req->tcp_opt)); + insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid); dst_confirm(ep->dst); state_set(&ep->com, MPA_REQ_WAIT); start_ep_timer(ep); send_flowc(ep, skb); + set_bit(PASS_ESTAB, &ep->com.history); return 0; } @@ -1759,6 +2044,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); dst_confirm(ep->dst); + set_bit(PEER_CLOSE, &ep->com.history); mutex_lock(&ep->com.mutex); switch (ep->com.state) { case MPA_REQ_WAIT: @@ -1838,74 +2124,6 @@ static int is_neg_adv_abort(unsigned int status) status == CPL_ERR_PERSIST_NEG_ADVICE; } -static int c4iw_reconnect(struct c4iw_ep *ep) -{ - struct rtable *rt; - int err = 0; - - PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id); - init_timer(&ep->timer); - - /* - * Allocate an active TID to initiate a TCP connection. - */ - ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep); - if (ep->atid == -1) { - printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__); - err = -ENOMEM; - goto fail2; - } - - /* find a route */ - rt = find_route(ep->com.dev, - ep->com.cm_id->local_addr.sin_addr.s_addr, - ep->com.cm_id->remote_addr.sin_addr.s_addr, - ep->com.cm_id->local_addr.sin_port, - ep->com.cm_id->remote_addr.sin_port, 0); - if (!rt) { - printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); - err = -EHOSTUNREACH; - goto fail3; - } - ep->dst = &rt->dst; - - err = import_ep(ep, ep->com.cm_id->remote_addr.sin_addr.s_addr, - ep->dst, ep->com.dev, false); - if (err) { - printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); - goto fail4; - } - - PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", - __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, - ep->l2t->idx); - - state_set(&ep->com, CONNECTING); - ep->tos = 0; - - /* send connect request to rnic */ - err = send_connect(ep); - if (!err) - goto out; - - cxgb4_l2t_release(ep->l2t); -fail4: - dst_release(ep->dst); -fail3: - cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); -fail2: - /* - * remember to send notification to upper layer. - * We are in here so the upper layer is not aware that this is - * re-connect attempt and so, upper layer is still waiting for - * response of 1st connect request. - */ - connect_reply_upcall(ep, -ECONNRESET); - c4iw_put_ep(&ep->com); -out: - return err; -} - static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_abort_req_rss *req = cplhdr(skb); @@ -1926,6 +2144,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) } PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, ep->com.state); + set_bit(PEER_ABORT, &ep->com.history); /* * Wake up any threads in rdma_init() or rdma_fini(). @@ -2140,6 +2359,7 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) c4iw_put_ep(&ep->com); return -ECONNRESET; } + set_bit(ULP_REJECT, &ep->com.history); BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); if (mpa_rev == 0) abort_connection(ep, NULL, GFP_KERNEL); @@ -2169,6 +2389,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); BUG_ON(!qp); + set_bit(ULP_ACCEPT, &ep->com.history); if ((conn_param->ord > c4iw_max_read_depth) || (conn_param->ird > c4iw_max_read_depth)) { abort_connection(ep, NULL, GFP_KERNEL); @@ -2292,6 +2513,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) err = -ENOMEM; goto fail2; } + insert_handle(dev, &dev->atid_idr, ep, ep->atid); PDBG("%s saddr 0x%x sport 0x%x raddr 0x%x rport 0x%x\n", __func__, ntohl(cm_id->local_addr.sin_addr.s_addr), @@ -2337,6 +2559,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) fail4: dst_release(ep->dst); fail3: + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); fail2: cm_id->rem_ref(cm_id); @@ -2351,7 +2574,6 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); struct c4iw_listen_ep *ep; - might_sleep(); ep = alloc_ep(sizeof(*ep), GFP_KERNEL); @@ -2370,30 +2592,54 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) /* * Allocate a server TID. */ - ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, PF_INET, ep); + if (dev->rdev.lldi.enable_fw_ofld_conn) + ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids, PF_INET, ep); + else + ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, PF_INET, ep); + if (ep->stid == -1) { printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__); err = -ENOMEM; goto fail2; } - + insert_handle(dev, &dev->stid_idr, ep, ep->stid); state_set(&ep->com, LISTEN); - c4iw_init_wr_wait(&ep->com.wr_wait); - err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], ep->stid, - ep->com.local_addr.sin_addr.s_addr, - ep->com.local_addr.sin_port, - ep->com.dev->rdev.lldi.rxq_ids[0]); - if (err) - goto fail3; - - /* wait for pass_open_rpl */ - err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, - __func__); + if (dev->rdev.lldi.enable_fw_ofld_conn) { + do { + err = cxgb4_create_server_filter( + ep->com.dev->rdev.lldi.ports[0], ep->stid, + ep->com.local_addr.sin_addr.s_addr, + ep->com.local_addr.sin_port, + 0, + ep->com.dev->rdev.lldi.rxq_ids[0], + 0, + 0); + if (err == -EBUSY) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(100)); + } + } while (err == -EBUSY); + } else { + c4iw_init_wr_wait(&ep->com.wr_wait); + err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], + ep->stid, ep->com.local_addr.sin_addr.s_addr, + ep->com.local_addr.sin_port, + 0, + ep->com.dev->rdev.lldi.rxq_ids[0]); + if (!err) + err = c4iw_wait_for_reply(&ep->com.dev->rdev, + &ep->com.wr_wait, + 0, 0, __func__); + } if (!err) { cm_id->provider_data = ep; goto out; } -fail3: + pr_err("%s cxgb4_create_server/filter failed err %d " \ + "stid %d laddr %08x lport %d\n", \ + __func__, err, ep->stid, + ntohl(ep->com.local_addr.sin_addr.s_addr), + ntohs(ep->com.local_addr.sin_port)); cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET); fail2: cm_id->rem_ref(cm_id); @@ -2412,12 +2658,19 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id) might_sleep(); state_set(&ep->com, DEAD); - c4iw_init_wr_wait(&ep->com.wr_wait); - err = listen_stop(ep); - if (err) - goto done; - err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, - __func__); + if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn) { + err = cxgb4_remove_server_filter( + ep->com.dev->rdev.lldi.ports[0], ep->stid, + ep->com.dev->rdev.lldi.rxq_ids[0], 0); + } else { + c4iw_init_wr_wait(&ep->com.wr_wait); + err = listen_stop(ep); + if (err) + goto done; + err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, + 0, 0, __func__); + } + remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid); cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET); done: cm_id->rem_ref(cm_id); @@ -2481,10 +2734,13 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) if (close) { if (abrupt) { + set_bit(EP_DISC_ABORT, &ep->com.history); close_complete_upcall(ep); ret = send_abort(ep, NULL, gfp); - } else + } else { + set_bit(EP_DISC_CLOSE, &ep->com.history); ret = send_halfclose(ep, gfp); + } if (ret) fatal = 1; } @@ -2494,10 +2750,323 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) return ret; } -static int async_event(struct c4iw_dev *dev, struct sk_buff *skb) +static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, + struct cpl_fw6_msg_ofld_connection_wr_rpl *req) +{ + struct c4iw_ep *ep; + int atid = be32_to_cpu(req->tid); + + ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids, req->tid); + if (!ep) + return; + + switch (req->retval) { + case FW_ENOMEM: + set_bit(ACT_RETRY_NOMEM, &ep->com.history); + if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { + send_fw_act_open_req(ep, atid); + return; + } + case FW_EADDRINUSE: + set_bit(ACT_RETRY_INUSE, &ep->com.history); + if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { + send_fw_act_open_req(ep, atid); + return; + } + break; + default: + pr_info("%s unexpected ofld conn wr retval %d\n", + __func__, req->retval); + break; + } + pr_err("active ofld_connect_wr failure %d atid %d\n", + req->retval, atid); + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.act_ofld_conn_fails++; + mutex_unlock(&dev->rdev.stats.lock); + connect_reply_upcall(ep, status2errno(req->retval)); + state_set(&ep->com, DEAD); + remove_handle(dev, &dev->atid_idr, atid); + cxgb4_free_atid(dev->rdev.lldi.tids, atid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_put_ep(&ep->com); +} + +static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, + struct cpl_fw6_msg_ofld_connection_wr_rpl *req) +{ + struct sk_buff *rpl_skb; + struct cpl_pass_accept_req *cpl; + int ret; + + rpl_skb = (struct sk_buff *)cpu_to_be64(req->cookie); + BUG_ON(!rpl_skb); + if (req->retval) { + PDBG("%s passive open failure %d\n", __func__, req->retval); + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.pas_ofld_conn_fails++; + mutex_unlock(&dev->rdev.stats.lock); + kfree_skb(rpl_skb); + } else { + cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb); + OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, + htonl(req->tid))); + ret = pass_accept_req(dev, rpl_skb); + if (!ret) + kfree_skb(rpl_skb); + } + return; +} + +static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_fw6_msg *rpl = cplhdr(skb); - c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]); + struct cpl_fw6_msg_ofld_connection_wr_rpl *req; + + switch (rpl->type) { + case FW6_TYPE_CQE: + c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]); + break; + case FW6_TYPE_OFLD_CONNECTION_WR_RPL: + req = (struct cpl_fw6_msg_ofld_connection_wr_rpl *)rpl->data; + switch (req->t_state) { + case TCP_SYN_SENT: + active_ofld_conn_reply(dev, skb, req); + break; + case TCP_SYN_RECV: + passive_ofld_conn_reply(dev, skb, req); + break; + default: + pr_err("%s unexpected ofld conn wr state %d\n", + __func__, req->t_state); + break; + } + break; + } + return 0; +} + +static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) +{ + u32 l2info; + u16 vlantag, len, hdr_len; + u8 intf; + struct cpl_rx_pkt *cpl = cplhdr(skb); + struct cpl_pass_accept_req *req; + struct tcp_options_received tmp_opt; + + /* Store values from cpl_rx_pkt in temporary location. */ + vlantag = cpl->vlan; + len = cpl->len; + l2info = cpl->l2info; + hdr_len = cpl->hdr_len; + intf = cpl->iff; + + __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header)); + + /* + * We need to parse the TCP options from SYN packet. + * to generate cpl_pass_accept_req. + */ + memset(&tmp_opt, 0, sizeof(tmp_opt)); + tcp_clear_options(&tmp_opt); + tcp_parse_options(skb, &tmp_opt, 0, 0, NULL); + + req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + req->l2info = cpu_to_be16(V_SYN_INTF(intf) | + V_SYN_MAC_IDX(G_RX_MACIDX(htonl(l2info))) | + F_SYN_XACT_MATCH); + req->hdr_len = cpu_to_be32(V_SYN_RX_CHAN(G_RX_CHAN(htonl(l2info))) | + V_TCP_HDR_LEN(G_RX_TCPHDR_LEN(htons(hdr_len))) | + V_IP_HDR_LEN(G_RX_IPHDR_LEN(htons(hdr_len))) | + V_ETH_HDR_LEN(G_RX_ETHHDR_LEN(htonl(l2info)))); + req->vlan = vlantag; + req->len = len; + req->tos_stid = cpu_to_be32(PASS_OPEN_TID(stid) | + PASS_OPEN_TOS(tos)); + req->tcpopt.mss = htons(tmp_opt.mss_clamp); + if (tmp_opt.wscale_ok) + req->tcpopt.wsf = tmp_opt.snd_wscale; + req->tcpopt.tstamp = tmp_opt.saw_tstamp; + if (tmp_opt.sack_ok) + req->tcpopt.sack = 1; + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, 0)); + return; +} + +static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, + __be32 laddr, __be16 lport, + __be32 raddr, __be16 rport, + u32 rcv_isn, u32 filter, u16 window, + u32 rss_qid, u8 port_id) +{ + struct sk_buff *req_skb; + struct fw_ofld_connection_wr *req; + struct cpl_pass_accept_req *cpl = cplhdr(skb); + + req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL); + req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL(1)); + req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16))); + req->le.version_cpl = htonl(F_FW_OFLD_CONNECTION_WR_CPL); + req->le.filter = filter; + req->le.lport = lport; + req->le.pport = rport; + req->le.u.ipv4.lip = laddr; + req->le.u.ipv4.pip = raddr; + req->tcb.rcv_nxt = htonl(rcv_isn + 1); + req->tcb.rcv_adv = htons(window); + req->tcb.t_state_to_astid = + htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_RECV) | + V_FW_OFLD_CONNECTION_WR_RCV_SCALE(cpl->tcpopt.wsf) | + V_FW_OFLD_CONNECTION_WR_ASTID( + GET_PASS_OPEN_TID(ntohl(cpl->tos_stid)))); + + /* + * We store the qid in opt2 which will be used by the firmware + * to send us the wr response. + */ + req->tcb.opt2 = htonl(V_RSS_QUEUE(rss_qid)); + + /* + * We initialize the MSS index in TCB to 0xF. + * So that when driver sends cpl_pass_accept_rpl + * TCB picks up the correct value. If this was 0 + * TP will ignore any value > 0 for MSS index. + */ + req->tcb.opt0 = cpu_to_be64(V_MSS_IDX(0xF)); + req->cookie = cpu_to_be64((u64)skb); + + set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id); + cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); +} + +/* + * Handler for CPL_RX_PKT message. Need to handle cpl_rx_pkt + * messages when a filter is being used instead of server to + * redirect a syn packet. When packets hit filter they are redirected + * to the offload queue and driver tries to establish the connection + * using firmware work request. + */ +static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) +{ + int stid; + unsigned int filter; + struct ethhdr *eh = NULL; + struct vlan_ethhdr *vlan_eh = NULL; + struct iphdr *iph; + struct tcphdr *tcph; + struct rss_header *rss = (void *)skb->data; + struct cpl_rx_pkt *cpl = (void *)skb->data; + struct cpl_pass_accept_req *req = (void *)(rss + 1); + struct l2t_entry *e; + struct dst_entry *dst; + struct rtable *rt; + struct c4iw_ep *lep; + u16 window; + struct port_info *pi; + struct net_device *pdev; + u16 rss_qid; + int step; + u32 tx_chan; + struct neighbour *neigh; + + /* Drop all non-SYN packets */ + if (!(cpl->l2info & cpu_to_be32(F_RXF_SYN))) + goto reject; + + /* + * Drop all packets which did not hit the filter. + * Unlikely to happen. + */ + if (!(rss->filter_hit && rss->filter_tid)) + goto reject; + + /* + * Calculate the server tid from filter hit index from cpl_rx_pkt. + */ + stid = cpu_to_be32(rss->hash_val) - dev->rdev.lldi.tids->sftid_base + + dev->rdev.lldi.tids->nstids; + + lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid); + if (!lep) { + PDBG("%s connect request on invalid stid %d\n", __func__, stid); + goto reject; + } + + if (G_RX_ETHHDR_LEN(ntohl(cpl->l2info)) == ETH_HLEN) { + eh = (struct ethhdr *)(req + 1); + iph = (struct iphdr *)(eh + 1); + } else { + vlan_eh = (struct vlan_ethhdr *)(req + 1); + iph = (struct iphdr *)(vlan_eh + 1); + skb->vlan_tci = ntohs(cpl->vlan); + } + + if (iph->version != 0x4) + goto reject; + + tcph = (struct tcphdr *)(iph + 1); + skb_set_network_header(skb, (void *)iph - (void *)rss); + skb_set_transport_header(skb, (void *)tcph - (void *)rss); + skb_get(skb); + + PDBG("%s lip 0x%x lport %u pip 0x%x pport %u tos %d\n", __func__, + ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr), + ntohs(tcph->source), iph->tos); + + rt = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source, + iph->tos); + if (!rt) { + pr_err("%s - failed to find dst entry!\n", + __func__); + goto reject; + } + dst = &rt->dst; + neigh = dst_neigh_lookup_skb(dst, skb); + + if (neigh->dev->flags & IFF_LOOPBACK) { + pdev = ip_dev_find(&init_net, iph->daddr); + e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, + pdev, 0); + pi = (struct port_info *)netdev_priv(pdev); + tx_chan = cxgb4_port_chan(pdev); + dev_put(pdev); + } else { + e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, + neigh->dev, 0); + pi = (struct port_info *)netdev_priv(neigh->dev); + tx_chan = cxgb4_port_chan(neigh->dev); + } + if (!e) { + pr_err("%s - failed to allocate l2t entry!\n", + __func__); + goto free_dst; + } + + step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; + rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step]; + window = htons(tcph->window); + + /* Calcuate filter portion for LE region. */ + filter = cpu_to_be32(select_ntuple(dev, dst, e)); + + /* + * Synthesize the cpl_pass_accept_req. We have everything except the + * TID. Once firmware sends a reply with TID we update the TID field + * in cpl and pass it through the regular cpl_pass_accept_req path. + */ + build_cpl_pass_accept_req(skb, stid, iph->tos); + send_fw_pass_open_req(dev, skb, iph->daddr, tcph->dest, iph->saddr, + tcph->source, ntohl(tcph->seq), filter, window, + rss_qid, pi->port_id); + cxgb4_l2t_release(e); +free_dst: + dst_release(dst); +reject: return 0; } @@ -2520,7 +3089,8 @@ static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = { [CPL_CLOSE_CON_RPL] = close_con_rpl, [CPL_RDMA_TERMINATE] = terminate, [CPL_FW4_ACK] = fw4_ack, - [CPL_FW6_MSG] = async_event + [CPL_FW6_MSG] = deferred_fw6_msg, + [CPL_RX_PKT] = rx_pkt }; static void process_timeout(struct c4iw_ep *ep) @@ -2531,6 +3101,7 @@ static void process_timeout(struct c4iw_ep *ep) mutex_lock(&ep->com.mutex); PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, ep->com.state); + set_bit(TIMEDOUT, &ep->com.history); switch (ep->com.state) { case MPA_REQ_SENT: __state_set(&ep->com, ABORTING); @@ -2651,7 +3222,7 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s type %u\n", __func__, rpl->type); switch (rpl->type) { - case 1: + case FW6_TYPE_WR_RPL: ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff); wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1]; PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret); @@ -2659,7 +3230,8 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) c4iw_wake_up(wr_waitp, ret ? -ret : 0); kfree_skb(skb); break; - case 2: + case FW6_TYPE_CQE: + case FW6_TYPE_OFLD_CONNECTION_WR_RPL: sched(dev, skb); break; default: @@ -2722,7 +3294,8 @@ c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = { [CPL_RDMA_TERMINATE] = sched, [CPL_FW4_ACK] = sched, [CPL_SET_TCB_RPL] = set_tcb_rpl, - [CPL_FW6_MSG] = fw6_msg + [CPL_FW6_MSG] = fw6_msg, + [CPL_RX_PKT] = sched }; int __init c4iw_cm_init(void) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index cb4ecd7..ba11c76 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -279,6 +279,11 @@ static int stats_show(struct seq_file *seq, void *v) seq_printf(seq, " DB State: %s Transitions %llu\n", db_state_str[dev->db_state], dev->rdev.stats.db_state_transitions); + seq_printf(seq, "TCAM_FULL: %10llu\n", dev->rdev.stats.tcam_full); + seq_printf(seq, "ACT_OFLD_CONN_FAILS: %10llu\n", + dev->rdev.stats.act_ofld_conn_fails); + seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n", + dev->rdev.stats.pas_ofld_conn_fails); return 0; } @@ -309,6 +314,9 @@ static ssize_t stats_clear(struct file *file, const char __user *buf, dev->rdev.stats.db_empty = 0; dev->rdev.stats.db_drop = 0; dev->rdev.stats.db_state_transitions = 0; + dev->rdev.stats.tcam_full = 0; + dev->rdev.stats.act_ofld_conn_fails = 0; + dev->rdev.stats.pas_ofld_conn_fails = 0; mutex_unlock(&dev->rdev.stats.lock); return count; } @@ -322,6 +330,113 @@ static const struct file_operations stats_debugfs_fops = { .write = stats_clear, }; +static int dump_ep(int id, void *p, void *data) +{ + struct c4iw_ep *ep = p; + struct c4iw_debugfs_data *epd = data; + int space; + int cc; + + space = epd->bufsize - epd->pos - 1; + if (space == 0) + return 1; + + cc = snprintf(epd->buf + epd->pos, space, + "ep %p cm_id %p qp %p state %d flags 0x%lx history 0x%lx " + "hwtid %d atid %d %pI4:%d <-> %pI4:%d\n", + ep, ep->com.cm_id, ep->com.qp, (int)ep->com.state, + ep->com.flags, ep->com.history, ep->hwtid, ep->atid, + &ep->com.local_addr.sin_addr.s_addr, + ntohs(ep->com.local_addr.sin_port), + &ep->com.remote_addr.sin_addr.s_addr, + ntohs(ep->com.remote_addr.sin_port)); + if (cc < space) + epd->pos += cc; + return 0; +} + +static int dump_listen_ep(int id, void *p, void *data) +{ + struct c4iw_listen_ep *ep = p; + struct c4iw_debugfs_data *epd = data; + int space; + int cc; + + space = epd->bufsize - epd->pos - 1; + if (space == 0) + return 1; + + cc = snprintf(epd->buf + epd->pos, space, + "ep %p cm_id %p state %d flags 0x%lx stid %d backlog %d " + "%pI4:%d\n", ep, ep->com.cm_id, (int)ep->com.state, + ep->com.flags, ep->stid, ep->backlog, + &ep->com.local_addr.sin_addr.s_addr, + ntohs(ep->com.local_addr.sin_port)); + if (cc < space) + epd->pos += cc; + return 0; +} + +static int ep_release(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *epd = file->private_data; + if (!epd) { + pr_info("%s null qpd?\n", __func__); + return 0; + } + vfree(epd->buf); + kfree(epd); + return 0; +} + +static int ep_open(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *epd; + int ret = 0; + int count = 1; + + epd = kmalloc(sizeof(*epd), GFP_KERNEL); + if (!epd) { + ret = -ENOMEM; + goto out; + } + epd->devp = inode->i_private; + epd->pos = 0; + + spin_lock_irq(&epd->devp->lock); + idr_for_each(&epd->devp->hwtid_idr, count_idrs, &count); + idr_for_each(&epd->devp->atid_idr, count_idrs, &count); + idr_for_each(&epd->devp->stid_idr, count_idrs, &count); + spin_unlock_irq(&epd->devp->lock); + + epd->bufsize = count * 160; + epd->buf = vmalloc(epd->bufsize); + if (!epd->buf) { + ret = -ENOMEM; + goto err1; + } + + spin_lock_irq(&epd->devp->lock); + idr_for_each(&epd->devp->hwtid_idr, dump_ep, epd); + idr_for_each(&epd->devp->atid_idr, dump_ep, epd); + idr_for_each(&epd->devp->stid_idr, dump_listen_ep, epd); + spin_unlock_irq(&epd->devp->lock); + + file->private_data = epd; + goto out; +err1: + kfree(epd); +out: + return ret; +} + +static const struct file_operations ep_debugfs_fops = { + .owner = THIS_MODULE, + .open = ep_open, + .release = ep_release, + .read = debugfs_read, +}; + static int setup_debugfs(struct c4iw_dev *devp) { struct dentry *de; @@ -344,6 +459,11 @@ static int setup_debugfs(struct c4iw_dev *devp) if (de && de->d_inode) de->d_inode->i_size = 4096; + de = debugfs_create_file("eps", S_IWUSR, devp->debugfs_root, + (void *)devp, &ep_debugfs_fops); + if (de && de->d_inode) + de->d_inode->i_size = 4096; + return 0; } @@ -475,6 +595,9 @@ static void c4iw_dealloc(struct uld_ctx *ctx) idr_destroy(&ctx->dev->cqidr); idr_destroy(&ctx->dev->qpidr); idr_destroy(&ctx->dev->mmidr); + idr_destroy(&ctx->dev->hwtid_idr); + idr_destroy(&ctx->dev->stid_idr); + idr_destroy(&ctx->dev->atid_idr); iounmap(ctx->dev->rdev.oc_mw_kva); ib_dealloc_device(&ctx->dev->ibdev); ctx->dev = NULL; @@ -532,6 +655,9 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) idr_init(&devp->cqidr); idr_init(&devp->qpidr); idr_init(&devp->mmidr); + idr_init(&devp->hwtid_idr); + idr_init(&devp->stid_idr); + idr_init(&devp->atid_idr); spin_lock_init(&devp->lock); mutex_init(&devp->rdev.stats.lock); mutex_init(&devp->db_mutex); @@ -577,14 +703,76 @@ out: return ctx; } +static inline struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl, + const __be64 *rsp, + u32 pktshift) +{ + struct sk_buff *skb; + + /* + * Allocate space for cpl_pass_accept_req which will be synthesized by + * driver. Once the driver synthesizes the request the skb will go + * through the regular cpl_pass_accept_req processing. + * The math here assumes sizeof cpl_pass_accept_req >= sizeof + * cpl_rx_pkt. + */ + skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) + + sizeof(struct rss_header) - pktshift, GFP_ATOMIC); + if (unlikely(!skb)) + return NULL; + + __skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req) + + sizeof(struct rss_header) - pktshift); + + /* + * This skb will contain: + * rss_header from the rspq descriptor (1 flit) + * cpl_rx_pkt struct from the rspq descriptor (2 flits) + * space for the difference between the size of an + * rx_pkt and pass_accept_req cpl (1 flit) + * the packet data from the gl + */ + skb_copy_to_linear_data(skb, rsp, sizeof(struct cpl_pass_accept_req) + + sizeof(struct rss_header)); + skb_copy_to_linear_data_offset(skb, sizeof(struct rss_header) + + sizeof(struct cpl_pass_accept_req), + gl->va + pktshift, + gl->tot_len - pktshift); + return skb; +} + +static inline int recv_rx_pkt(struct c4iw_dev *dev, const struct pkt_gl *gl, + const __be64 *rsp) +{ + unsigned int opcode = *(u8 *)rsp; + struct sk_buff *skb; + + if (opcode != CPL_RX_PKT) + goto out; + + skb = copy_gl_to_skb_pkt(gl , rsp, dev->rdev.lldi.sge_pktshift); + if (skb == NULL) + goto out; + + if (c4iw_handlers[opcode] == NULL) { + pr_info("%s no handler opcode 0x%x...\n", __func__, + opcode); + kfree_skb(skb); + goto out; + } + c4iw_handlers[opcode](dev, skb); + return 1; +out: + return 0; +} + static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, const struct pkt_gl *gl) { struct uld_ctx *ctx = handle; struct c4iw_dev *dev = ctx->dev; struct sk_buff *skb; - const struct cpl_act_establish *rpl; - unsigned int opcode; + u8 opcode; if (gl == NULL) { /* omit RSS and rsp_ctrl at end of descriptor */ @@ -601,19 +789,29 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, u32 qid = be32_to_cpu(rc->pldbuflen_qid); c4iw_ev_handler(dev, qid); return 0; + } else if (unlikely(*(u8 *)rsp != *(u8 *)gl->va)) { + if (recv_rx_pkt(dev, gl, rsp)) + return 0; + + pr_info("%s: unexpected FL contents at %p, " \ + "RSS %#llx, FL %#llx, len %u\n", + pci_name(ctx->lldi.pdev), gl->va, + (unsigned long long)be64_to_cpu(*rsp), + (unsigned long long)be64_to_cpu(*(u64 *)gl->va), + gl->tot_len); + + return 0; } else { skb = cxgb4_pktgl_to_skb(gl, 128, 128); if (unlikely(!skb)) goto nomem; } - rpl = cplhdr(skb); - opcode = rpl->ot.opcode; - + opcode = *(u8 *)rsp; if (c4iw_handlers[opcode]) c4iw_handlers[opcode](dev, skb); else - printk(KERN_INFO "%s no handler opcode 0x%x...\n", __func__, + pr_info("%s no handler opcode 0x%x...\n", __func__, opcode); return 0; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 9beb3a9..9c1644f 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -130,6 +130,9 @@ struct c4iw_stats { u64 db_empty; u64 db_drop; u64 db_state_transitions; + u64 tcam_full; + u64 act_ofld_conn_fails; + u64 pas_ofld_conn_fails; }; struct c4iw_rdev { @@ -223,6 +226,9 @@ struct c4iw_dev { struct dentry *debugfs_root; enum db_state db_state; int qpcnt; + struct idr hwtid_idr; + struct idr atid_idr; + struct idr stid_idr; }; static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) @@ -712,6 +718,31 @@ enum c4iw_ep_flags { CLOSE_SENT = 3, }; +enum c4iw_ep_history { + ACT_OPEN_REQ = 0, + ACT_OFLD_CONN = 1, + ACT_OPEN_RPL = 2, + ACT_ESTAB = 3, + PASS_ACCEPT_REQ = 4, + PASS_ESTAB = 5, + ABORT_UPCALL = 6, + ESTAB_UPCALL = 7, + CLOSE_UPCALL = 8, + ULP_ACCEPT = 9, + ULP_REJECT = 10, + TIMEDOUT = 11, + PEER_ABORT = 12, + PEER_CLOSE = 13, + CONNREQ_UPCALL = 14, + ABORT_CONN = 15, + DISCONN_UPCALL = 16, + EP_DISC_CLOSE = 17, + EP_DISC_ABORT = 18, + CONN_RPL_UPCALL = 19, + ACT_RETRY_NOMEM = 20, + ACT_RETRY_INUSE = 21 +}; + struct c4iw_ep_common { struct iw_cm_id *cm_id; struct c4iw_qp *qp; @@ -723,6 +754,7 @@ struct c4iw_ep_common { struct sockaddr_in remote_addr; struct c4iw_wr_wait wr_wait; unsigned long flags; + unsigned long history; }; struct c4iw_listen_ep { @@ -760,6 +792,7 @@ struct c4iw_ep { u8 tos; u8 retry_with_mpa_v1; u8 tried_with_mpa_v1; + unsigned int retry_count; }; static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 72ae63f..03103d2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -752,6 +752,9 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ dev->trans_start = jiffies; ++tx->tx_head; + skb_orphan(skb); + skb_dst_drop(skb); + if (++priv->tx_outstanding == ipoib_sendq_size) { ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", tx->qp->qp_num); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index f10221f..a1bca70 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -615,8 +615,9 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, address->last_send = priv->tx_head; ++priv->tx_head; - skb_orphan(skb); + skb_orphan(skb); + skb_dst_drop(skb); } if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 378988b..6db997c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -35,6 +35,8 @@ #ifndef __CXGB4_H__ #define __CXGB4_H__ +#include "t4_hw.h" + #include <linux/bitops.h> #include <linux/cache.h> #include <linux/interrupt.h> @@ -212,6 +214,8 @@ struct tp_err_stats { struct tp_params { unsigned int ntxchan; /* # of Tx channels */ unsigned int tre; /* log2 of core clocks per TP tick */ + unsigned short tx_modq_map; /* TX modulation scheduler queue to */ + /* channel map */ uint32_t dack_re; /* DACK timer resolution */ unsigned short tx_modq[NCHAN]; /* channel to modulation queue map */ @@ -526,6 +530,7 @@ struct adapter { struct net_device *port[MAX_NPORTS]; u8 chan_map[NCHAN]; /* channel -> port map */ + u32 filter_mode; unsigned int l2t_start; unsigned int l2t_end; struct l2t_data *l2t; @@ -545,6 +550,129 @@ struct adapter { spinlock_t stats_lock; }; +/* Defined bit width of user definable filter tuples + */ +#define ETHTYPE_BITWIDTH 16 +#define FRAG_BITWIDTH 1 +#define MACIDX_BITWIDTH 9 +#define FCOE_BITWIDTH 1 +#define IPORT_BITWIDTH 3 +#define MATCHTYPE_BITWIDTH 3 +#define PROTO_BITWIDTH 8 +#define TOS_BITWIDTH 8 +#define PF_BITWIDTH 8 +#define VF_BITWIDTH 8 +#define IVLAN_BITWIDTH 16 +#define OVLAN_BITWIDTH 16 + +/* Filter matching rules. These consist of a set of ingress packet field + * (value, mask) tuples. The associated ingress packet field matches the + * tuple when ((field & mask) == value). (Thus a wildcard "don't care" field + * rule can be constructed by specifying a tuple of (0, 0).) A filter rule + * matches an ingress packet when all of the individual individual field + * matching rules are true. + * + * Partial field masks are always valid, however, while it may be easy to + * understand their meanings for some fields (e.g. IP address to match a + * subnet), for others making sensible partial masks is less intuitive (e.g. + * MPS match type) ... + * + * Most of the following data structures are modeled on T4 capabilities. + * Drivers for earlier chips use the subsets which make sense for those chips. + * We really need to come up with a hardware-independent mechanism to + * represent hardware filter capabilities ... + */ +struct ch_filter_tuple { + /* Compressed header matching field rules. The TP_VLAN_PRI_MAP + * register selects which of these fields will participate in the + * filter match rules -- up to a maximum of 36 bits. Because + * TP_VLAN_PRI_MAP is a global register, all filters must use the same + * set of fields. + */ + uint32_t ethtype:ETHTYPE_BITWIDTH; /* Ethernet type */ + uint32_t frag:FRAG_BITWIDTH; /* IP fragmentation header */ + uint32_t ivlan_vld:1; /* inner VLAN valid */ + uint32_t ovlan_vld:1; /* outer VLAN valid */ + uint32_t pfvf_vld:1; /* PF/VF valid */ + uint32_t macidx:MACIDX_BITWIDTH; /* exact match MAC index */ + uint32_t fcoe:FCOE_BITWIDTH; /* FCoE packet */ + uint32_t iport:IPORT_BITWIDTH; /* ingress port */ + uint32_t matchtype:MATCHTYPE_BITWIDTH; /* MPS match type */ + uint32_t proto:PROTO_BITWIDTH; /* protocol type */ + uint32_t tos:TOS_BITWIDTH; /* TOS/Traffic Type */ + uint32_t pf:PF_BITWIDTH; /* PCI-E PF ID */ + uint32_t vf:VF_BITWIDTH; /* PCI-E VF ID */ + uint32_t ivlan:IVLAN_BITWIDTH; /* inner VLAN */ + uint32_t ovlan:OVLAN_BITWIDTH; /* outer VLAN */ + + /* Uncompressed header matching field rules. These are always + * available for field rules. + */ + uint8_t lip[16]; /* local IP address (IPv4 in [3:0]) */ + uint8_t fip[16]; /* foreign IP address (IPv4 in [3:0]) */ + uint16_t lport; /* local port */ + uint16_t fport; /* foreign port */ +}; + +/* A filter ioctl command. + */ +struct ch_filter_specification { + /* Administrative fields for filter. + */ + uint32_t hitcnts:1; /* count filter hits in TCB */ + uint32_t prio:1; /* filter has priority over active/server */ + + /* Fundamental filter typing. This is the one element of filter + * matching that doesn't exist as a (value, mask) tuple. + */ + uint32_t type:1; /* 0 => IPv4, 1 => IPv6 */ + + /* Packet dispatch information. Ingress packets which match the + * filter rules will be dropped, passed to the host or switched back + * out as egress packets. + */ + uint32_t action:2; /* drop, pass, switch */ + + uint32_t rpttid:1; /* report TID in RSS hash field */ + + uint32_t dirsteer:1; /* 0 => RSS, 1 => steer to iq */ + uint32_t iq:10; /* ingress queue */ + + uint32_t maskhash:1; /* dirsteer=0: store RSS hash in TCB */ + uint32_t dirsteerhash:1;/* dirsteer=1: 0 => TCB contains RSS hash */ + /* 1 => TCB contains IQ ID */ + + /* Switch proxy/rewrite fields. An ingress packet which matches a + * filter with "switch" set will be looped back out as an egress + * packet -- potentially with some Ethernet header rewriting. + */ + uint32_t eport:2; /* egress port to switch packet out */ + uint32_t newdmac:1; /* rewrite destination MAC address */ + uint32_t newsmac:1; /* rewrite source MAC address */ + uint32_t newvlan:2; /* rewrite VLAN Tag */ + uint8_t dmac[ETH_ALEN]; /* new destination MAC address */ + uint8_t smac[ETH_ALEN]; /* new source MAC address */ + uint16_t vlan; /* VLAN Tag to insert */ + + /* Filter rule value/mask pairs. + */ + struct ch_filter_tuple val; + struct ch_filter_tuple mask; +}; + +enum { + FILTER_PASS = 0, /* default */ + FILTER_DROP, + FILTER_SWITCH +}; + +enum { + VLAN_NOCHANGE = 0, /* default */ + VLAN_REMOVE, + VLAN_INSERT, + VLAN_REWRITE +}; + static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr) { return readl(adap->regs + reg_addr); @@ -701,6 +829,12 @@ static inline int t4_wr_mbox_ns(struct adapter *adap, int mbox, const void *cmd, void t4_write_indirect(struct adapter *adap, unsigned int addr_reg, unsigned int data_reg, const u32 *vals, unsigned int nregs, unsigned int start_idx); +void t4_read_indirect(struct adapter *adap, unsigned int addr_reg, + unsigned int data_reg, u32 *vals, unsigned int nregs, + unsigned int start_idx); + +struct fw_filter_wr; + void t4_intr_enable(struct adapter *adapter); void t4_intr_disable(struct adapter *adapter); int t4_slow_intr_handler(struct adapter *adapter); @@ -737,6 +871,8 @@ void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4, void t4_load_mtus(struct adapter *adap, const unsigned short *mtus, const unsigned short *alpha, const unsigned short *beta); +void t4_mk_filtdelwr(unsigned int ftid, struct fw_filter_wr *wr, int qid); + void t4_wol_magic_enable(struct adapter *adap, unsigned int port, const u8 *addr); int t4_wol_pat_enable(struct adapter *adap, unsigned int port, unsigned int map, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index a27b4ae..f0718e1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -175,6 +175,30 @@ enum { MIN_FL_ENTRIES = 16 }; +/* Host shadow copy of ingress filter entry. This is in host native format + * and doesn't match the ordering or bit order, etc. of the hardware of the + * firmware command. The use of bit-field structure elements is purely to + * remind ourselves of the field size limitations and save memory in the case + * where the filter table is large. + */ +struct filter_entry { + /* Administrative fields for filter. + */ + u32 valid:1; /* filter allocated and valid */ + u32 locked:1; /* filter is administratively locked */ + + u32 pending:1; /* filter action is pending firmware reply */ + u32 smtidx:8; /* Source MAC Table index for smac */ + struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ + + /* The filter itself. Most of this is a straight copy of information + * provided by the extended ioctl(). Some fields are translated to + * internal forms -- for instance the Ingress Queue ID passed in from + * the ioctl() is translated into the Absolute Ingress Queue ID. + */ + struct ch_filter_specification fs; +}; + #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \ NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\ NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) @@ -325,6 +349,9 @@ enum { static unsigned int tp_vlan_pri_map = TP_VLAN_PRI_MAP_DEFAULT; +module_param(tp_vlan_pri_map, uint, 0644); +MODULE_PARM_DESC(tp_vlan_pri_map, "global compressed filter configuration"); + static struct dentry *cxgb4_debugfs_root; static LIST_HEAD(adapter_list); @@ -506,8 +533,67 @@ static int link_start(struct net_device *dev) return ret; } -/* - * Response queue handler for the FW event queue. +/* Clear a filter and release any of its resources that we own. This also + * clears the filter's "pending" status. + */ +static void clear_filter(struct adapter *adap, struct filter_entry *f) +{ + /* If the new or old filter have loopback rewriteing rules then we'll + * need to free any existing Layer Two Table (L2T) entries of the old + * filter rule. The firmware will handle freeing up any Source MAC + * Table (SMT) entries used for rewriting Source MAC Addresses in + * loopback rules. + */ + if (f->l2t) + cxgb4_l2t_release(f->l2t); + + /* The zeroing of the filter rule below clears the filter valid, + * pending, locked flags, l2t pointer, etc. so it's all we need for + * this operation. + */ + memset(f, 0, sizeof(*f)); +} + +/* Handle a filter write/deletion reply. + */ +static void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) +{ + unsigned int idx = GET_TID(rpl); + unsigned int nidx = idx - adap->tids.ftid_base; + unsigned int ret; + struct filter_entry *f; + + if (idx >= adap->tids.ftid_base && nidx < + (adap->tids.nftids + adap->tids.nsftids)) { + idx = nidx; + ret = GET_TCB_COOKIE(rpl->cookie); + f = &adap->tids.ftid_tab[idx]; + + if (ret == FW_FILTER_WR_FLT_DELETED) { + /* Clear the filter when we get confirmation from the + * hardware that the filter has been deleted. + */ + clear_filter(adap, f); + } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) { + dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n", + idx); + clear_filter(adap, f); + } else if (ret == FW_FILTER_WR_FLT_ADDED) { + f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff; + f->pending = 0; /* asynchronous setup completed */ + f->valid = 1; + } else { + /* Something went wrong. Issue a warning about the + * problem and clear everything out. + */ + dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n", + idx, ret); + clear_filter(adap, f); + } + } +} + +/* Response queue handler for the FW event queue. */ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, const struct pkt_gl *gl) @@ -542,6 +628,10 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, const struct cpl_l2t_write_rpl *p = (void *)rsp; do_l2t_write_rpl(q->adap, p); + } else if (opcode == CPL_SET_TCB_RPL) { + const struct cpl_set_tcb_rpl *p = (void *)rsp; + + filter_rpl(q->adap, p); } else dev_err(q->adap->pdev_dev, "unexpected CPL %#x on FW event queue\n", opcode); @@ -983,6 +1073,148 @@ static void t4_free_mem(void *addr) kfree(addr); } +/* Send a Work Request to write the filter at a specified index. We construct + * a Firmware Filter Work Request to have the work done and put the indicated + * filter into "pending" mode which will prevent any further actions against + * it till we get a reply from the firmware on the completion status of the + * request. + */ +static int set_filter_wr(struct adapter *adapter, int fidx) +{ + struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; + struct sk_buff *skb; + struct fw_filter_wr *fwr; + unsigned int ftid; + + /* If the new filter requires loopback Destination MAC and/or VLAN + * rewriting then we need to allocate a Layer 2 Table (L2T) entry for + * the filter. + */ + if (f->fs.newdmac || f->fs.newvlan) { + /* allocate L2T entry for new filter */ + f->l2t = t4_l2t_alloc_switching(adapter->l2t); + if (f->l2t == NULL) + return -EAGAIN; + if (t4_l2t_set_switching(adapter, f->l2t, f->fs.vlan, + f->fs.eport, f->fs.dmac)) { + cxgb4_l2t_release(f->l2t); + f->l2t = NULL; + return -ENOMEM; + } + } + + ftid = adapter->tids.ftid_base + fidx; + + skb = alloc_skb(sizeof(*fwr), GFP_KERNEL | __GFP_NOFAIL); + fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr)); + memset(fwr, 0, sizeof(*fwr)); + + /* It would be nice to put most of the following in t4_hw.c but most + * of the work is translating the cxgbtool ch_filter_specification + * into the Work Request and the definition of that structure is + * currently in cxgbtool.h which isn't appropriate to pull into the + * common code. We may eventually try to come up with a more neutral + * filter specification structure but for now it's easiest to simply + * put this fairly direct code in line ... + */ + fwr->op_pkd = htonl(FW_WR_OP(FW_FILTER_WR)); + fwr->len16_pkd = htonl(FW_WR_LEN16(sizeof(*fwr)/16)); + fwr->tid_to_iq = + htonl(V_FW_FILTER_WR_TID(ftid) | + V_FW_FILTER_WR_RQTYPE(f->fs.type) | + V_FW_FILTER_WR_NOREPLY(0) | + V_FW_FILTER_WR_IQ(f->fs.iq)); + fwr->del_filter_to_l2tix = + htonl(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) | + V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) | + V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) | + V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) | + V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) | + V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) | + V_FW_FILTER_WR_DMAC(f->fs.newdmac) | + V_FW_FILTER_WR_SMAC(f->fs.newsmac) | + V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT || + f->fs.newvlan == VLAN_REWRITE) | + V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE || + f->fs.newvlan == VLAN_REWRITE) | + V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) | + V_FW_FILTER_WR_TXCHAN(f->fs.eport) | + V_FW_FILTER_WR_PRIO(f->fs.prio) | + V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0)); + fwr->ethtype = htons(f->fs.val.ethtype); + fwr->ethtypem = htons(f->fs.mask.ethtype); + fwr->frag_to_ovlan_vldm = + (V_FW_FILTER_WR_FRAG(f->fs.val.frag) | + V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) | + V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.ivlan_vld) | + V_FW_FILTER_WR_OVLAN_VLD(f->fs.val.ovlan_vld) | + V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.ivlan_vld) | + V_FW_FILTER_WR_OVLAN_VLDM(f->fs.mask.ovlan_vld)); + fwr->smac_sel = 0; + fwr->rx_chan_rx_rpl_iq = + htons(V_FW_FILTER_WR_RX_CHAN(0) | + V_FW_FILTER_WR_RX_RPL_IQ(adapter->sge.fw_evtq.abs_id)); + fwr->maci_to_matchtypem = + htonl(V_FW_FILTER_WR_MACI(f->fs.val.macidx) | + V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) | + V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) | + V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) | + V_FW_FILTER_WR_PORT(f->fs.val.iport) | + V_FW_FILTER_WR_PORTM(f->fs.mask.iport) | + V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) | + V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype)); + fwr->ptcl = f->fs.val.proto; + fwr->ptclm = f->fs.mask.proto; + fwr->ttyp = f->fs.val.tos; + fwr->ttypm = f->fs.mask.tos; + fwr->ivlan = htons(f->fs.val.ivlan); + fwr->ivlanm = htons(f->fs.mask.ivlan); + fwr->ovlan = htons(f->fs.val.ovlan); + fwr->ovlanm = htons(f->fs.mask.ovlan); + memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip)); + memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm)); + memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip)); + memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm)); + fwr->lp = htons(f->fs.val.lport); + fwr->lpm = htons(f->fs.mask.lport); + fwr->fp = htons(f->fs.val.fport); + fwr->fpm = htons(f->fs.mask.fport); + if (f->fs.newsmac) + memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma)); + + /* Mark the filter as "pending" and ship off the Filter Work Request. + * When we get the Work Request Reply we'll clear the pending status. + */ + f->pending = 1; + set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); + t4_ofld_send(adapter, skb); + return 0; +} + +/* Delete the filter at a specified index. + */ +static int del_filter_wr(struct adapter *adapter, int fidx) +{ + struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; + struct sk_buff *skb; + struct fw_filter_wr *fwr; + unsigned int len, ftid; + + len = sizeof(*fwr); + ftid = adapter->tids.ftid_base + fidx; + + skb = alloc_skb(len, GFP_KERNEL | __GFP_NOFAIL); + fwr = (struct fw_filter_wr *)__skb_put(skb, len); + t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id); + + /* Mark the filter as "pending" and ship off the Filter Work Request. + * When we get the Work Request Reply we'll clear the pending status. + */ + f->pending = 1; + t4_mgmt_tx(adapter, skb); + return 0; +} + static inline int is_offload(const struct adapter *adap) { return adap->params.offload; @@ -2195,7 +2427,7 @@ int cxgb4_alloc_atid(struct tid_info *t, void *data) if (t->afree) { union aopen_entry *p = t->afree; - atid = p - t->atid_tab; + atid = (p - t->atid_tab) + t->atid_base; t->afree = p->next; p->data = data; t->atids_in_use++; @@ -2210,7 +2442,7 @@ EXPORT_SYMBOL(cxgb4_alloc_atid); */ void cxgb4_free_atid(struct tid_info *t, unsigned int atid) { - union aopen_entry *p = &t->atid_tab[atid]; + union aopen_entry *p = &t->atid_tab[atid - t->atid_base]; spin_lock_bh(&t->atid_lock); p->next = t->afree; @@ -2249,8 +2481,34 @@ int cxgb4_alloc_stid(struct tid_info *t, int family, void *data) } EXPORT_SYMBOL(cxgb4_alloc_stid); -/* - * Release a server TID. +/* Allocate a server filter TID and set it to the supplied value. + */ +int cxgb4_alloc_sftid(struct tid_info *t, int family, void *data) +{ + int stid; + + spin_lock_bh(&t->stid_lock); + if (family == PF_INET) { + stid = find_next_zero_bit(t->stid_bmap, + t->nstids + t->nsftids, t->nstids); + if (stid < (t->nstids + t->nsftids)) + __set_bit(stid, t->stid_bmap); + else + stid = -1; + } else { + stid = -1; + } + if (stid >= 0) { + t->stid_tab[stid].data = data; + stid += t->stid_base; + t->stids_in_use++; + } + spin_unlock_bh(&t->stid_lock); + return stid; +} +EXPORT_SYMBOL(cxgb4_alloc_sftid); + +/* Release a server TID. */ void cxgb4_free_stid(struct tid_info *t, unsigned int stid, int family) { @@ -2362,18 +2620,26 @@ EXPORT_SYMBOL(cxgb4_remove_tid); static int tid_init(struct tid_info *t) { size_t size; + unsigned int stid_bmap_size; unsigned int natids = t->natids; - size = t->ntids * sizeof(*t->tid_tab) + natids * sizeof(*t->atid_tab) + + stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids); + size = t->ntids * sizeof(*t->tid_tab) + + natids * sizeof(*t->atid_tab) + t->nstids * sizeof(*t->stid_tab) + - BITS_TO_LONGS(t->nstids) * sizeof(long); + t->nsftids * sizeof(*t->stid_tab) + + stid_bmap_size * sizeof(long) + + t->nftids * sizeof(*t->ftid_tab) + + t->nsftids * sizeof(*t->ftid_tab); + t->tid_tab = t4_alloc_mem(size); if (!t->tid_tab) return -ENOMEM; t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids]; t->stid_tab = (struct serv_entry *)&t->atid_tab[natids]; - t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids]; + t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids]; + t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size]; spin_lock_init(&t->stid_lock); spin_lock_init(&t->atid_lock); @@ -2388,7 +2654,7 @@ static int tid_init(struct tid_info *t) t->atid_tab[natids - 1].next = &t->atid_tab[natids]; t->afree = t->atid_tab; } - bitmap_zero(t->stid_bmap, t->nstids); + bitmap_zero(t->stid_bmap, t->nstids + t->nsftids); return 0; } @@ -2404,7 +2670,8 @@ static int tid_init(struct tid_info *t) * Returns <0 on error and one of the %NET_XMIT_* values on success. */ int cxgb4_create_server(const struct net_device *dev, unsigned int stid, - __be32 sip, __be16 sport, unsigned int queue) + __be32 sip, __be16 sport, __be16 vlan, + unsigned int queue) { unsigned int chan; struct sk_buff *skb; @@ -2750,6 +3017,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld) { void *handle; struct cxgb4_lld_info lli; + unsigned short i; lli.pdev = adap->pdev; lli.l2t = adap->l2t; @@ -2776,10 +3044,16 @@ static void uld_attach(struct adapter *adap, unsigned int uld) lli.ucq_density = 1 << QUEUESPERPAGEPF0_GET( t4_read_reg(adap, SGE_INGRESS_QUEUES_PER_PAGE_PF) >> (adap->fn * 4)); + lli.filt_mode = adap->filter_mode; + /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */ + for (i = 0; i < NCHAN; i++) + lli.tx_modq[i] = i; lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS); lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL); lli.fw_vers = adap->params.fw_vers; lli.dbfifo_int_thresh = dbfifo_int_thresh; + lli.sge_pktshift = adap->sge.pktshift; + lli.enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN; handle = ulds[uld].add(&lli); if (IS_ERR(handle)) { @@ -2999,6 +3273,126 @@ static int cxgb_close(struct net_device *dev) return t4_enable_vi(adapter, adapter->fn, pi->viid, false, false); } +/* Return an error number if the indicated filter isn't writable ... + */ +static int writable_filter(struct filter_entry *f) +{ + if (f->locked) + return -EPERM; + if (f->pending) + return -EBUSY; + + return 0; +} + +/* Delete the filter at the specified index (if valid). The checks for all + * the common problems with doing this like the filter being locked, currently + * pending in another operation, etc. + */ +static int delete_filter(struct adapter *adapter, unsigned int fidx) +{ + struct filter_entry *f; + int ret; + + if (fidx >= adapter->tids.nftids + adapter->tids.nsftids) + return -EINVAL; + + f = &adapter->tids.ftid_tab[fidx]; + ret = writable_filter(f); + if (ret) + return ret; + if (f->valid) + return del_filter_wr(adapter, fidx); + + return 0; +} + +int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, + __be32 sip, __be16 sport, __be16 vlan, + unsigned int queue, unsigned char port, unsigned char mask) +{ + int ret; + struct filter_entry *f; + struct adapter *adap; + int i; + u8 *val; + + adap = netdev2adap(dev); + + /* Adjust stid to correct filter index */ + stid -= adap->tids.nstids; + stid += adap->tids.nftids; + + /* Check to make sure the filter requested is writable ... + */ + f = &adap->tids.ftid_tab[stid]; + ret = writable_filter(f); + if (ret) + return ret; + + /* Clear out any old resources being used by the filter before + * we start constructing the new filter. + */ + if (f->valid) + clear_filter(adap, f); + + /* Clear out filter specifications */ + memset(&f->fs, 0, sizeof(struct ch_filter_specification)); + f->fs.val.lport = cpu_to_be16(sport); + f->fs.mask.lport = ~0; + val = (u8 *)&sip; + if ((val[0] | val[1] | val[2] | val[3]) != 0) { + for (i = 0; i < 4; i++) { + f->fs.val.lip[i] = val[i]; + f->fs.mask.lip[i] = ~0; + } + if (adap->filter_mode & F_PORT) { + f->fs.val.iport = port; + f->fs.mask.iport = mask; + } + } + + f->fs.dirsteer = 1; + f->fs.iq = queue; + /* Mark filter as locked */ + f->locked = 1; + f->fs.rpttid = 1; + + ret = set_filter_wr(adap, stid); + if (ret) { + clear_filter(adap, f); + return ret; + } + + return 0; +} +EXPORT_SYMBOL(cxgb4_create_server_filter); + +int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, + unsigned int queue, bool ipv6) +{ + int ret; + struct filter_entry *f; + struct adapter *adap; + + adap = netdev2adap(dev); + + /* Adjust stid to correct filter index */ + stid -= adap->tids.nstids; + stid += adap->tids.nftids; + + f = &adap->tids.ftid_tab[stid]; + /* Unlock the filter */ + f->locked = 0; + + ret = delete_filter(adap, stid); + if (ret) + return ret; + + return 0; +} +EXPORT_SYMBOL(cxgb4_remove_server_filter); + static struct rtnl_link_stats64 *cxgb_get_stats(struct net_device *dev, struct rtnl_link_stats64 *ns) { @@ -3245,6 +3639,34 @@ static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c) v = t4_read_reg(adap, TP_PIO_DATA); t4_write_reg(adap, TP_PIO_DATA, v & ~CSUM_HAS_PSEUDO_HDR); + /* first 4 Tx modulation queues point to consecutive Tx channels */ + adap->params.tp.tx_modq_map = 0xE4; + t4_write_reg(adap, A_TP_TX_MOD_QUEUE_REQ_MAP, + V_TX_MOD_QUEUE_REQ_MAP(adap->params.tp.tx_modq_map)); + + /* associate each Tx modulation queue with consecutive Tx channels */ + v = 0x84218421; + t4_write_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, + &v, 1, A_TP_TX_SCHED_HDR); + t4_write_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, + &v, 1, A_TP_TX_SCHED_FIFO); + t4_write_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, + &v, 1, A_TP_TX_SCHED_PCMD); + +#define T4_TX_MODQ_10G_WEIGHT_DEFAULT 16 /* in KB units */ + if (is_offload(adap)) { + t4_write_reg(adap, A_TP_TX_MOD_QUEUE_WEIGHT0, + V_TX_MODQ_WEIGHT0(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT1(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT2(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT3(T4_TX_MODQ_10G_WEIGHT_DEFAULT)); + t4_write_reg(adap, A_TP_TX_MOD_CHANNEL_WEIGHT, + V_TX_MODQ_WEIGHT0(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT1(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT2(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT3(T4_TX_MODQ_10G_WEIGHT_DEFAULT)); + } + /* get basic stuff going */ return t4_early_init(adap, adap->fn); } @@ -4035,6 +4457,10 @@ static int adap_init0(struct adapter *adap) for (j = 0; j < NCHAN; j++) adap->params.tp.tx_modq[j] = j; + t4_read_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, + &adap->filter_mode, 1, + TP_VLAN_PRI_MAP); + adap->flags |= FW_OK; return 0; @@ -4661,6 +5087,17 @@ static void remove_one(struct pci_dev *pdev) if (adapter->debugfs_root) debugfs_remove_recursive(adapter->debugfs_root); + /* If we allocated filters, free up state associated with any + * valid filters ... + */ + if (adapter->tids.ftid_tab) { + struct filter_entry *f = &adapter->tids.ftid_tab[0]; + for (i = 0; i < (adapter->tids.nftids + + adapter->tids.nsftids); i++, f++) + if (f->valid) + clear_filter(adapter, f); + } + if (adapter->flags & FULL_INIT_DONE) cxgb_down(adapter); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 39bec73..e2bbc7f3e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -38,6 +38,7 @@ #include <linux/cache.h> #include <linux/spinlock.h> #include <linux/skbuff.h> +#include <linux/inetdevice.h> #include <linux/atomic.h> /* CPL message priority levels */ @@ -97,7 +98,9 @@ struct tid_info { union aopen_entry *atid_tab; unsigned int natids; + unsigned int atid_base; + struct filter_entry *ftid_tab; unsigned int nftids; unsigned int ftid_base; unsigned int aftid_base; @@ -129,7 +132,7 @@ static inline void *lookup_atid(const struct tid_info *t, unsigned int atid) static inline void *lookup_stid(const struct tid_info *t, unsigned int stid) { stid -= t->stid_base; - return stid < t->nstids ? t->stid_tab[stid].data : NULL; + return stid < (t->nstids + t->nsftids) ? t->stid_tab[stid].data : NULL; } static inline void cxgb4_insert_tid(struct tid_info *t, void *data, @@ -141,6 +144,7 @@ static inline void cxgb4_insert_tid(struct tid_info *t, void *data, int cxgb4_alloc_atid(struct tid_info *t, void *data); int cxgb4_alloc_stid(struct tid_info *t, int family, void *data); +int cxgb4_alloc_sftid(struct tid_info *t, int family, void *data); void cxgb4_free_atid(struct tid_info *t, unsigned int atid); void cxgb4_free_stid(struct tid_info *t, unsigned int stid, int family); void cxgb4_remove_tid(struct tid_info *t, unsigned int qid, unsigned int tid); @@ -148,8 +152,14 @@ void cxgb4_remove_tid(struct tid_info *t, unsigned int qid, unsigned int tid); struct in6_addr; int cxgb4_create_server(const struct net_device *dev, unsigned int stid, - __be32 sip, __be16 sport, unsigned int queue); - + __be32 sip, __be16 sport, __be16 vlan, + unsigned int queue); +int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, + __be32 sip, __be16 sport, __be16 vlan, + unsigned int queue, + unsigned char port, unsigned char mask); +int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, + unsigned int queue, bool ipv6); static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue) { skb_set_queue_mapping(skb, (queue << 1) | prio); @@ -221,9 +231,16 @@ struct cxgb4_lld_info { unsigned int iscsi_iolen; /* iSCSI max I/O length */ unsigned short udb_density; /* # of user DB/page */ unsigned short ucq_density; /* # of user CQs/page */ + unsigned short filt_mode; /* filter optional components */ + unsigned short tx_modq[NCHAN]; /* maps each tx channel to a */ + /* scheduler queue */ void __iomem *gts_reg; /* address of GTS register */ void __iomem *db_reg; /* address of kernel doorbell */ int dbfifo_int_thresh; /* doorbell fifo int threshold */ + unsigned int sge_pktshift; /* Padding between CPL and */ + /* packet data */ + bool enable_fw_ofld_conn; /* Enable connection through fw */ + /* WR */ }; struct cxgb4_uld_info { diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c index 6ac77a6..2987809 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c @@ -484,6 +484,38 @@ void t4_l2t_update(struct adapter *adap, struct neighbour *neigh) handle_failed_resolution(adap, arpq); } +/* Allocate an L2T entry for use by a switching rule. Such need to be + * explicitly freed and while busy they are not on any hash chain, so normal + * address resolution updates do not see them. + */ +struct l2t_entry *t4_l2t_alloc_switching(struct l2t_data *d) +{ + struct l2t_entry *e; + + write_lock_bh(&d->lock); + e = alloc_l2e(d); + if (e) { + spin_lock(&e->lock); /* avoid race with t4_l2t_free */ + e->state = L2T_STATE_SWITCHING; + atomic_set(&e->refcnt, 1); + spin_unlock(&e->lock); + } + write_unlock_bh(&d->lock); + return e; +} + +/* Sets/updates the contents of a switching L2T entry that has been allocated + * with an earlier call to @t4_l2t_alloc_switching. + */ +int t4_l2t_set_switching(struct adapter *adap, struct l2t_entry *e, u16 vlan, + u8 port, u8 *eth_addr) +{ + e->vlan = vlan; + e->lport = port; + memcpy(e->dmac, eth_addr, ETH_ALEN); + return write_l2e(adap, e, 0); +} + struct l2t_data *t4_init_l2t(void) { int i; diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.h b/drivers/net/ethernet/chelsio/cxgb4/l2t.h index 02b31d0..108c0f1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.h +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.h @@ -100,6 +100,9 @@ struct l2t_entry *cxgb4_l2t_get(struct l2t_data *d, struct neighbour *neigh, unsigned int priority); void t4_l2t_update(struct adapter *adap, struct neighbour *neigh); +struct l2t_entry *t4_l2t_alloc_switching(struct l2t_data *d); +int t4_l2t_set_switching(struct adapter *adap, struct l2t_entry *e, u16 vlan, + u8 port, u8 *eth_addr); struct l2t_data *t4_init_l2t(void); void do_l2t_write_rpl(struct adapter *p, const struct cpl_l2t_write_rpl *rpl); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 8d9c754..22f3af5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -109,7 +109,7 @@ void t4_set_reg_field(struct adapter *adapter, unsigned int addr, u32 mask, * Reads registers that are accessed indirectly through an address/data * register pair. */ -static void t4_read_indirect(struct adapter *adap, unsigned int addr_reg, +void t4_read_indirect(struct adapter *adap, unsigned int addr_reg, unsigned int data_reg, u32 *vals, unsigned int nregs, unsigned int start_idx) { @@ -2268,6 +2268,26 @@ int t4_wol_pat_enable(struct adapter *adap, unsigned int port, unsigned int map, return 0; } +/* t4_mk_filtdelwr - create a delete filter WR + * @ftid: the filter ID + * @wr: the filter work request to populate + * @qid: ingress queue to receive the delete notification + * + * Creates a filter work request to delete the supplied filter. If @qid is + * negative the delete notification is suppressed. + */ +void t4_mk_filtdelwr(unsigned int ftid, struct fw_filter_wr *wr, int qid) +{ + memset(wr, 0, sizeof(*wr)); + wr->op_pkd = htonl(FW_WR_OP(FW_FILTER_WR)); + wr->len16_pkd = htonl(FW_WR_LEN16(sizeof(*wr) / 16)); + wr->tid_to_iq = htonl(V_FW_FILTER_WR_TID(ftid) | + V_FW_FILTER_WR_NOREPLY(qid < 0)); + wr->del_filter_to_l2tix = htonl(F_FW_FILTER_WR_DEL_FILTER); + if (qid >= 0) + wr->rx_chan_rx_rpl_iq = htons(V_FW_FILTER_WR_RX_RPL_IQ(qid)); +} + #define INIT_CMD(var, cmd, rd_wr) do { \ (var).op_to_write = htonl(FW_CMD_OP(FW_##cmd##_CMD) | \ FW_CMD_REQUEST | FW_CMD_##rd_wr); \ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index b760808..261d177 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -193,8 +193,24 @@ struct work_request_hdr { __be64 wr_lo; }; +/* wr_hi fields */ +#define S_WR_OP 24 +#define V_WR_OP(x) ((__u64)(x) << S_WR_OP) + #define WR_HDR struct work_request_hdr wr +/* option 0 fields */ +#define S_MSS_IDX 60 +#define M_MSS_IDX 0xF +#define V_MSS_IDX(x) ((__u64)(x) << S_MSS_IDX) +#define G_MSS_IDX(x) (((x) >> S_MSS_IDX) & M_MSS_IDX) + +/* option 2 fields */ +#define S_RSS_QUEUE 0 +#define M_RSS_QUEUE 0x3FF +#define V_RSS_QUEUE(x) ((x) << S_RSS_QUEUE) +#define G_RSS_QUEUE(x) (((x) >> S_RSS_QUEUE) & M_RSS_QUEUE) + struct cpl_pass_open_req { WR_HDR; union opcode_tid ot; @@ -204,12 +220,14 @@ struct cpl_pass_open_req { __be32 peer_ip; __be64 opt0; #define TX_CHAN(x) ((x) << 2) +#define NO_CONG(x) ((x) << 4) #define DELACK(x) ((x) << 5) #define ULP_MODE(x) ((x) << 8) #define RCV_BUFSIZ(x) ((x) << 12) #define DSCP(x) ((x) << 22) #define SMAC_SEL(x) ((u64)(x) << 28) #define L2T_IDX(x) ((u64)(x) << 36) +#define TCAM_BYPASS(x) ((u64)(x) << 48) #define NAGLE(x) ((u64)(x) << 49) #define WND_SCALE(x) ((u64)(x) << 50) #define KEEP_ALIVE(x) ((u64)(x) << 54) @@ -247,8 +265,10 @@ struct cpl_pass_accept_rpl { #define RSS_QUEUE_VALID (1 << 10) #define RX_COALESCE_VALID(x) ((x) << 11) #define RX_COALESCE(x) ((x) << 12) +#define PACE(x) ((x) << 16) #define TX_QUEUE(x) ((x) << 23) #define RX_CHANNEL(x) ((x) << 26) +#define CCTRL_ECN(x) ((x) << 27) #define WND_SCALE_EN(x) ((x) << 28) #define TSTAMPS_EN(x) ((x) << 29) #define SACK_EN(x) ((x) << 30) @@ -292,6 +312,9 @@ struct cpl_pass_establish { union opcode_tid ot; __be32 rsvd; __be32 tos_stid; +#define PASS_OPEN_TID(x) ((x) << 0) +#define PASS_OPEN_TOS(x) ((x) << 24) +#define GET_PASS_OPEN_TID(x) (((x) >> 0) & 0xFFFFFF) #define GET_POPEN_TID(x) ((x) & 0xffffff) #define GET_POPEN_TOS(x) (((x) >> 24) & 0xff) __be16 mac_idx; @@ -332,6 +355,7 @@ struct cpl_set_tcb_field { __be16 word_cookie; #define TCB_WORD(x) ((x) << 0) #define TCB_COOKIE(x) ((x) << 5) +#define GET_TCB_COOKIE(x) (((x) >> 5) & 7) __be64 mask; __be64 val; }; @@ -536,6 +560,37 @@ struct cpl_rx_pkt { __be16 err_vec; }; +/* rx_pkt.l2info fields */ +#define S_RX_ETHHDR_LEN 0 +#define M_RX_ETHHDR_LEN 0x1F +#define V_RX_ETHHDR_LEN(x) ((x) << S_RX_ETHHDR_LEN) +#define G_RX_ETHHDR_LEN(x) (((x) >> S_RX_ETHHDR_LEN) & M_RX_ETHHDR_LEN) + +#define S_RX_MACIDX 8 +#define M_RX_MACIDX 0x1FF +#define V_RX_MACIDX(x) ((x) << S_RX_MACIDX) +#define G_RX_MACIDX(x) (((x) >> S_RX_MACIDX) & M_RX_MACIDX) + +#define S_RXF_SYN 21 +#define V_RXF_SYN(x) ((x) << S_RXF_SYN) +#define F_RXF_SYN V_RXF_SYN(1U) + +#define S_RX_CHAN 28 +#define M_RX_CHAN 0xF +#define V_RX_CHAN(x) ((x) << S_RX_CHAN) +#define G_RX_CHAN(x) (((x) >> S_RX_CHAN) & M_RX_CHAN) + +/* rx_pkt.hdr_len fields */ +#define S_RX_TCPHDR_LEN 0 +#define M_RX_TCPHDR_LEN 0x3F +#define V_RX_TCPHDR_LEN(x) ((x) << S_RX_TCPHDR_LEN) +#define G_RX_TCPHDR_LEN(x) (((x) >> S_RX_TCPHDR_LEN) & M_RX_TCPHDR_LEN) + +#define S_RX_IPHDR_LEN 6 +#define M_RX_IPHDR_LEN 0x3FF +#define V_RX_IPHDR_LEN(x) ((x) << S_RX_IPHDR_LEN) +#define G_RX_IPHDR_LEN(x) (((x) >> S_RX_IPHDR_LEN) & M_RX_IPHDR_LEN) + struct cpl_trace_pkt { u8 opcode; u8 intf; @@ -634,6 +689,17 @@ struct cpl_fw6_msg { /* cpl_fw6_msg.type values */ enum { FW6_TYPE_CMD_RPL = 0, + FW6_TYPE_WR_RPL = 1, + FW6_TYPE_CQE = 2, + FW6_TYPE_OFLD_CONNECTION_WR_RPL = 3, +}; + +struct cpl_fw6_msg_ofld_connection_wr_rpl { + __u64 cookie; + __be32 tid; /* or atid in case of active failure */ + __u8 t_state; + __u8 retval; + __u8 rsvd[2]; }; enum { diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index 75393f5..83ec5f7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -1064,4 +1064,41 @@ #define ADDRESS(x) ((x) << ADDRESS_SHIFT) #define XGMAC_PORT_INT_CAUSE 0x10dc + +#define A_TP_TX_MOD_QUEUE_REQ_MAP 0x7e28 + +#define A_TP_TX_MOD_CHANNEL_WEIGHT 0x7e34 + +#define S_TX_MOD_QUEUE_REQ_MAP 0 +#define M_TX_MOD_QUEUE_REQ_MAP 0xffffU +#define V_TX_MOD_QUEUE_REQ_MAP(x) ((x) << S_TX_MOD_QUEUE_REQ_MAP) + +#define A_TP_TX_MOD_QUEUE_WEIGHT0 0x7e30 + +#define S_TX_MODQ_WEIGHT3 24 +#define M_TX_MODQ_WEIGHT3 0xffU +#define V_TX_MODQ_WEIGHT3(x) ((x) << S_TX_MODQ_WEIGHT3) + +#define S_TX_MODQ_WEIGHT2 16 +#define M_TX_MODQ_WEIGHT2 0xffU +#define V_TX_MODQ_WEIGHT2(x) ((x) << S_TX_MODQ_WEIGHT2) + +#define S_TX_MODQ_WEIGHT1 8 +#define M_TX_MODQ_WEIGHT1 0xffU +#define V_TX_MODQ_WEIGHT1(x) ((x) << S_TX_MODQ_WEIGHT1) + +#define S_TX_MODQ_WEIGHT0 0 +#define M_TX_MODQ_WEIGHT0 0xffU +#define V_TX_MODQ_WEIGHT0(x) ((x) << S_TX_MODQ_WEIGHT0) + +#define A_TP_TX_SCHED_HDR 0x23 + +#define A_TP_TX_SCHED_FIFO 0x24 + +#define A_TP_TX_SCHED_PCMD 0x25 + +#define S_PORT 1 +#define V_PORT(x) ((x) << S_PORT) +#define F_PORT V_PORT(1U) + #endif /* __T4_REGS_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 0abc864..a0dcccd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -35,6 +35,45 @@ #ifndef _T4FW_INTERFACE_H_ #define _T4FW_INTERFACE_H_ +enum fw_retval { + FW_SUCCESS = 0, /* completed sucessfully */ + FW_EPERM = 1, /* operation not permitted */ + FW_ENOENT = 2, /* no such file or directory */ + FW_EIO = 5, /* input/output error; hw bad */ + FW_ENOEXEC = 8, /* exec format error; inv microcode */ + FW_EAGAIN = 11, /* try again */ + FW_ENOMEM = 12, /* out of memory */ + FW_EFAULT = 14, /* bad address; fw bad */ + FW_EBUSY = 16, /* resource busy */ + FW_EEXIST = 17, /* file exists */ + FW_EINVAL = 22, /* invalid argument */ + FW_ENOSPC = 28, /* no space left on device */ + FW_ENOSYS = 38, /* functionality not implemented */ + FW_EPROTO = 71, /* protocol error */ + FW_EADDRINUSE = 98, /* address already in use */ + FW_EADDRNOTAVAIL = 99, /* cannot assigned requested address */ + FW_ENETDOWN = 100, /* network is down */ + FW_ENETUNREACH = 101, /* network is unreachable */ + FW_ENOBUFS = 105, /* no buffer space available */ + FW_ETIMEDOUT = 110, /* timeout */ + FW_EINPROGRESS = 115, /* fw internal */ + FW_SCSI_ABORT_REQUESTED = 128, /* */ + FW_SCSI_ABORT_TIMEDOUT = 129, /* */ + FW_SCSI_ABORTED = 130, /* */ + FW_SCSI_CLOSE_REQUESTED = 131, /* */ + FW_ERR_LINK_DOWN = 132, /* */ + FW_RDEV_NOT_READY = 133, /* */ + FW_ERR_RDEV_LOST = 134, /* */ + FW_ERR_RDEV_LOGO = 135, /* */ + FW_FCOE_NO_XCHG = 136, /* */ + FW_SCSI_RSP_ERR = 137, /* */ + FW_ERR_RDEV_IMPL_LOGO = 138, /* */ + FW_SCSI_UNDER_FLOW_ERR = 139, /* */ + FW_SCSI_OVER_FLOW_ERR = 140, /* */ + FW_SCSI_DDP_ERR = 141, /* DDP error*/ + FW_SCSI_TASK_ERR = 142, /* No SCSI tasks available */ +}; + #define FW_T4VF_SGE_BASE_ADDR 0x0000 #define FW_T4VF_MPS_BASE_ADDR 0x0100 #define FW_T4VF_PL_BASE_ADDR 0x0200 @@ -46,6 +85,7 @@ enum fw_wr_opcodes { FW_ULPTX_WR = 0x04, FW_TP_WR = 0x05, FW_ETH_TX_PKT_WR = 0x08, + FW_OFLD_CONNECTION_WR = 0x2f, FW_FLOWC_WR = 0x0a, FW_OFLD_TX_DATA_WR = 0x0b, FW_CMD_WR = 0x10, @@ -81,6 +121,282 @@ struct fw_wr_hdr { #define FW_WR_LEN16(x) ((x) << 0) #define HW_TPL_FR_MT_PR_IV_P_FC 0X32B +#define HW_TPL_FR_MT_PR_OV_P_FC 0X327 + +/* filter wr reply code in cookie in CPL_SET_TCB_RPL */ +enum fw_filter_wr_cookie { + FW_FILTER_WR_SUCCESS, + FW_FILTER_WR_FLT_ADDED, + FW_FILTER_WR_FLT_DELETED, + FW_FILTER_WR_SMT_TBL_FULL, + FW_FILTER_WR_EINVAL, +}; + +struct fw_filter_wr { + __be32 op_pkd; + __be32 len16_pkd; + __be64 r3; + __be32 tid_to_iq; + __be32 del_filter_to_l2tix; + __be16 ethtype; + __be16 ethtypem; + __u8 frag_to_ovlan_vldm; + __u8 smac_sel; + __be16 rx_chan_rx_rpl_iq; + __be32 maci_to_matchtypem; + __u8 ptcl; + __u8 ptclm; + __u8 ttyp; + __u8 ttypm; + __be16 ivlan; + __be16 ivlanm; + __be16 ovlan; + __be16 ovlanm; + __u8 lip[16]; + __u8 lipm[16]; + __u8 fip[16]; + __u8 fipm[16]; + __be16 lp; + __be16 lpm; + __be16 fp; + __be16 fpm; + __be16 r7; + __u8 sma[6]; +}; + +#define S_FW_FILTER_WR_TID 12 +#define M_FW_FILTER_WR_TID 0xfffff +#define V_FW_FILTER_WR_TID(x) ((x) << S_FW_FILTER_WR_TID) +#define G_FW_FILTER_WR_TID(x) \ + (((x) >> S_FW_FILTER_WR_TID) & M_FW_FILTER_WR_TID) + +#define S_FW_FILTER_WR_RQTYPE 11 +#define M_FW_FILTER_WR_RQTYPE 0x1 +#define V_FW_FILTER_WR_RQTYPE(x) ((x) << S_FW_FILTER_WR_RQTYPE) +#define G_FW_FILTER_WR_RQTYPE(x) \ + (((x) >> S_FW_FILTER_WR_RQTYPE) & M_FW_FILTER_WR_RQTYPE) +#define F_FW_FILTER_WR_RQTYPE V_FW_FILTER_WR_RQTYPE(1U) + +#define S_FW_FILTER_WR_NOREPLY 10 +#define M_FW_FILTER_WR_NOREPLY 0x1 +#define V_FW_FILTER_WR_NOREPLY(x) ((x) << S_FW_FILTER_WR_NOREPLY) +#define G_FW_FILTER_WR_NOREPLY(x) \ + (((x) >> S_FW_FILTER_WR_NOREPLY) & M_FW_FILTER_WR_NOREPLY) +#define F_FW_FILTER_WR_NOREPLY V_FW_FILTER_WR_NOREPLY(1U) + +#define S_FW_FILTER_WR_IQ 0 +#define M_FW_FILTER_WR_IQ 0x3ff +#define V_FW_FILTER_WR_IQ(x) ((x) << S_FW_FILTER_WR_IQ) +#define G_FW_FILTER_WR_IQ(x) \ + (((x) >> S_FW_FILTER_WR_IQ) & M_FW_FILTER_WR_IQ) + +#define S_FW_FILTER_WR_DEL_FILTER 31 +#define M_FW_FILTER_WR_DEL_FILTER 0x1 +#define V_FW_FILTER_WR_DEL_FILTER(x) ((x) << S_FW_FILTER_WR_DEL_FILTER) +#define G_FW_FILTER_WR_DEL_FILTER(x) \ + (((x) >> S_FW_FILTER_WR_DEL_FILTER) & M_FW_FILTER_WR_DEL_FILTER) +#define F_FW_FILTER_WR_DEL_FILTER V_FW_FILTER_WR_DEL_FILTER(1U) + +#define S_FW_FILTER_WR_RPTTID 25 +#define M_FW_FILTER_WR_RPTTID 0x1 +#define V_FW_FILTER_WR_RPTTID(x) ((x) << S_FW_FILTER_WR_RPTTID) +#define G_FW_FILTER_WR_RPTTID(x) \ + (((x) >> S_FW_FILTER_WR_RPTTID) & M_FW_FILTER_WR_RPTTID) +#define F_FW_FILTER_WR_RPTTID V_FW_FILTER_WR_RPTTID(1U) + +#define S_FW_FILTER_WR_DROP 24 +#define M_FW_FILTER_WR_DROP 0x1 +#define V_FW_FILTER_WR_DROP(x) ((x) << S_FW_FILTER_WR_DROP) +#define G_FW_FILTER_WR_DROP(x) \ + (((x) >> S_FW_FILTER_WR_DROP) & M_FW_FILTER_WR_DROP) +#define F_FW_FILTER_WR_DROP V_FW_FILTER_WR_DROP(1U) + +#define S_FW_FILTER_WR_DIRSTEER 23 +#define M_FW_FILTER_WR_DIRSTEER 0x1 +#define V_FW_FILTER_WR_DIRSTEER(x) ((x) << S_FW_FILTER_WR_DIRSTEER) +#define G_FW_FILTER_WR_DIRSTEER(x) \ + (((x) >> S_FW_FILTER_WR_DIRSTEER) & M_FW_FILTER_WR_DIRSTEER) +#define F_FW_FILTER_WR_DIRSTEER V_FW_FILTER_WR_DIRSTEER(1U) + +#define S_FW_FILTER_WR_MASKHASH 22 +#define M_FW_FILTER_WR_MASKHASH 0x1 +#define V_FW_FILTER_WR_MASKHASH(x) ((x) << S_FW_FILTER_WR_MASKHASH) +#define G_FW_FILTER_WR_MASKHASH(x) \ + (((x) >> S_FW_FILTER_WR_MASKHASH) & M_FW_FILTER_WR_MASKHASH) +#define F_FW_FILTER_WR_MASKHASH V_FW_FILTER_WR_MASKHASH(1U) + +#define S_FW_FILTER_WR_DIRSTEERHASH 21 +#define M_FW_FILTER_WR_DIRSTEERHASH 0x1 +#define V_FW_FILTER_WR_DIRSTEERHASH(x) ((x) << S_FW_FILTER_WR_DIRSTEERHASH) +#define G_FW_FILTER_WR_DIRSTEERHASH(x) \ + (((x) >> S_FW_FILTER_WR_DIRSTEERHASH) & M_FW_FILTER_WR_DIRSTEERHASH) +#define F_FW_FILTER_WR_DIRSTEERHASH V_FW_FILTER_WR_DIRSTEERHASH(1U) + +#define S_FW_FILTER_WR_LPBK 20 +#define M_FW_FILTER_WR_LPBK 0x1 +#define V_FW_FILTER_WR_LPBK(x) ((x) << S_FW_FILTER_WR_LPBK) +#define G_FW_FILTER_WR_LPBK(x) \ + (((x) >> S_FW_FILTER_WR_LPBK) & M_FW_FILTER_WR_LPBK) +#define F_FW_FILTER_WR_LPBK V_FW_FILTER_WR_LPBK(1U) + +#define S_FW_FILTER_WR_DMAC 19 +#define M_FW_FILTER_WR_DMAC 0x1 +#define V_FW_FILTER_WR_DMAC(x) ((x) << S_FW_FILTER_WR_DMAC) +#define G_FW_FILTER_WR_DMAC(x) \ + (((x) >> S_FW_FILTER_WR_DMAC) & M_FW_FILTER_WR_DMAC) +#define F_FW_FILTER_WR_DMAC V_FW_FILTER_WR_DMAC(1U) + +#define S_FW_FILTER_WR_SMAC 18 +#define M_FW_FILTER_WR_SMAC 0x1 +#define V_FW_FILTER_WR_SMAC(x) ((x) << S_FW_FILTER_WR_SMAC) +#define G_FW_FILTER_WR_SMAC(x) \ + (((x) >> S_FW_FILTER_WR_SMAC) & M_FW_FILTER_WR_SMAC) +#define F_FW_FILTER_WR_SMAC V_FW_FILTER_WR_SMAC(1U) + +#define S_FW_FILTER_WR_INSVLAN 17 +#define M_FW_FILTER_WR_INSVLAN 0x1 +#define V_FW_FILTER_WR_INSVLAN(x) ((x) << S_FW_FILTER_WR_INSVLAN) +#define G_FW_FILTER_WR_INSVLAN(x) \ + (((x) >> S_FW_FILTER_WR_INSVLAN) & M_FW_FILTER_WR_INSVLAN) +#define F_FW_FILTER_WR_INSVLAN V_FW_FILTER_WR_INSVLAN(1U) + +#define S_FW_FILTER_WR_RMVLAN 16 +#define M_FW_FILTER_WR_RMVLAN 0x1 +#define V_FW_FILTER_WR_RMVLAN(x) ((x) << S_FW_FILTER_WR_RMVLAN) +#define G_FW_FILTER_WR_RMVLAN(x) \ + (((x) >> S_FW_FILTER_WR_RMVLAN) & M_FW_FILTER_WR_RMVLAN) +#define F_FW_FILTER_WR_RMVLAN V_FW_FILTER_WR_RMVLAN(1U) + +#define S_FW_FILTER_WR_HITCNTS 15 +#define M_FW_FILTER_WR_HITCNTS 0x1 +#define V_FW_FILTER_WR_HITCNTS(x) ((x) << S_FW_FILTER_WR_HITCNTS) +#define G_FW_FILTER_WR_HITCNTS(x) \ + (((x) >> S_FW_FILTER_WR_HITCNTS) & M_FW_FILTER_WR_HITCNTS) +#define F_FW_FILTER_WR_HITCNTS V_FW_FILTER_WR_HITCNTS(1U) + +#define S_FW_FILTER_WR_TXCHAN 13 +#define M_FW_FILTER_WR_TXCHAN 0x3 +#define V_FW_FILTER_WR_TXCHAN(x) ((x) << S_FW_FILTER_WR_TXCHAN) +#define G_FW_FILTER_WR_TXCHAN(x) \ + (((x) >> S_FW_FILTER_WR_TXCHAN) & M_FW_FILTER_WR_TXCHAN) + +#define S_FW_FILTER_WR_PRIO 12 +#define M_FW_FILTER_WR_PRIO 0x1 +#define V_FW_FILTER_WR_PRIO(x) ((x) << S_FW_FILTER_WR_PRIO) +#define G_FW_FILTER_WR_PRIO(x) \ + (((x) >> S_FW_FILTER_WR_PRIO) & M_FW_FILTER_WR_PRIO) +#define F_FW_FILTER_WR_PRIO V_FW_FILTER_WR_PRIO(1U) + +#define S_FW_FILTER_WR_L2TIX 0 +#define M_FW_FILTER_WR_L2TIX 0xfff +#define V_FW_FILTER_WR_L2TIX(x) ((x) << S_FW_FILTER_WR_L2TIX) +#define G_FW_FILTER_WR_L2TIX(x) \ + (((x) >> S_FW_FILTER_WR_L2TIX) & M_FW_FILTER_WR_L2TIX) + +#define S_FW_FILTER_WR_FRAG 7 +#define M_FW_FILTER_WR_FRAG 0x1 +#define V_FW_FILTER_WR_FRAG(x) ((x) << S_FW_FILTER_WR_FRAG) +#define G_FW_FILTER_WR_FRAG(x) \ + (((x) >> S_FW_FILTER_WR_FRAG) & M_FW_FILTER_WR_FRAG) +#define F_FW_FILTER_WR_FRAG V_FW_FILTER_WR_FRAG(1U) + +#define S_FW_FILTER_WR_FRAGM 6 +#define M_FW_FILTER_WR_FRAGM 0x1 +#define V_FW_FILTER_WR_FRAGM(x) ((x) << S_FW_FILTER_WR_FRAGM) +#define G_FW_FILTER_WR_FRAGM(x) \ + (((x) >> S_FW_FILTER_WR_FRAGM) & M_FW_FILTER_WR_FRAGM) +#define F_FW_FILTER_WR_FRAGM V_FW_FILTER_WR_FRAGM(1U) + +#define S_FW_FILTER_WR_IVLAN_VLD 5 +#define M_FW_FILTER_WR_IVLAN_VLD 0x1 +#define V_FW_FILTER_WR_IVLAN_VLD(x) ((x) << S_FW_FILTER_WR_IVLAN_VLD) +#define G_FW_FILTER_WR_IVLAN_VLD(x) \ + (((x) >> S_FW_FILTER_WR_IVLAN_VLD) & M_FW_FILTER_WR_IVLAN_VLD) +#define F_FW_FILTER_WR_IVLAN_VLD V_FW_FILTER_WR_IVLAN_VLD(1U) + +#define S_FW_FILTER_WR_OVLAN_VLD 4 +#define M_FW_FILTER_WR_OVLAN_VLD 0x1 +#define V_FW_FILTER_WR_OVLAN_VLD(x) ((x) << S_FW_FILTER_WR_OVLAN_VLD) +#define G_FW_FILTER_WR_OVLAN_VLD(x) \ + (((x) >> S_FW_FILTER_WR_OVLAN_VLD) & M_FW_FILTER_WR_OVLAN_VLD) +#define F_FW_FILTER_WR_OVLAN_VLD V_FW_FILTER_WR_OVLAN_VLD(1U) + +#define S_FW_FILTER_WR_IVLAN_VLDM 3 +#define M_FW_FILTER_WR_IVLAN_VLDM 0x1 +#define V_FW_FILTER_WR_IVLAN_VLDM(x) ((x) << S_FW_FILTER_WR_IVLAN_VLDM) +#define G_FW_FILTER_WR_IVLAN_VLDM(x) \ + (((x) >> S_FW_FILTER_WR_IVLAN_VLDM) & M_FW_FILTER_WR_IVLAN_VLDM) +#define F_FW_FILTER_WR_IVLAN_VLDM V_FW_FILTER_WR_IVLAN_VLDM(1U) + +#define S_FW_FILTER_WR_OVLAN_VLDM 2 +#define M_FW_FILTER_WR_OVLAN_VLDM 0x1 +#define V_FW_FILTER_WR_OVLAN_VLDM(x) ((x) << S_FW_FILTER_WR_OVLAN_VLDM) +#define G_FW_FILTER_WR_OVLAN_VLDM(x) \ + (((x) >> S_FW_FILTER_WR_OVLAN_VLDM) & M_FW_FILTER_WR_OVLAN_VLDM) +#define F_FW_FILTER_WR_OVLAN_VLDM V_FW_FILTER_WR_OVLAN_VLDM(1U) + +#define S_FW_FILTER_WR_RX_CHAN 15 +#define M_FW_FILTER_WR_RX_CHAN 0x1 +#define V_FW_FILTER_WR_RX_CHAN(x) ((x) << S_FW_FILTER_WR_RX_CHAN) +#define G_FW_FILTER_WR_RX_CHAN(x) \ + (((x) >> S_FW_FILTER_WR_RX_CHAN) & M_FW_FILTER_WR_RX_CHAN) +#define F_FW_FILTER_WR_RX_CHAN V_FW_FILTER_WR_RX_CHAN(1U) + +#define S_FW_FILTER_WR_RX_RPL_IQ 0 +#define M_FW_FILTER_WR_RX_RPL_IQ 0x3ff +#define V_FW_FILTER_WR_RX_RPL_IQ(x) ((x) << S_FW_FILTER_WR_RX_RPL_IQ) +#define G_FW_FILTER_WR_RX_RPL_IQ(x) \ + (((x) >> S_FW_FILTER_WR_RX_RPL_IQ) & M_FW_FILTER_WR_RX_RPL_IQ) + +#define S_FW_FILTER_WR_MACI 23 +#define M_FW_FILTER_WR_MACI 0x1ff +#define V_FW_FILTER_WR_MACI(x) ((x) << S_FW_FILTER_WR_MACI) +#define G_FW_FILTER_WR_MACI(x) \ + (((x) >> S_FW_FILTER_WR_MACI) & M_FW_FILTER_WR_MACI) + +#define S_FW_FILTER_WR_MACIM 14 +#define M_FW_FILTER_WR_MACIM 0x1ff +#define V_FW_FILTER_WR_MACIM(x) ((x) << S_FW_FILTER_WR_MACIM) +#define G_FW_FILTER_WR_MACIM(x) \ + (((x) >> S_FW_FILTER_WR_MACIM) & M_FW_FILTER_WR_MACIM) + +#define S_FW_FILTER_WR_FCOE 13 +#define M_FW_FILTER_WR_FCOE 0x1 +#define V_FW_FILTER_WR_FCOE(x) ((x) << S_FW_FILTER_WR_FCOE) +#define G_FW_FILTER_WR_FCOE(x) \ + (((x) >> S_FW_FILTER_WR_FCOE) & M_FW_FILTER_WR_FCOE) +#define F_FW_FILTER_WR_FCOE V_FW_FILTER_WR_FCOE(1U) + +#define S_FW_FILTER_WR_FCOEM 12 +#define M_FW_FILTER_WR_FCOEM 0x1 +#define V_FW_FILTER_WR_FCOEM(x) ((x) << S_FW_FILTER_WR_FCOEM) +#define G_FW_FILTER_WR_FCOEM(x) \ + (((x) >> S_FW_FILTER_WR_FCOEM) & M_FW_FILTER_WR_FCOEM) +#define F_FW_FILTER_WR_FCOEM V_FW_FILTER_WR_FCOEM(1U) + +#define S_FW_FILTER_WR_PORT 9 +#define M_FW_FILTER_WR_PORT 0x7 +#define V_FW_FILTER_WR_PORT(x) ((x) << S_FW_FILTER_WR_PORT) +#define G_FW_FILTER_WR_PORT(x) \ + (((x) >> S_FW_FILTER_WR_PORT) & M_FW_FILTER_WR_PORT) + +#define S_FW_FILTER_WR_PORTM 6 +#define M_FW_FILTER_WR_PORTM 0x7 +#define V_FW_FILTER_WR_PORTM(x) ((x) << S_FW_FILTER_WR_PORTM) +#define G_FW_FILTER_WR_PORTM(x) \ + (((x) >> S_FW_FILTER_WR_PORTM) & M_FW_FILTER_WR_PORTM) + +#define S_FW_FILTER_WR_MATCHTYPE 3 +#define M_FW_FILTER_WR_MATCHTYPE 0x7 +#define V_FW_FILTER_WR_MATCHTYPE(x) ((x) << S_FW_FILTER_WR_MATCHTYPE) +#define G_FW_FILTER_WR_MATCHTYPE(x) \ + (((x) >> S_FW_FILTER_WR_MATCHTYPE) & M_FW_FILTER_WR_MATCHTYPE) + +#define S_FW_FILTER_WR_MATCHTYPEM 0 +#define M_FW_FILTER_WR_MATCHTYPEM 0x7 +#define V_FW_FILTER_WR_MATCHTYPEM(x) ((x) << S_FW_FILTER_WR_MATCHTYPEM) +#define G_FW_FILTER_WR_MATCHTYPEM(x) \ + (((x) >> S_FW_FILTER_WR_MATCHTYPEM) & M_FW_FILTER_WR_MATCHTYPEM) struct fw_ulptx_wr { __be32 op_to_compl; @@ -100,6 +416,108 @@ struct fw_eth_tx_pkt_wr { __be64 r3; }; +struct fw_ofld_connection_wr { + __be32 op_compl; + __be32 len16_pkd; + __u64 cookie; + __be64 r2; + __be64 r3; + struct fw_ofld_connection_le { + __be32 version_cpl; + __be32 filter; + __be32 r1; + __be16 lport; + __be16 pport; + union fw_ofld_connection_leip { + struct fw_ofld_connection_le_ipv4 { + __be32 pip; + __be32 lip; + __be64 r0; + __be64 r1; + __be64 r2; + } ipv4; + struct fw_ofld_connection_le_ipv6 { + __be64 pip_hi; + __be64 pip_lo; + __be64 lip_hi; + __be64 lip_lo; + } ipv6; + } u; + } le; + struct fw_ofld_connection_tcb { + __be32 t_state_to_astid; + __be16 cplrxdataack_cplpassacceptrpl; + __be16 rcv_adv; + __be32 rcv_nxt; + __be32 tx_max; + __be64 opt0; + __be32 opt2; + __be32 r1; + __be64 r2; + __be64 r3; + } tcb; +}; + +#define S_FW_OFLD_CONNECTION_WR_VERSION 31 +#define M_FW_OFLD_CONNECTION_WR_VERSION 0x1 +#define V_FW_OFLD_CONNECTION_WR_VERSION(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_VERSION) +#define G_FW_OFLD_CONNECTION_WR_VERSION(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_VERSION) & \ + M_FW_OFLD_CONNECTION_WR_VERSION) +#define F_FW_OFLD_CONNECTION_WR_VERSION \ + V_FW_OFLD_CONNECTION_WR_VERSION(1U) + +#define S_FW_OFLD_CONNECTION_WR_CPL 30 +#define M_FW_OFLD_CONNECTION_WR_CPL 0x1 +#define V_FW_OFLD_CONNECTION_WR_CPL(x) ((x) << S_FW_OFLD_CONNECTION_WR_CPL) +#define G_FW_OFLD_CONNECTION_WR_CPL(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_CPL) & M_FW_OFLD_CONNECTION_WR_CPL) +#define F_FW_OFLD_CONNECTION_WR_CPL V_FW_OFLD_CONNECTION_WR_CPL(1U) + +#define S_FW_OFLD_CONNECTION_WR_T_STATE 28 +#define M_FW_OFLD_CONNECTION_WR_T_STATE 0xf +#define V_FW_OFLD_CONNECTION_WR_T_STATE(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_T_STATE) +#define G_FW_OFLD_CONNECTION_WR_T_STATE(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_T_STATE) & \ + M_FW_OFLD_CONNECTION_WR_T_STATE) + +#define S_FW_OFLD_CONNECTION_WR_RCV_SCALE 24 +#define M_FW_OFLD_CONNECTION_WR_RCV_SCALE 0xf +#define V_FW_OFLD_CONNECTION_WR_RCV_SCALE(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_RCV_SCALE) +#define G_FW_OFLD_CONNECTION_WR_RCV_SCALE(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_RCV_SCALE) & \ + M_FW_OFLD_CONNECTION_WR_RCV_SCALE) + +#define S_FW_OFLD_CONNECTION_WR_ASTID 0 +#define M_FW_OFLD_CONNECTION_WR_ASTID 0xffffff +#define V_FW_OFLD_CONNECTION_WR_ASTID(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_ASTID) +#define G_FW_OFLD_CONNECTION_WR_ASTID(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_ASTID) & M_FW_OFLD_CONNECTION_WR_ASTID) + +#define S_FW_OFLD_CONNECTION_WR_CPLRXDATAACK 15 +#define M_FW_OFLD_CONNECTION_WR_CPLRXDATAACK 0x1 +#define V_FW_OFLD_CONNECTION_WR_CPLRXDATAACK(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_CPLRXDATAACK) +#define G_FW_OFLD_CONNECTION_WR_CPLRXDATAACK(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_CPLRXDATAACK) & \ + M_FW_OFLD_CONNECTION_WR_CPLRXDATAACK) +#define F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK \ + V_FW_OFLD_CONNECTION_WR_CPLRXDATAACK(1U) + +#define S_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL 14 +#define M_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL 0x1 +#define V_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL) +#define G_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL) & \ + M_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL) +#define F_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL \ + V_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL(1U) + enum fw_flowc_mnem { FW_FLOWC_MNEM_PFNVFN, /* PFN [15:8] VFN [7:0] */ FW_FLOWC_MNEM_CH, diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 9a9de51..8b3d051 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -1338,6 +1338,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, { struct mlx4_cmd_mailbox *mailbox; __be32 *outbox; + u32 dword_field; int err; u8 byte_field; @@ -1372,10 +1373,18 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET); MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET); + MLX4_GET(dword_field, outbox, INIT_HCA_FLAGS_OFFSET); + if (dword_field & (1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN)) { + param->steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; + } else { + MLX4_GET(byte_field, outbox, INIT_HCA_UC_STEERING_OFFSET); + if (byte_field & 0x8) + param->steering_mode = MLX4_STEERING_MODE_B0; + else + param->steering_mode = MLX4_STEERING_MODE_A0; + } /* steering attributes */ - if (dev->caps.steering_mode == - MLX4_STEERING_MODE_DEVICE_MANAGED) { - + if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET); MLX4_GET(param->log_mc_entry_sz, outbox, INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 2c2e7ad..dbf2f69 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -172,6 +172,7 @@ struct mlx4_init_hca_param { u8 log_uar_sz; u8 uar_page_sz; /* log pg sz in 4k chunks */ u8 fs_hash_enable_bits; + u8 steering_mode; /* for QUERY_HCA */ u64 dev_cap_enabled; }; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index b2acbe7..e1bafff 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -85,15 +85,15 @@ static int probe_vf; module_param(probe_vf, int, 0644); MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)"); -int mlx4_log_num_mgm_entry_size = 10; +int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; module_param_named(log_num_mgm_entry_size, mlx4_log_num_mgm_entry_size, int, 0444); MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" " of qp per mcg, for example:" - " 10 gives 248.range: 9<=" + " 10 gives 248.range: 7 <=" " log_num_mgm_entry_size <= 12." - " Not in use with device managed" - " flow steering"); + " To activate device managed" + " flow steering when available, set to -1"); static bool enable_64b_cqe_eqe; module_param(enable_64b_cqe_eqe, bool, 0444); @@ -281,28 +281,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; - if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) { - dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; - dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; - dev->caps.fs_log_max_ucast_qp_range_size = - dev_cap->fs_log_max_ucast_qp_range_size; - } else { - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && - dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) { - dev->caps.steering_mode = MLX4_STEERING_MODE_B0; - } else { - dev->caps.steering_mode = MLX4_STEERING_MODE_A0; - - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || - dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) - mlx4_warn(dev, "Must have UC_STEER and MC_STEER flags " - "set to use B0 steering. Falling back to A0 steering mode.\n"); - } - dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); - } - mlx4_dbg(dev, "Steering mode is: %s\n", - mlx4_steering_mode_str(dev->caps.steering_mode)); - /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; @@ -493,6 +471,23 @@ int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) } EXPORT_SYMBOL(mlx4_is_slave_active); +static void slave_adjust_steering_mode(struct mlx4_dev *dev, + struct mlx4_dev_cap *dev_cap, + struct mlx4_init_hca_param *hca_param) +{ + dev->caps.steering_mode = hca_param->steering_mode; + if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { + dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; + dev->caps.fs_log_max_ucast_qp_range_size = + dev_cap->fs_log_max_ucast_qp_range_size; + } else + dev->caps.num_qp_per_mgm = + 4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2); + + mlx4_dbg(dev, "Steering mode is: %s\n", + mlx4_steering_mode_str(dev->caps.steering_mode)); +} + static int mlx4_slave_cap(struct mlx4_dev *dev) { int err; @@ -635,6 +630,8 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) dev->caps.cqe_size = 32; } + slave_adjust_steering_mode(dev, &dev_cap, &hca_param); + return 0; err_mem: @@ -1321,6 +1318,59 @@ static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) } } +static int choose_log_fs_mgm_entry_size(int qp_per_entry) +{ + int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; + + for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE; + i++) { + if (qp_per_entry <= 4 * ((1 << i) / 16 - 2)) + break; + } + + return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1; +} + +static void choose_steering_mode(struct mlx4_dev *dev, + struct mlx4_dev_cap *dev_cap) +{ + if (mlx4_log_num_mgm_entry_size == -1 && + dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && + (!mlx4_is_mfunc(dev) || + (dev_cap->fs_max_num_qp_per_entry >= (num_vfs + 1))) && + choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= + MLX4_MIN_MGM_LOG_ENTRY_SIZE) { + dev->oper_log_mgm_entry_size = + choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry); + dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; + dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; + dev->caps.fs_log_max_ucast_qp_range_size = + dev_cap->fs_log_max_ucast_qp_range_size; + } else { + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && + dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) + dev->caps.steering_mode = MLX4_STEERING_MODE_B0; + else { + dev->caps.steering_mode = MLX4_STEERING_MODE_A0; + + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || + dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) + mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags " + "set to use B0 steering. Falling back to A0 steering mode.\n"); + } + dev->oper_log_mgm_entry_size = + mlx4_log_num_mgm_entry_size > 0 ? + mlx4_log_num_mgm_entry_size : + MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; + dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); + } + mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, " + "modparam log_num_mgm_entry_size = %d\n", + mlx4_steering_mode_str(dev->caps.steering_mode), + dev->oper_log_mgm_entry_size, + mlx4_log_num_mgm_entry_size); +} + static int mlx4_init_hca(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1360,6 +1410,8 @@ static int mlx4_init_hca(struct mlx4_dev *dev) goto err_stop_fw; } + choose_steering_mode(dev, &dev_cap); + if (mlx4_is_master(dev)) mlx4_parav_master_pf_caps(dev); @@ -2452,6 +2504,17 @@ static int __init mlx4_verify_params(void) port_type_array[0] = true; } + if (mlx4_log_num_mgm_entry_size != -1 && + (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE || + mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) { + pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not " + "in legal range (-1 or %d..%d)\n", + mlx4_log_num_mgm_entry_size, + MLX4_MIN_MGM_LOG_ENTRY_SIZE, + MLX4_MAX_MGM_LOG_ENTRY_SIZE); + return -1; + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index e151c21..1ee4db3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -54,12 +54,7 @@ struct mlx4_mgm { int mlx4_get_mgm_entry_size(struct mlx4_dev *dev) { - if (dev->caps.steering_mode == - MLX4_STEERING_MODE_DEVICE_MANAGED) - return 1 << MLX4_FS_MGM_LOG_ENTRY_SIZE; - else - return min((1 << mlx4_log_num_mgm_entry_size), - MLX4_MAX_MGM_ENTRY_SIZE); + return 1 << dev->oper_log_mgm_entry_size; } int mlx4_get_qp_per_mgm(struct mlx4_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 1cf4203..116c5c2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -94,8 +94,10 @@ enum { }; enum { - MLX4_MAX_MGM_ENTRY_SIZE = 0x1000, - MLX4_MAX_QP_PER_MGM = 4 * (MLX4_MAX_MGM_ENTRY_SIZE / 16 - 2), + MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE = 10, + MLX4_MIN_MGM_LOG_ENTRY_SIZE = 7, + MLX4_MAX_MGM_LOG_ENTRY_SIZE = 12, + MLX4_MAX_QP_PER_MGM = 4 * ((1 << MLX4_MAX_MGM_LOG_ENTRY_SIZE) / 16 - 2), MLX4_MTT_ENTRY_PER_SEG = 8, }; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index b05705f..561ed2a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -3071,6 +3071,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *rlist = &tracker->slave_list[slave].res_list[RES_MAC]; int err; + int qpn; struct mlx4_net_trans_rule_hw_ctrl *ctrl; struct _rule_hw *rule_header; int header_id; @@ -3080,13 +3081,21 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, return -EOPNOTSUPP; ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf; + qpn = be32_to_cpu(ctrl->qpn) & 0xffffff; + err = get_res(dev, slave, qpn, RES_QP, NULL); + if (err) { + pr_err("Steering rule with qpn 0x%x rejected.\n", qpn); + return err; + } rule_header = (struct _rule_hw *)(ctrl + 1); header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id)); switch (header_id) { case MLX4_NET_TRANS_RULE_ID_ETH: - if (validate_eth_header_mac(slave, rule_header, rlist)) - return -EINVAL; + if (validate_eth_header_mac(slave, rule_header, rlist)) { + err = -EINVAL; + goto err_put; + } break; case MLX4_NET_TRANS_RULE_ID_IB: break; @@ -3094,14 +3103,17 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, case MLX4_NET_TRANS_RULE_ID_TCP: case MLX4_NET_TRANS_RULE_ID_UDP: pr_warn("Can't attach FS rule without L2 headers, adding L2 header.\n"); - if (add_eth_header(dev, slave, inbox, rlist, header_id)) - return -EINVAL; + if (add_eth_header(dev, slave, inbox, rlist, header_id)) { + err = -EINVAL; + goto err_put; + } vhcr->in_modifier += sizeof(struct mlx4_net_trans_rule_hw_eth) >> 2; break; default: pr_err("Corrupted mailbox.\n"); - return -EINVAL; + err = -EINVAL; + goto err_put; } err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param, @@ -3109,16 +3121,18 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) - return err; + goto err_put; err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, 0); if (err) { mlx4_err(dev, "Fail to add flow steering resources.\n "); /* detach rule*/ mlx4_cmd(dev, vhcr->out_param, 0, 0, - MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, + MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } +err_put: + put_res(dev, slave, qpn, RES_QP); return err; } diff --git a/drivers/scsi/csiostor/t4fw_api_stor.h b/drivers/scsi/csiostor/t4fw_api_stor.h index 1223e0d..097e52c 100644 --- a/drivers/scsi/csiostor/t4fw_api_stor.h +++ b/drivers/scsi/csiostor/t4fw_api_stor.h @@ -40,45 +40,6 @@ * R E T U R N V A L U E S ********************************/ -enum fw_retval { - FW_SUCCESS = 0, /* completed sucessfully */ - FW_EPERM = 1, /* operation not permitted */ - FW_ENOENT = 2, /* no such file or directory */ - FW_EIO = 5, /* input/output error; hw bad */ - FW_ENOEXEC = 8, /* exec format error; inv microcode */ - FW_EAGAIN = 11, /* try again */ - FW_ENOMEM = 12, /* out of memory */ - FW_EFAULT = 14, /* bad address; fw bad */ - FW_EBUSY = 16, /* resource busy */ - FW_EEXIST = 17, /* file exists */ - FW_EINVAL = 22, /* invalid argument */ - FW_ENOSPC = 28, /* no space left on device */ - FW_ENOSYS = 38, /* functionality not implemented */ - FW_EPROTO = 71, /* protocol error */ - FW_EADDRINUSE = 98, /* address already in use */ - FW_EADDRNOTAVAIL = 99, /* cannot assigned requested address */ - FW_ENETDOWN = 100, /* network is down */ - FW_ENETUNREACH = 101, /* network is unreachable */ - FW_ENOBUFS = 105, /* no buffer space available */ - FW_ETIMEDOUT = 110, /* timeout */ - FW_EINPROGRESS = 115, /* fw internal */ - FW_SCSI_ABORT_REQUESTED = 128, /* */ - FW_SCSI_ABORT_TIMEDOUT = 129, /* */ - FW_SCSI_ABORTED = 130, /* */ - FW_SCSI_CLOSE_REQUESTED = 131, /* */ - FW_ERR_LINK_DOWN = 132, /* */ - FW_RDEV_NOT_READY = 133, /* */ - FW_ERR_RDEV_LOST = 134, /* */ - FW_ERR_RDEV_LOGO = 135, /* */ - FW_FCOE_NO_XCHG = 136, /* */ - FW_SCSI_RSP_ERR = 137, /* */ - FW_ERR_RDEV_IMPL_LOGO = 138, /* */ - FW_SCSI_UNDER_FLOW_ERR = 139, /* */ - FW_SCSI_OVER_FLOW_ERR = 140, /* */ - FW_SCSI_DDP_ERR = 141, /* DDP error*/ - FW_SCSI_TASK_ERR = 142, /* No SCSI tasks available */ -}; - enum fw_fcoe_link_sub_op { FCOE_LINK_DOWN = 0x0, FCOE_LINK_UP = 0x1, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 21821da..20ea939 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -625,6 +625,7 @@ struct mlx4_dev { u8 rev_id; char board_id[MLX4_BOARD_ID_LEN]; int num_vfs; + int oper_log_mgm_entry_size; u64 regid_promisc_array[MLX4_MAX_PORTS + 1]; u64 regid_allmulti_array[MLX4_MAX_PORTS + 1]; }; |