diff options
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r-- | drivers/infiniband/core/agent.c | 29 | ||||
-rw-r--r-- | drivers/infiniband/core/cma.c | 313 | ||||
-rw-r--r-- | drivers/infiniband/core/iwcm.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/core/mad.c | 27 | ||||
-rw-r--r-- | drivers/infiniband/core/multicast.c | 23 | ||||
-rw-r--r-- | drivers/infiniband/core/sa_query.c | 30 | ||||
-rw-r--r-- | drivers/infiniband/core/sysfs.c | 15 | ||||
-rw-r--r-- | drivers/infiniband/core/ucma.c | 92 | ||||
-rw-r--r-- | drivers/infiniband/core/ud_header.c | 140 | ||||
-rw-r--r-- | drivers/infiniband/core/user_mad.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_cmd.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/core/verbs.c | 16 |
12 files changed, 606 insertions, 87 deletions
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index ae7c288..91916a8 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -59,8 +59,8 @@ __ib_get_agent_port(struct ib_device *device, int port_num) struct ib_agent_port_private *entry; list_for_each_entry(entry, &ib_agent_port_list, port_list) { - if (entry->agent[0]->device == device && - entry->agent[0]->port_num == port_num) + if (entry->agent[1]->device == device && + entry->agent[1]->port_num == port_num) return entry; } return NULL; @@ -155,14 +155,16 @@ int ib_agent_port_open(struct ib_device *device, int port_num) goto error1; } - /* Obtain send only MAD agent for SMI QP */ - port_priv->agent[0] = ib_register_mad_agent(device, port_num, - IB_QPT_SMI, NULL, 0, - &agent_send_handler, - NULL, NULL); - if (IS_ERR(port_priv->agent[0])) { - ret = PTR_ERR(port_priv->agent[0]); - goto error2; + if (rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND) { + /* Obtain send only MAD agent for SMI QP */ + port_priv->agent[0] = ib_register_mad_agent(device, port_num, + IB_QPT_SMI, NULL, 0, + &agent_send_handler, + NULL, NULL); + if (IS_ERR(port_priv->agent[0])) { + ret = PTR_ERR(port_priv->agent[0]); + goto error2; + } } /* Obtain send only MAD agent for GSI QP */ @@ -182,7 +184,8 @@ int ib_agent_port_open(struct ib_device *device, int port_num) return 0; error3: - ib_unregister_mad_agent(port_priv->agent[0]); + if (port_priv->agent[0]) + ib_unregister_mad_agent(port_priv->agent[0]); error2: kfree(port_priv); error1: @@ -205,7 +208,9 @@ int ib_agent_port_close(struct ib_device *device, int port_num) spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); ib_unregister_mad_agent(port_priv->agent[1]); - ib_unregister_mad_agent(port_priv->agent[0]); + if (port_priv->agent[0]) + ib_unregister_mad_agent(port_priv->agent[0]); + kfree(port_priv); return 0; } diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index b930b81..6884da2 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -59,6 +59,7 @@ MODULE_LICENSE("Dual BSD/GPL"); #define CMA_CM_RESPONSE_TIMEOUT 20 #define CMA_MAX_CM_RETRIES 15 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) +#define CMA_IBOE_PACKET_LIFETIME 18 static void cma_add_one(struct ib_device *device); static void cma_remove_one(struct ib_device *device); @@ -157,6 +158,7 @@ struct cma_multicast { struct list_head list; void *context; struct sockaddr_storage addr; + struct kref mcref; }; struct cma_work { @@ -173,6 +175,12 @@ struct cma_ndev_work { struct rdma_cm_event event; }; +struct iboe_mcast_work { + struct work_struct work; + struct rdma_id_private *id; + struct cma_multicast *mc; +}; + union cma_ip_addr { struct in6_addr ip6; struct { @@ -281,6 +289,8 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv, atomic_inc(&cma_dev->refcount); id_priv->cma_dev = cma_dev; id_priv->id.device = cma_dev->device; + id_priv->id.route.addr.dev_addr.transport = + rdma_node_get_transport(cma_dev->device->node_type); list_add_tail(&id_priv->list, &cma_dev->id_list); } @@ -290,6 +300,14 @@ static inline void cma_deref_dev(struct cma_device *cma_dev) complete(&cma_dev->comp); } +static inline void release_mc(struct kref *kref) +{ + struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref); + + kfree(mc->multicast.ib); + kfree(mc); +} + static void cma_detach_from_dev(struct rdma_id_private *id_priv) { list_del(&id_priv->list); @@ -323,22 +341,63 @@ static int cma_set_qkey(struct rdma_id_private *id_priv) return ret; } +static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num) +{ + int i; + int err; + struct ib_port_attr props; + union ib_gid tmp; + + err = ib_query_port(device, port_num, &props); + if (err) + return 1; + + for (i = 0; i < props.gid_tbl_len; ++i) { + err = ib_query_gid(device, port_num, i, &tmp); + if (err) + return 1; + if (!memcmp(&tmp, gid, sizeof tmp)) + return 0; + } + + return -EAGAIN; +} + static int cma_acquire_dev(struct rdma_id_private *id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct cma_device *cma_dev; - union ib_gid gid; + union ib_gid gid, iboe_gid; int ret = -ENODEV; + u8 port; + enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ? + IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; - rdma_addr_get_sgid(dev_addr, &gid); + iboe_addr_get_sgid(dev_addr, &iboe_gid); + memcpy(&gid, dev_addr->src_dev_addr + + rdma_addr_gid_offset(dev_addr), sizeof gid); list_for_each_entry(cma_dev, &dev_list, list) { - ret = ib_find_cached_gid(cma_dev->device, &gid, - &id_priv->id.port_num, NULL); - if (!ret) { - cma_attach_to_dev(id_priv, cma_dev); - break; + for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { + if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) { + if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && + rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) + ret = find_gid_port(cma_dev->device, &iboe_gid, port); + else + ret = find_gid_port(cma_dev->device, &gid, port); + + if (!ret) { + id_priv->id.port_num = port; + goto out; + } else if (ret == 1) + break; + } } } + +out: + if (!ret) + cma_attach_to_dev(id_priv, cma_dev); + return ret; } @@ -556,10 +615,16 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; int ret; + u16 pkey; + + if (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) == + IB_LINK_LAYER_INFINIBAND) + pkey = ib_addr_get_pkey(dev_addr); + else + pkey = 0xffff; ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, - ib_addr_get_pkey(dev_addr), - &qp_attr->pkey_index); + pkey, &qp_attr->pkey_index); if (ret) return ret; @@ -737,8 +802,8 @@ static inline int cma_user_data_offset(enum rdma_port_space ps) static void cma_cancel_route(struct rdma_id_private *id_priv) { - switch (rdma_node_get_transport(id_priv->id.device->node_type)) { - case RDMA_TRANSPORT_IB: + switch (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)) { + case IB_LINK_LAYER_INFINIBAND: if (id_priv->query) ib_sa_cancel_query(id_priv->query_id, id_priv->query); break; @@ -816,8 +881,17 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv) mc = container_of(id_priv->mc_list.next, struct cma_multicast, list); list_del(&mc->list); - ib_sa_free_multicast(mc->multicast.ib); - kfree(mc); + switch (rdma_port_get_link_layer(id_priv->cma_dev->device, id_priv->id.port_num)) { + case IB_LINK_LAYER_INFINIBAND: + ib_sa_free_multicast(mc->multicast.ib); + kfree(mc); + break; + case IB_LINK_LAYER_ETHERNET: + kref_put(&mc->mcref, release_mc); + break; + default: + break; + } } } @@ -833,7 +907,7 @@ void rdma_destroy_id(struct rdma_cm_id *id) mutex_lock(&lock); if (id_priv->cma_dev) { mutex_unlock(&lock); - switch (rdma_node_get_transport(id->device->node_type)) { + switch (rdma_node_get_transport(id_priv->id.device->node_type)) { case RDMA_TRANSPORT_IB: if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) ib_destroy_cm_id(id_priv->cm_id.ib); @@ -1708,6 +1782,81 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) return 0; } +static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) +{ + struct rdma_route *route = &id_priv->id.route; + struct rdma_addr *addr = &route->addr; + struct cma_work *work; + int ret; + struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr; + struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr; + struct net_device *ndev = NULL; + u16 vid; + + if (src_addr->sin_family != dst_addr->sin_family) + return -EINVAL; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + work->id = id_priv; + INIT_WORK(&work->work, cma_work_handler); + + route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL); + if (!route->path_rec) { + ret = -ENOMEM; + goto err1; + } + + route->num_paths = 1; + + if (addr->dev_addr.bound_dev_if) + ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); + if (!ndev) { + ret = -ENODEV; + goto err2; + } + + vid = rdma_vlan_dev_vlan_id(ndev); + + iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid); + iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid); + + route->path_rec->hop_limit = 1; + route->path_rec->reversible = 1; + route->path_rec->pkey = cpu_to_be16(0xffff); + route->path_rec->mtu_selector = IB_SA_EQ; + route->path_rec->sl = id_priv->tos >> 5; + + route->path_rec->mtu = iboe_get_mtu(ndev->mtu); + route->path_rec->rate_selector = IB_SA_EQ; + route->path_rec->rate = iboe_get_rate(ndev); + dev_put(ndev); + route->path_rec->packet_life_time_selector = IB_SA_EQ; + route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; + if (!route->path_rec->mtu) { + ret = -EINVAL; + goto err2; + } + + work->old_state = CMA_ROUTE_QUERY; + work->new_state = CMA_ROUTE_RESOLVED; + work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; + work->event.status = 0; + + queue_work(cma_wq, &work->work); + + return 0; + +err2: + kfree(route->path_rec); + route->path_rec = NULL; +err1: + kfree(work); + return ret; +} + int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) { struct rdma_id_private *id_priv; @@ -1720,7 +1869,16 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) atomic_inc(&id_priv->refcount); switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - ret = cma_resolve_ib_route(id_priv, timeout_ms); + switch (rdma_port_get_link_layer(id->device, id->port_num)) { + case IB_LINK_LAYER_INFINIBAND: + ret = cma_resolve_ib_route(id_priv, timeout_ms); + break; + case IB_LINK_LAYER_ETHERNET: + ret = cma_resolve_iboe_route(id_priv); + break; + default: + ret = -ENOSYS; + } break; case RDMA_TRANSPORT_IWARP: ret = cma_resolve_iw_route(id_priv, timeout_ms); @@ -1773,7 +1931,7 @@ port_found: goto out; id_priv->id.route.addr.dev_addr.dev_type = - (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB) ? + (rdma_port_get_link_layer(cma_dev->device, p) == IB_LINK_LAYER_INFINIBAND) ? ARPHRD_INFINIBAND : ARPHRD_ETHER; rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); @@ -2758,6 +2916,102 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, return 0; } +static void iboe_mcast_work_handler(struct work_struct *work) +{ + struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work); + struct cma_multicast *mc = mw->mc; + struct ib_sa_multicast *m = mc->multicast.ib; + + mc->multicast.ib->context = mc; + cma_ib_mc_handler(0, m); + kref_put(&mc->mcref, release_mc); + kfree(mw); +} + +static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)addr; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; + + if (cma_any_addr(addr)) { + memset(mgid, 0, sizeof *mgid); + } else if (addr->sa_family == AF_INET6) { + memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); + } else { + mgid->raw[0] = 0xff; + mgid->raw[1] = 0x0e; + mgid->raw[2] = 0; + mgid->raw[3] = 0; + mgid->raw[4] = 0; + mgid->raw[5] = 0; + mgid->raw[6] = 0; + mgid->raw[7] = 0; + mgid->raw[8] = 0; + mgid->raw[9] = 0; + mgid->raw[10] = 0xff; + mgid->raw[11] = 0xff; + *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr; + } +} + +static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, + struct cma_multicast *mc) +{ + struct iboe_mcast_work *work; + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + int err; + struct sockaddr *addr = (struct sockaddr *)&mc->addr; + struct net_device *ndev = NULL; + + if (cma_zero_addr((struct sockaddr *)&mc->addr)) + return -EINVAL; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL); + if (!mc->multicast.ib) { + err = -ENOMEM; + goto out1; + } + + cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid); + + mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff); + if (id_priv->id.ps == RDMA_PS_UDP) + mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); + + if (dev_addr->bound_dev_if) + ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); + if (!ndev) { + err = -ENODEV; + goto out2; + } + mc->multicast.ib->rec.rate = iboe_get_rate(ndev); + mc->multicast.ib->rec.hop_limit = 1; + mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu); + dev_put(ndev); + if (!mc->multicast.ib->rec.mtu) { + err = -EINVAL; + goto out2; + } + iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid); + work->id = id_priv; + work->mc = mc; + INIT_WORK(&work->work, iboe_mcast_work_handler); + kref_get(&mc->mcref); + queue_work(cma_wq, &work->work); + + return 0; + +out2: + kfree(mc->multicast.ib); +out1: + kfree(work); + return err; +} + int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, void *context) { @@ -2784,7 +3038,17 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - ret = cma_join_ib_multicast(id_priv, mc); + switch (rdma_port_get_link_layer(id->device, id->port_num)) { + case IB_LINK_LAYER_INFINIBAND: + ret = cma_join_ib_multicast(id_priv, mc); + break; + case IB_LINK_LAYER_ETHERNET: + kref_init(&mc->mcref); + ret = cma_iboe_join_multicast(id_priv, mc); + break; + default: + ret = -EINVAL; + } break; default: ret = -ENOSYS; @@ -2817,8 +3081,19 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) ib_detach_mcast(id->qp, &mc->multicast.ib->rec.mgid, mc->multicast.ib->rec.mlid); - ib_sa_free_multicast(mc->multicast.ib); - kfree(mc); + if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) { + switch (rdma_port_get_link_layer(id->device, id->port_num)) { + case IB_LINK_LAYER_INFINIBAND: + ib_sa_free_multicast(mc->multicast.ib); + kfree(mc); + break; + case IB_LINK_LAYER_ETHERNET: + kref_put(&mc->mcref, release_mc); + break; + default: + break; + } + } return; } } diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index bfead5b..2a1e9ae 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -506,6 +506,8 @@ int iw_cm_accept(struct iw_cm_id *cm_id, qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); if (!qp) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); return -EINVAL; } cm_id->device->iwcm->add_ref(qp); @@ -565,6 +567,8 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); if (!qp) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); return -EINVAL; } cm_id->device->iwcm->add_ref(qp); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index ef1304f..822cfdc 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2598,6 +2598,9 @@ static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info) struct ib_mad_private *recv; struct ib_mad_list_head *mad_list; + if (!qp_info->qp) + return; + while (!list_empty(&qp_info->recv_queue.list)) { mad_list = list_entry(qp_info->recv_queue.list.next, @@ -2639,6 +2642,9 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) for (i = 0; i < IB_MAD_QPS_CORE; i++) { qp = port_priv->qp_info[i].qp; + if (!qp) + continue; + /* * PKey index for QP1 is irrelevant but * one is needed for the Reset to Init transition @@ -2680,6 +2686,9 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) } for (i = 0; i < IB_MAD_QPS_CORE; i++) { + if (!port_priv->qp_info[i].qp) + continue; + ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL); if (ret) { printk(KERN_ERR PFX "Couldn't post receive WRs\n"); @@ -2758,6 +2767,9 @@ error: static void destroy_mad_qp(struct ib_mad_qp_info *qp_info) { + if (!qp_info->qp) + return; + ib_destroy_qp(qp_info->qp); kfree(qp_info->snoop_table); } @@ -2773,6 +2785,7 @@ static int ib_mad_port_open(struct ib_device *device, struct ib_mad_port_private *port_priv; unsigned long flags; char name[sizeof "ib_mad123"]; + int has_smi; /* Create new device info */ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); @@ -2788,7 +2801,11 @@ static int ib_mad_port_open(struct ib_device *device, init_mad_qp(port_priv, &port_priv->qp_info[0]); init_mad_qp(port_priv, &port_priv->qp_info[1]); - cq_size = (mad_sendq_size + mad_recvq_size) * 2; + cq_size = mad_sendq_size + mad_recvq_size; + has_smi = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND; + if (has_smi) + cq_size *= 2; + port_priv->cq = ib_create_cq(port_priv->device, ib_mad_thread_completion_handler, NULL, port_priv, cq_size, 0); @@ -2812,9 +2829,11 @@ static int ib_mad_port_open(struct ib_device *device, goto error5; } - ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI); - if (ret) - goto error6; + if (has_smi) { + ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI); + if (ret) + goto error6; + } ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI); if (ret) goto error7; diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index a519801..68b4162 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -774,6 +774,10 @@ static void mcast_event_handler(struct ib_event_handler *handler, int index; dev = container_of(handler, struct mcast_device, event_handler); + if (rdma_port_get_link_layer(dev->device, event->element.port_num) != + IB_LINK_LAYER_INFINIBAND) + return; + index = event->element.port_num - dev->start_port; switch (event->event) { @@ -796,6 +800,7 @@ static void mcast_add_one(struct ib_device *device) struct mcast_device *dev; struct mcast_port *port; int i; + int count = 0; if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) return; @@ -813,6 +818,9 @@ static void mcast_add_one(struct ib_device *device) } for (i = 0; i <= dev->end_port - dev->start_port; i++) { + if (rdma_port_get_link_layer(device, dev->start_port + i) != + IB_LINK_LAYER_INFINIBAND) + continue; port = &dev->port[i]; port->dev = dev; port->port_num = dev->start_port + i; @@ -820,6 +828,12 @@ static void mcast_add_one(struct ib_device *device) port->table = RB_ROOT; init_completion(&port->comp); atomic_set(&port->refcount, 1); + ++count; + } + + if (!count) { + kfree(dev); + return; } dev->device = device; @@ -843,9 +857,12 @@ static void mcast_remove_one(struct ib_device *device) flush_workqueue(mcast_wq); for (i = 0; i <= dev->end_port - dev->start_port; i++) { - port = &dev->port[i]; - deref_port(port); - wait_for_completion(&port->comp); + if (rdma_port_get_link_layer(device, dev->start_port + i) == + IB_LINK_LAYER_INFINIBAND) { + port = &dev->port[i]; + deref_port(port); + wait_for_completion(&port->comp); + } } kfree(dev); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 7e1ffd8c..91a6603 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -416,6 +416,9 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event struct ib_sa_port *port = &sa_dev->port[event->element.port_num - sa_dev->start_port]; + if (rdma_port_get_link_layer(handler->device, port->port_num) != IB_LINK_LAYER_INFINIBAND) + return; + spin_lock_irqsave(&port->ah_lock, flags); if (port->sm_ah) kref_put(&port->sm_ah->ref, free_sm_ah); @@ -493,6 +496,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, { int ret; u16 gid_index; + int force_grh; memset(ah_attr, 0, sizeof *ah_attr); ah_attr->dlid = be16_to_cpu(rec->dlid); @@ -502,7 +506,9 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, ah_attr->port_num = port_num; ah_attr->static_rate = rec->rate; - if (rec->hop_limit > 1) { + force_grh = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_ETHERNET; + + if (rec->hop_limit > 1 || force_grh) { ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.dgid = rec->dgid; @@ -1007,7 +1013,7 @@ static void ib_sa_add_one(struct ib_device *device) e = device->phys_port_cnt; } - sa_dev = kmalloc(sizeof *sa_dev + + sa_dev = kzalloc(sizeof *sa_dev + (e - s + 1) * sizeof (struct ib_sa_port), GFP_KERNEL); if (!sa_dev) @@ -1017,9 +1023,12 @@ static void ib_sa_add_one(struct ib_device *device) sa_dev->end_port = e; for (i = 0; i <= e - s; ++i) { + spin_lock_init(&sa_dev->port[i].ah_lock); + if (rdma_port_get_link_layer(device, i + 1) != IB_LINK_LAYER_INFINIBAND) + continue; + sa_dev->port[i].sm_ah = NULL; sa_dev->port[i].port_num = i + s; - spin_lock_init(&sa_dev->port[i].ah_lock); sa_dev->port[i].agent = ib_register_mad_agent(device, i + s, IB_QPT_GSI, @@ -1045,13 +1054,15 @@ static void ib_sa_add_one(struct ib_device *device) goto err; for (i = 0; i <= e - s; ++i) - update_sm_ah(&sa_dev->port[i].update_task); + if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) + update_sm_ah(&sa_dev->port[i].update_task); return; err: while (--i >= 0) - ib_unregister_mad_agent(sa_dev->port[i].agent); + if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) + ib_unregister_mad_agent(sa_dev->port[i].agent); kfree(sa_dev); @@ -1071,9 +1082,12 @@ static void ib_sa_remove_one(struct ib_device *device) flush_scheduled_work(); for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) { - ib_unregister_mad_agent(sa_dev->port[i].agent); - if (sa_dev->port[i].sm_ah) - kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); + if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) { + ib_unregister_mad_agent(sa_dev->port[i].agent); + if (sa_dev->port[i].sm_ah) + kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); + } + } kfree(sa_dev); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 3627300..9ab5df7 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -222,6 +222,19 @@ static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused, } } +static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused, + char *buf) +{ + switch (rdma_port_get_link_layer(p->ibdev, p->port_num)) { + case IB_LINK_LAYER_INFINIBAND: + return sprintf(buf, "%s\n", "InfiniBand"); + case IB_LINK_LAYER_ETHERNET: + return sprintf(buf, "%s\n", "Ethernet"); + default: + return sprintf(buf, "%s\n", "Unknown"); + } +} + static PORT_ATTR_RO(state); static PORT_ATTR_RO(lid); static PORT_ATTR_RO(lid_mask_count); @@ -230,6 +243,7 @@ static PORT_ATTR_RO(sm_sl); static PORT_ATTR_RO(cap_mask); static PORT_ATTR_RO(rate); static PORT_ATTR_RO(phys_state); +static PORT_ATTR_RO(link_layer); static struct attribute *port_default_attrs[] = { &port_attr_state.attr, @@ -240,6 +254,7 @@ static struct attribute *port_default_attrs[] = { &port_attr_cap_mask.attr, &port_attr_rate.attr, &port_attr_phys_state.attr, + &port_attr_link_layer.attr, NULL }; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index ac7edc2..ca12acf 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -40,6 +40,7 @@ #include <linux/in6.h> #include <linux/miscdevice.h> #include <linux/slab.h> +#include <linux/sysctl.h> #include <rdma/rdma_user_cm.h> #include <rdma/ib_marshall.h> @@ -50,8 +51,24 @@ MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); MODULE_LICENSE("Dual BSD/GPL"); -enum { - UCMA_MAX_BACKLOG = 128 +static unsigned int max_backlog = 1024; + +static struct ctl_table_header *ucma_ctl_table_hdr; +static ctl_table ucma_ctl_table[] = { + { + .procname = "max_backlog", + .data = &max_backlog, + .maxlen = sizeof max_backlog, + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; + +static struct ctl_path ucma_ctl_path[] = { + { .procname = "net" }, + { .procname = "rdma_ucm" }, + { } }; struct ucma_file { @@ -583,6 +600,42 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, } } +static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, + struct rdma_route *route) +{ + struct rdma_dev_addr *dev_addr; + struct net_device *dev; + u16 vid = 0; + + resp->num_paths = route->num_paths; + switch (route->num_paths) { + case 0: + dev_addr = &route->addr.dev_addr; + dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); + if (dev) { + vid = rdma_vlan_dev_vlan_id(dev); + dev_put(dev); + } + + iboe_mac_vlan_to_ll((union ib_gid *) &resp->ib_route[0].dgid, + dev_addr->dst_dev_addr, vid); + iboe_addr_get_sgid(dev_addr, + (union ib_gid *) &resp->ib_route[0].sgid); + resp->ib_route[0].pkey = cpu_to_be16(0xffff); + break; + case 2: + ib_copy_path_rec_to_user(&resp->ib_route[1], + &route->path_rec[1]); + /* fall through */ + case 1: + ib_copy_path_rec_to_user(&resp->ib_route[0], + &route->path_rec[0]); + break; + default: + break; + } +} + static ssize_t ucma_query_route(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) @@ -617,12 +670,17 @@ static ssize_t ucma_query_route(struct ucma_file *file, resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; resp.port_num = ctx->cm_id->port_num; - switch (rdma_node_get_transport(ctx->cm_id->device->node_type)) { - case RDMA_TRANSPORT_IB: - ucma_copy_ib_route(&resp, &ctx->cm_id->route); - break; - default: - break; + if (rdma_node_get_transport(ctx->cm_id->device->node_type) == RDMA_TRANSPORT_IB) { + switch (rdma_port_get_link_layer(ctx->cm_id->device, ctx->cm_id->port_num)) { + case IB_LINK_LAYER_INFINIBAND: + ucma_copy_ib_route(&resp, &ctx->cm_id->route); + break; + case IB_LINK_LAYER_ETHERNET: + ucma_copy_iboe_route(&resp, &ctx->cm_id->route); + break; + default: + break; + } } out: @@ -686,8 +744,8 @@ static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); - ctx->backlog = cmd.backlog > 0 && cmd.backlog < UCMA_MAX_BACKLOG ? - cmd.backlog : UCMA_MAX_BACKLOG; + ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ? + cmd.backlog : max_backlog; ret = rdma_listen(ctx->cm_id, ctx->backlog); ucma_put_ctx(ctx); return ret; @@ -1279,16 +1337,26 @@ static int __init ucma_init(void) ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); if (ret) { printk(KERN_ERR "rdma_ucm: couldn't create abi_version attr\n"); - goto err; + goto err1; + } + + ucma_ctl_table_hdr = register_sysctl_paths(ucma_ctl_path, ucma_ctl_table); + if (!ucma_ctl_table_hdr) { + printk(KERN_ERR "rdma_ucm: couldn't register sysctl paths\n"); + ret = -ENOMEM; + goto err2; } return 0; -err: +err2: + device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); +err1: misc_deregister(&ucma_misc); return ret; } static void __exit ucma_cleanup(void) { + unregister_sysctl_table(ucma_ctl_table_hdr); device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); misc_deregister(&ucma_misc); idr_destroy(&ctx_idr); diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c index 650b501..bb7e192 100644 --- a/drivers/infiniband/core/ud_header.c +++ b/drivers/infiniband/core/ud_header.c @@ -33,6 +33,7 @@ #include <linux/errno.h> #include <linux/string.h> +#include <linux/if_ether.h> #include <rdma/ib_pack.h> @@ -80,6 +81,40 @@ static const struct ib_field lrh_table[] = { .size_bits = 16 } }; +static const struct ib_field eth_table[] = { + { STRUCT_FIELD(eth, dmac_h), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 32 }, + { STRUCT_FIELD(eth, dmac_l), + .offset_words = 1, + .offset_bits = 0, + .size_bits = 16 }, + { STRUCT_FIELD(eth, smac_h), + .offset_words = 1, + .offset_bits = 16, + .size_bits = 16 }, + { STRUCT_FIELD(eth, smac_l), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 32 }, + { STRUCT_FIELD(eth, type), + .offset_words = 3, + .offset_bits = 0, + .size_bits = 16 } +}; + +static const struct ib_field vlan_table[] = { + { STRUCT_FIELD(vlan, tag), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 16 }, + { STRUCT_FIELD(vlan, type), + .offset_words = 0, + .offset_bits = 16, + .size_bits = 16 } +}; + static const struct ib_field grh_table[] = { { STRUCT_FIELD(grh, ip_version), .offset_words = 0, @@ -180,38 +215,43 @@ static const struct ib_field deth_table[] = { /** * ib_ud_header_init - Initialize UD header structure * @payload_bytes:Length of packet payload + * @lrh_present: specify if LRH is present + * @eth_present: specify if Eth header is present + * @vlan_present: packet is tagged vlan * @grh_present:GRH flag (if non-zero, GRH will be included) - * @immediate_present: specify if immediate data should be used + * @immediate_present: specify if immediate data is present * @header:Structure to initialize - * - * ib_ud_header_init() initializes the lrh.link_version, lrh.link_next_header, - * lrh.packet_length, grh.ip_version, grh.payload_length, - * grh.next_header, bth.opcode, bth.pad_count and - * bth.transport_header_version fields of a &struct ib_ud_header given - * the payload length and whether a GRH will be included. */ void ib_ud_header_init(int payload_bytes, + int lrh_present, + int eth_present, + int vlan_present, int grh_present, int immediate_present, struct ib_ud_header *header) { - u16 packet_length; - memset(header, 0, sizeof *header); - header->lrh.link_version = 0; - header->lrh.link_next_header = - grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL; - packet_length = (IB_LRH_BYTES + - IB_BTH_BYTES + - IB_DETH_BYTES + - payload_bytes + - 4 + /* ICRC */ - 3) / 4; /* round up */ - - header->grh_present = grh_present; + if (lrh_present) { + u16 packet_length; + + header->lrh.link_version = 0; + header->lrh.link_next_header = + grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL; + packet_length = (IB_LRH_BYTES + + IB_BTH_BYTES + + IB_DETH_BYTES + + (grh_present ? IB_GRH_BYTES : 0) + + payload_bytes + + 4 + /* ICRC */ + 3) / 4; /* round up */ + header->lrh.packet_length = cpu_to_be16(packet_length); + } + + if (vlan_present) + header->eth.type = cpu_to_be16(ETH_P_8021Q); + if (grh_present) { - packet_length += IB_GRH_BYTES / 4; header->grh.ip_version = 6; header->grh.payload_length = cpu_to_be16((IB_BTH_BYTES + @@ -222,19 +262,52 @@ void ib_ud_header_init(int payload_bytes, header->grh.next_header = 0x1b; } - header->lrh.packet_length = cpu_to_be16(packet_length); - - header->immediate_present = immediate_present; if (immediate_present) header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; else header->bth.opcode = IB_OPCODE_UD_SEND_ONLY; header->bth.pad_count = (4 - payload_bytes) & 3; header->bth.transport_header_version = 0; + + header->lrh_present = lrh_present; + header->eth_present = eth_present; + header->vlan_present = vlan_present; + header->grh_present = grh_present; + header->immediate_present = immediate_present; } EXPORT_SYMBOL(ib_ud_header_init); /** + * ib_lrh_header_pack - Pack LRH header struct into wire format + * @lrh:unpacked LRH header struct + * @buf:Buffer to pack into + * + * ib_lrh_header_pack() packs the LRH header structure @lrh into + * wire format in the buffer @buf. + */ +int ib_lrh_header_pack(struct ib_unpacked_lrh *lrh, void *buf) +{ + ib_pack(lrh_table, ARRAY_SIZE(lrh_table), lrh, buf); + return 0; +} +EXPORT_SYMBOL(ib_lrh_header_pack); + +/** + * ib_lrh_header_unpack - Unpack LRH structure from wire format + * @lrh:unpacked LRH header struct + * @buf:Buffer to pack into + * + * ib_lrh_header_unpack() unpacks the LRH header structure from + * wire format (in buf) into @lrh. + */ +int ib_lrh_header_unpack(void *buf, struct ib_unpacked_lrh *lrh) +{ + ib_unpack(lrh_table, ARRAY_SIZE(lrh_table), buf, lrh); + return 0; +} +EXPORT_SYMBOL(ib_lrh_header_unpack); + +/** * ib_ud_header_pack - Pack UD header struct into wire format * @header:UD header struct * @buf:Buffer to pack into @@ -247,10 +320,21 @@ int ib_ud_header_pack(struct ib_ud_header *header, { int len = 0; - ib_pack(lrh_table, ARRAY_SIZE(lrh_table), - &header->lrh, buf); - len += IB_LRH_BYTES; - + if (header->lrh_present) { + ib_pack(lrh_table, ARRAY_SIZE(lrh_table), + &header->lrh, buf + len); + len += IB_LRH_BYTES; + } + if (header->eth_present) { + ib_pack(eth_table, ARRAY_SIZE(eth_table), + &header->eth, buf + len); + len += IB_ETH_BYTES; + } + if (header->vlan_present) { + ib_pack(vlan_table, ARRAY_SIZE(vlan_table), + &header->vlan, buf + len); + len += IB_VLAN_BYTES; + } if (header->grh_present) { ib_pack(grh_table, ARRAY_SIZE(grh_table), &header->grh, buf + len); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 5fa8569..cd1996d 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1022,7 +1022,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, port->ib_dev = device; port->port_num = port_num; - init_MUTEX(&port->sm_sem); + sema_init(&port->sm_sem, 1); mutex_init(&port->file_mutex); INIT_LIST_HEAD(&port->file_list); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 6fcfbeb..b342248 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -460,6 +460,8 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, resp.active_width = attr.active_width; resp.active_speed = attr.active_speed; resp.phys_state = attr.phys_state; + resp.link_layer = rdma_port_get_link_layer(file->device->ib_dev, + cmd.port_num); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index e0fa222..af7a8b0 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -94,6 +94,22 @@ rdma_node_get_transport(enum rdma_node_type node_type) } EXPORT_SYMBOL(rdma_node_get_transport); +enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num) +{ + if (device->get_link_layer) + return device->get_link_layer(device, port_num); + + switch (rdma_node_get_transport(device->node_type)) { + case RDMA_TRANSPORT_IB: + return IB_LINK_LAYER_INFINIBAND; + case RDMA_TRANSPORT_IWARP: + return IB_LINK_LAYER_ETHERNET; + default: + return IB_LINK_LAYER_UNSPECIFIED; + } +} +EXPORT_SYMBOL(rdma_port_get_link_layer); + /* Protection domains */ struct ib_pd *ib_alloc_pd(struct ib_device *device) |