summaryrefslogtreecommitdiff
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/agent.c29
-rw-r--r--drivers/infiniband/core/cma.c313
-rw-r--r--drivers/infiniband/core/iwcm.c4
-rw-r--r--drivers/infiniband/core/mad.c27
-rw-r--r--drivers/infiniband/core/multicast.c23
-rw-r--r--drivers/infiniband/core/sa_query.c30
-rw-r--r--drivers/infiniband/core/sysfs.c15
-rw-r--r--drivers/infiniband/core/ucma.c92
-rw-r--r--drivers/infiniband/core/ud_header.c140
-rw-r--r--drivers/infiniband/core/user_mad.c2
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c2
-rw-r--r--drivers/infiniband/core/verbs.c16
12 files changed, 606 insertions, 87 deletions
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index ae7c288..91916a8 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -59,8 +59,8 @@ __ib_get_agent_port(struct ib_device *device, int port_num)
struct ib_agent_port_private *entry;
list_for_each_entry(entry, &ib_agent_port_list, port_list) {
- if (entry->agent[0]->device == device &&
- entry->agent[0]->port_num == port_num)
+ if (entry->agent[1]->device == device &&
+ entry->agent[1]->port_num == port_num)
return entry;
}
return NULL;
@@ -155,14 +155,16 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
goto error1;
}
- /* Obtain send only MAD agent for SMI QP */
- port_priv->agent[0] = ib_register_mad_agent(device, port_num,
- IB_QPT_SMI, NULL, 0,
- &agent_send_handler,
- NULL, NULL);
- if (IS_ERR(port_priv->agent[0])) {
- ret = PTR_ERR(port_priv->agent[0]);
- goto error2;
+ if (rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND) {
+ /* Obtain send only MAD agent for SMI QP */
+ port_priv->agent[0] = ib_register_mad_agent(device, port_num,
+ IB_QPT_SMI, NULL, 0,
+ &agent_send_handler,
+ NULL, NULL);
+ if (IS_ERR(port_priv->agent[0])) {
+ ret = PTR_ERR(port_priv->agent[0]);
+ goto error2;
+ }
}
/* Obtain send only MAD agent for GSI QP */
@@ -182,7 +184,8 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
return 0;
error3:
- ib_unregister_mad_agent(port_priv->agent[0]);
+ if (port_priv->agent[0])
+ ib_unregister_mad_agent(port_priv->agent[0]);
error2:
kfree(port_priv);
error1:
@@ -205,7 +208,9 @@ int ib_agent_port_close(struct ib_device *device, int port_num)
spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
ib_unregister_mad_agent(port_priv->agent[1]);
- ib_unregister_mad_agent(port_priv->agent[0]);
+ if (port_priv->agent[0])
+ ib_unregister_mad_agent(port_priv->agent[0]);
+
kfree(port_priv);
return 0;
}
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index b930b81..6884da2 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -59,6 +59,7 @@ MODULE_LICENSE("Dual BSD/GPL");
#define CMA_CM_RESPONSE_TIMEOUT 20
#define CMA_MAX_CM_RETRIES 15
#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
+#define CMA_IBOE_PACKET_LIFETIME 18
static void cma_add_one(struct ib_device *device);
static void cma_remove_one(struct ib_device *device);
@@ -157,6 +158,7 @@ struct cma_multicast {
struct list_head list;
void *context;
struct sockaddr_storage addr;
+ struct kref mcref;
};
struct cma_work {
@@ -173,6 +175,12 @@ struct cma_ndev_work {
struct rdma_cm_event event;
};
+struct iboe_mcast_work {
+ struct work_struct work;
+ struct rdma_id_private *id;
+ struct cma_multicast *mc;
+};
+
union cma_ip_addr {
struct in6_addr ip6;
struct {
@@ -281,6 +289,8 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv,
atomic_inc(&cma_dev->refcount);
id_priv->cma_dev = cma_dev;
id_priv->id.device = cma_dev->device;
+ id_priv->id.route.addr.dev_addr.transport =
+ rdma_node_get_transport(cma_dev->device->node_type);
list_add_tail(&id_priv->list, &cma_dev->id_list);
}
@@ -290,6 +300,14 @@ static inline void cma_deref_dev(struct cma_device *cma_dev)
complete(&cma_dev->comp);
}
+static inline void release_mc(struct kref *kref)
+{
+ struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
+
+ kfree(mc->multicast.ib);
+ kfree(mc);
+}
+
static void cma_detach_from_dev(struct rdma_id_private *id_priv)
{
list_del(&id_priv->list);
@@ -323,22 +341,63 @@ static int cma_set_qkey(struct rdma_id_private *id_priv)
return ret;
}
+static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num)
+{
+ int i;
+ int err;
+ struct ib_port_attr props;
+ union ib_gid tmp;
+
+ err = ib_query_port(device, port_num, &props);
+ if (err)
+ return 1;
+
+ for (i = 0; i < props.gid_tbl_len; ++i) {
+ err = ib_query_gid(device, port_num, i, &tmp);
+ if (err)
+ return 1;
+ if (!memcmp(&tmp, gid, sizeof tmp))
+ return 0;
+ }
+
+ return -EAGAIN;
+}
+
static int cma_acquire_dev(struct rdma_id_private *id_priv)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
struct cma_device *cma_dev;
- union ib_gid gid;
+ union ib_gid gid, iboe_gid;
int ret = -ENODEV;
+ u8 port;
+ enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
+ IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
- rdma_addr_get_sgid(dev_addr, &gid);
+ iboe_addr_get_sgid(dev_addr, &iboe_gid);
+ memcpy(&gid, dev_addr->src_dev_addr +
+ rdma_addr_gid_offset(dev_addr), sizeof gid);
list_for_each_entry(cma_dev, &dev_list, list) {
- ret = ib_find_cached_gid(cma_dev->device, &gid,
- &id_priv->id.port_num, NULL);
- if (!ret) {
- cma_attach_to_dev(id_priv, cma_dev);
- break;
+ for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
+ if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) {
+ if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
+ rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
+ ret = find_gid_port(cma_dev->device, &iboe_gid, port);
+ else
+ ret = find_gid_port(cma_dev->device, &gid, port);
+
+ if (!ret) {
+ id_priv->id.port_num = port;
+ goto out;
+ } else if (ret == 1)
+ break;
+ }
}
}
+
+out:
+ if (!ret)
+ cma_attach_to_dev(id_priv, cma_dev);
+
return ret;
}
@@ -556,10 +615,16 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
int ret;
+ u16 pkey;
+
+ if (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) ==
+ IB_LINK_LAYER_INFINIBAND)
+ pkey = ib_addr_get_pkey(dev_addr);
+ else
+ pkey = 0xffff;
ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
- ib_addr_get_pkey(dev_addr),
- &qp_attr->pkey_index);
+ pkey, &qp_attr->pkey_index);
if (ret)
return ret;
@@ -737,8 +802,8 @@ static inline int cma_user_data_offset(enum rdma_port_space ps)
static void cma_cancel_route(struct rdma_id_private *id_priv)
{
- switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
- case RDMA_TRANSPORT_IB:
+ switch (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)) {
+ case IB_LINK_LAYER_INFINIBAND:
if (id_priv->query)
ib_sa_cancel_query(id_priv->query_id, id_priv->query);
break;
@@ -816,8 +881,17 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
mc = container_of(id_priv->mc_list.next,
struct cma_multicast, list);
list_del(&mc->list);
- ib_sa_free_multicast(mc->multicast.ib);
- kfree(mc);
+ switch (rdma_port_get_link_layer(id_priv->cma_dev->device, id_priv->id.port_num)) {
+ case IB_LINK_LAYER_INFINIBAND:
+ ib_sa_free_multicast(mc->multicast.ib);
+ kfree(mc);
+ break;
+ case IB_LINK_LAYER_ETHERNET:
+ kref_put(&mc->mcref, release_mc);
+ break;
+ default:
+ break;
+ }
}
}
@@ -833,7 +907,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
mutex_lock(&lock);
if (id_priv->cma_dev) {
mutex_unlock(&lock);
- switch (rdma_node_get_transport(id->device->node_type)) {
+ switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
case RDMA_TRANSPORT_IB:
if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
ib_destroy_cm_id(id_priv->cm_id.ib);
@@ -1708,6 +1782,81 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
return 0;
}
+static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
+{
+ struct rdma_route *route = &id_priv->id.route;
+ struct rdma_addr *addr = &route->addr;
+ struct cma_work *work;
+ int ret;
+ struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr;
+ struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr;
+ struct net_device *ndev = NULL;
+ u16 vid;
+
+ if (src_addr->sin_family != dst_addr->sin_family)
+ return -EINVAL;
+
+ work = kzalloc(sizeof *work, GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ work->id = id_priv;
+ INIT_WORK(&work->work, cma_work_handler);
+
+ route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL);
+ if (!route->path_rec) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ route->num_paths = 1;
+
+ if (addr->dev_addr.bound_dev_if)
+ ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
+ if (!ndev) {
+ ret = -ENODEV;
+ goto err2;
+ }
+
+ vid = rdma_vlan_dev_vlan_id(ndev);
+
+ iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid);
+ iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid);
+
+ route->path_rec->hop_limit = 1;
+ route->path_rec->reversible = 1;
+ route->path_rec->pkey = cpu_to_be16(0xffff);
+ route->path_rec->mtu_selector = IB_SA_EQ;
+ route->path_rec->sl = id_priv->tos >> 5;
+
+ route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
+ route->path_rec->rate_selector = IB_SA_EQ;
+ route->path_rec->rate = iboe_get_rate(ndev);
+ dev_put(ndev);
+ route->path_rec->packet_life_time_selector = IB_SA_EQ;
+ route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
+ if (!route->path_rec->mtu) {
+ ret = -EINVAL;
+ goto err2;
+ }
+
+ work->old_state = CMA_ROUTE_QUERY;
+ work->new_state = CMA_ROUTE_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+ work->event.status = 0;
+
+ queue_work(cma_wq, &work->work);
+
+ return 0;
+
+err2:
+ kfree(route->path_rec);
+ route->path_rec = NULL;
+err1:
+ kfree(work);
+ return ret;
+}
+
int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
{
struct rdma_id_private *id_priv;
@@ -1720,7 +1869,16 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
atomic_inc(&id_priv->refcount);
switch (rdma_node_get_transport(id->device->node_type)) {
case RDMA_TRANSPORT_IB:
- ret = cma_resolve_ib_route(id_priv, timeout_ms);
+ switch (rdma_port_get_link_layer(id->device, id->port_num)) {
+ case IB_LINK_LAYER_INFINIBAND:
+ ret = cma_resolve_ib_route(id_priv, timeout_ms);
+ break;
+ case IB_LINK_LAYER_ETHERNET:
+ ret = cma_resolve_iboe_route(id_priv);
+ break;
+ default:
+ ret = -ENOSYS;
+ }
break;
case RDMA_TRANSPORT_IWARP:
ret = cma_resolve_iw_route(id_priv, timeout_ms);
@@ -1773,7 +1931,7 @@ port_found:
goto out;
id_priv->id.route.addr.dev_addr.dev_type =
- (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB) ?
+ (rdma_port_get_link_layer(cma_dev->device, p) == IB_LINK_LAYER_INFINIBAND) ?
ARPHRD_INFINIBAND : ARPHRD_ETHER;
rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
@@ -2758,6 +2916,102 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
return 0;
}
+static void iboe_mcast_work_handler(struct work_struct *work)
+{
+ struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
+ struct cma_multicast *mc = mw->mc;
+ struct ib_sa_multicast *m = mc->multicast.ib;
+
+ mc->multicast.ib->context = mc;
+ cma_ib_mc_handler(0, m);
+ kref_put(&mc->mcref, release_mc);
+ kfree(mw);
+}
+
+static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
+{
+ struct sockaddr_in *sin = (struct sockaddr_in *)addr;
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
+
+ if (cma_any_addr(addr)) {
+ memset(mgid, 0, sizeof *mgid);
+ } else if (addr->sa_family == AF_INET6) {
+ memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
+ } else {
+ mgid->raw[0] = 0xff;
+ mgid->raw[1] = 0x0e;
+ mgid->raw[2] = 0;
+ mgid->raw[3] = 0;
+ mgid->raw[4] = 0;
+ mgid->raw[5] = 0;
+ mgid->raw[6] = 0;
+ mgid->raw[7] = 0;
+ mgid->raw[8] = 0;
+ mgid->raw[9] = 0;
+ mgid->raw[10] = 0xff;
+ mgid->raw[11] = 0xff;
+ *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr;
+ }
+}
+
+static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
+ struct cma_multicast *mc)
+{
+ struct iboe_mcast_work *work;
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ int err;
+ struct sockaddr *addr = (struct sockaddr *)&mc->addr;
+ struct net_device *ndev = NULL;
+
+ if (cma_zero_addr((struct sockaddr *)&mc->addr))
+ return -EINVAL;
+
+ work = kzalloc(sizeof *work, GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
+ if (!mc->multicast.ib) {
+ err = -ENOMEM;
+ goto out1;
+ }
+
+ cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid);
+
+ mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
+ if (id_priv->id.ps == RDMA_PS_UDP)
+ mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
+
+ if (dev_addr->bound_dev_if)
+ ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ if (!ndev) {
+ err = -ENODEV;
+ goto out2;
+ }
+ mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
+ mc->multicast.ib->rec.hop_limit = 1;
+ mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
+ dev_put(ndev);
+ if (!mc->multicast.ib->rec.mtu) {
+ err = -EINVAL;
+ goto out2;
+ }
+ iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid);
+ work->id = id_priv;
+ work->mc = mc;
+ INIT_WORK(&work->work, iboe_mcast_work_handler);
+ kref_get(&mc->mcref);
+ queue_work(cma_wq, &work->work);
+
+ return 0;
+
+out2:
+ kfree(mc->multicast.ib);
+out1:
+ kfree(work);
+ return err;
+}
+
int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
void *context)
{
@@ -2784,7 +3038,17 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
switch (rdma_node_get_transport(id->device->node_type)) {
case RDMA_TRANSPORT_IB:
- ret = cma_join_ib_multicast(id_priv, mc);
+ switch (rdma_port_get_link_layer(id->device, id->port_num)) {
+ case IB_LINK_LAYER_INFINIBAND:
+ ret = cma_join_ib_multicast(id_priv, mc);
+ break;
+ case IB_LINK_LAYER_ETHERNET:
+ kref_init(&mc->mcref);
+ ret = cma_iboe_join_multicast(id_priv, mc);
+ break;
+ default:
+ ret = -EINVAL;
+ }
break;
default:
ret = -ENOSYS;
@@ -2817,8 +3081,19 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
ib_detach_mcast(id->qp,
&mc->multicast.ib->rec.mgid,
mc->multicast.ib->rec.mlid);
- ib_sa_free_multicast(mc->multicast.ib);
- kfree(mc);
+ if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) {
+ switch (rdma_port_get_link_layer(id->device, id->port_num)) {
+ case IB_LINK_LAYER_INFINIBAND:
+ ib_sa_free_multicast(mc->multicast.ib);
+ kfree(mc);
+ break;
+ case IB_LINK_LAYER_ETHERNET:
+ kref_put(&mc->mcref, release_mc);
+ break;
+ default:
+ break;
+ }
+ }
return;
}
}
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index bfead5b..2a1e9ae 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -506,6 +506,8 @@ int iw_cm_accept(struct iw_cm_id *cm_id,
qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
if (!qp) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+ wake_up_all(&cm_id_priv->connect_wait);
return -EINVAL;
}
cm_id->device->iwcm->add_ref(qp);
@@ -565,6 +567,8 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
if (!qp) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+ wake_up_all(&cm_id_priv->connect_wait);
return -EINVAL;
}
cm_id->device->iwcm->add_ref(qp);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index ef1304f..822cfdc 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -2598,6 +2598,9 @@ static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info)
struct ib_mad_private *recv;
struct ib_mad_list_head *mad_list;
+ if (!qp_info->qp)
+ return;
+
while (!list_empty(&qp_info->recv_queue.list)) {
mad_list = list_entry(qp_info->recv_queue.list.next,
@@ -2639,6 +2642,9 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
for (i = 0; i < IB_MAD_QPS_CORE; i++) {
qp = port_priv->qp_info[i].qp;
+ if (!qp)
+ continue;
+
/*
* PKey index for QP1 is irrelevant but
* one is needed for the Reset to Init transition
@@ -2680,6 +2686,9 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
}
for (i = 0; i < IB_MAD_QPS_CORE; i++) {
+ if (!port_priv->qp_info[i].qp)
+ continue;
+
ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL);
if (ret) {
printk(KERN_ERR PFX "Couldn't post receive WRs\n");
@@ -2758,6 +2767,9 @@ error:
static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
{
+ if (!qp_info->qp)
+ return;
+
ib_destroy_qp(qp_info->qp);
kfree(qp_info->snoop_table);
}
@@ -2773,6 +2785,7 @@ static int ib_mad_port_open(struct ib_device *device,
struct ib_mad_port_private *port_priv;
unsigned long flags;
char name[sizeof "ib_mad123"];
+ int has_smi;
/* Create new device info */
port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
@@ -2788,7 +2801,11 @@ static int ib_mad_port_open(struct ib_device *device,
init_mad_qp(port_priv, &port_priv->qp_info[0]);
init_mad_qp(port_priv, &port_priv->qp_info[1]);
- cq_size = (mad_sendq_size + mad_recvq_size) * 2;
+ cq_size = mad_sendq_size + mad_recvq_size;
+ has_smi = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND;
+ if (has_smi)
+ cq_size *= 2;
+
port_priv->cq = ib_create_cq(port_priv->device,
ib_mad_thread_completion_handler,
NULL, port_priv, cq_size, 0);
@@ -2812,9 +2829,11 @@ static int ib_mad_port_open(struct ib_device *device,
goto error5;
}
- ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI);
- if (ret)
- goto error6;
+ if (has_smi) {
+ ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI);
+ if (ret)
+ goto error6;
+ }
ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI);
if (ret)
goto error7;
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index a519801..68b4162 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -774,6 +774,10 @@ static void mcast_event_handler(struct ib_event_handler *handler,
int index;
dev = container_of(handler, struct mcast_device, event_handler);
+ if (rdma_port_get_link_layer(dev->device, event->element.port_num) !=
+ IB_LINK_LAYER_INFINIBAND)
+ return;
+
index = event->element.port_num - dev->start_port;
switch (event->event) {
@@ -796,6 +800,7 @@ static void mcast_add_one(struct ib_device *device)
struct mcast_device *dev;
struct mcast_port *port;
int i;
+ int count = 0;
if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
return;
@@ -813,6 +818,9 @@ static void mcast_add_one(struct ib_device *device)
}
for (i = 0; i <= dev->end_port - dev->start_port; i++) {
+ if (rdma_port_get_link_layer(device, dev->start_port + i) !=
+ IB_LINK_LAYER_INFINIBAND)
+ continue;
port = &dev->port[i];
port->dev = dev;
port->port_num = dev->start_port + i;
@@ -820,6 +828,12 @@ static void mcast_add_one(struct ib_device *device)
port->table = RB_ROOT;
init_completion(&port->comp);
atomic_set(&port->refcount, 1);
+ ++count;
+ }
+
+ if (!count) {
+ kfree(dev);
+ return;
}
dev->device = device;
@@ -843,9 +857,12 @@ static void mcast_remove_one(struct ib_device *device)
flush_workqueue(mcast_wq);
for (i = 0; i <= dev->end_port - dev->start_port; i++) {
- port = &dev->port[i];
- deref_port(port);
- wait_for_completion(&port->comp);
+ if (rdma_port_get_link_layer(device, dev->start_port + i) ==
+ IB_LINK_LAYER_INFINIBAND) {
+ port = &dev->port[i];
+ deref_port(port);
+ wait_for_completion(&port->comp);
+ }
}
kfree(dev);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 7e1ffd8c..91a6603 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -416,6 +416,9 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
struct ib_sa_port *port =
&sa_dev->port[event->element.port_num - sa_dev->start_port];
+ if (rdma_port_get_link_layer(handler->device, port->port_num) != IB_LINK_LAYER_INFINIBAND)
+ return;
+
spin_lock_irqsave(&port->ah_lock, flags);
if (port->sm_ah)
kref_put(&port->sm_ah->ref, free_sm_ah);
@@ -493,6 +496,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
{
int ret;
u16 gid_index;
+ int force_grh;
memset(ah_attr, 0, sizeof *ah_attr);
ah_attr->dlid = be16_to_cpu(rec->dlid);
@@ -502,7 +506,9 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
ah_attr->port_num = port_num;
ah_attr->static_rate = rec->rate;
- if (rec->hop_limit > 1) {
+ force_grh = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_ETHERNET;
+
+ if (rec->hop_limit > 1 || force_grh) {
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->grh.dgid = rec->dgid;
@@ -1007,7 +1013,7 @@ static void ib_sa_add_one(struct ib_device *device)
e = device->phys_port_cnt;
}
- sa_dev = kmalloc(sizeof *sa_dev +
+ sa_dev = kzalloc(sizeof *sa_dev +
(e - s + 1) * sizeof (struct ib_sa_port),
GFP_KERNEL);
if (!sa_dev)
@@ -1017,9 +1023,12 @@ static void ib_sa_add_one(struct ib_device *device)
sa_dev->end_port = e;
for (i = 0; i <= e - s; ++i) {
+ spin_lock_init(&sa_dev->port[i].ah_lock);
+ if (rdma_port_get_link_layer(device, i + 1) != IB_LINK_LAYER_INFINIBAND)
+ continue;
+
sa_dev->port[i].sm_ah = NULL;
sa_dev->port[i].port_num = i + s;
- spin_lock_init(&sa_dev->port[i].ah_lock);
sa_dev->port[i].agent =
ib_register_mad_agent(device, i + s, IB_QPT_GSI,
@@ -1045,13 +1054,15 @@ static void ib_sa_add_one(struct ib_device *device)
goto err;
for (i = 0; i <= e - s; ++i)
- update_sm_ah(&sa_dev->port[i].update_task);
+ if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND)
+ update_sm_ah(&sa_dev->port[i].update_task);
return;
err:
while (--i >= 0)
- ib_unregister_mad_agent(sa_dev->port[i].agent);
+ if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND)
+ ib_unregister_mad_agent(sa_dev->port[i].agent);
kfree(sa_dev);
@@ -1071,9 +1082,12 @@ static void ib_sa_remove_one(struct ib_device *device)
flush_scheduled_work();
for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
- ib_unregister_mad_agent(sa_dev->port[i].agent);
- if (sa_dev->port[i].sm_ah)
- kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
+ if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) {
+ ib_unregister_mad_agent(sa_dev->port[i].agent);
+ if (sa_dev->port[i].sm_ah)
+ kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
+ }
+
}
kfree(sa_dev);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 3627300..9ab5df7 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -222,6 +222,19 @@ static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused,
}
}
+static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused,
+ char *buf)
+{
+ switch (rdma_port_get_link_layer(p->ibdev, p->port_num)) {
+ case IB_LINK_LAYER_INFINIBAND:
+ return sprintf(buf, "%s\n", "InfiniBand");
+ case IB_LINK_LAYER_ETHERNET:
+ return sprintf(buf, "%s\n", "Ethernet");
+ default:
+ return sprintf(buf, "%s\n", "Unknown");
+ }
+}
+
static PORT_ATTR_RO(state);
static PORT_ATTR_RO(lid);
static PORT_ATTR_RO(lid_mask_count);
@@ -230,6 +243,7 @@ static PORT_ATTR_RO(sm_sl);
static PORT_ATTR_RO(cap_mask);
static PORT_ATTR_RO(rate);
static PORT_ATTR_RO(phys_state);
+static PORT_ATTR_RO(link_layer);
static struct attribute *port_default_attrs[] = {
&port_attr_state.attr,
@@ -240,6 +254,7 @@ static struct attribute *port_default_attrs[] = {
&port_attr_cap_mask.attr,
&port_attr_rate.attr,
&port_attr_phys_state.attr,
+ &port_attr_link_layer.attr,
NULL
};
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index ac7edc2..ca12acf 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -40,6 +40,7 @@
#include <linux/in6.h>
#include <linux/miscdevice.h>
#include <linux/slab.h>
+#include <linux/sysctl.h>
#include <rdma/rdma_user_cm.h>
#include <rdma/ib_marshall.h>
@@ -50,8 +51,24 @@ MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
MODULE_LICENSE("Dual BSD/GPL");
-enum {
- UCMA_MAX_BACKLOG = 128
+static unsigned int max_backlog = 1024;
+
+static struct ctl_table_header *ucma_ctl_table_hdr;
+static ctl_table ucma_ctl_table[] = {
+ {
+ .procname = "max_backlog",
+ .data = &max_backlog,
+ .maxlen = sizeof max_backlog,
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ { }
+};
+
+static struct ctl_path ucma_ctl_path[] = {
+ { .procname = "net" },
+ { .procname = "rdma_ucm" },
+ { }
};
struct ucma_file {
@@ -583,6 +600,42 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
}
}
+static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
+ struct rdma_route *route)
+{
+ struct rdma_dev_addr *dev_addr;
+ struct net_device *dev;
+ u16 vid = 0;
+
+ resp->num_paths = route->num_paths;
+ switch (route->num_paths) {
+ case 0:
+ dev_addr = &route->addr.dev_addr;
+ dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ if (dev) {
+ vid = rdma_vlan_dev_vlan_id(dev);
+ dev_put(dev);
+ }
+
+ iboe_mac_vlan_to_ll((union ib_gid *) &resp->ib_route[0].dgid,
+ dev_addr->dst_dev_addr, vid);
+ iboe_addr_get_sgid(dev_addr,
+ (union ib_gid *) &resp->ib_route[0].sgid);
+ resp->ib_route[0].pkey = cpu_to_be16(0xffff);
+ break;
+ case 2:
+ ib_copy_path_rec_to_user(&resp->ib_route[1],
+ &route->path_rec[1]);
+ /* fall through */
+ case 1:
+ ib_copy_path_rec_to_user(&resp->ib_route[0],
+ &route->path_rec[0]);
+ break;
+ default:
+ break;
+ }
+}
+
static ssize_t ucma_query_route(struct ucma_file *file,
const char __user *inbuf,
int in_len, int out_len)
@@ -617,12 +670,17 @@ static ssize_t ucma_query_route(struct ucma_file *file,
resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;
resp.port_num = ctx->cm_id->port_num;
- switch (rdma_node_get_transport(ctx->cm_id->device->node_type)) {
- case RDMA_TRANSPORT_IB:
- ucma_copy_ib_route(&resp, &ctx->cm_id->route);
- break;
- default:
- break;
+ if (rdma_node_get_transport(ctx->cm_id->device->node_type) == RDMA_TRANSPORT_IB) {
+ switch (rdma_port_get_link_layer(ctx->cm_id->device, ctx->cm_id->port_num)) {
+ case IB_LINK_LAYER_INFINIBAND:
+ ucma_copy_ib_route(&resp, &ctx->cm_id->route);
+ break;
+ case IB_LINK_LAYER_ETHERNET:
+ ucma_copy_iboe_route(&resp, &ctx->cm_id->route);
+ break;
+ default:
+ break;
+ }
}
out:
@@ -686,8 +744,8 @@ static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ctx->backlog = cmd.backlog > 0 && cmd.backlog < UCMA_MAX_BACKLOG ?
- cmd.backlog : UCMA_MAX_BACKLOG;
+ ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ?
+ cmd.backlog : max_backlog;
ret = rdma_listen(ctx->cm_id, ctx->backlog);
ucma_put_ctx(ctx);
return ret;
@@ -1279,16 +1337,26 @@ static int __init ucma_init(void)
ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version);
if (ret) {
printk(KERN_ERR "rdma_ucm: couldn't create abi_version attr\n");
- goto err;
+ goto err1;
+ }
+
+ ucma_ctl_table_hdr = register_sysctl_paths(ucma_ctl_path, ucma_ctl_table);
+ if (!ucma_ctl_table_hdr) {
+ printk(KERN_ERR "rdma_ucm: couldn't register sysctl paths\n");
+ ret = -ENOMEM;
+ goto err2;
}
return 0;
-err:
+err2:
+ device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
+err1:
misc_deregister(&ucma_misc);
return ret;
}
static void __exit ucma_cleanup(void)
{
+ unregister_sysctl_table(ucma_ctl_table_hdr);
device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
misc_deregister(&ucma_misc);
idr_destroy(&ctx_idr);
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
index 650b501..bb7e192 100644
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -33,6 +33,7 @@
#include <linux/errno.h>
#include <linux/string.h>
+#include <linux/if_ether.h>
#include <rdma/ib_pack.h>
@@ -80,6 +81,40 @@ static const struct ib_field lrh_table[] = {
.size_bits = 16 }
};
+static const struct ib_field eth_table[] = {
+ { STRUCT_FIELD(eth, dmac_h),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { STRUCT_FIELD(eth, dmac_l),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { STRUCT_FIELD(eth, smac_h),
+ .offset_words = 1,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { STRUCT_FIELD(eth, smac_l),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { STRUCT_FIELD(eth, type),
+ .offset_words = 3,
+ .offset_bits = 0,
+ .size_bits = 16 }
+};
+
+static const struct ib_field vlan_table[] = {
+ { STRUCT_FIELD(vlan, tag),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { STRUCT_FIELD(vlan, type),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 16 }
+};
+
static const struct ib_field grh_table[] = {
{ STRUCT_FIELD(grh, ip_version),
.offset_words = 0,
@@ -180,38 +215,43 @@ static const struct ib_field deth_table[] = {
/**
* ib_ud_header_init - Initialize UD header structure
* @payload_bytes:Length of packet payload
+ * @lrh_present: specify if LRH is present
+ * @eth_present: specify if Eth header is present
+ * @vlan_present: packet is tagged vlan
* @grh_present:GRH flag (if non-zero, GRH will be included)
- * @immediate_present: specify if immediate data should be used
+ * @immediate_present: specify if immediate data is present
* @header:Structure to initialize
- *
- * ib_ud_header_init() initializes the lrh.link_version, lrh.link_next_header,
- * lrh.packet_length, grh.ip_version, grh.payload_length,
- * grh.next_header, bth.opcode, bth.pad_count and
- * bth.transport_header_version fields of a &struct ib_ud_header given
- * the payload length and whether a GRH will be included.
*/
void ib_ud_header_init(int payload_bytes,
+ int lrh_present,
+ int eth_present,
+ int vlan_present,
int grh_present,
int immediate_present,
struct ib_ud_header *header)
{
- u16 packet_length;
-
memset(header, 0, sizeof *header);
- header->lrh.link_version = 0;
- header->lrh.link_next_header =
- grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL;
- packet_length = (IB_LRH_BYTES +
- IB_BTH_BYTES +
- IB_DETH_BYTES +
- payload_bytes +
- 4 + /* ICRC */
- 3) / 4; /* round up */
-
- header->grh_present = grh_present;
+ if (lrh_present) {
+ u16 packet_length;
+
+ header->lrh.link_version = 0;
+ header->lrh.link_next_header =
+ grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL;
+ packet_length = (IB_LRH_BYTES +
+ IB_BTH_BYTES +
+ IB_DETH_BYTES +
+ (grh_present ? IB_GRH_BYTES : 0) +
+ payload_bytes +
+ 4 + /* ICRC */
+ 3) / 4; /* round up */
+ header->lrh.packet_length = cpu_to_be16(packet_length);
+ }
+
+ if (vlan_present)
+ header->eth.type = cpu_to_be16(ETH_P_8021Q);
+
if (grh_present) {
- packet_length += IB_GRH_BYTES / 4;
header->grh.ip_version = 6;
header->grh.payload_length =
cpu_to_be16((IB_BTH_BYTES +
@@ -222,19 +262,52 @@ void ib_ud_header_init(int payload_bytes,
header->grh.next_header = 0x1b;
}
- header->lrh.packet_length = cpu_to_be16(packet_length);
-
- header->immediate_present = immediate_present;
if (immediate_present)
header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
else
header->bth.opcode = IB_OPCODE_UD_SEND_ONLY;
header->bth.pad_count = (4 - payload_bytes) & 3;
header->bth.transport_header_version = 0;
+
+ header->lrh_present = lrh_present;
+ header->eth_present = eth_present;
+ header->vlan_present = vlan_present;
+ header->grh_present = grh_present;
+ header->immediate_present = immediate_present;
}
EXPORT_SYMBOL(ib_ud_header_init);
/**
+ * ib_lrh_header_pack - Pack LRH header struct into wire format
+ * @lrh:unpacked LRH header struct
+ * @buf:Buffer to pack into
+ *
+ * ib_lrh_header_pack() packs the LRH header structure @lrh into
+ * wire format in the buffer @buf.
+ */
+int ib_lrh_header_pack(struct ib_unpacked_lrh *lrh, void *buf)
+{
+ ib_pack(lrh_table, ARRAY_SIZE(lrh_table), lrh, buf);
+ return 0;
+}
+EXPORT_SYMBOL(ib_lrh_header_pack);
+
+/**
+ * ib_lrh_header_unpack - Unpack LRH structure from wire format
+ * @lrh:unpacked LRH header struct
+ * @buf:Buffer to pack into
+ *
+ * ib_lrh_header_unpack() unpacks the LRH header structure from
+ * wire format (in buf) into @lrh.
+ */
+int ib_lrh_header_unpack(void *buf, struct ib_unpacked_lrh *lrh)
+{
+ ib_unpack(lrh_table, ARRAY_SIZE(lrh_table), buf, lrh);
+ return 0;
+}
+EXPORT_SYMBOL(ib_lrh_header_unpack);
+
+/**
* ib_ud_header_pack - Pack UD header struct into wire format
* @header:UD header struct
* @buf:Buffer to pack into
@@ -247,10 +320,21 @@ int ib_ud_header_pack(struct ib_ud_header *header,
{
int len = 0;
- ib_pack(lrh_table, ARRAY_SIZE(lrh_table),
- &header->lrh, buf);
- len += IB_LRH_BYTES;
-
+ if (header->lrh_present) {
+ ib_pack(lrh_table, ARRAY_SIZE(lrh_table),
+ &header->lrh, buf + len);
+ len += IB_LRH_BYTES;
+ }
+ if (header->eth_present) {
+ ib_pack(eth_table, ARRAY_SIZE(eth_table),
+ &header->eth, buf + len);
+ len += IB_ETH_BYTES;
+ }
+ if (header->vlan_present) {
+ ib_pack(vlan_table, ARRAY_SIZE(vlan_table),
+ &header->vlan, buf + len);
+ len += IB_VLAN_BYTES;
+ }
if (header->grh_present) {
ib_pack(grh_table, ARRAY_SIZE(grh_table),
&header->grh, buf + len);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 5fa8569..cd1996d 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -1022,7 +1022,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
port->ib_dev = device;
port->port_num = port_num;
- init_MUTEX(&port->sm_sem);
+ sema_init(&port->sm_sem, 1);
mutex_init(&port->file_mutex);
INIT_LIST_HEAD(&port->file_list);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 6fcfbeb..b342248 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -460,6 +460,8 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
resp.active_width = attr.active_width;
resp.active_speed = attr.active_speed;
resp.phys_state = attr.phys_state;
+ resp.link_layer = rdma_port_get_link_layer(file->device->ib_dev,
+ cmd.port_num);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index e0fa222..af7a8b0 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -94,6 +94,22 @@ rdma_node_get_transport(enum rdma_node_type node_type)
}
EXPORT_SYMBOL(rdma_node_get_transport);
+enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num)
+{
+ if (device->get_link_layer)
+ return device->get_link_layer(device, port_num);
+
+ switch (rdma_node_get_transport(device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ return IB_LINK_LAYER_INFINIBAND;
+ case RDMA_TRANSPORT_IWARP:
+ return IB_LINK_LAYER_ETHERNET;
+ default:
+ return IB_LINK_LAYER_UNSPECIFIED;
+ }
+}
+EXPORT_SYMBOL(rdma_port_get_link_layer);
+
/* Protection domains */
struct ib_pd *ib_alloc_pd(struct ib_device *device)