From 4139032b4860c06ff3a7687041f06535fed901ed Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Mon, 29 Jun 2015 09:57:00 -0400 Subject: IB: Add rdma_cap_ib_switch helper and use where appropriate Persuant to Liran's comments on node_type on linux-rdma mailing list: In an effort to reform the RDMA core and ULPs to minimize use of node_type in struct ib_device, an additional bit is added to struct ib_device for is_switch (IB switch). This is needed to be initialized by any IB switch device driver. This is a NEW requirement on such device drivers which are all "out of tree". In addition, an ib_switch helper was added to ib_verbs.h based on the is_switch device bit rather than node_type (although those should be consistent). The RDMA core (MAD, SMI, agent, sa_query, multicast, sysfs) as well as (IPoIB and SRP) ULPs are updated where appropriate to use this new helper. In some cases, the helper is now used under the covers of using rdma_[start end]_port rather than the open coding previously used. Reviewed-by: Sean Hefty Reviewed-By: Jason Gunthorpe Reviewed-by: Ira Weiny Tested-by: Ira Weiny Signed-off-by: Hal Rosenstock Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index c7dcfe4..0429040 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -88,7 +88,7 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh * struct ib_ah *ah; struct ib_mad_send_wr_private *mad_send_wr; - if (device->node_type == RDMA_NODE_IB_SWITCH) + if (rdma_cap_ib_switch(device)) port_priv = ib_get_agent_port(device, 0); else port_priv = ib_get_agent_port(device, port_num); @@ -122,7 +122,7 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh * memcpy(send_buf->mad, mad_hdr, resp_mad_len); send_buf->ah = ah; - if (device->node_type == RDMA_NODE_IB_SWITCH) { + if (rdma_cap_ib_switch(device)) { mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, send_buf); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index a4b1466..c82d751 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -769,7 +769,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, bool opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, mad_agent_priv->qp_info->port_priv->port_num); - if (device->node_type == RDMA_NODE_IB_SWITCH && + if (rdma_cap_ib_switch(device) && smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) port_num = send_wr->wr.ud.port_num; else @@ -787,7 +787,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, if ((opa_get_smp_direction(opa_smp) ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) == OPA_LID_PERMISSIVE && - opa_smi_handle_dr_smp_send(opa_smp, device->node_type, + opa_smi_handle_dr_smp_send(opa_smp, + rdma_cap_ib_switch(device), port_num) == IB_SMI_DISCARD) { ret = -EINVAL; dev_err(&device->dev, "OPA Invalid directed route\n"); @@ -810,7 +811,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, } else { if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == IB_LID_PERMISSIVE && - smi_handle_dr_smp_send(smp, device->node_type, port_num) == + smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) == IB_SMI_DISCARD) { ret = -EINVAL; dev_err(&device->dev, "Invalid directed route\n"); @@ -2030,7 +2031,7 @@ static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv struct ib_smp *smp = (struct ib_smp *)recv->mad; if (smi_handle_dr_smp_recv(smp, - port_priv->device->node_type, + rdma_cap_ib_switch(port_priv->device), port_num, port_priv->device->phys_port_cnt) == IB_SMI_DISCARD) @@ -2042,13 +2043,13 @@ static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv if (retsmi == IB_SMI_SEND) { /* don't forward */ if (smi_handle_dr_smp_send(smp, - port_priv->device->node_type, + rdma_cap_ib_switch(port_priv->device), port_num) == IB_SMI_DISCARD) return IB_SMI_DISCARD; if (smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD) return IB_SMI_DISCARD; - } else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) { + } else if (rdma_cap_ib_switch(port_priv->device)) { /* forward case for switches */ memcpy(response, recv, mad_priv_size(response)); response->header.recv_wc.wc = &response->header.wc; @@ -2115,7 +2116,7 @@ handle_opa_smi(struct ib_mad_port_private *port_priv, struct opa_smp *smp = (struct opa_smp *)recv->mad; if (opa_smi_handle_dr_smp_recv(smp, - port_priv->device->node_type, + rdma_cap_ib_switch(port_priv->device), port_num, port_priv->device->phys_port_cnt) == IB_SMI_DISCARD) @@ -2127,7 +2128,7 @@ handle_opa_smi(struct ib_mad_port_private *port_priv, if (retsmi == IB_SMI_SEND) { /* don't forward */ if (opa_smi_handle_dr_smp_send(smp, - port_priv->device->node_type, + rdma_cap_ib_switch(port_priv->device), port_num) == IB_SMI_DISCARD) return IB_SMI_DISCARD; @@ -2135,7 +2136,7 @@ handle_opa_smi(struct ib_mad_port_private *port_priv, IB_SMI_DISCARD) return IB_SMI_DISCARD; - } else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) { + } else if (rdma_cap_ib_switch(port_priv->device)) { /* forward case for switches */ memcpy(response, recv, mad_priv_size(response)); response->header.recv_wc.wc = &response->header.wc; @@ -2235,7 +2236,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, goto out; } - if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) + if (rdma_cap_ib_switch(port_priv->device)) port_num = wc->port_num; else port_num = port_priv->port_num; @@ -3297,17 +3298,11 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) static void ib_mad_init_device(struct ib_device *device) { - int start, end, i; + int start, i; - if (device->node_type == RDMA_NODE_IB_SWITCH) { - start = 0; - end = 0; - } else { - start = 1; - end = device->phys_port_cnt; - } + start = rdma_start_port(device); - for (i = start; i <= end; i++) { + for (i = start; i <= rdma_end_port(device); i++) { if (!rdma_cap_ib_mad(device, i)) continue; @@ -3342,17 +3337,9 @@ error: static void ib_mad_remove_device(struct ib_device *device) { - int start, end, i; - - if (device->node_type == RDMA_NODE_IB_SWITCH) { - start = 0; - end = 0; - } else { - start = 1; - end = device->phys_port_cnt; - } + int i; - for (i = start; i <= end; i++) { + for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { if (!rdma_cap_ib_mad(device, i)) continue; diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 1244f02..2cb865c 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -812,12 +812,8 @@ static void mcast_add_one(struct ib_device *device) if (!dev) return; - if (device->node_type == RDMA_NODE_IB_SWITCH) - dev->start_port = dev->end_port = 0; - else { - dev->start_port = 1; - dev->end_port = device->phys_port_cnt; - } + dev->start_port = rdma_start_port(device); + dev->end_port = rdma_end_port(device); for (i = 0; i <= dev->end_port - dev->start_port; i++) { if (!rdma_cap_ib_mcast(device, dev->start_port + i)) diff --git a/drivers/infiniband/core/opa_smi.h b/drivers/infiniband/core/opa_smi.h index 62d91bf..3bfab35 100644 --- a/drivers/infiniband/core/opa_smi.h +++ b/drivers/infiniband/core/opa_smi.h @@ -39,12 +39,12 @@ #include "smi.h" -enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, u8 node_type, +enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, bool is_switch, int port_num, int phys_port_cnt); int opa_smi_get_fwd_port(struct opa_smp *smp); extern enum smi_forward_action opa_smi_check_forward_dr_smp(struct opa_smp *smp); extern enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp, - u8 node_type, int port_num); + bool is_switch, int port_num); /* * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 0fae850..ca919f4 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1156,12 +1156,8 @@ static void ib_sa_add_one(struct ib_device *device) int s, e, i; int count = 0; - if (device->node_type == RDMA_NODE_IB_SWITCH) - s = e = 0; - else { - s = 1; - e = device->phys_port_cnt; - } + s = rdma_start_port(device); + e = rdma_end_port(device); sa_dev = kzalloc(sizeof *sa_dev + (e - s + 1) * sizeof (struct ib_sa_port), diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c index 368a561..f19b238 100644 --- a/drivers/infiniband/core/smi.c +++ b/drivers/infiniband/core/smi.c @@ -41,7 +41,7 @@ #include "smi.h" #include "opa_smi.h" -static enum smi_action __smi_handle_dr_smp_send(u8 node_type, int port_num, +static enum smi_action __smi_handle_dr_smp_send(bool is_switch, int port_num, u8 *hop_ptr, u8 hop_cnt, const u8 *initial_path, const u8 *return_path, @@ -64,7 +64,7 @@ static enum smi_action __smi_handle_dr_smp_send(u8 node_type, int port_num, /* C14-9:2 */ if (*hop_ptr && *hop_ptr < hop_cnt) { - if (node_type != RDMA_NODE_IB_SWITCH) + if (!is_switch) return IB_SMI_DISCARD; /* return_path set when received */ @@ -77,7 +77,7 @@ static enum smi_action __smi_handle_dr_smp_send(u8 node_type, int port_num, if (*hop_ptr == hop_cnt) { /* return_path set when received */ (*hop_ptr)++; - return (node_type == RDMA_NODE_IB_SWITCH || + return (is_switch || dr_dlid_is_permissive ? IB_SMI_HANDLE : IB_SMI_DISCARD); } @@ -96,7 +96,7 @@ static enum smi_action __smi_handle_dr_smp_send(u8 node_type, int port_num, /* C14-13:2 */ if (2 <= *hop_ptr && *hop_ptr <= hop_cnt) { - if (node_type != RDMA_NODE_IB_SWITCH) + if (!is_switch) return IB_SMI_DISCARD; (*hop_ptr)--; @@ -108,7 +108,7 @@ static enum smi_action __smi_handle_dr_smp_send(u8 node_type, int port_num, if (*hop_ptr == 1) { (*hop_ptr)--; /* C14-13:3 -- SMPs destined for SM shouldn't be here */ - return (node_type == RDMA_NODE_IB_SWITCH || + return (is_switch || dr_slid_is_permissive ? IB_SMI_HANDLE : IB_SMI_DISCARD); } @@ -127,9 +127,9 @@ static enum smi_action __smi_handle_dr_smp_send(u8 node_type, int port_num, * Return IB_SMI_DISCARD if the SMP should be discarded */ enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, - u8 node_type, int port_num) + bool is_switch, int port_num) { - return __smi_handle_dr_smp_send(node_type, port_num, + return __smi_handle_dr_smp_send(is_switch, port_num, &smp->hop_ptr, smp->hop_cnt, smp->initial_path, smp->return_path, @@ -139,9 +139,9 @@ enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, } enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp, - u8 node_type, int port_num) + bool is_switch, int port_num) { - return __smi_handle_dr_smp_send(node_type, port_num, + return __smi_handle_dr_smp_send(is_switch, port_num, &smp->hop_ptr, smp->hop_cnt, smp->route.dr.initial_path, smp->route.dr.return_path, @@ -152,7 +152,7 @@ enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp, OPA_LID_PERMISSIVE); } -static enum smi_action __smi_handle_dr_smp_recv(u8 node_type, int port_num, +static enum smi_action __smi_handle_dr_smp_recv(bool is_switch, int port_num, int phys_port_cnt, u8 *hop_ptr, u8 hop_cnt, const u8 *initial_path, @@ -173,7 +173,7 @@ static enum smi_action __smi_handle_dr_smp_recv(u8 node_type, int port_num, /* C14-9:2 -- intermediate hop */ if (*hop_ptr && *hop_ptr < hop_cnt) { - if (node_type != RDMA_NODE_IB_SWITCH) + if (!is_switch) return IB_SMI_DISCARD; return_path[*hop_ptr] = port_num; @@ -188,7 +188,7 @@ static enum smi_action __smi_handle_dr_smp_recv(u8 node_type, int port_num, return_path[*hop_ptr] = port_num; /* hop_ptr updated when sending */ - return (node_type == RDMA_NODE_IB_SWITCH || + return (is_switch || dr_dlid_is_permissive ? IB_SMI_HANDLE : IB_SMI_DISCARD); } @@ -208,7 +208,7 @@ static enum smi_action __smi_handle_dr_smp_recv(u8 node_type, int port_num, /* C14-13:2 */ if (2 <= *hop_ptr && *hop_ptr <= hop_cnt) { - if (node_type != RDMA_NODE_IB_SWITCH) + if (!is_switch) return IB_SMI_DISCARD; /* hop_ptr updated when sending */ @@ -224,8 +224,7 @@ static enum smi_action __smi_handle_dr_smp_recv(u8 node_type, int port_num, return IB_SMI_HANDLE; } /* hop_ptr updated when sending */ - return (node_type == RDMA_NODE_IB_SWITCH ? - IB_SMI_HANDLE : IB_SMI_DISCARD); + return (is_switch ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:4 -- hop_ptr = 0 -> give to SM */ @@ -238,10 +237,10 @@ static enum smi_action __smi_handle_dr_smp_recv(u8 node_type, int port_num, * Adjust information for a received SMP * Return IB_SMI_DISCARD if the SMP should be dropped */ -enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, +enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, bool is_switch, int port_num, int phys_port_cnt) { - return __smi_handle_dr_smp_recv(node_type, port_num, phys_port_cnt, + return __smi_handle_dr_smp_recv(is_switch, port_num, phys_port_cnt, &smp->hop_ptr, smp->hop_cnt, smp->initial_path, smp->return_path, @@ -254,10 +253,10 @@ enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, * Adjust information for a received SMP * Return IB_SMI_DISCARD if the SMP should be dropped */ -enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, u8 node_type, +enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, bool is_switch, int port_num, int phys_port_cnt) { - return __smi_handle_dr_smp_recv(node_type, port_num, phys_port_cnt, + return __smi_handle_dr_smp_recv(is_switch, port_num, phys_port_cnt, &smp->hop_ptr, smp->hop_cnt, smp->route.dr.initial_path, smp->route.dr.return_path, diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h index aff96ba..33c91c8 100644 --- a/drivers/infiniband/core/smi.h +++ b/drivers/infiniband/core/smi.h @@ -51,12 +51,12 @@ enum smi_forward_action { IB_SMI_FORWARD /* SMP should be forwarded (for switches only) */ }; -enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, +enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, bool is_switch, int port_num, int phys_port_cnt); int smi_get_fwd_port(struct ib_smp *smp); extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp); extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, - u8 node_type, int port_num); + bool is_switch, int port_num); /* * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index ed6b6c8..0b84a9c 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -870,7 +870,7 @@ int ib_device_register_sysfs(struct ib_device *device, goto err_put; } - if (device->node_type == RDMA_NODE_IB_SWITCH) { + if (rdma_cap_ib_switch(device)) { ret = add_port(device, 0, port_callback); if (ret) goto err_put; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index da149c2..0d8336e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1684,7 +1684,7 @@ static void ipoib_add_one(struct ib_device *device) struct list_head *dev_list; struct net_device *dev; struct ipoib_dev_priv *priv; - int s, e, p; + int p; int count = 0; dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); @@ -1693,15 +1693,7 @@ static void ipoib_add_one(struct ib_device *device) INIT_LIST_HEAD(dev_list); - if (device->node_type == RDMA_NODE_IB_SWITCH) { - s = 0; - e = 0; - } else { - s = 1; - e = device->phys_port_cnt; - } - - for (p = s; p <= e; ++p) { + for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { if (!rdma_protocol_ib(device, p)) continue; dev = ipoib_add_port("ib%d", device, p); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 267dc4f..d40e321 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3379,7 +3379,7 @@ static void srp_add_one(struct ib_device *device) struct srp_device *srp_dev; struct ib_device_attr *dev_attr; struct srp_host *host; - int mr_page_shift, s, e, p; + int mr_page_shift, p; u64 max_pages_per_mr; dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL); @@ -3443,15 +3443,7 @@ static void srp_add_one(struct ib_device *device) if (IS_ERR(srp_dev->mr)) goto err_pd; - if (device->node_type == RDMA_NODE_IB_SWITCH) { - s = 0; - e = 0; - } else { - s = 1; - e = device->phys_port_cnt; - } - - for (p = s; p <= e; ++p) { + for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { host = srp_add_port(srp_dev, p); if (host) list_add_tail(&host->list, &srp_dev->dev_list); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 986fddb..b0f898e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1745,6 +1745,7 @@ struct ib_device { char node_desc[64]; __be64 node_guid; u32 local_dma_lkey; + u16 is_switch:1; u8 node_type; u8 phys_port_cnt; @@ -1824,6 +1825,20 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num); /** + * rdma_cap_ib_switch - Check if the device is IB switch + * @device: Device to check + * + * Device driver is responsible for setting is_switch bit on + * in ib_device structure at init time. + * + * Return: true if the device is IB switch. + */ +static inline bool rdma_cap_ib_switch(const struct ib_device *device) +{ + return device->is_switch; +} + +/** * rdma_start_port - Return the first valid port number for the device * specified * @@ -1833,7 +1848,7 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, */ static inline u8 rdma_start_port(const struct ib_device *device) { - return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1; + return rdma_cap_ib_switch(device) ? 0 : 1; } /** @@ -1846,8 +1861,7 @@ static inline u8 rdma_start_port(const struct ib_device *device) */ static inline u8 rdma_end_port(const struct ib_device *device) { - return (device->node_type == RDMA_NODE_IB_SWITCH) ? - 0 : device->phys_port_cnt; + return rdma_cap_ib_switch(device) ? 0 : device->phys_port_cnt; } static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num) -- cgit v0.10.2 From cd4cd565e049c6e734a12754d91d1142c25f6a64 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 25 Jun 2015 12:04:49 -0400 Subject: IB/mad: Fix compare between big endian and cpu endian The define OPA_LID_PERMISSIVE is big endian and was compared to the cpu endian variable opa_drslid. Problem caught by 0-day build infrastructure. Fixes: 8e4349d13f33 (IB/mad: Add final OPA MAD processing) Signed-off-by: Ira Weiny Reviewed-by: Mike Marciniszyn Reviewed-by: John, Jubin Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index c82d751..786fc51 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -795,7 +795,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, goto out; } opa_drslid = be32_to_cpu(opa_smp->route.dr.dr_slid); - if (opa_drslid != OPA_LID_PERMISSIVE && + if (opa_drslid != be32_to_cpu(OPA_LID_PERMISSIVE) && opa_drslid & 0xffff0000) { ret = -EINVAL; dev_err(&device->dev, "OPA Invalid dr_slid 0x%x\n", -- cgit v0.10.2 From 3b8ab700af0a5c5e59c833f8a88f94b97499f5e5 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 25 Jun 2015 09:52:50 -0400 Subject: IB/mad: Remove improper use of BUG_ON We recently added BUG_ON's which were inappropriate for a condition which should never happen. Change these to be WARN_ON_ONCE as a debugging aid. Fixes: 4cd7c9479aff ('IB/mad: Add support for additional MAD info to/from drivers') Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c index 12b5bc2..376b031 100644 --- a/drivers/infiniband/hw/ehca/ehca_sqp.c +++ b/drivers/infiniband/hw/ehca/ehca_sqp.c @@ -226,8 +226,9 @@ int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - BUG_ON(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad)); + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc) return IB_MAD_RESULT_FAILURE; diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index 948188e..ad3a926 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -1499,8 +1499,9 @@ int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - BUG_ON(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad)); + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 85a50df..9a810e3 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -861,8 +861,9 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - BUG_ON(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad)); + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; switch (rdma_port_get_link_layer(ibdev, port_num)) { case IB_LINK_LAYER_INFINIBAND: diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 01fc97d..b84d13a 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -68,8 +68,9 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - BUG_ON(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad)); + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 6b2418b..7c3f2fb 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -209,8 +209,9 @@ int mthca_process_mad(struct ib_device *ibdev, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - BUG_ON(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad)); + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; /* Forward locally generated traps to the SM */ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 4bafa15..29b2767 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -215,8 +215,9 @@ int ocrdma_process_mad(struct ib_device *ibdev, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - BUG_ON(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad)); + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_PERF_MGMT: diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 05e3242..9625e7c 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2412,8 +2412,9 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - BUG_ON(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad)); + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: -- cgit v0.10.2 From b356c1c1f90a347b6f67d9272dda7ecf47e0b46c Mon Sep 17 00:00:00 2001 From: Vaishali Thakkar Date: Wed, 24 Jun 2015 10:12:13 +0530 Subject: IB/srpt: Convert use of __constant_cpu_to_beXX to cpu_to_beXX In little endian cases, the macro cpu_to_be{16,32,64} unfolds to __swab{16,32,64} which provides special case for constants. In big endian cases, __constant_cpu_to_be{16,32,64} and cpu_to_be{16,32,64} expand directly to the same expression. So, replace __constant_cpu_to_be{16,32,64} with cpu_to_be{16,32,64} with the goal of getting rid of the definitions of __constant_cpu_to_be{16,32,64} completely. The Coccinelle semantic patch that performs this transformation is as follows: @@expression x;@@ ( - __constant_cpu_to_be16(x) + cpu_to_be16(x) | - __constant_cpu_to_be32(x) + cpu_to_be32(x) | - __constant_cpu_to_be64(x) + cpu_to_be64(x) ) Signed-off-by: Vaishali Thakkar Reviewed-by: Bart Van Assche Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 82897ca..60ff0a2 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -302,7 +302,7 @@ static void srpt_get_iou(struct ib_dm_mad *mad) int i; ioui = (struct ib_dm_iou_info *)mad->data; - ioui->change_id = __constant_cpu_to_be16(1); + ioui->change_id = cpu_to_be16(1); ioui->max_controllers = 16; /* set present for slot 1 and empty for the rest */ @@ -330,13 +330,13 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot, if (!slot || slot > 16) { mad->mad_hdr.status - = __constant_cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); + = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); return; } if (slot > 2) { mad->mad_hdr.status - = __constant_cpu_to_be16(DM_MAD_STATUS_NO_IOC); + = cpu_to_be16(DM_MAD_STATUS_NO_IOC); return; } @@ -348,10 +348,10 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot, iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver); iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id); iocp->subsys_device_id = 0x0; - iocp->io_class = __constant_cpu_to_be16(SRP_REV16A_IB_IO_CLASS); - iocp->io_subclass = __constant_cpu_to_be16(SRP_IO_SUBCLASS); - iocp->protocol = __constant_cpu_to_be16(SRP_PROTOCOL); - iocp->protocol_version = __constant_cpu_to_be16(SRP_PROTOCOL_VERSION); + iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS); + iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS); + iocp->protocol = cpu_to_be16(SRP_PROTOCOL); + iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION); iocp->send_queue_depth = cpu_to_be16(sdev->srq_size); iocp->rdma_read_depth = 4; iocp->send_size = cpu_to_be32(srp_max_req_size); @@ -379,13 +379,13 @@ static void srpt_get_svc_entries(u64 ioc_guid, if (!slot || slot > 16) { mad->mad_hdr.status - = __constant_cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); + = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); return; } if (slot > 2 || lo > hi || hi > 1) { mad->mad_hdr.status - = __constant_cpu_to_be16(DM_MAD_STATUS_NO_IOC); + = cpu_to_be16(DM_MAD_STATUS_NO_IOC); return; } @@ -436,7 +436,7 @@ static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad, break; default: rsp_mad->mad_hdr.status = - __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); + cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); break; } } @@ -493,11 +493,11 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent, break; case IB_MGMT_METHOD_SET: dm_mad->mad_hdr.status = - __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); + cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); break; default: dm_mad->mad_hdr.status = - __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD); + cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD); break; } @@ -1535,7 +1535,7 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, memset(srp_rsp, 0, sizeof *srp_rsp); srp_rsp->opcode = SRP_RSP; srp_rsp->req_lim_delta = - __constant_cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0)); + cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0)); srp_rsp->tag = tag; srp_rsp->status = status; @@ -1585,8 +1585,8 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch, memset(srp_rsp, 0, sizeof *srp_rsp); srp_rsp->opcode = SRP_RSP; - srp_rsp->req_lim_delta = __constant_cpu_to_be32(1 - + atomic_xchg(&ch->req_lim_delta, 0)); + srp_rsp->req_lim_delta = + cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0)); srp_rsp->tag = tag; srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID; @@ -1630,7 +1630,7 @@ static uint64_t srpt_unpack_lun(const uint8_t *lun, int len) switch (len) { case 8: if ((*((__be64 *)lun) & - __constant_cpu_to_be64(0x0000FFFFFFFFFFFFLL)) != 0) + cpu_to_be64(0x0000FFFFFFFFFFFFLL)) != 0) goto out_err; break; case 4: @@ -2449,8 +2449,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, } if (it_iu_len > srp_max_req_size || it_iu_len < 64) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE); ret = -EINVAL; pr_err("rejected SRP_LOGIN_REQ because its" " length (%d bytes) is out of range (%d .. %d)\n", @@ -2459,8 +2459,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, } if (!sport->enabled) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); ret = -EINVAL; pr_err("rejected SRP_LOGIN_REQ because the target port" " has not yet been enabled\n"); @@ -2505,8 +2505,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, if (*(__be64 *)req->target_port_id != cpu_to_be64(srpt_service_guid) || *(__be64 *)(req->target_port_id + 8) != cpu_to_be64(srpt_service_guid)) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL); ret = -ENOMEM; pr_err("rejected SRP_LOGIN_REQ because it" " has an invalid target port identifier.\n"); @@ -2515,8 +2515,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, ch = kzalloc(sizeof *ch, GFP_KERNEL); if (!ch) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_err("rejected SRP_LOGIN_REQ because no memory.\n"); ret = -ENOMEM; goto reject; @@ -2552,8 +2552,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, ret = srpt_create_ch_ib(ch); if (ret) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_err("rejected SRP_LOGIN_REQ because creating" " a new RDMA channel failed.\n"); goto free_ring; @@ -2561,8 +2561,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, ret = srpt_ch_qp_rtr(ch, ch->qp); if (ret) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_err("rejected SRP_LOGIN_REQ because enabling" " RTR failed (error code = %d)\n", ret); goto destroy_ib; @@ -2580,15 +2579,15 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, if (!nacl) { pr_info("Rejected login because no ACL has been" " configured yet for initiator %s.\n", ch->sess_name); - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED); goto destroy_ib; } ch->sess = transport_init_session(TARGET_PROT_NORMAL); if (IS_ERR(ch->sess)) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_debug("Failed to create session\n"); goto deregister_session; } @@ -2604,8 +2603,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, rsp->max_it_iu_len = req->req_it_iu_len; rsp->max_ti_iu_len = req->req_it_iu_len; ch->max_ti_iu_len = it_iu_len; - rsp->buf_fmt = __constant_cpu_to_be16(SRP_BUF_FORMAT_DIRECT - | SRP_BUF_FORMAT_INDIRECT); + rsp->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT + | SRP_BUF_FORMAT_INDIRECT); rsp->req_lim_delta = cpu_to_be32(ch->rq_size); atomic_set(&ch->req_lim, ch->rq_size); atomic_set(&ch->req_lim_delta, 0); @@ -2655,8 +2654,8 @@ free_ch: reject: rej->opcode = SRP_LOGIN_REJ; rej->tag = req->tag; - rej->buf_fmt = __constant_cpu_to_be16(SRP_BUF_FORMAT_DIRECT - | SRP_BUF_FORMAT_INDIRECT); + rej->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT + | SRP_BUF_FORMAT_INDIRECT); ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, (void *)rej, sizeof *rej); -- cgit v0.10.2 From 3fdf70acec13efc7ecf254f5a364df06348bfd2c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 25 Jun 2015 13:34:15 +0300 Subject: IB/srp: Avoid using uninitialized variable We might return res which is not initialized. Also reduce code duplication by exporting srp_parse_tmo so srp_tmo_set can reuse it. Detected by Coverity. Signed-off-by: Sagi Grimberg Signed-off-by: Jenny Falkovich Reviewed-by: Bart Van Assche Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index d40e321..31a20b4 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -161,13 +161,10 @@ static int srp_tmo_set(const char *val, const struct kernel_param *kp) { int tmo, res; - if (strncmp(val, "off", 3) != 0) { - res = kstrtoint(val, 0, &tmo); - if (res) - goto out; - } else { - tmo = -1; - } + res = srp_parse_tmo(&tmo, val); + if (res) + goto out; + if (kp->arg == &srp_reconnect_delay) res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo, srp_dev_loss_tmo); diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index a85292b..e3cd3ec 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -203,7 +203,7 @@ static ssize_t srp_show_tmo(char *buf, int tmo) return tmo >= 0 ? sprintf(buf, "%d\n", tmo) : sprintf(buf, "off\n"); } -static int srp_parse_tmo(int *tmo, const char *buf) +int srp_parse_tmo(int *tmo, const char *buf) { int res = 0; @@ -214,6 +214,7 @@ static int srp_parse_tmo(int *tmo, const char *buf) return res; } +EXPORT_SYMBOL(srp_parse_tmo); static ssize_t show_reconnect_delay(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h index cdb05dd1..d40d3ef 100644 --- a/include/scsi/scsi_transport_srp.h +++ b/include/scsi/scsi_transport_srp.h @@ -119,6 +119,7 @@ extern struct srp_rport *srp_rport_add(struct Scsi_Host *, extern void srp_rport_del(struct srp_rport *); extern int srp_tmo_valid(int reconnect_delay, int fast_io_fail_tmo, int dev_loss_tmo); +int srp_parse_tmo(int *tmo, const char *buf); extern int srp_reconnect_rport(struct srp_rport *rport); extern void srp_start_tl_fail_timers(struct srp_rport *rport); extern void srp_remove_host(struct Scsi_Host *); -- cgit v0.10.2 From be4b499323bf7291b491c6df51baae62f45b8404 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Thu, 25 Jun 2015 17:13:22 +0300 Subject: IB/cm: Do not queue work to a device that's going away Whenever ib_cm gets remove_one call, like when there is a hot-unplug event, the driver should mark itself as going_down and confirm that no new works are going to be queued for that device. so, the order of the actions are: 1. mark the going_down bit. 2. flush the wq. 3. [make sure no new works for that device.] 4. unregister mad agent. otherwise, works that are already queued can be scheduled after the mad agent was freed. Signed-off-by: Erez Shitrit Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index dbddddd..3a972eb 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -169,6 +169,7 @@ struct cm_device { struct ib_device *ib_device; struct device *device; u8 ack_delay; + int going_down; struct cm_port *port[0]; }; @@ -805,6 +806,11 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) { int wait_time; unsigned long flags; + struct cm_device *cm_dev; + + cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client); + if (!cm_dev) + return; spin_lock_irqsave(&cm.lock, flags); cm_cleanup_timewait(cm_id_priv->timewait_info); @@ -818,8 +824,14 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) */ cm_id_priv->id.state = IB_CM_TIMEWAIT; wait_time = cm_convert_to_ms(cm_id_priv->av.timeout); - queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, - msecs_to_jiffies(wait_time)); + + /* Check if the device started its remove_one */ + spin_lock_irq(&cm.lock); + if (!cm_dev->going_down) + queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, + msecs_to_jiffies(wait_time)); + spin_unlock_irq(&cm.lock); + cm_id_priv->timewait_info = NULL; } @@ -3305,6 +3317,11 @@ static int cm_establish(struct ib_cm_id *cm_id) struct cm_work *work; unsigned long flags; int ret = 0; + struct cm_device *cm_dev; + + cm_dev = ib_get_client_data(cm_id->device, &cm_client); + if (!cm_dev) + return -ENODEV; work = kmalloc(sizeof *work, GFP_ATOMIC); if (!work) @@ -3343,7 +3360,17 @@ static int cm_establish(struct ib_cm_id *cm_id) work->remote_id = cm_id->remote_id; work->mad_recv_wc = NULL; work->cm_event.event = IB_CM_USER_ESTABLISHED; - queue_delayed_work(cm.wq, &work->work, 0); + + /* Check if the device started its remove_one */ + spin_lock_irq(&cm.lock); + if (!cm_dev->going_down) { + queue_delayed_work(cm.wq, &work->work, 0); + } else { + kfree(work); + ret = -ENODEV; + } + spin_unlock_irq(&cm.lock); + out: return ret; } @@ -3394,6 +3421,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent, enum ib_cm_event_type event; u16 attr_id; int paths = 0; + int going_down = 0; switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) { case CM_REQ_ATTR_ID: @@ -3452,7 +3480,19 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent, work->cm_event.event = event; work->mad_recv_wc = mad_recv_wc; work->port = port; - queue_delayed_work(cm.wq, &work->work, 0); + + /* Check if the device started its remove_one */ + spin_lock_irq(&cm.lock); + if (!port->cm_dev->going_down) + queue_delayed_work(cm.wq, &work->work, 0); + else + going_down = 1; + spin_unlock_irq(&cm.lock); + + if (going_down) { + kfree(work); + ib_free_recv_mad(mad_recv_wc); + } } static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, @@ -3771,7 +3811,7 @@ static void cm_add_one(struct ib_device *ib_device) cm_dev->ib_device = ib_device; cm_get_ack_delay(cm_dev); - + cm_dev->going_down = 0; cm_dev->device = device_create(&cm_class, &ib_device->dev, MKDEV(0, 0), NULL, "%s", ib_device->name); @@ -3864,14 +3904,23 @@ static void cm_remove_one(struct ib_device *ib_device) list_del(&cm_dev->list); write_unlock_irqrestore(&cm.device_lock, flags); + spin_lock_irq(&cm.lock); + cm_dev->going_down = 1; + spin_unlock_irq(&cm.lock); + for (i = 1; i <= ib_device->phys_port_cnt; i++) { if (!rdma_cap_ib_cm(ib_device, i)) continue; port = cm_dev->port[i-1]; ib_modify_port(ib_device, port->port_num, 0, &port_modify); - ib_unregister_mad_agent(port->mad_agent); + /* + * We flush the queue here after the going_down set, this + * verify that no new works will be queued in the recv handler, + * after that we can call the unregister_mad_agent + */ flush_workqueue(cm.wq); + ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); } device_unregister(cm_dev->device); -- cgit v0.10.2 From 8a7ff14dcb40bade309cd2ad04c746bb1d169c4e Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Wed, 1 Jul 2015 14:31:02 +0300 Subject: IB/mlx4: Do not attemp to report HCA clock offset on VFs mlx4 VFs can provide CQE raw time-stamping services, but they don't have the hca core clock mapped to their PCI bars. As such, we should not attempt to query and report the clock offset to user space for VFs. Doing so causes query_device over VFs to fail with -ENOSUPP. Fixes: 4b664c4355b2 ('IB/mlx4: Add support for CQ time-stamping') Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 067a691..f419a72 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -253,14 +253,15 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL; props->timestamp_mask = 0xFFFFFFFFFFFFULL; - err = mlx4_get_internal_clock_params(dev->dev, &clock_params); - if (err) - goto out; + if (!mlx4_is_slave(dev->dev)) + err = mlx4_get_internal_clock_params(dev->dev, &clock_params); if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) { - resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE; resp.response_length += sizeof(resp.hca_core_clock_offset); - resp.comp_mask |= QUERY_DEVICE_RESP_MASK_TIMESTAMP; + if (!err && !mlx4_is_slave(dev->dev)) { + resp.comp_mask |= QUERY_DEVICE_RESP_MASK_TIMESTAMP; + resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE; + } } if (uhw->outlen) { -- cgit v0.10.2 From 58e9cc90cda7bc732856a2ad3210328fbc4f6ca2 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Wed, 1 Jul 2015 14:31:01 +0300 Subject: IB/IPoIB: Fix bad error flow in ipoib_add_port() Error values of ib_query_port() and ib_query_device() weren't propagated correctly. Because of that, ipoib_add_port() could return NULL value, which escaped the IS_ERR() check in ipoib_add_one() and we crashed. Signed-off-by: Amir Vadai Signed-off-by: Or Gerlitz Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 0d8336e..da3e7e7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1577,7 +1577,8 @@ static struct net_device *ipoib_add_port(const char *format, SET_NETDEV_DEV(priv->dev, hca->dma_device); priv->dev->dev_id = port - 1; - if (!ib_query_port(hca, port, &attr)) + result = ib_query_port(hca, port, &attr); + if (!result) priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); else { printk(KERN_WARNING "%s: ib_query_port %d failed\n", @@ -1598,7 +1599,8 @@ static struct net_device *ipoib_add_port(const char *format, goto device_init_failed; } - if (ipoib_set_dev_features(priv, hca)) + result = ipoib_set_dev_features(priv, hca); + if (result) goto device_init_failed; /* -- cgit v0.10.2 From a7f2f24cd716d7f1119d46929f41a8436f9c252f Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Thu, 2 Jul 2015 12:47:44 -0500 Subject: RDMA/core: Fixes for port mapper client registration Fixes to allow clients to make remove mapping requests, after they have provided the user space service with the mapping information, they are using when the service is restarted. 1) Adding IWPM_REG_VALID, IWPM_REG_INCOMPL and IWPM_REG_UNDEF registration types for the port mapper clients and functions to set/check the registration type. 2) If the port mapper user space service is not available to register the client, then its registration stays IWPM_REG_UNDEF and the registration isn't checked until the service becomes available (no mappings are possible, if the user space service isn't running). 3) After the service is restarted, the user space port mapper pid is set to valid and the client registration is set to IWPM_REG_INCOMPL to allow the client to make remove mapping requests. Signed-off-by: Tatyana Nikolova Reviewed-by: Steve Wise Tested-by: Steve Wise Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index e6ffa2e..22a3abe 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -67,7 +67,8 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client) err_str = "Invalid port mapper client"; goto pid_query_error; } - if (iwpm_registered_client(nl_client)) + if (iwpm_check_registration(nl_client, IWPM_REG_VALID) || + iwpm_user_pid == IWPM_PID_UNAVAILABLE) return 0; skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REG_PID, &nlh, nl_client); if (!skb) { @@ -106,7 +107,6 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client) ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_IWPM, GFP_KERNEL); if (ret) { skb = NULL; /* skb is freed in the netlink send-op handling */ - iwpm_set_registered(nl_client, 1); iwpm_user_pid = IWPM_PID_UNAVAILABLE; err_str = "Unable to send a nlmsg"; goto pid_query_error; @@ -144,12 +144,12 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) err_str = "Invalid port mapper client"; goto add_mapping_error; } - if (!iwpm_registered_client(nl_client)) { + if (!iwpm_valid_pid()) + return 0; + if (!iwpm_check_registration(nl_client, IWPM_REG_VALID)) { err_str = "Unregistered port mapper client"; goto add_mapping_error; } - if (!iwpm_valid_pid()) - return 0; skb = iwpm_create_nlmsg(RDMA_NL_IWPM_ADD_MAPPING, &nlh, nl_client); if (!skb) { err_str = "Unable to create a nlmsg"; @@ -214,12 +214,12 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) err_str = "Invalid port mapper client"; goto query_mapping_error; } - if (!iwpm_registered_client(nl_client)) { + if (!iwpm_valid_pid()) + return 0; + if (!iwpm_check_registration(nl_client, IWPM_REG_VALID)) { err_str = "Unregistered port mapper client"; goto query_mapping_error; } - if (!iwpm_valid_pid()) - return 0; ret = -ENOMEM; skb = iwpm_create_nlmsg(RDMA_NL_IWPM_QUERY_MAPPING, &nlh, nl_client); if (!skb) { @@ -288,12 +288,12 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client) err_str = "Invalid port mapper client"; goto remove_mapping_error; } - if (!iwpm_registered_client(nl_client)) { + if (!iwpm_valid_pid()) + return 0; + if (iwpm_check_registration(nl_client, IWPM_REG_UNDEF)) { err_str = "Unregistered port mapper client"; goto remove_mapping_error; } - if (!iwpm_valid_pid()) - return 0; skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REMOVE_MAPPING, &nlh, nl_client); if (!skb) { ret = -ENOMEM; @@ -388,7 +388,7 @@ int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb) pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n", __func__, iwpm_user_pid); if (iwpm_valid_client(nl_client)) - iwpm_set_registered(nl_client, 1); + iwpm_set_registration(nl_client, IWPM_REG_VALID); register_pid_response_exit: nlmsg_request->request_done = 1; /* always for found nlmsg_request */ @@ -644,7 +644,6 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) { struct nlattr *nltb[IWPM_NLA_MAPINFO_REQ_MAX]; const char *msg_type = "Mapping Info response"; - int iwpm_pid; u8 nl_client; char *iwpm_name; u16 iwpm_version; @@ -669,14 +668,14 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) __func__, nl_client); return ret; } - iwpm_set_registered(nl_client, 0); + iwpm_set_registration(nl_client, IWPM_REG_INCOMPL); atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); + iwpm_user_pid = cb->nlh->nlmsg_pid; if (!iwpm_mapinfo_available()) return 0; - iwpm_pid = cb->nlh->nlmsg_pid; pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n", - __func__, iwpm_pid); - ret = iwpm_send_mapinfo(nl_client, iwpm_pid); + __func__, iwpm_user_pid); + ret = iwpm_send_mapinfo(nl_client, iwpm_user_pid); return ret; } EXPORT_SYMBOL(iwpm_mapping_info_cb); diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index a626795..5fb089e 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -78,6 +78,7 @@ init_exit: mutex_unlock(&iwpm_admin_lock); if (!ret) { iwpm_set_valid(nl_client, 1); + iwpm_set_registration(nl_client, IWPM_REG_UNDEF); pr_debug("%s: Mapinfo and reminfo tables are created\n", __func__); } @@ -106,6 +107,7 @@ int iwpm_exit(u8 nl_client) } mutex_unlock(&iwpm_admin_lock); iwpm_set_valid(nl_client, 0); + iwpm_set_registration(nl_client, IWPM_REG_UNDEF); return 0; } EXPORT_SYMBOL(iwpm_exit); @@ -397,17 +399,23 @@ void iwpm_set_valid(u8 nl_client, int valid) } /* valid client */ -int iwpm_registered_client(u8 nl_client) +u32 iwpm_get_registration(u8 nl_client) { return iwpm_admin.reg_list[nl_client]; } /* valid client */ -void iwpm_set_registered(u8 nl_client, int reg) +void iwpm_set_registration(u8 nl_client, u32 reg) { iwpm_admin.reg_list[nl_client] = reg; } +/* valid client */ +u32 iwpm_check_registration(u8 nl_client, u32 reg) +{ + return (iwpm_get_registration(nl_client) & reg); +} + int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr, struct sockaddr_storage *b_sockaddr) { diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h index ee2d9ff..b7b9e19 100644 --- a/drivers/infiniband/core/iwpm_util.h +++ b/drivers/infiniband/core/iwpm_util.h @@ -58,6 +58,10 @@ #define IWPM_PID_UNDEFINED -1 #define IWPM_PID_UNAVAILABLE -2 +#define IWPM_REG_UNDEF 0x01 +#define IWPM_REG_VALID 0x02 +#define IWPM_REG_INCOMPL 0x04 + struct iwpm_nlmsg_request { struct list_head inprocess_list; __u32 nlmsg_seq; @@ -88,7 +92,7 @@ struct iwpm_admin_data { atomic_t refcount; atomic_t nlmsg_seq; int client_list[RDMA_NL_NUM_CLIENTS]; - int reg_list[RDMA_NL_NUM_CLIENTS]; + u32 reg_list[RDMA_NL_NUM_CLIENTS]; }; /** @@ -159,19 +163,31 @@ int iwpm_valid_client(u8 nl_client); void iwpm_set_valid(u8 nl_client, int valid); /** - * iwpm_registered_client - Check if the port mapper client is registered + * iwpm_check_registration - Check if the client registration + * matches the given one * @nl_client: The index of the netlink client + * @reg: The given registration type to compare with * * Call iwpm_register_pid() to register a client + * Returns true if the client registration matches reg, + * otherwise returns false + */ +u32 iwpm_check_registration(u8 nl_client, u32 reg); + +/** + * iwpm_set_registration - Set the client registration + * @nl_client: The index of the netlink client + * @reg: Registration type to set */ -int iwpm_registered_client(u8 nl_client); +void iwpm_set_registration(u8 nl_client, u32 reg); /** - * iwpm_set_registered - Set the port mapper client to registered or not + * iwpm_get_registration * @nl_client: The index of the netlink client - * @reg: 1 if registered or 0 if not + * + * Returns the client registration type */ -void iwpm_set_registered(u8 nl_client, int reg); +u32 iwpm_get_registration(u8 nl_client); /** * iwpm_send_mapinfo - Send local and mapped IPv4/IPv6 address info of -- cgit v0.10.2 From 165d68228906f2866a52069ef1176ab70fa9e216 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Thu, 2 Jul 2015 12:49:40 -0500 Subject: RDMA/nes: Fix for resolving the neigh Neighbor resolution doesn't work without this fix Signed-off-by: Tatyana Nikolova Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 9047af4..8a3ad17 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1520,8 +1520,9 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi int rc = arpindex; struct net_device *netdev; struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter; + __be32 dst_ipaddr = htonl(dst_ip); - rt = ip_route_output(&init_net, htonl(dst_ip), 0, 0, 0); + rt = ip_route_output(&init_net, dst_ipaddr, nesvnic->local_ipaddr, 0, 0); if (IS_ERR(rt)) { printk(KERN_ERR "%s: ip_route_output_key failed for 0x%08X\n", __func__, dst_ip); @@ -1533,7 +1534,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi else netdev = nesvnic->netdev; - neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, netdev); + neigh = dst_neigh_lookup(&rt->dst, &dst_ipaddr); rcu_read_lock(); if (neigh) { -- cgit v0.10.2 From 0a691272509d59ea0adbaa0c420ca0a71c9d0419 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Thu, 2 Jul 2015 12:52:29 -0500 Subject: RDMA/nes: Fix for incorrect recording of the MAC address Fix for incorrect recording of the MAC address Signed-off-by: Tatyana Nikolova Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 02120d3..4713dd7 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -3861,7 +3861,7 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr, (((u32)mac_addr[2]) << 24) | (((u32)mac_addr[3]) << 16) | (((u32)mac_addr[4]) << 8) | (u32)mac_addr[5]); cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = cpu_to_le32( - (((u32)mac_addr[0]) << 16) | (u32)mac_addr[1]); + (((u32)mac_addr[0]) << 8) | (u32)mac_addr[1]); } else { cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = 0; -- cgit v0.10.2 From 4fabb59449aa44a585b3603ffdadd4c5f4d0c033 Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Mon, 6 Jul 2015 14:35:11 +0800 Subject: rds: rds_ib_device.refcount overflow Fixes: 3e0249f9c05c ("RDS/IB: add refcount tracking to struct rds_ib_device") There lacks a dropping on rds_ib_device.refcount in case rds_ib_alloc_fmr failed(mr pool running out). this lead to the refcount overflow. A complain in line 117(see following) is seen. From vmcore: s_ib_rdma_mr_pool_depleted is 2147485544 and rds_ibdev->refcount is -2147475448. That is the evidence the mr pool is used up. so rds_ib_alloc_fmr is very likely to return ERR_PTR(-EAGAIN). 115 void rds_ib_dev_put(struct rds_ib_device *rds_ibdev) 116 { 117 BUG_ON(atomic_read(&rds_ibdev->refcount) <= 0); 118 if (atomic_dec_and_test(&rds_ibdev->refcount)) 119 queue_work(rds_wq, &rds_ibdev->free_work); 120 } fix is to drop refcount when rds_ib_alloc_fmr failed. Signed-off-by: Wengang Wang Reviewed-by: Haggai Eran Signed-off-by: Doug Ledford diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 273b8bf..657ba9f 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -759,8 +759,10 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, } ibmr = rds_ib_alloc_fmr(rds_ibdev); - if (IS_ERR(ibmr)) + if (IS_ERR(ibmr)) { + rds_ib_dev_put(rds_ibdev); return ibmr; + } ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents); if (ret == 0) -- cgit v0.10.2 From 31b57b87fddf547bf3e98306b41bc82e111396fa Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Tue, 7 Jul 2015 17:45:12 +0300 Subject: IB/ucma: Fix lockdep warning in ucma_lock_files The ucma_lock_files() locks the mut mutex on two files, e.g. for migrating an ID. Use mutex_lock_nested() to prevent the warning below. ============================================= [ INFO: possible recursive locking detected ] 4.1.0-rc6-hmm+ #40 Tainted: G O --------------------------------------------- pingpong_rpc_se/10260 is trying to acquire lock: (&file->mut){+.+.+.}, at: [] ucma_migrate_id+0xc5/0x248 [rdma_ucm] but task is already holding lock: (&file->mut){+.+.+.}, at: [] ucma_migrate_id+0xbb/0x248 [rdma_ucm] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&file->mut); lock(&file->mut); *** DEADLOCK *** May be due to missing lock nesting notation 1 lock held by pingpong_rpc_se/10260: #0: (&file->mut){+.+.+.}, at: [] ucma_migrate_id+0xbb/0x248 [rdma_ucm] stack backtrace: CPU: 0 PID: 10260 Comm: pingpong_rpc_se Tainted: G O 4.1.0-rc6-hmm+ #40 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2007 ffff8801f85b63d0 ffff880195677b58 ffffffff81668f49 0000000000000001 ffffffff825cbbe0 ffff880195677c38 ffffffff810bb991 ffff880100000000 ffff880100000000 ffff880100000001 ffff8801f85b7010 ffffffff8121bee9 Call Trace: [] dump_stack+0x4f/0x6e [] __lock_acquire+0x741/0x1820 [] ? dput+0x29/0x320 [] lock_acquire+0xc8/0x240 [] ? ucma_migrate_id+0xc5/0x248 [rdma_ucm] [] ? mutex_lock_nested+0x291/0x3e0 [] mutex_lock_nested+0x65/0x3e0 [] ? ucma_migrate_id+0xc5/0x248 [rdma_ucm] [] ? trace_hardirqs_on+0xd/0x10 [] ? mutex_unlock+0xe/0x10 [] ucma_migrate_id+0xc5/0x248 [rdma_ucm] [] ucma_write+0xa4/0xb0 [rdma_ucm] [] __vfs_write+0x34/0x100 [] ? __audit_syscall_entry+0xac/0x110 [] ? current_kernel_time+0xc5/0xe0 [] ? security_file_permission+0x23/0x90 [] ? rw_verify_area+0x5d/0xe0 [] vfs_write+0xab/0x120 [] SyS_write+0x59/0xd0 [] ? __audit_syscall_entry+0xac/0x110 [] system_call_fastpath+0x12/0x76 Signed-off-by: Haggai Eran Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index ad45469..a700cdb 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1354,10 +1354,10 @@ static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2) /* Acquire mutex's based on pointer comparison to prevent deadlock. */ if (file1 < file2) { mutex_lock(&file1->mut); - mutex_lock(&file2->mut); + mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING); } else { mutex_lock(&file2->mut); - mutex_lock(&file1->mut); + mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING); } } -- cgit v0.10.2 From 8b7cce0dae956b329d09099f8739821936cf7c0f Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Tue, 7 Jul 2015 17:45:13 +0300 Subject: IB/ipoib: Prevent lockdep warning in __ipoib_ib_dev_flush __ipoib_ib_dev_flush calls itself recursively on child devices, and lockdep complains about locking vlan_rwsem twice (see below). Use down_read_nested instead of down_read to prevent the warning. ============================================= [ INFO: possible recursive locking detected ] 4.1.0-rc4+ #36 Tainted: G O --------------------------------------------- kworker/u20:2/261 is trying to acquire lock: (&priv->vlan_rwsem){.+.+..}, at: [] __ipoib_ib_dev_flush+0x3a/0x2b0 [ib_ipoib] but task is already holding lock: (&priv->vlan_rwsem){.+.+..}, at: [] __ipoib_ib_dev_flush+0x3a/0x2b0 [ib_ipoib] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&priv->vlan_rwsem); lock(&priv->vlan_rwsem); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by kworker/u20:2/261: #0: ("%s""ipoib_flush"){.+.+..}, at: [] process_one_work+0x15c/0x760 #1: ((&priv->flush_heavy)){+.+...}, at: [] process_one_work+0x15c/0x760 #2: (&priv->vlan_rwsem){.+.+..}, at: [] __ipoib_ib_dev_flush+0x3a/0x2b0 [ib_ipoib] stack backtrace: CPU: 3 PID: 261 Comm: kworker/u20:2 Tainted: G O 4.1.0-rc4+ #36 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2007 Workqueue: ipoib_flush ipoib_ib_dev_flush_heavy [ib_ipoib] ffff8801c6c54790 ffff8801c9927af8 ffffffff81665238 0000000000000001 ffffffff825b5b30 ffff8801c9927bd8 ffffffff810bba51 ffff880100000000 ffffffff00000001 ffff880100000001 ffff8801c6c55428 ffff8801c6c54790 Call Trace: [] dump_stack+0x4f/0x6f [] __lock_acquire+0x741/0x1820 [] lock_acquire+0xc8/0x240 [] ? __ipoib_ib_dev_flush+0x3a/0x2b0 [ib_ipoib] [] down_read+0x4c/0x70 [] ? __ipoib_ib_dev_flush+0x3a/0x2b0 [ib_ipoib] [] __ipoib_ib_dev_flush+0x3a/0x2b0 [ib_ipoib] [] __ipoib_ib_dev_flush+0x5a/0x2b0 [ib_ipoib] [] ipoib_ib_dev_flush_heavy+0x1a/0x20 [ib_ipoib] [] process_one_work+0x201/0x760 [] ? process_one_work+0x15c/0x760 [] worker_thread+0x120/0x4d0 [] ? process_one_work+0x760/0x760 [] ? process_one_work+0x760/0x760 [] kthread+0xfe/0x120 [] ? __init_kthread_worker+0x70/0x70 [] ret_from_fork+0x42/0x70 [] ? __init_kthread_worker+0x70/0x70 Signed-off-by: Haggai Eran Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 63b92cb..058150f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -985,20 +985,21 @@ static inline int update_child_pkey(struct ipoib_dev_priv *priv) } static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, - enum ipoib_flush_level level) + enum ipoib_flush_level level, + int nesting) { struct ipoib_dev_priv *cpriv; struct net_device *dev = priv->dev; int result; - down_read(&priv->vlan_rwsem); + down_read_nested(&priv->vlan_rwsem, nesting); /* * Flush any child interfaces too -- they might be up even if * the parent is down. */ list_for_each_entry(cpriv, &priv->child_intfs, list) - __ipoib_ib_dev_flush(cpriv, level); + __ipoib_ib_dev_flush(cpriv, level, nesting + 1); up_read(&priv->vlan_rwsem); @@ -1076,7 +1077,7 @@ void ipoib_ib_dev_flush_light(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, flush_light); - __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT, 0); } void ipoib_ib_dev_flush_normal(struct work_struct *work) @@ -1084,7 +1085,7 @@ void ipoib_ib_dev_flush_normal(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, flush_normal); - __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL, 0); } void ipoib_ib_dev_flush_heavy(struct work_struct *work) @@ -1092,7 +1093,7 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, flush_heavy); - __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0); } void ipoib_ib_dev_cleanup(struct net_device *dev) -- cgit v0.10.2 From 59d40dd92ca00df317ad44fa44c27fe38c58e477 Mon Sep 17 00:00:00 2001 From: Carol L Soto Date: Thu, 11 Jun 2015 11:06:41 -0500 Subject: IB/ucm: Fix bitmap wrap when devnum > IB_UCM_MAX_DEVICES ib_ucm_release_dev clears the wrong bit if devnum is greater than IB_UCM_MAX_DEVICES. Signed-off-by: Carol L Soto Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 62c24b1..0094810 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -1193,6 +1193,7 @@ static int ib_ucm_close(struct inode *inode, struct file *filp) return 0; } +static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES); static void ib_ucm_release_dev(struct device *dev) { struct ib_ucm_device *ucm_dev; @@ -1202,7 +1203,7 @@ static void ib_ucm_release_dev(struct device *dev) if (ucm_dev->devnum < IB_UCM_MAX_DEVICES) clear_bit(ucm_dev->devnum, dev_map); else - clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, dev_map); + clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, overflow_map); kfree(ucm_dev); } @@ -1226,7 +1227,6 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); static dev_t overflow_maj; -static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES); static int find_overflow_devnum(void) { int ret; -- cgit v0.10.2 From c42687784b9a6b9733fee701ed236a5fe088fac4 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Sun, 12 Jul 2015 01:24:09 -0700 Subject: IB/ipoib: Scatter-Gather support in connected mode By default, IPoIB-CM driver uses 64k MTU. Larger MTU gives better performance. This MTU plus overhead puts the memory allocation for IP based packets at 32 4k pages (order 5), which have to be contiguous. When the system memory under pressure, it was observed that allocating 128k contiguous physical memory is difficult and causes serious errors (such as system becomes unusable). This enhancement resolve the issue by removing the physically contiguous memory requirement using Scatter/Gather feature that exists in Linux stack. With this fix Scatter-Gather will be supported also in connected mode. This change reverts some of the change made in commit e112373fd6aa ("IPoIB/cm: Reduce connected mode TX object size"). The ability to use SG in IPoIB CM is possible because the coupling between NETIF_F_SG and NETIF_F_CSUM was removed in commit ec5f06156423 ("net: Kill link between CSUM and SG features.") Signed-off-by: Yuval Shaia Acked-by: Christian Marie Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index bd94b0a..79859c4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -239,7 +239,7 @@ struct ipoib_cm_tx { struct net_device *dev; struct ipoib_neigh *neigh; struct ipoib_path *path; - struct ipoib_cm_tx_buf *tx_ring; + struct ipoib_tx_buf *tx_ring; unsigned tx_head; unsigned tx_tail; unsigned long flags; @@ -504,6 +504,33 @@ int ipoib_mcast_stop_thread(struct net_device *dev); void ipoib_mcast_dev_down(struct net_device *dev); void ipoib_mcast_dev_flush(struct net_device *dev); +int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req); +void ipoib_dma_unmap_tx(struct ipoib_dev_priv *priv, + struct ipoib_tx_buf *tx_req); + +static inline void ipoib_build_sge(struct ipoib_dev_priv *priv, + struct ipoib_tx_buf *tx_req) +{ + int i, off; + struct sk_buff *skb = tx_req->skb; + skb_frag_t *frags = skb_shinfo(skb)->frags; + int nr_frags = skb_shinfo(skb)->nr_frags; + u64 *mapping = tx_req->mapping; + + if (skb_headlen(skb)) { + priv->tx_sge[0].addr = mapping[0]; + priv->tx_sge[0].length = skb_headlen(skb); + off = 1; + } else + off = 0; + + for (i = 0; i < nr_frags; ++i) { + priv->tx_sge[i + off].addr = mapping[i + off]; + priv->tx_sge[i + off].length = skb_frag_size(&frags[i]); + } + priv->tx_wr.num_sge = nr_frags + off; +} + #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev); int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index cf32a778..ee39be6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -694,14 +694,12 @@ repost: static inline int post_send(struct ipoib_dev_priv *priv, struct ipoib_cm_tx *tx, unsigned int wr_id, - u64 addr, int len) + struct ipoib_tx_buf *tx_req) { struct ib_send_wr *bad_wr; - priv->tx_sge[0].addr = addr; - priv->tx_sge[0].length = len; + ipoib_build_sge(priv, tx_req); - priv->tx_wr.num_sge = 1; priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM; return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr); @@ -710,8 +708,7 @@ static inline int post_send(struct ipoib_dev_priv *priv, void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ipoib_cm_tx_buf *tx_req; - u64 addr; + struct ipoib_tx_buf *tx_req; int rc; if (unlikely(skb->len > tx->mtu)) { @@ -735,24 +732,21 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ */ tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)]; tx_req->skb = skb; - addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE); - if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { + + if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) { ++dev->stats.tx_errors; dev_kfree_skb_any(skb); return; } - tx_req->mapping = addr; - skb_orphan(skb); skb_dst_drop(skb); - rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), - addr, skb->len); + rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), tx_req); if (unlikely(rc)) { ipoib_warn(priv, "post_send failed, error %d\n", rc); ++dev->stats.tx_errors; - ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE); + ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(skb); } else { dev->trans_start = jiffies; @@ -777,7 +771,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_cm_tx *tx = wc->qp->qp_context; unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM; - struct ipoib_cm_tx_buf *tx_req; + struct ipoib_tx_buf *tx_req; unsigned long flags; ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n", @@ -791,7 +785,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) tx_req = &tx->tx_ring[wr_id]; - ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE); + ipoib_dma_unmap_tx(priv, tx_req); /* FIXME: is this right? Shouldn't we only increment on success? */ ++dev->stats.tx_packets; @@ -1036,6 +1030,9 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ struct ib_qp *tx_qp; + if (dev->features & NETIF_F_SG) + attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; + tx_qp = ib_create_qp(priv->pd, &attr); if (PTR_ERR(tx_qp) == -EINVAL) { ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n", @@ -1170,7 +1167,7 @@ err_tx: static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) { struct ipoib_dev_priv *priv = netdev_priv(p->dev); - struct ipoib_cm_tx_buf *tx_req; + struct ipoib_tx_buf *tx_req; unsigned long begin; ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n", @@ -1197,8 +1194,7 @@ timeout: while ((int) p->tx_tail - (int) p->tx_head < 0) { tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; - ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, - DMA_TO_DEVICE); + ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(tx_req->skb); ++p->tx_tail; netif_tx_lock_bh(p->dev); @@ -1455,7 +1451,6 @@ static void ipoib_cm_stale_task(struct work_struct *work) spin_unlock_irq(&priv->lock); } - static ssize_t show_mode(struct device *d, struct device_attribute *attr, char *buf) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 058150f..d266667 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -263,8 +263,7 @@ repost: "for buf %d\n", wr_id); } -static int ipoib_dma_map_tx(struct ib_device *ca, - struct ipoib_tx_buf *tx_req) +int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req) { struct sk_buff *skb = tx_req->skb; u64 *mapping = tx_req->mapping; @@ -305,8 +304,8 @@ partial_error: return -EIO; } -static void ipoib_dma_unmap_tx(struct ib_device *ca, - struct ipoib_tx_buf *tx_req) +void ipoib_dma_unmap_tx(struct ipoib_dev_priv *priv, + struct ipoib_tx_buf *tx_req) { struct sk_buff *skb = tx_req->skb; u64 *mapping = tx_req->mapping; @@ -314,7 +313,8 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca, int off; if (skb_headlen(skb)) { - ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE); + ib_dma_unmap_single(priv->ca, mapping[0], skb_headlen(skb), + DMA_TO_DEVICE); off = 1; } else off = 0; @@ -322,8 +322,8 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca, for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - ib_dma_unmap_page(ca, mapping[i + off], skb_frag_size(frag), - DMA_TO_DEVICE); + ib_dma_unmap_page(priv->ca, mapping[i + off], + skb_frag_size(frag), DMA_TO_DEVICE); } } @@ -389,7 +389,7 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) tx_req = &priv->tx_ring[wr_id]; - ipoib_dma_unmap_tx(priv->ca, tx_req); + ipoib_dma_unmap_tx(priv, tx_req); ++dev->stats.tx_packets; dev->stats.tx_bytes += tx_req->skb->len; @@ -514,24 +514,10 @@ static inline int post_send(struct ipoib_dev_priv *priv, void *head, int hlen) { struct ib_send_wr *bad_wr; - int i, off; struct sk_buff *skb = tx_req->skb; - skb_frag_t *frags = skb_shinfo(skb)->frags; - int nr_frags = skb_shinfo(skb)->nr_frags; - u64 *mapping = tx_req->mapping; - if (skb_headlen(skb)) { - priv->tx_sge[0].addr = mapping[0]; - priv->tx_sge[0].length = skb_headlen(skb); - off = 1; - } else - off = 0; + ipoib_build_sge(priv, tx_req); - for (i = 0; i < nr_frags; ++i) { - priv->tx_sge[i + off].addr = mapping[i + off]; - priv->tx_sge[i + off].length = skb_frag_size(&frags[i]); - } - priv->tx_wr.num_sge = nr_frags + off; priv->tx_wr.wr_id = wr_id; priv->tx_wr.wr.ud.remote_qpn = qpn; priv->tx_wr.wr.ud.ah = address; @@ -617,7 +603,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ipoib_warn(priv, "post_send failed, error %d\n", rc); ++dev->stats.tx_errors; --priv->tx_outstanding; - ipoib_dma_unmap_tx(priv->ca, tx_req); + ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(skb); if (netif_queue_stopped(dev)) netif_wake_queue(dev); @@ -868,7 +854,7 @@ int ipoib_ib_dev_stop(struct net_device *dev) while ((int) priv->tx_tail - (int) priv->tx_head < 0) { tx_req = &priv->tx_ring[priv->tx_tail & (ipoib_sendq_size - 1)]; - ipoib_dma_unmap_tx(priv->ca, tx_req); + ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(tx_req->skb); ++priv->tx_tail; --priv->tx_outstanding; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index da3e7e7..59604a7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -190,7 +190,7 @@ static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_featu struct ipoib_dev_priv *priv = netdev_priv(dev); if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) - features &= ~(NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO); + features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO); return features; } -- cgit v0.10.2 From edcd2a7474ba3b47e54c3c9a300287342de74766 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Sun, 7 Jun 2015 13:36:11 +0300 Subject: IB/ipoib: Set MTU to max allowed by mode when mode changes When switching between modes (datagram / connected) change the MTU accordingly. datagram mode up to 4K, connected mode up to (64K - 0x10). Signed-off-by: ELi Cohen Signed-off-by: Erez Shitrit Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 59604a7..b2943c8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -232,6 +232,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) ipoib_warn(priv, "enabling connected mode " "will cause multicast packet drops\n"); netdev_update_features(dev); + dev_set_mtu(dev, ipoib_cm_max_mtu(dev)); rtnl_unlock(); priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; -- cgit v0.10.2 From cb1ff431c3dd904cda37d6d07d4a3ea29840d621 Mon Sep 17 00:00:00 2001 From: Vaishali Thakkar Date: Tue, 16 Jun 2015 17:13:05 +0530 Subject: IB/ipath: Convert use of __constant_ to In little endian cases, the macros be16_to_cpu and cpu_to_be64 unfolds to __swab{16,64} which provides special case for constants. In big endian cases, __constant_be16_to_cpu and be16_to_cpu expand directly to the same expression. The same applies for __constant_cpu_to_be64 and cpu_to_be64. So, replace __constant_be16_to_cpu with be16_to_cpu and __constant_cpu_to_be64 with cpu_to_be64, with the goal of getting rid of the definition of __constant_be16_to_cpu and __constant_cpu_to_be64 completely. Signed-off-by: Vaishali Thakkar Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 48253b8..30ba49c 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -2044,9 +2044,9 @@ int ipath_register_ib_device(struct ipath_devdata *dd) spin_lock_init(&idev->qp_table.lock); spin_lock_init(&idev->lk_table.lock); - idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE); + idev->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE); /* Set the prefix to the default value (see ch. 4.1.1) */ - idev->gid_prefix = __constant_cpu_to_be64(0xfe80000000000000ULL); + idev->gid_prefix = cpu_to_be64(0xfe80000000000000ULL); ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size); if (ret) -- cgit v0.10.2 From 43bfb9729ea88d46e3f4d3ad7b17106c7b071fcb Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 25 Jun 2015 17:45:38 +0300 Subject: IB/mlx4: Fix use of flow-counters for process_mad For IB links, reading HCA flow counters through iboe_process_mad() should be used when mlx4_ib_process_mad() is invoked only for VFs PMA queries and exactly nothing else. Fixes: 7193a141eb74 ('IB/mlx4: Set VF to read from QP counters') Reported-by: Linus Torvalds Signed-off-by: Or Gerlitz Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 9a810e3..68b3dfa 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -860,22 +860,31 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct mlx4_ib_dev *dev = to_mdev(ibdev); const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; + enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num); if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || *out_mad_size != sizeof(*out_mad))) return IB_MAD_RESULT_FAILURE; - switch (rdma_port_get_link_layer(ibdev, port_num)) { - case IB_LINK_LAYER_INFINIBAND: - if (!mlx4_is_slave(dev->dev)) - return ib_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); - case IB_LINK_LAYER_ETHERNET: - return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); - default: - return -EINVAL; + /* iboe_process_mad() which uses the HCA flow-counters to implement IB PMA + * queries, should be called only by VFs and for that specific purpose + */ + if (link == IB_LINK_LAYER_INFINIBAND) { + if (mlx4_is_slave(dev->dev) && + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && + in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS) + return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, + in_grh, in_mad, out_mad); + + return ib_process_mad(ibdev, mad_flags, port_num, in_wc, + in_grh, in_mad, out_mad); } + + if (link == IB_LINK_LAYER_ETHERNET) + return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, + in_grh, in_mad, out_mad); + + return -EINVAL; } static void send_handler(struct ib_mad_agent *agent, -- cgit v0.10.2 From a39a98ff4cc8b514fe6fa551f6ed59cd60e07da2 Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Wed, 8 Jul 2015 09:43:35 +0530 Subject: IB/mlx4: Optimize freeing of items on error unwind On failure, we loop through all possible pointers and test them before calling kfree. But really, why even attempt to free items we didn't allocate when we can easily loop through exactly and only the devices for which the original memory allocation succeeded and free just those. Signed-off-by: Maninder Singh Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index f419a72..064454a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2670,17 +2670,15 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC); if (!dm) { pr_err("failed to allocate memory for tunneling qp update\n"); - goto out; + return; } for (i = 0; i < ports; i++) { dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC); if (!dm[i]) { pr_err("failed to allocate memory for tunneling qp update work struct\n"); - for (i = 0; i < dev->caps.num_ports; i++) { - if (dm[i]) - kfree(dm[i]); - } + while (--i >= 0) + kfree(dm[i]); goto out; } } -- cgit v0.10.2 From 9bbf282da87294e1bda0ccb4e351bfdf5fc076cd Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Thu, 9 Jul 2015 10:16:12 -0400 Subject: IB/mlx4: Fix memory leak in do_slave_init We create a number of work structs to be queued up to a workqueue, and on completion of the workqueue handler, the workqueue handler frees the allocated memory. If, however, we don't queue the work struct because the device is going down, then we need to free the memory ourselves. Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 064454a..1c59e47 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2692,6 +2692,8 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags); if (!ibdev->sriov.is_going_down) queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work); + else + kfree(dm[i]); spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); } out: -- cgit v0.10.2 From d9a047aeffcef5755952d18f2901d8777d84019d Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Thu, 9 Jul 2015 10:21:08 -0400 Subject: IB/mlx4: Optimize do_slave_init There is little chance our memory allocation will fail, so we can combine initializing the work structs with allocating them instead of looping through all of them once to allocate and again to initialize. Then when we need to actually find out if our device is up or in the process of going down, have all of our work structs batched up, take the spin_lock once and only once, and do all of the batch under the one spin_lock invocation instead of incurring all of the locked memory cycles we would otherwise incur to take/release the spin_lock over and over again. Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 1c59e47..8be6db8 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2681,20 +2681,22 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) kfree(dm[i]); goto out; } - } - /* initialize or tear down tunnel QPs for the slave */ - for (i = 0; i < ports; i++) { INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work); dm[i]->port = first_port + i + 1; dm[i]->slave = slave; dm[i]->do_init = do_init; dm[i]->dev = ibdev; - spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags); - if (!ibdev->sriov.is_going_down) + } + /* initialize or tear down tunnel QPs for the slave */ + spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags); + if (!ibdev->sriov.is_going_down) { + for (i = 0; i < ports; i++) queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work); - else - kfree(dm[i]); spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); + } else { + spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); + for (i = 0; i < ports; i++) + kfree(dm[i]); } out: kfree(dm); -- cgit v0.10.2 From 45d254206f141f560e76319191f912d0a3d27bd3 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 8 Jul 2015 17:21:15 +0200 Subject: IB/core: Destroy multcast_idr on module exit Destroy multcast_idr on module exit, reclaiming the allocated memory. This was detected by the following semantic patch (written by Luis Rodriguez ) @ defines_module_init @ declarer name module_init, module_exit; declarer name DEFINE_IDR; identifier init; @@ module_init(init); @ defines_module_exit @ identifier exit; @@ module_exit(exit); @ declares_idr depends on defines_module_init && defines_module_exit @ identifier idr; @@ DEFINE_IDR(idr); @ on_exit_calls_destroy depends on declares_idr && defines_module_exit @ identifier declares_idr.idr, defines_module_exit.exit; @@ exit(void) { ... idr_destroy(&idr); ... } @ missing_module_idr_destroy depends on declares_idr && defines_module_exit && !on_exit_calls_destroy @ identifier declares_idr.idr, defines_module_exit.exit; @@ exit(void) { ... +idr_destroy(&idr); } Signed-off-by: Johannes Thumshirn Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index a700cdb..29b2121 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1616,6 +1616,7 @@ static void __exit ucma_cleanup(void) device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); misc_deregister(&ucma_misc); idr_destroy(&ctx_idr); + idr_destroy(&multicast_idr); } module_init(ucma_init); -- cgit v0.10.2 From d8b2ba7c5928173fe1c12bd2545f5ed85d1c3c7a Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 8 Jul 2015 17:23:00 +0200 Subject: IB/core: Destroy ocrdma_dev_id IDR on module exit Destroy ocrdma_dev_id IDR on module exit, reclaiming the allocated memory. This was detected by the following semantic patch (written by Luis Rodriguez ) @ defines_module_init @ declarer name module_init, module_exit; declarer name DEFINE_IDR; identifier init; @@ module_init(init); @ defines_module_exit @ identifier exit; @@ module_exit(exit); @ declares_idr depends on defines_module_init && defines_module_exit @ identifier idr; @@ DEFINE_IDR(idr); @ on_exit_calls_destroy depends on declares_idr && defines_module_exit @ identifier declares_idr.idr, defines_module_exit.exit; @@ exit(void) { ... idr_destroy(&idr); ... } @ missing_module_idr_destroy depends on declares_idr && defines_module_exit && !on_exit_calls_destroy @ identifier declares_idr.idr, defines_module_exit.exit; @@ exit(void) { ... +idr_destroy(&idr); } Signed-off-by: Johannes Thumshirn Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 8a1398b..d98a707 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -696,6 +696,7 @@ static void __exit ocrdma_exit_module(void) ocrdma_unregister_inet6addr_notifier(); ocrdma_unregister_inetaddr_notifier(); ocrdma_rem_debugfs(); + idr_destroy(&ocrdma_dev_id); } module_init(ocrdma_init_module); -- cgit v0.10.2