From ceb7decb366591e9b67d70832e07f5d240572a3d Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 27 Nov 2012 16:24:29 +0000 Subject: IB/mlx4: Fix spinlock order to avoid lockdep warnings lockdep warns about taking a hard-irq-unsafe lock (sriov->id_map_lock) inside a hard-irq-safe lock (sriov->going_down_lock). Since id_map_lock is never taken in the interrupt context, we can simply reverse the order of taking the two spinlocks, thus avoiding the warning and the depencency. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Cc: Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index 80079e5..dbc99d4 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -268,15 +268,15 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id) struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; unsigned long flags; - spin_lock_irqsave(&sriov->going_down_lock, flags); spin_lock(&sriov->id_map_lock); + spin_lock_irqsave(&sriov->going_down_lock, flags); /*make sure that there is no schedule inside the scheduled work.*/ if (!sriov->is_going_down) { id->scheduled_delete = 1; schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT); } - spin_unlock(&sriov->id_map_lock); spin_unlock_irqrestore(&sriov->going_down_lock, flags); + spin_unlock(&sriov->id_map_lock); } int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id, -- cgit v0.10.2 From 311f813a2daefcba03f706a692fe0c67888d7622 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 27 Nov 2012 16:24:30 +0000 Subject: mlx4_core: Fix potential deadlock in mlx4_eq_int() The slave_state_lock spinlock is used in both interrupt context and process context, hence irq locking must be used. Found by lockdep. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Cc: Signed-off-by: Roland Dreier diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index e791e70..fdc5f23 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1498,6 +1498,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, u32 reply; u8 is_going_down = 0; int i; + unsigned long flags; slave_state[slave].comm_toggle ^= 1; reply = (u32) slave_state[slave].comm_toggle << 31; @@ -1576,12 +1577,12 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, mlx4_warn(dev, "Bad comm cmd:%d from slave:%d\n", cmd, slave); goto reset_slave; } - spin_lock(&priv->mfunc.master.slave_state_lock); + spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); if (!slave_state[slave].is_slave_going_down) slave_state[slave].last_cmd = cmd; else is_going_down = 1; - spin_unlock(&priv->mfunc.master.slave_state_lock); + spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); if (is_going_down) { mlx4_warn(dev, "Slave is going down aborting command(%d)" " executing from slave:%d\n", @@ -1597,10 +1598,10 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, reset_slave: /* cleanup any slave resources */ mlx4_delete_all_resources_for_slave(dev, slave); - spin_lock(&priv->mfunc.master.slave_state_lock); + spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); if (!slave_state[slave].is_slave_going_down) slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET; - spin_unlock(&priv->mfunc.master.slave_state_lock); + spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); /*with slave in the middle of flr, no need to clean resources again.*/ inform_slave_state: memset(&slave_state[slave].event_eq, 0, diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index c509a86..3cb7727 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -407,6 +407,7 @@ void mlx4_master_handle_slave_flr(struct work_struct *work) struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state; int i; int err; + unsigned long flags; mlx4_dbg(dev, "mlx4_handle_slave_flr\n"); @@ -418,10 +419,10 @@ void mlx4_master_handle_slave_flr(struct work_struct *work) mlx4_delete_all_resources_for_slave(dev, i); /*return the slave to running mode*/ - spin_lock(&priv->mfunc.master.slave_state_lock); + spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); slave_state[i].last_cmd = MLX4_COMM_CMD_RESET; slave_state[i].is_slave_going_down = 0; - spin_unlock(&priv->mfunc.master.slave_state_lock); + spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); /*notify the FW:*/ err = mlx4_cmd(dev, 0, i, 0, MLX4_CMD_INFORM_FLR_DONE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); @@ -446,6 +447,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) u8 update_slave_state; int i; enum slave_port_gen_event gen_event; + unsigned long flags; while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) { /* @@ -653,13 +655,13 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) } else update_slave_state = 1; - spin_lock(&priv->mfunc.master.slave_state_lock); + spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); if (update_slave_state) { priv->mfunc.master.slave_state[flr_slave].active = false; priv->mfunc.master.slave_state[flr_slave].last_cmd = MLX4_COMM_CMD_FLR; priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1; } - spin_unlock(&priv->mfunc.master.slave_state_lock); + spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); queue_work(priv->mfunc.master.comm_wq, &priv->mfunc.master.slave_flr_event_work); break; -- cgit v0.10.2 From 63f05be2c075022d1b3227037f82ad75c4d5ab1d Mon Sep 17 00:00:00 2001 From: shefty Date: Wed, 28 Nov 2012 20:39:52 +0000 Subject: RDMA/cm: Change return value from find_gid_port() Problem reported by Dan Carpenter : The patch 3c86aa70bf67: "RDMA/cm: Add RDMA CM support for IBoE devices" from Oct 13, 2010, leads to the following warning: net/sunrpc/xprtrdma/svc_rdma_transport.c:722 svc_rdma_create() error: passing non neg 1 to ERR_PTR This bug would result in a NULL dereference. svc_rdma_create() is supposed to return ERR_PTRs or valid pointers, but instead it returns ERR_PTRs, valid pointers and 1. The call tree is: svc_rdma_create() => rdma_bind_addr() => cma_acquire_dev() => find_gid_port() rdma_bind_addr() should return a valid errno. Fix this by having find_gid_port() also return a valid errno. If we can't find the specified GID on a given port, return -EADDRNOTAVAIL, rather than -EAGAIN, to better indicate the error. We also drop using the special return value of '1' and instead pass through the error returned by the underlying verbs call. On such errors, rather than aborting the search, we simply continue to check the next device/port. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index a7568c3..d789eea 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -345,17 +345,17 @@ static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_nu err = ib_query_port(device, port_num, &props); if (err) - return 1; + return err; for (i = 0; i < props.gid_tbl_len; ++i) { err = ib_query_gid(device, port_num, i, &tmp); if (err) - return 1; + return err; if (!memcmp(&tmp, gid, sizeof tmp)) return 0; } - return -EAGAIN; + return -EADDRNOTAVAIL; } static int cma_acquire_dev(struct rdma_id_private *id_priv) @@ -388,8 +388,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv) if (!ret) { id_priv->id.port_num = port; goto out; - } else if (ret == 1) - break; + } } } } -- cgit v0.10.2