summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-07-01 10:12:12 (GMT)
committerDavid S. Miller <davem@davemloft.net>2016-07-01 10:12:12 (GMT)
commit08df7bc5001e7fc8bdb0a74e04ad2db5f797ed3d (patch)
tree88890189376e33b6dff72fe0b5b1a7f76cc9d08b
parentf87fda00b6ed232a817c655b8d179b48bde8fdbe (diff)
parent87424ad52d76535234f217637fcaadcd2ef2334d (diff)
downloadlinux-08df7bc5001e7fc8bdb0a74e04ad2db5f797ed3d.tar.xz
Merge branch 'mlx5-fixes'
Saeed Mahameed says: ==================== Mellanox 100G mlx5 resiliency and xmit path fixes This series provides two set of fixes to the mlx5 driver: - Resiliency fixes for reset flow and internal pci errors - xmit path fixes Please consider queuing those patches for -stable (4.6). Reset flow fixes for core driver: - Add more commands to the list of error simulated commands when pci errors occur - Avoid calling sleeping function by the health poll thread - Fix incorrect page count when in internal error - Fix timeout in wait vital for VFs - Deadlock fix and Timeout handling in commands interface Reset flow and resiliency fixes for mlx5e netdev driver: - Handle RQ flush in error cases - Implement ndo_tx_timeout callback - Timeout if SQ doesn't flush during close - Log link state changes - Validate BW weight values of ETS xmit path fixes: - Fix wrong fallback assumption in select queue callback - Account for all L2 headers when copying headers into inline segment ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c129
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c99
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c41
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c52
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c41
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c63
-rw-r--r--include/linux/mlx5/driver.h1
10 files changed, 335 insertions, 121 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 0b49862..d6e2a1c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -295,6 +295,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_DESTROY_FLOW_GROUP:
case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY:
case MLX5_CMD_OP_DEALLOC_FLOW_COUNTER:
+ case MLX5_CMD_OP_2ERR_QP:
+ case MLX5_CMD_OP_2RST_QP:
+ case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT:
+ case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
return MLX5_CMD_STAT_OK;
case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -321,8 +327,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_RTR2RTS_QP:
case MLX5_CMD_OP_RTS2RTS_QP:
case MLX5_CMD_OP_SQERR2RTS_QP:
- case MLX5_CMD_OP_2ERR_QP:
- case MLX5_CMD_OP_2RST_QP:
case MLX5_CMD_OP_QUERY_QP:
case MLX5_CMD_OP_SQD_RTS_QP:
case MLX5_CMD_OP_INIT2INIT_QP:
@@ -342,7 +346,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT:
case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
- case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT:
case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
case MLX5_CMD_OP_SET_ROCE_ADDRESS:
case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
@@ -390,11 +393,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_CREATE_RQT:
case MLX5_CMD_OP_MODIFY_RQT:
case MLX5_CMD_OP_QUERY_RQT:
+
case MLX5_CMD_OP_CREATE_FLOW_TABLE:
case MLX5_CMD_OP_QUERY_FLOW_TABLE:
case MLX5_CMD_OP_CREATE_FLOW_GROUP:
case MLX5_CMD_OP_QUERY_FLOW_GROUP:
- case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+
case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
@@ -602,11 +606,36 @@ static void dump_command(struct mlx5_core_dev *dev,
pr_debug("\n");
}
+static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
+{
+ struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data);
+
+ return be16_to_cpu(hdr->opcode);
+}
+
+static void cb_timeout_handler(struct work_struct *work)
+{
+ struct delayed_work *dwork = container_of(work, struct delayed_work,
+ work);
+ struct mlx5_cmd_work_ent *ent = container_of(dwork,
+ struct mlx5_cmd_work_ent,
+ cb_timeout_work);
+ struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev,
+ cmd);
+
+ ent->ret = -ETIMEDOUT;
+ mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
+ mlx5_command_str(msg_to_opcode(ent->in)),
+ msg_to_opcode(ent->in));
+ mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+}
+
static void cmd_work_handler(struct work_struct *work)
{
struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
struct mlx5_cmd *cmd = ent->cmd;
struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd);
+ unsigned long cb_timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
struct mlx5_cmd_layout *lay;
struct semaphore *sem;
unsigned long flags;
@@ -647,6 +676,9 @@ static void cmd_work_handler(struct work_struct *work)
dump_command(dev, ent, 1);
ent->ts1 = ktime_get_ns();
+ if (ent->callback)
+ schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
+
/* ring doorbell after the descriptor is valid */
mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
wmb();
@@ -691,13 +723,6 @@ static const char *deliv_status_to_str(u8 status)
}
}
-static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
-{
- struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data);
-
- return be16_to_cpu(hdr->opcode);
-}
-
static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
{
unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
@@ -706,13 +731,13 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
if (cmd->mode == CMD_MODE_POLLING) {
wait_for_completion(&ent->done);
- err = ent->ret;
- } else {
- if (!wait_for_completion_timeout(&ent->done, timeout))
- err = -ETIMEDOUT;
- else
- err = 0;
+ } else if (!wait_for_completion_timeout(&ent->done, timeout)) {
+ ent->ret = -ETIMEDOUT;
+ mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
}
+
+ err = ent->ret;
+
if (err == -ETIMEDOUT) {
mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
mlx5_command_str(msg_to_opcode(ent->in)),
@@ -761,6 +786,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
if (!callback)
init_completion(&ent->done);
+ INIT_DELAYED_WORK(&ent->cb_timeout_work, cb_timeout_handler);
INIT_WORK(&ent->work, cmd_work_handler);
if (page_queue) {
cmd_work_handler(&ent->work);
@@ -770,28 +796,26 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
goto out_free;
}
- if (!callback) {
- err = wait_func(dev, ent);
- if (err == -ETIMEDOUT)
- goto out;
-
- ds = ent->ts2 - ent->ts1;
- op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
- if (op < ARRAY_SIZE(cmd->stats)) {
- stats = &cmd->stats[op];
- spin_lock_irq(&stats->lock);
- stats->sum += ds;
- ++stats->n;
- spin_unlock_irq(&stats->lock);
- }
- mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
- "fw exec time for %s is %lld nsec\n",
- mlx5_command_str(op), ds);
- *status = ent->status;
- free_cmd(ent);
- }
+ if (callback)
+ goto out;
- return err;
+ err = wait_func(dev, ent);
+ if (err == -ETIMEDOUT)
+ goto out_free;
+
+ ds = ent->ts2 - ent->ts1;
+ op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
+ if (op < ARRAY_SIZE(cmd->stats)) {
+ stats = &cmd->stats[op];
+ spin_lock_irq(&stats->lock);
+ stats->sum += ds;
+ ++stats->n;
+ spin_unlock_irq(&stats->lock);
+ }
+ mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
+ "fw exec time for %s is %lld nsec\n",
+ mlx5_command_str(op), ds);
+ *status = ent->status;
out_free:
free_cmd(ent);
@@ -1181,41 +1205,30 @@ err_dbg:
return err;
}
-void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
+static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
{
struct mlx5_cmd *cmd = &dev->cmd;
int i;
for (i = 0; i < cmd->max_reg_cmds; i++)
down(&cmd->sem);
-
down(&cmd->pages_sem);
- flush_workqueue(cmd->wq);
-
- cmd->mode = CMD_MODE_EVENTS;
+ cmd->mode = mode;
up(&cmd->pages_sem);
for (i = 0; i < cmd->max_reg_cmds; i++)
up(&cmd->sem);
}
-void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
+void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
{
- struct mlx5_cmd *cmd = &dev->cmd;
- int i;
-
- for (i = 0; i < cmd->max_reg_cmds; i++)
- down(&cmd->sem);
-
- down(&cmd->pages_sem);
-
- flush_workqueue(cmd->wq);
- cmd->mode = CMD_MODE_POLLING;
+ mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS);
+}
- up(&cmd->pages_sem);
- for (i = 0; i < cmd->max_reg_cmds; i++)
- up(&cmd->sem);
+void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
+{
+ mlx5_cmd_change_mod(dev, CMD_MODE_POLLING);
}
static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
@@ -1251,6 +1264,8 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
struct semaphore *sem;
ent = cmd->ent_arr[i];
+ if (ent->callback)
+ cancel_delayed_work(&ent->cb_timeout_work);
if (ent->page_queue)
sem = &cmd->pages_sem;
else
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index baa991a..943b1bd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -145,7 +145,6 @@ struct mlx5e_umr_wqe {
#ifdef CONFIG_MLX5_CORE_EN_DCB
#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
-#define MLX5E_MIN_BW_ALLOC 1 /* Min percentage of BW allocation */
#endif
struct mlx5e_params {
@@ -191,6 +190,7 @@ struct mlx5e_tstamp {
enum {
MLX5E_RQ_STATE_POST_WQES_ENABLE,
MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS,
+ MLX5E_RQ_STATE_FLUSH_TIMEOUT,
};
struct mlx5e_cq {
@@ -220,6 +220,8 @@ typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq *rq,
typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe,
u16 ix);
+typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq *rq, u16 ix);
+
struct mlx5e_dma_info {
struct page *page;
dma_addr_t addr;
@@ -241,6 +243,7 @@ struct mlx5e_rq {
struct mlx5e_cq cq;
mlx5e_fp_handle_rx_cqe handle_rx_cqe;
mlx5e_fp_alloc_wqe alloc_wqe;
+ mlx5e_fp_dealloc_wqe dealloc_wqe;
unsigned long state;
int ix;
@@ -305,6 +308,7 @@ struct mlx5e_sq_dma {
enum {
MLX5E_SQ_STATE_WAKE_TXQ_ENABLE,
MLX5E_SQ_STATE_BF_ENABLE,
+ MLX5E_SQ_STATE_TX_TIMEOUT,
};
struct mlx5e_ico_wqe_info {
@@ -538,6 +542,7 @@ struct mlx5e_priv {
struct workqueue_struct *wq;
struct work_struct update_carrier_work;
struct work_struct set_rx_mode_work;
+ struct work_struct tx_timeout_work;
struct delayed_work update_stats_work;
struct mlx5_core_dev *mdev;
@@ -589,12 +594,16 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
int mlx5e_napi_poll(struct napi_struct *napi, int budget);
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
+void mlx5e_free_tx_descs(struct mlx5e_sq *sq);
+void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix);
int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix);
+void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
+void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq);
void mlx5e_complete_rx_linear_mpwqe(struct mlx5e_rq *rq,
struct mlx5_cqe64 *cqe,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index b2db180..c585349 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -96,7 +96,7 @@ static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw,
tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
break;
case IEEE_8021QAZ_TSA_ETS:
- tc_tx_bw[i] = ets->tc_tx_bw[i] ?: MLX5E_MIN_BW_ALLOC;
+ tc_tx_bw[i] = ets->tc_tx_bw[i];
break;
}
}
@@ -140,8 +140,12 @@ static int mlx5e_dbcnl_validate_ets(struct ieee_ets *ets)
/* Validate Bandwidth Sum */
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
- if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS)
+ if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
+ if (!ets->tc_tx_bw[i])
+ return -EINVAL;
+
bw_sum += ets->tc_tx_bw[i];
+ }
}
if (bw_sum != 0 && bw_sum != 100)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index cb6defd..7a0dca2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -39,6 +39,13 @@
#include "eswitch.h"
#include "vxlan.h"
+enum {
+ MLX5_EN_QP_FLUSH_TIMEOUT_MS = 5000,
+ MLX5_EN_QP_FLUSH_MSLEEP_QUANT = 20,
+ MLX5_EN_QP_FLUSH_MAX_ITER = MLX5_EN_QP_FLUSH_TIMEOUT_MS /
+ MLX5_EN_QP_FLUSH_MSLEEP_QUANT,
+};
+
struct mlx5e_rq_param {
u32 rqc[MLX5_ST_SZ_DW(rqc)];
struct mlx5_wq_param wq;
@@ -74,10 +81,13 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv)
port_state = mlx5_query_vport_state(mdev,
MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
- if (port_state == VPORT_STATE_UP)
+ if (port_state == VPORT_STATE_UP) {
+ netdev_info(priv->netdev, "Link up\n");
netif_carrier_on(priv->netdev);
- else
+ } else {
+ netdev_info(priv->netdev, "Link down\n");
netif_carrier_off(priv->netdev);
+ }
}
static void mlx5e_update_carrier_work(struct work_struct *work)
@@ -91,6 +101,26 @@ static void mlx5e_update_carrier_work(struct work_struct *work)
mutex_unlock(&priv->state_lock);
}
+static void mlx5e_tx_timeout_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ tx_timeout_work);
+ int err;
+
+ rtnl_lock();
+ mutex_lock(&priv->state_lock);
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto unlock;
+ mlx5e_close_locked(priv->netdev);
+ err = mlx5e_open_locked(priv->netdev);
+ if (err)
+ netdev_err(priv->netdev, "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
+ err);
+unlock:
+ mutex_unlock(&priv->state_lock);
+ rtnl_unlock();
+}
+
static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
{
struct mlx5e_sw_stats *s = &priv->stats.sw;
@@ -305,6 +335,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
}
rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq;
rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
+ rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
@@ -320,6 +351,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
}
rq->handle_rx_cqe = mlx5e_handle_rx_cqe;
rq->alloc_wqe = mlx5e_alloc_rx_wqe;
+ rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
rq->wqe_sz = (priv->params.lro_en) ?
priv->params.lro_wqe_sz :
@@ -525,17 +557,25 @@ err_destroy_rq:
static void mlx5e_close_rq(struct mlx5e_rq *rq)
{
+ int tout = 0;
+ int err;
+
clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state);
napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */
- mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
- while (!mlx5_wq_ll_is_empty(&rq->wq))
- msleep(20);
+ err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
+ while (!mlx5_wq_ll_is_empty(&rq->wq) && !err &&
+ tout++ < MLX5_EN_QP_FLUSH_MAX_ITER)
+ msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
+
+ if (err || tout == MLX5_EN_QP_FLUSH_MAX_ITER)
+ set_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state);
/* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */
napi_synchronize(&rq->channel->napi);
mlx5e_disable_rq(rq);
+ mlx5e_free_rx_descs(rq);
mlx5e_destroy_rq(rq);
}
@@ -782,6 +822,9 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq)
static void mlx5e_close_sq(struct mlx5e_sq *sq)
{
+ int tout = 0;
+ int err;
+
if (sq->txq) {
clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state);
/* prevent netif_tx_wake_queue */
@@ -792,15 +835,24 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq)
if (mlx5e_sq_has_room_for(sq, 1))
mlx5e_send_nop(sq, true);
- mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
+ err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY,
+ MLX5_SQC_STATE_ERR);
+ if (err)
+ set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
}
- while (sq->cc != sq->pc) /* wait till sq is empty */
- msleep(20);
+ /* wait till sq is empty, unless a TX timeout occurred on this SQ */
+ while (sq->cc != sq->pc &&
+ !test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)) {
+ msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
+ if (tout++ > MLX5_EN_QP_FLUSH_MAX_ITER)
+ set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
+ }
/* avoid destroying sq before mlx5e_poll_tx_cq() is done with it */
napi_synchronize(&sq->channel->napi);
+ mlx5e_free_tx_descs(sq);
mlx5e_disable_sq(sq);
mlx5e_destroy_sq(sq);
}
@@ -1658,8 +1710,11 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev)
netdev_set_num_tc(netdev, ntc);
+ /* Map netdev TCs to offset 0
+ * We have our own UP to TXQ mapping for QoS
+ */
for (tc = 0; tc < ntc; tc++)
- netdev_set_tc_queue(netdev, tc, nch, tc * nch);
+ netdev_set_tc_queue(netdev, tc, nch, 0);
}
int mlx5e_open_locked(struct net_device *netdev)
@@ -2590,6 +2645,29 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
return features;
}
+static void mlx5e_tx_timeout(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ bool sched_work = false;
+ int i;
+
+ netdev_err(dev, "TX timeout detected\n");
+
+ for (i = 0; i < priv->params.num_channels * priv->params.num_tc; i++) {
+ struct mlx5e_sq *sq = priv->txq_to_sq_map[i];
+
+ if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i)))
+ continue;
+ sched_work = true;
+ set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
+ netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x\n",
+ i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc);
+ }
+
+ if (sched_work && test_bit(MLX5E_STATE_OPENED, &priv->state))
+ schedule_work(&priv->tx_timeout_work);
+}
+
static const struct net_device_ops mlx5e_netdev_ops_basic = {
.ndo_open = mlx5e_open,
.ndo_stop = mlx5e_close,
@@ -2607,6 +2685,7 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = {
#ifdef CONFIG_RFS_ACCEL
.ndo_rx_flow_steer = mlx5e_rx_flow_steer,
#endif
+ .ndo_tx_timeout = mlx5e_tx_timeout,
};
static const struct net_device_ops mlx5e_netdev_ops_sriov = {
@@ -2636,6 +2715,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = {
.ndo_get_vf_config = mlx5e_get_vf_config,
.ndo_set_vf_link_state = mlx5e_set_vf_link_state,
.ndo_get_vf_stats = mlx5e_get_vf_stats,
+ .ndo_tx_timeout = mlx5e_tx_timeout,
};
static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
@@ -2838,6 +2918,7 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
+ INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 022acc2..9f2a16a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -212,6 +212,20 @@ err_free_skb:
return -ENOMEM;
}
+void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
+{
+ struct sk_buff *skb = rq->skb[ix];
+
+ if (skb) {
+ rq->skb[ix] = NULL;
+ dma_unmap_single(rq->pdev,
+ *((dma_addr_t *)skb->cb),
+ rq->wqe_sz,
+ DMA_FROM_DEVICE);
+ dev_kfree_skb(skb);
+ }
+}
+
static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq)
{
return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER;
@@ -574,6 +588,30 @@ int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
return 0;
}
+void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
+{
+ struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
+
+ wi->free_wqe(rq, wi);
+}
+
+void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
+{
+ struct mlx5_wq_ll *wq = &rq->wq;
+ struct mlx5e_rx_wqe *wqe;
+ __be16 wqe_ix_be;
+ u16 wqe_ix;
+
+ while (!mlx5_wq_ll_is_empty(wq)) {
+ wqe_ix_be = *wq->tail_next;
+ wqe_ix = be16_to_cpu(wqe_ix_be);
+ wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_ix);
+ rq->dealloc_wqe(rq, wqe_ix);
+ mlx5_wq_ll_pop(&rq->wq, wqe_ix_be,
+ &wqe->next.next_wqe_index);
+ }
+}
+
#define RQ_CANNOT_POST(rq) \
(!test_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state) || \
test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
@@ -878,6 +916,9 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
int work_done = 0;
+ if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state)))
+ return 0;
+
if (cq->decmprs_left)
work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 5a750b9cd..5740b46 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -110,8 +110,20 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
{
struct mlx5e_priv *priv = netdev_priv(dev);
int channel_ix = fallback(dev, skb);
- int up = (netdev_get_num_tc(dev) && skb_vlan_tag_present(skb)) ?
- skb->vlan_tci >> VLAN_PRIO_SHIFT : 0;
+ int up = 0;
+
+ if (!netdev_get_num_tc(dev))
+ return channel_ix;
+
+ if (skb_vlan_tag_present(skb))
+ up = skb->vlan_tci >> VLAN_PRIO_SHIFT;
+
+ /* channel_ix can be larger than num_channels since
+ * dev->num_real_tx_queues = num_channels * num_tc
+ */
+ if (channel_ix >= priv->params.num_channels)
+ channel_ix = reciprocal_scale(channel_ix,
+ priv->params.num_channels);
return priv->channeltc_to_txq_map[channel_ix][up];
}
@@ -123,7 +135,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq,
* headers and occur before the data gather.
* Therefore these headers must be copied into the WQE
*/
-#define MLX5E_MIN_INLINE ETH_HLEN
+#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
if (bf) {
u16 ihs = skb_headlen(skb);
@@ -135,7 +147,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq,
return skb_headlen(skb);
}
- return MLX5E_MIN_INLINE;
+ return max(skb_network_offset(skb), MLX5E_MIN_INLINE);
}
static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data,
@@ -341,6 +353,35 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
return mlx5e_sq_xmit(sq, skb);
}
+void mlx5e_free_tx_descs(struct mlx5e_sq *sq)
+{
+ struct mlx5e_tx_wqe_info *wi;
+ struct sk_buff *skb;
+ u16 ci;
+ int i;
+
+ while (sq->cc != sq->pc) {
+ ci = sq->cc & sq->wq.sz_m1;
+ skb = sq->skb[ci];
+ wi = &sq->wqe_info[ci];
+
+ if (!skb) { /* nop */
+ sq->cc++;
+ continue;
+ }
+
+ for (i = 0; i < wi->num_dma; i++) {
+ struct mlx5e_sq_dma *dma =
+ mlx5e_dma_get(sq, sq->dma_fifo_cc++);
+
+ mlx5e_tx_dma_unmap(sq->pdev, dma);
+ }
+
+ dev_kfree_skb_any(skb);
+ sq->cc += wi->num_wqebbs;
+ }
+}
+
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
{
struct mlx5e_sq *sq;
@@ -352,6 +393,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
sq = container_of(cq, struct mlx5e_sq, cq);
+ if (unlikely(test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)))
+ return false;
+
npkts = 0;
nbytes = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 42d16b9..96a5946 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -108,15 +108,21 @@ static int in_fatal(struct mlx5_core_dev *dev)
void mlx5_enter_error_state(struct mlx5_core_dev *dev)
{
+ mutex_lock(&dev->intf_state_mutex);
if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
- return;
+ goto unlock;
mlx5_core_err(dev, "start\n");
- if (pci_channel_offline(dev->pdev) || in_fatal(dev))
+ if (pci_channel_offline(dev->pdev) || in_fatal(dev)) {
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+ trigger_cmd_completions(dev);
+ }
mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0);
mlx5_core_err(dev, "end\n");
+
+unlock:
+ mutex_unlock(&dev->intf_state_mutex);
}
static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
@@ -245,7 +251,6 @@ static void poll_health(unsigned long data)
u32 count;
if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
- trigger_cmd_completions(dev);
mod_timer(&health->timer, get_next_poll_jiffies());
return;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c65f4a1..6695893 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1422,46 +1422,31 @@ void mlx5_disable_device(struct mlx5_core_dev *dev)
mlx5_pci_err_detected(dev->pdev, 0);
}
-/* wait for the device to show vital signs. For now we check
- * that we can read the device ID and that the health buffer
- * shows a non zero value which is different than 0xffffffff
+/* wait for the device to show vital signs by waiting
+ * for the health counter to start counting.
*/
-static void wait_vital(struct pci_dev *pdev)
+static int wait_vital(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
struct mlx5_core_health *health = &dev->priv.health;
const int niter = 100;
+ u32 last_count = 0;
u32 count;
- u16 did;
int i;
- /* Wait for firmware to be ready after reset */
- msleep(1000);
- for (i = 0; i < niter; i++) {
- if (pci_read_config_word(pdev, 2, &did)) {
- dev_warn(&pdev->dev, "failed reading config word\n");
- break;
- }
- if (did == pdev->device) {
- dev_info(&pdev->dev, "device ID correctly read after %d iterations\n", i);
- break;
- }
- msleep(50);
- }
- if (i == niter)
- dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
-
for (i = 0; i < niter; i++) {
count = ioread32be(health->health_counter);
if (count && count != 0xffffffff) {
- dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
- break;
+ if (last_count && last_count != count) {
+ dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
+ return 0;
+ }
+ last_count = count;
}
msleep(50);
}
- if (i == niter)
- dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
+ return -ETIMEDOUT;
}
static void mlx5_pci_resume(struct pci_dev *pdev)
@@ -1473,7 +1458,11 @@ static void mlx5_pci_resume(struct pci_dev *pdev)
dev_info(&pdev->dev, "%s was called\n", __func__);
pci_save_state(pdev);
- wait_vital(pdev);
+ err = wait_vital(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
+ return;
+ }
err = mlx5_load_one(dev, priv);
if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 9eeee05..32dea35 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -345,7 +345,6 @@ retry:
func_id, npages, err);
goto out_4k;
}
- dev->priv.fw_pages += npages;
err = mlx5_cmd_status_to_err(&out.hdr);
if (err) {
@@ -373,6 +372,33 @@ out_free:
return err;
}
+static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
+ struct mlx5_manage_pages_inbox *in, int in_size,
+ struct mlx5_manage_pages_outbox *out, int out_size)
+{
+ struct fw_page *fwp;
+ struct rb_node *p;
+ u32 npages;
+ u32 i = 0;
+
+ if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ return mlx5_cmd_exec_check_status(dev, (u32 *)in, in_size,
+ (u32 *)out, out_size);
+
+ npages = be32_to_cpu(in->num_entries);
+
+ p = rb_first(&dev->priv.page_root);
+ while (p && i < npages) {
+ fwp = rb_entry(p, struct fw_page, rb_node);
+ out->pas[i] = cpu_to_be64(fwp->addr);
+ p = rb_next(p);
+ i++;
+ }
+
+ out->num_entries = cpu_to_be32(i);
+ return 0;
+}
+
static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
int *nclaimed)
{
@@ -398,15 +424,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
in.func_id = cpu_to_be16(func_id);
in.num_entries = cpu_to_be32(npages);
mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
- err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
+ err = reclaim_pages_cmd(dev, &in, sizeof(in), out, outlen);
if (err) {
- mlx5_core_err(dev, "failed reclaiming pages\n");
- goto out_free;
- }
- dev->priv.fw_pages -= npages;
-
- if (out->hdr.status) {
- err = mlx5_cmd_status_to_err(&out->hdr);
+ mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err);
goto out_free;
}
@@ -417,13 +437,15 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
err = -EINVAL;
goto out_free;
}
- if (nclaimed)
- *nclaimed = num_claimed;
for (i = 0; i < num_claimed; i++) {
addr = be64_to_cpu(out->pas[i]);
free_4k(dev, addr);
}
+
+ if (nclaimed)
+ *nclaimed = num_claimed;
+
dev->priv.fw_pages -= num_claimed;
if (func_id)
dev->priv.vfs_pages -= num_claimed;
@@ -514,14 +536,10 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
p = rb_first(&dev->priv.page_root);
if (p) {
fwp = rb_entry(p, struct fw_page, rb_node);
- if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
- free_4k(dev, fwp->addr);
- nclaimed = 1;
- } else {
- err = reclaim_pages(dev, fwp->func_id,
- optimal_reclaimed_pages(),
- &nclaimed);
- }
+ err = reclaim_pages(dev, fwp->func_id,
+ optimal_reclaimed_pages(),
+ &nclaimed);
+
if (err) {
mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
err);
@@ -536,6 +554,13 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
}
} while (p);
+ WARN(dev->priv.fw_pages,
+ "FW pages counter is %d after reclaiming all pages\n",
+ dev->priv.fw_pages);
+ WARN(dev->priv.vfs_pages,
+ "VFs FW pages counter is %d after reclaiming all pages\n",
+ dev->priv.vfs_pages);
+
return 0;
}
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 80776d0..fd72ecf 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -629,6 +629,7 @@ struct mlx5_cmd_work_ent {
void *uout;
int uout_size;
mlx5_cmd_cbk_t callback;
+ struct delayed_work cb_timeout_work;
void *context;
int idx;
struct completion done;