net/mlx5_core: Use tasklet for user-space CQ completion events

Previously, we've fired all our completion callbacks straight from
our ISR.

Some of those callbacks were lightweight (for example, mlx5 Ethernet
napi callbacks), but some of them did more work (for example,
the user-space RDMA stack uverbs' completion handler). Besides that,
doing more than the minimal work in ISR is generally considered wrong,
it could even lead to a hard lockup of the system. Since when a lot
of completion events are generated by the hardware, the loop over
those events could be so long, that we'll get into a hard lockup by
the system watchdog.

In order to avoid that, add a new way of invoking completion events
callbacks. In the interrupt itself, we add the CQs which receive
completion event to a per-EQ list and schedule a tasklet. In the
tasklet context we loop over all the CQs in the list and invoke the
user callback.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
Matan Barak 2016-04-17 17:08:40 +03:00 committed by Doug Ledford
parent e3b6d8cf8d
commit 94c6825e0f
6 changed files with 104 additions and 1 deletions

View File

@ -39,6 +39,53 @@
#include <linux/mlx5/cq.h>
#include "mlx5_core.h"
#define TASKLET_MAX_TIME 2
#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
void mlx5_cq_tasklet_cb(unsigned long data)
{
unsigned long flags;
unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES;
struct mlx5_eq_tasklet *ctx = (struct mlx5_eq_tasklet *)data;
struct mlx5_core_cq *mcq;
struct mlx5_core_cq *temp;
spin_lock_irqsave(&ctx->lock, flags);
list_splice_tail_init(&ctx->list, &ctx->process_list);
spin_unlock_irqrestore(&ctx->lock, flags);
list_for_each_entry_safe(mcq, temp, &ctx->process_list,
tasklet_ctx.list) {
list_del_init(&mcq->tasklet_ctx.list);
mcq->tasklet_ctx.comp(mcq);
if (atomic_dec_and_test(&mcq->refcount))
complete(&mcq->free);
if (time_after(jiffies, end))
break;
}
if (!list_empty(&ctx->process_list))
tasklet_schedule(&ctx->task);
}
static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq)
{
unsigned long flags;
struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv;
spin_lock_irqsave(&tasklet_ctx->lock, flags);
/* When migrating CQs between EQs will be implemented, please note
* that you need to sync this point. It is possible that
* while migrating a CQ, completions on the old EQs could
* still arrive.
*/
if (list_empty_careful(&cq->tasklet_ctx.list)) {
atomic_inc(&cq->refcount);
list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list);
}
spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
}
void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn)
{
struct mlx5_core_cq *cq;
@ -96,6 +143,13 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
struct mlx5_create_cq_mbox_out out;
struct mlx5_destroy_cq_mbox_in din;
struct mlx5_destroy_cq_mbox_out dout;
int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
c_eqn);
struct mlx5_eq *eq;
eq = mlx5_eqn2eq(dev, eqn);
if (IS_ERR(eq))
return PTR_ERR(eq);
in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_CQ);
memset(&out, 0, sizeof(out));
@ -111,6 +165,11 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
cq->arm_sn = 0;
atomic_set(&cq->refcount, 1);
init_completion(&cq->free);
if (!cq->comp)
cq->comp = mlx5_add_cq_to_tasklet;
/* assuming CQ will be deleted before the EQ */
cq->tasklet_ctx.priv = &eq->tasklet_ctx;
INIT_LIST_HEAD(&cq->tasklet_ctx.list);
spin_lock_irq(&table->lock);
err = radix_tree_insert(&table->tree, cq->cqn, cq);

View File

@ -202,7 +202,7 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
struct mlx5_eqe *eqe;
int eqes_found = 0;
int set_ci = 0;
u32 cqn;
u32 cqn = -1;
u32 rsn;
u8 port;
@ -320,6 +320,9 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
eq_update_ci(eq, 1);
if (cqn != -1)
tasklet_schedule(&eq->tasklet_ctx.task);
return eqes_found;
}
@ -403,6 +406,12 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
if (err)
goto err_irq;
INIT_LIST_HEAD(&eq->tasklet_ctx.list);
INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
spin_lock_init(&eq->tasklet_ctx.lock);
tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
(unsigned long)&eq->tasklet_ctx);
/* EQs are created in ARMED state
*/
eq_update_ci(eq, 1);
@ -436,6 +445,7 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
eq->eqn);
synchronize_irq(eq->irqn);
tasklet_disable(&eq->tasklet_ctx.task);
mlx5_buf_free(dev, &eq->buf);
return err;

View File

@ -660,6 +660,23 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
}
EXPORT_SYMBOL(mlx5_vector2eqn);
struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn)
{
struct mlx5_eq_table *table = &dev->priv.eq_table;
struct mlx5_eq *eq;
spin_lock(&table->lock);
list_for_each_entry(eq, &table->comp_eqs_list, list)
if (eq->eqn == eqn) {
spin_unlock(&table->lock);
return eq;
}
spin_unlock(&table->lock);
return ERR_PTR(-ENOENT);
}
static void free_comp_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = &dev->priv.eq_table;

View File

@ -100,6 +100,8 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev);
u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx);
struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
void mlx5_cq_tasklet_cb(unsigned long data);
void mlx5e_init(void);
void mlx5e_cleanup(void);

View File

@ -53,6 +53,11 @@ struct mlx5_core_cq {
unsigned arm_sn;
struct mlx5_rsc_debug *dbg;
int pid;
struct {
struct list_head list;
void (*comp)(struct mlx5_core_cq *);
void *priv;
} tasklet_ctx;
};

View File

@ -41,6 +41,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/radix-tree.h>
#include <linux/interrupt.h>
#include <linux/mlx5/device.h>
#include <linux/mlx5/doorbell.h>
@ -304,6 +305,14 @@ struct mlx5_buf {
u8 page_shift;
};
struct mlx5_eq_tasklet {
struct list_head list;
struct list_head process_list;
struct tasklet_struct task;
/* lock on completion tasklet list */
spinlock_t lock;
};
struct mlx5_eq {
struct mlx5_core_dev *dev;
__be32 __iomem *doorbell;
@ -317,6 +326,7 @@ struct mlx5_eq {
struct list_head list;
int index;
struct mlx5_rsc_debug *dbg;
struct mlx5_eq_tasklet tasklet_ctx;
};
struct mlx5_core_psv {