forked from Minki/linux
CXL: Return error to PSL if IRQ demultiplexing fails & print clearer warning
If an AFU has a hardware bug that causes it to acknowledge a context terminate or remove while that context has outstanding transactions, it is possible for the kernel to receive an interrupt for that context after we have removed it from the context list. The kernel will not be able to demultiplex the interrupt (or worse - if we have already reallocated the process handle we could mis-attribute it to the new context), and printed a big scary warning. It did not acknowledge the interrupt, which would effectively halt further translation fault processing on the PSL. This patch makes the warning clearer about the likely cause of the issue (i.e. hardware bug) to make it obvious to future AFU designers of what needs to be fixed. It also prints out the process handle which can then be matched up with hardware and software traces for debugging. It also acknowledges the interrupt to the PSL with either an address error or acknowledge, so that the PSL can continue with other translations. Signed-off-by: Ian Munsie <imunsie@au1.ibm.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
ac610cd7c2
commit
27bbcef20a
@ -612,7 +612,7 @@ int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed,
|
|||||||
u64 amr);
|
u64 amr);
|
||||||
int cxl_detach_process(struct cxl_context *ctx);
|
int cxl_detach_process(struct cxl_context *ctx);
|
||||||
|
|
||||||
int cxl_get_irq(struct cxl_context *ctx, struct cxl_irq_info *info);
|
int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info);
|
||||||
int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask);
|
int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask);
|
||||||
|
|
||||||
int cxl_check_error(struct cxl_afu *afu);
|
int cxl_check_error(struct cxl_afu *afu);
|
||||||
|
@ -92,20 +92,13 @@ static irqreturn_t schedule_cxl_fault(struct cxl_context *ctx, u64 dsisr, u64 da
|
|||||||
return IRQ_HANDLED;
|
return IRQ_HANDLED;
|
||||||
}
|
}
|
||||||
|
|
||||||
static irqreturn_t cxl_irq(int irq, void *data)
|
static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info)
|
||||||
{
|
{
|
||||||
struct cxl_context *ctx = data;
|
struct cxl_context *ctx = data;
|
||||||
struct cxl_irq_info irq_info;
|
|
||||||
u64 dsisr, dar;
|
u64 dsisr, dar;
|
||||||
int result;
|
|
||||||
|
|
||||||
if ((result = cxl_get_irq(ctx, &irq_info))) {
|
dsisr = irq_info->dsisr;
|
||||||
WARN(1, "Unable to get CXL IRQ Info: %i\n", result);
|
dar = irq_info->dar;
|
||||||
return IRQ_HANDLED;
|
|
||||||
}
|
|
||||||
|
|
||||||
dsisr = irq_info.dsisr;
|
|
||||||
dar = irq_info.dar;
|
|
||||||
|
|
||||||
pr_devel("CXL interrupt %i for afu pe: %i DSISR: %#llx DAR: %#llx\n", irq, ctx->pe, dsisr, dar);
|
pr_devel("CXL interrupt %i for afu pe: %i DSISR: %#llx DAR: %#llx\n", irq, ctx->pe, dsisr, dar);
|
||||||
|
|
||||||
@ -149,9 +142,9 @@ static irqreturn_t cxl_irq(int irq, void *data)
|
|||||||
if (dsisr & CXL_PSL_DSISR_An_UR)
|
if (dsisr & CXL_PSL_DSISR_An_UR)
|
||||||
pr_devel("CXL interrupt: AURP PTE not found\n");
|
pr_devel("CXL interrupt: AURP PTE not found\n");
|
||||||
if (dsisr & CXL_PSL_DSISR_An_PE)
|
if (dsisr & CXL_PSL_DSISR_An_PE)
|
||||||
return handle_psl_slice_error(ctx, dsisr, irq_info.errstat);
|
return handle_psl_slice_error(ctx, dsisr, irq_info->errstat);
|
||||||
if (dsisr & CXL_PSL_DSISR_An_AE) {
|
if (dsisr & CXL_PSL_DSISR_An_AE) {
|
||||||
pr_devel("CXL interrupt: AFU Error %.llx\n", irq_info.afu_err);
|
pr_devel("CXL interrupt: AFU Error %.llx\n", irq_info->afu_err);
|
||||||
|
|
||||||
if (ctx->pending_afu_err) {
|
if (ctx->pending_afu_err) {
|
||||||
/*
|
/*
|
||||||
@ -163,10 +156,10 @@ static irqreturn_t cxl_irq(int irq, void *data)
|
|||||||
*/
|
*/
|
||||||
dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error "
|
dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error "
|
||||||
"undelivered to pe %i: %.llx\n",
|
"undelivered to pe %i: %.llx\n",
|
||||||
ctx->pe, irq_info.afu_err);
|
ctx->pe, irq_info->afu_err);
|
||||||
} else {
|
} else {
|
||||||
spin_lock(&ctx->lock);
|
spin_lock(&ctx->lock);
|
||||||
ctx->afu_err = irq_info.afu_err;
|
ctx->afu_err = irq_info->afu_err;
|
||||||
ctx->pending_afu_err = 1;
|
ctx->pending_afu_err = 1;
|
||||||
spin_unlock(&ctx->lock);
|
spin_unlock(&ctx->lock);
|
||||||
|
|
||||||
@ -182,24 +175,43 @@ static irqreturn_t cxl_irq(int irq, void *data)
|
|||||||
return IRQ_HANDLED;
|
return IRQ_HANDLED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static irqreturn_t fail_psl_irq(struct cxl_afu *afu, struct cxl_irq_info *irq_info)
|
||||||
|
{
|
||||||
|
if (irq_info->dsisr & CXL_PSL_DSISR_TRANS)
|
||||||
|
cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE);
|
||||||
|
else
|
||||||
|
cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A);
|
||||||
|
|
||||||
|
return IRQ_HANDLED;
|
||||||
|
}
|
||||||
|
|
||||||
static irqreturn_t cxl_irq_multiplexed(int irq, void *data)
|
static irqreturn_t cxl_irq_multiplexed(int irq, void *data)
|
||||||
{
|
{
|
||||||
struct cxl_afu *afu = data;
|
struct cxl_afu *afu = data;
|
||||||
struct cxl_context *ctx;
|
struct cxl_context *ctx;
|
||||||
|
struct cxl_irq_info irq_info;
|
||||||
int ph = cxl_p2n_read(afu, CXL_PSL_PEHandle_An) & 0xffff;
|
int ph = cxl_p2n_read(afu, CXL_PSL_PEHandle_An) & 0xffff;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
if ((ret = cxl_get_irq(afu, &irq_info))) {
|
||||||
|
WARN(1, "Unable to get CXL IRQ Info: %i\n", ret);
|
||||||
|
return fail_psl_irq(afu, &irq_info);
|
||||||
|
}
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
ctx = idr_find(&afu->contexts_idr, ph);
|
ctx = idr_find(&afu->contexts_idr, ph);
|
||||||
if (ctx) {
|
if (ctx) {
|
||||||
ret = cxl_irq(irq, ctx);
|
ret = cxl_irq(irq, ctx, &irq_info);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
WARN(1, "Unable to demultiplex CXL PSL IRQ\n");
|
WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %.16llx DAR"
|
||||||
return IRQ_HANDLED;
|
" %.16llx\n(Possible AFU HW issue - was a term/remove acked"
|
||||||
|
" with outstanding transactions?)\n", ph, irq_info.dsisr,
|
||||||
|
irq_info.dar);
|
||||||
|
return fail_psl_irq(afu, &irq_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
static irqreturn_t cxl_irq_afu(int irq, void *data)
|
static irqreturn_t cxl_irq_afu(int irq, void *data)
|
||||||
|
@ -637,18 +637,18 @@ int cxl_detach_process(struct cxl_context *ctx)
|
|||||||
return detach_process_native_afu_directed(ctx);
|
return detach_process_native_afu_directed(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
int cxl_get_irq(struct cxl_context *ctx, struct cxl_irq_info *info)
|
int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info)
|
||||||
{
|
{
|
||||||
u64 pidtid;
|
u64 pidtid;
|
||||||
|
|
||||||
info->dsisr = cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An);
|
info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
|
||||||
info->dar = cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An);
|
info->dar = cxl_p2n_read(afu, CXL_PSL_DAR_An);
|
||||||
info->dsr = cxl_p2n_read(ctx->afu, CXL_PSL_DSR_An);
|
info->dsr = cxl_p2n_read(afu, CXL_PSL_DSR_An);
|
||||||
pidtid = cxl_p2n_read(ctx->afu, CXL_PSL_PID_TID_An);
|
pidtid = cxl_p2n_read(afu, CXL_PSL_PID_TID_An);
|
||||||
info->pid = pidtid >> 32;
|
info->pid = pidtid >> 32;
|
||||||
info->tid = pidtid & 0xffffffff;
|
info->tid = pidtid & 0xffffffff;
|
||||||
info->afu_err = cxl_p2n_read(ctx->afu, CXL_AFU_ERR_An);
|
info->afu_err = cxl_p2n_read(afu, CXL_AFU_ERR_An);
|
||||||
info->errstat = cxl_p2n_read(ctx->afu, CXL_PSL_ErrStat_An);
|
info->errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user