iommu/amd: Improving Interrupt Remapping Table Invalidation

Invalidating Interrupt Remapping Table (IRT) requires, the AMD IOMMU driver
to issue INVALIDATE_INTERRUPT_TABLE and COMPLETION_WAIT commands.
Currently, the driver issues the two commands separately, which requires
calling raw_spin_lock_irqsave() twice. In addition, the COMPLETION_WAIT
could potentially be interleaved with other commands causing delay of
the COMPLETION_WAIT command.

Therefore, combine issuing of the two commands in one spin-lock, and
changing struct amd_iommu.cmd_sem_val to use atomic64 to minimize
locking.

Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Link: https://lore.kernel.org/r/20230530141137.14376-6-suravee.suthikulpanit@amd.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
This commit is contained in:
Suravee Suthikulpanit 2023-05-30 10:11:37 -04:00 committed by Joerg Roedel
parent 98aeb4ea55
commit bccc37a8a2
3 changed files with 24 additions and 7 deletions

View File

@ -752,7 +752,7 @@ struct amd_iommu {
u32 flags; u32 flags;
volatile u64 *cmd_sem; volatile u64 *cmd_sem;
u64 cmd_sem_val; atomic64_t cmd_sem_val;
#ifdef CONFIG_AMD_IOMMU_DEBUGFS #ifdef CONFIG_AMD_IOMMU_DEBUGFS
/* DebugFS Info */ /* DebugFS Info */

View File

@ -1733,7 +1733,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
iommu->pci_seg = pci_seg; iommu->pci_seg = pci_seg;
raw_spin_lock_init(&iommu->lock); raw_spin_lock_init(&iommu->lock);
iommu->cmd_sem_val = 0; atomic64_set(&iommu->cmd_sem_val, 0);
/* Add IOMMU to internal data structures */ /* Add IOMMU to internal data structures */
list_add_tail(&iommu->list, &amd_iommu_list); list_add_tail(&iommu->list, &amd_iommu_list);

View File

@ -1175,11 +1175,11 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
if (!iommu->need_sync) if (!iommu->need_sync)
return 0; return 0;
raw_spin_lock_irqsave(&iommu->lock, flags); data = atomic64_add_return(1, &iommu->cmd_sem_val);
data = ++iommu->cmd_sem_val;
build_completion_wait(&cmd, iommu, data); build_completion_wait(&cmd, iommu, data);
raw_spin_lock_irqsave(&iommu->lock, flags);
ret = __iommu_queue_command_sync(iommu, &cmd, false); ret = __iommu_queue_command_sync(iommu, &cmd, false);
if (ret) if (ret)
goto out_unlock; goto out_unlock;
@ -1277,11 +1277,28 @@ static void amd_iommu_flush_irt_all(struct amd_iommu *iommu)
static void iommu_flush_irt_and_complete(struct amd_iommu *iommu, u16 devid) static void iommu_flush_irt_and_complete(struct amd_iommu *iommu, u16 devid)
{ {
int ret;
u64 data;
unsigned long flags;
struct iommu_cmd cmd, cmd2;
if (iommu->irtcachedis_enabled) if (iommu->irtcachedis_enabled)
return; return;
iommu_flush_irt(iommu, devid); build_inv_irt(&cmd, devid);
iommu_completion_wait(iommu); data = atomic64_add_return(1, &iommu->cmd_sem_val);
build_completion_wait(&cmd2, iommu, data);
raw_spin_lock_irqsave(&iommu->lock, flags);
ret = __iommu_queue_command_sync(iommu, &cmd, true);
if (ret)
goto out;
ret = __iommu_queue_command_sync(iommu, &cmd2, false);
if (ret)
goto out;
wait_on_sem(iommu, data);
out:
raw_spin_unlock_irqrestore(&iommu->lock, flags);
} }
void iommu_flush_all_caches(struct amd_iommu *iommu) void iommu_flush_all_caches(struct amd_iommu *iommu)