Merge branch 'x86/pasid' into x86/core, to resolve conflicts

Conflicts:
	tools/objtool/arch/x86/decode.c

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2022-03-15 12:50:49 +01:00
commit 8c490b42fe
19 changed files with 185 additions and 117 deletions

View File

@ -104,18 +104,47 @@ The MSR must be configured on each logical CPU before any application
thread can interact with a device. Threads that belong to the same
process share the same page tables, thus the same MSR value.
PASID is cleared when a process is created. The PASID allocation and MSR
programming may occur long after a process and its threads have been created.
One thread must call iommu_sva_bind_device() to allocate the PASID for the
process. If a thread uses ENQCMD without the MSR first being populated, a #GP
will be raised. The kernel will update the PASID MSR with the PASID for all
threads in the process. A single process PASID can be used simultaneously
with multiple devices since they all share the same address space.
PASID Life Cycle Management
===========================
One thread can call iommu_sva_unbind_device() to free the allocated PASID.
The kernel will clear the PASID MSR for all threads belonging to the process.
PASID is initialized as INVALID_IOASID (-1) when a process is created.
New threads inherit the MSR value from the parent.
Only processes that access SVA-capable devices need to have a PASID
allocated. This allocation happens when a process opens/binds an SVA-capable
device but finds no PASID for this process. Subsequent binds of the same, or
other devices will share the same PASID.
Although the PASID is allocated to the process by opening a device,
it is not active in any of the threads of that process. It's loaded to the
IA32_PASID MSR lazily when a thread tries to submit a work descriptor
to a device using the ENQCMD.
That first access will trigger a #GP fault because the IA32_PASID MSR
has not been initialized with the PASID value assigned to the process
when the device was opened. The Linux #GP handler notes that a PASID has
been allocated for the process, and so initializes the IA32_PASID MSR
and returns so that the ENQCMD instruction is re-executed.
On fork(2) or exec(2) the PASID is removed from the process as it no
longer has the same address space that it had when the device was opened.
On clone(2) the new task shares the same address space, so will be
able to use the PASID allocated to the process. The IA32_PASID is not
preemptively initialized as the PASID value might not be allocated yet or
the kernel does not know whether this thread is going to access the device
and the cleared IA32_PASID MSR reduces context switch overhead by xstate
init optimization. Since #GP faults have to be handled on any threads that
were created before the PASID was assigned to the mm of the process, newly
created threads might as well be treated in a consistent way.
Due to complexity of freeing the PASID and clearing all IA32_PASID MSRs in
all threads in unbind, free the PASID lazily only on mm exit.
If a process does a close(2) of the device file descriptor and munmap(2)
of the device MMIO portal, then the driver will unbind the device. The
PASID is still marked VALID in the PASID_MSR for any threads in the
process that accessed the device. But this is harmless as without the
MMIO portal they cannot submit new work to the device.
Relationships
=============

View File

@ -56,8 +56,11 @@
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
/* Force disable because it's broken beyond repair */
#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
#ifdef CONFIG_INTEL_IOMMU_SVM
# define DISABLE_ENQCMD 0
#else
# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
#endif
#ifdef CONFIG_X86_SGX
# define DISABLE_SGX 0

View File

@ -612,6 +612,13 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags)
fpu_inherit_perms(dst_fpu);
fpregs_unlock();
/*
* Children never inherit PASID state.
* Force it to have its init value:
*/
if (use_xsave())
dst_fpu->fpstate->regs.xsave.header.xfeatures &= ~XFEATURE_MASK_PASID;
trace_x86_fpu_copy_src(src_fpu);
trace_x86_fpu_copy_dst(dst_fpu);

View File

@ -39,6 +39,7 @@
#include <linux/io.h>
#include <linux/hardirq.h>
#include <linux/atomic.h>
#include <linux/ioasid.h>
#include <asm/stacktrace.h>
#include <asm/processor.h>
@ -634,6 +635,57 @@ static bool fixup_iopl_exception(struct pt_regs *regs)
return true;
}
/*
* The unprivileged ENQCMD instruction generates #GPs if the
* IA32_PASID MSR has not been populated. If possible, populate
* the MSR from a PASID previously allocated to the mm.
*/
static bool try_fixup_enqcmd_gp(void)
{
#ifdef CONFIG_IOMMU_SVA
u32 pasid;
/*
* MSR_IA32_PASID is managed using XSAVE. Directly
* writing to the MSR is only possible when fpregs
* are valid and the fpstate is not. This is
* guaranteed when handling a userspace exception
* in *before* interrupts are re-enabled.
*/
lockdep_assert_irqs_disabled();
/*
* Hardware without ENQCMD will not generate
* #GPs that can be fixed up here.
*/
if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
return false;
pasid = current->mm->pasid;
/*
* If the mm has not been allocated a
* PASID, the #GP can not be fixed up.
*/
if (!pasid_valid(pasid))
return false;
/*
* Did this thread already have its PASID activated?
* If so, the #GP must be from something else.
*/
if (current->pasid_activated)
return false;
wrmsrl(MSR_IA32_PASID, pasid | MSR_IA32_PASID_VALID);
current->pasid_activated = 1;
return true;
#else
return false;
#endif
}
DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
{
char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
@ -642,6 +694,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
unsigned long gp_addr;
int ret;
if (user_mode(regs) && try_fixup_enqcmd_gp())
return;
cond_local_irq_enable(regs);
if (static_cpu_has(X86_FEATURE_UMIP)) {

View File

@ -144,8 +144,8 @@ config IOMMU_DMA
select IRQ_MSI_IOMMU
select NEED_SG_DMA_LENGTH
# Shared Virtual Addressing library
config IOMMU_SVA_LIB
# Shared Virtual Addressing
config IOMMU_SVA
bool
select IOASID
@ -379,7 +379,7 @@ config ARM_SMMU_V3
config ARM_SMMU_V3_SVA
bool "Shared Virtual Addressing support for the ARM SMMUv3"
depends on ARM_SMMU_V3
select IOMMU_SVA_LIB
select IOMMU_SVA
select MMU_NOTIFIER
help
Support for sharing process address spaces with devices using the

View File

@ -27,6 +27,6 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o io-pgfault.o
obj-$(CONFIG_IOMMU_SVA) += iommu-sva-lib.o io-pgfault.o
obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o
obj-$(CONFIG_APPLE_DART) += apple-dart.o

View File

@ -340,14 +340,12 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm);
if (IS_ERR(bond->smmu_mn)) {
ret = PTR_ERR(bond->smmu_mn);
goto err_free_pasid;
goto err_free_bond;
}
list_add(&bond->list, &master->bonds);
return &bond->sva;
err_free_pasid:
iommu_sva_free_pasid(mm);
err_free_bond:
kfree(bond);
return ERR_PTR(ret);
@ -377,7 +375,6 @@ void arm_smmu_sva_unbind(struct iommu_sva *handle)
if (refcount_dec_and_test(&bond->refs)) {
list_del(&bond->list);
arm_smmu_mmu_notifier_put(bond->smmu_mn);
iommu_sva_free_pasid(bond->mm);
kfree(bond);
}
mutex_unlock(&sva_lock);

View File

@ -52,7 +52,7 @@ config INTEL_IOMMU_SVM
select PCI_PRI
select MMU_NOTIFIER
select IOASID
select IOMMU_SVA_LIB
select IOMMU_SVA
help
Shared Virtual Memory (SVM) provides a facility for devices
to access DMA resources through process address space by

View File

@ -4781,7 +4781,7 @@ attach_failed:
link_failed:
spin_unlock_irqrestore(&device_domain_lock, flags);
if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
ioasid_put(domain->default_pasid);
ioasid_free(domain->default_pasid);
return ret;
}
@ -4811,7 +4811,7 @@ static void aux_domain_remove_dev(struct dmar_domain *domain,
spin_unlock_irqrestore(&device_domain_lock, flags);
if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
ioasid_put(domain->default_pasid);
ioasid_free(domain->default_pasid);
}
static int prepare_domain_attach_device(struct iommu_domain *domain,

View File

@ -514,11 +514,6 @@ static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm,
return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1);
}
static void intel_svm_free_pasid(struct mm_struct *mm)
{
iommu_sva_free_pasid(mm);
}
static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
struct device *dev,
struct mm_struct *mm,
@ -662,8 +657,6 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
kfree(svm);
}
}
/* Drop a PASID reference and free it if no reference. */
intel_svm_free_pasid(mm);
}
out:
return ret;
@ -1047,8 +1040,6 @@ struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void
}
sva = intel_svm_bind_mm(iommu, dev, mm, flags);
if (IS_ERR_OR_NULL(sva))
intel_svm_free_pasid(mm);
mutex_unlock(&pasid_mutex);
return sva;

View File

@ -2,7 +2,7 @@
/*
* I/O Address Space ID allocator. There is one global IOASID space, split into
* subsets. Users create a subset with DECLARE_IOASID_SET, then allocate and
* free IOASIDs with ioasid_alloc and ioasid_put.
* free IOASIDs with ioasid_alloc() and ioasid_free().
*/
#include <linux/ioasid.h>
#include <linux/module.h>
@ -15,7 +15,6 @@ struct ioasid_data {
struct ioasid_set *set;
void *private;
struct rcu_head rcu;
refcount_t refs;
};
/*
@ -315,7 +314,6 @@ ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max,
data->set = set;
data->private = private;
refcount_set(&data->refs, 1);
/*
* Custom allocator needs allocator data to perform platform specific
@ -348,35 +346,11 @@ exit_free:
EXPORT_SYMBOL_GPL(ioasid_alloc);
/**
* ioasid_get - obtain a reference to the IOASID
* @ioasid: the ID to get
*/
void ioasid_get(ioasid_t ioasid)
{
struct ioasid_data *ioasid_data;
spin_lock(&ioasid_allocator_lock);
ioasid_data = xa_load(&active_allocator->xa, ioasid);
if (ioasid_data)
refcount_inc(&ioasid_data->refs);
else
WARN_ON(1);
spin_unlock(&ioasid_allocator_lock);
}
EXPORT_SYMBOL_GPL(ioasid_get);
/**
* ioasid_put - Release a reference to an ioasid
* ioasid_free - Free an ioasid
* @ioasid: the ID to remove
*
* Put a reference to the IOASID, free it when the number of references drops to
* zero.
*
* Return: %true if the IOASID was freed, %false otherwise.
*/
bool ioasid_put(ioasid_t ioasid)
void ioasid_free(ioasid_t ioasid)
{
bool free = false;
struct ioasid_data *ioasid_data;
spin_lock(&ioasid_allocator_lock);
@ -386,10 +360,6 @@ bool ioasid_put(ioasid_t ioasid)
goto exit_unlock;
}
free = refcount_dec_and_test(&ioasid_data->refs);
if (!free)
goto exit_unlock;
active_allocator->ops->free(ioasid, active_allocator->ops->pdata);
/* Custom allocator needs additional steps to free the xa element */
if (active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) {
@ -399,9 +369,8 @@ bool ioasid_put(ioasid_t ioasid)
exit_unlock:
spin_unlock(&ioasid_allocator_lock);
return free;
}
EXPORT_SYMBOL_GPL(ioasid_put);
EXPORT_SYMBOL_GPL(ioasid_free);
/**
* ioasid_find - Find IOASID data

View File

@ -18,8 +18,7 @@ static DECLARE_IOASID_SET(iommu_sva_pasid);
*
* Try to allocate a PASID for this mm, or take a reference to the existing one
* provided it fits within the [@min, @max] range. On success the PASID is
* available in mm->pasid, and must be released with iommu_sva_free_pasid().
* @min must be greater than 0, because 0 indicates an unused mm->pasid.
* available in mm->pasid and will be available for the lifetime of the mm.
*
* Returns 0 on success and < 0 on error.
*/
@ -33,38 +32,24 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max)
return -EINVAL;
mutex_lock(&iommu_sva_lock);
if (mm->pasid) {
if (mm->pasid >= min && mm->pasid <= max)
ioasid_get(mm->pasid);
else
/* Is a PASID already associated with this mm? */
if (pasid_valid(mm->pasid)) {
if (mm->pasid < min || mm->pasid >= max)
ret = -EOVERFLOW;
} else {
pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm);
if (pasid == INVALID_IOASID)
ret = -ENOMEM;
else
mm->pasid = pasid;
goto out;
}
pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm);
if (!pasid_valid(pasid))
ret = -ENOMEM;
else
mm_pasid_set(mm, pasid);
out:
mutex_unlock(&iommu_sva_lock);
return ret;
}
EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid);
/**
* iommu_sva_free_pasid - Release the mm's PASID
* @mm: the mm
*
* Drop one reference to a PASID allocated with iommu_sva_alloc_pasid()
*/
void iommu_sva_free_pasid(struct mm_struct *mm)
{
mutex_lock(&iommu_sva_lock);
if (ioasid_put(mm->pasid))
mm->pasid = 0;
mutex_unlock(&iommu_sva_lock);
}
EXPORT_SYMBOL_GPL(iommu_sva_free_pasid);
/* ioasid_find getter() requires a void * argument */
static bool __mmget_not_zero(void *mm)
{

View File

@ -9,7 +9,6 @@
#include <linux/mm_types.h>
int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max);
void iommu_sva_free_pasid(struct mm_struct *mm);
struct mm_struct *iommu_sva_find(ioasid_t pasid);
/* I/O Page fault */
@ -17,7 +16,7 @@ struct device;
struct iommu_fault;
struct iopf_queue;
#ifdef CONFIG_IOMMU_SVA_LIB
#ifdef CONFIG_IOMMU_SVA
int iommu_queue_iopf(struct iommu_fault *fault, void *cookie);
int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev);
@ -28,7 +27,7 @@ struct iopf_queue *iopf_queue_alloc(const char *name);
void iopf_queue_free(struct iopf_queue *queue);
int iopf_queue_discard_partial(struct iopf_queue *queue);
#else /* CONFIG_IOMMU_SVA_LIB */
#else /* CONFIG_IOMMU_SVA */
static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
{
return -ENODEV;
@ -64,5 +63,5 @@ static inline int iopf_queue_discard_partial(struct iopf_queue *queue)
{
return -ENODEV;
}
#endif /* CONFIG_IOMMU_SVA_LIB */
#endif /* CONFIG_IOMMU_SVA */
#endif /* _IOMMU_SVA_LIB_H */

View File

@ -34,13 +34,16 @@ struct ioasid_allocator_ops {
#if IS_ENABLED(CONFIG_IOASID)
ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max,
void *private);
void ioasid_get(ioasid_t ioasid);
bool ioasid_put(ioasid_t ioasid);
void ioasid_free(ioasid_t ioasid);
void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid,
bool (*getter)(void *));
int ioasid_register_allocator(struct ioasid_allocator_ops *allocator);
void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator);
int ioasid_set_data(ioasid_t ioasid, void *data);
static inline bool pasid_valid(ioasid_t ioasid)
{
return ioasid != INVALID_IOASID;
}
#else /* !CONFIG_IOASID */
static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min,
@ -49,14 +52,7 @@ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min,
return INVALID_IOASID;
}
static inline void ioasid_get(ioasid_t ioasid)
{
}
static inline bool ioasid_put(ioasid_t ioasid)
{
return false;
}
static inline void ioasid_free(ioasid_t ioasid) { }
static inline void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid,
bool (*getter)(void *))
@ -78,5 +74,10 @@ static inline int ioasid_set_data(ioasid_t ioasid, void *data)
return -ENOTSUPP;
}
static inline bool pasid_valid(ioasid_t ioasid)
{
return false;
}
#endif /* CONFIG_IOASID */
#endif /* __LINUX_IOASID_H */

View File

@ -634,7 +634,7 @@ struct mm_struct {
#endif
struct work_struct async_put_work;
#ifdef CONFIG_IOMMU_SUPPORT
#ifdef CONFIG_IOMMU_SVA
u32 pasid;
#endif
} __randomize_layout;

View File

@ -938,6 +938,9 @@ struct task_struct {
/* Recursion prevention for eventfd_signal() */
unsigned in_eventfd_signal:1;
#endif
#ifdef CONFIG_IOMMU_SVA
unsigned pasid_activated:1;
#endif
unsigned long atomic_flags; /* Flags requiring atomic access. */

View File

@ -8,6 +8,7 @@
#include <linux/mm_types.h>
#include <linux/gfp.h>
#include <linux/sync_core.h>
#include <linux/ioasid.h>
/*
* Routines for handling mm_structs
@ -433,4 +434,29 @@ static inline void membarrier_update_current_mm(struct mm_struct *next_mm)
}
#endif
#ifdef CONFIG_IOMMU_SVA
static inline void mm_pasid_init(struct mm_struct *mm)
{
mm->pasid = INVALID_IOASID;
}
/* Associate a PASID with an mm_struct: */
static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid)
{
mm->pasid = pasid;
}
static inline void mm_pasid_drop(struct mm_struct *mm)
{
if (pasid_valid(mm->pasid)) {
ioasid_free(mm->pasid);
mm->pasid = INVALID_IOASID;
}
}
#else
static inline void mm_pasid_init(struct mm_struct *mm) {}
static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {}
static inline void mm_pasid_drop(struct mm_struct *mm) {}
#endif
#endif /* _LINUX_SCHED_MM_H */

View File

@ -97,6 +97,7 @@
#include <linux/scs.h>
#include <linux/io_uring.h>
#include <linux/bpf.h>
#include <linux/sched/mm.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
@ -967,6 +968,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->use_memdelay = 0;
#endif
#ifdef CONFIG_IOMMU_SVA
tsk->pasid_activated = 0;
#endif
#ifdef CONFIG_MEMCG
tsk->active_memcg = NULL;
#endif
@ -1019,13 +1024,6 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
#endif
}
static void mm_init_pasid(struct mm_struct *mm)
{
#ifdef CONFIG_IOMMU_SUPPORT
mm->pasid = INIT_PASID;
#endif
}
static void mm_init_uprobes_state(struct mm_struct *mm)
{
#ifdef CONFIG_UPROBES
@ -1054,7 +1052,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
mm_init_cpumask(mm);
mm_init_aio(mm);
mm_init_owner(mm, p);
mm_init_pasid(mm);
mm_pasid_init(mm);
RCU_INIT_POINTER(mm->exe_file, NULL);
mmu_notifier_subscriptions_init(mm);
init_tlb_flush_pending(mm);
@ -1121,6 +1119,7 @@ static inline void __mmput(struct mm_struct *mm)
}
if (mm->binfmt)
module_put(mm->binfmt->module);
mm_pasid_drop(mm);
mmdrop(mm);
}

View File

@ -10,6 +10,7 @@
#include <linux/atomic.h>
#include <linux/user_namespace.h>
#include <linux/ioasid.h>
#include <asm/mmu.h>
#ifndef INIT_MM_CONTEXT
@ -38,6 +39,9 @@ struct mm_struct init_mm = {
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
.user_ns = &init_user_ns,
.cpu_bitmap = CPU_BITS_NONE,
#ifdef CONFIG_IOMMU_SVA
.pasid = INVALID_IOASID,
#endif
INIT_MM_CONTEXT(init_mm)
};