drm/amdkfd: fix zero reading of VMID and PASID for Hawaii
Upon VM Fault, the VMID and PASID written by HW are zeros in Hawaii. Instead of reading from ih_ring_entry, read directly from the registers. This workaround fix the soft hang issues caused by mishandled VM Fault in Hawaii. Signed-off-by: Lan Xiao <Lan.Xiao@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
This commit is contained in:
parent
2640c3facb
commit
58e6988612
@ -145,6 +145,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
|
|||||||
uint32_t page_table_base);
|
uint32_t page_table_base);
|
||||||
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
|
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
|
||||||
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
|
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
|
||||||
|
static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd);
|
||||||
|
|
||||||
/* Because of REG_GET_FIELD() being used, we put this function in the
|
/* Because of REG_GET_FIELD() being used, we put this function in the
|
||||||
* asic specific file.
|
* asic specific file.
|
||||||
@ -216,7 +217,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
|
|||||||
.invalidate_tlbs = invalidate_tlbs,
|
.invalidate_tlbs = invalidate_tlbs,
|
||||||
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
|
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
|
||||||
.submit_ib = amdgpu_amdkfd_submit_ib,
|
.submit_ib = amdgpu_amdkfd_submit_ib,
|
||||||
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info
|
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
|
||||||
|
.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
|
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
|
||||||
@ -912,3 +914,19 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
|
|||||||
RREG32(mmVM_INVALIDATE_RESPONSE);
|
RREG32(mmVM_INVALIDATE_RESPONSE);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* read_vmid_from_vmfault_reg - read vmid from register
|
||||||
|
*
|
||||||
|
* adev: amdgpu_device pointer
|
||||||
|
* @vmid: vmid pointer
|
||||||
|
* read vmid from register (CIK).
|
||||||
|
*/
|
||||||
|
static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd)
|
||||||
|
{
|
||||||
|
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||||
|
|
||||||
|
uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
|
||||||
|
|
||||||
|
return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
|
||||||
|
}
|
||||||
|
@ -25,12 +25,39 @@
|
|||||||
#include "cik_int.h"
|
#include "cik_int.h"
|
||||||
|
|
||||||
static bool cik_event_interrupt_isr(struct kfd_dev *dev,
|
static bool cik_event_interrupt_isr(struct kfd_dev *dev,
|
||||||
const uint32_t *ih_ring_entry)
|
const uint32_t *ih_ring_entry,
|
||||||
|
uint32_t *patched_ihre,
|
||||||
|
bool *patched_flag)
|
||||||
{
|
{
|
||||||
const struct cik_ih_ring_entry *ihre =
|
const struct cik_ih_ring_entry *ihre =
|
||||||
(const struct cik_ih_ring_entry *)ih_ring_entry;
|
(const struct cik_ih_ring_entry *)ih_ring_entry;
|
||||||
|
const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
|
||||||
unsigned int vmid, pasid;
|
unsigned int vmid, pasid;
|
||||||
|
|
||||||
|
/* This workaround is due to HW/FW limitation on Hawaii that
|
||||||
|
* VMID and PASID are not written into ih_ring_entry
|
||||||
|
*/
|
||||||
|
if ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
|
||||||
|
ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) &&
|
||||||
|
dev->device_info->asic_family == CHIP_HAWAII) {
|
||||||
|
struct cik_ih_ring_entry *tmp_ihre =
|
||||||
|
(struct cik_ih_ring_entry *)patched_ihre;
|
||||||
|
|
||||||
|
*patched_flag = true;
|
||||||
|
*tmp_ihre = *ihre;
|
||||||
|
|
||||||
|
vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd);
|
||||||
|
pasid = f2g->get_atc_vmid_pasid_mapping_pasid(dev->kgd, vmid);
|
||||||
|
|
||||||
|
tmp_ihre->ring_id &= 0x000000ff;
|
||||||
|
tmp_ihre->ring_id |= vmid << 8;
|
||||||
|
tmp_ihre->ring_id |= pasid << 16;
|
||||||
|
|
||||||
|
return (pasid != 0) &&
|
||||||
|
vmid >= dev->vm_info.first_vmid_kfd &&
|
||||||
|
vmid <= dev->vm_info.last_vmid_kfd;
|
||||||
|
}
|
||||||
|
|
||||||
/* Only handle interrupts from KFD VMIDs */
|
/* Only handle interrupts from KFD VMIDs */
|
||||||
vmid = (ihre->ring_id & 0x0000ff00) >> 8;
|
vmid = (ihre->ring_id & 0x0000ff00) >> 8;
|
||||||
if (vmid < dev->vm_info.first_vmid_kfd ||
|
if (vmid < dev->vm_info.first_vmid_kfd ||
|
||||||
|
@ -577,14 +577,24 @@ dqm_start_error:
|
|||||||
/* This is called directly from KGD at ISR. */
|
/* This is called directly from KGD at ISR. */
|
||||||
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
|
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
|
||||||
{
|
{
|
||||||
|
uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE];
|
||||||
|
bool is_patched = false;
|
||||||
|
|
||||||
if (!kfd->init_complete)
|
if (!kfd->init_complete)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) {
|
||||||
|
dev_err_once(kfd_device, "Ring entry too small\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
spin_lock(&kfd->interrupt_lock);
|
spin_lock(&kfd->interrupt_lock);
|
||||||
|
|
||||||
if (kfd->interrupts_active
|
if (kfd->interrupts_active
|
||||||
&& interrupt_is_wanted(kfd, ih_ring_entry)
|
&& interrupt_is_wanted(kfd, ih_ring_entry,
|
||||||
&& enqueue_ih_ring_entry(kfd, ih_ring_entry))
|
patched_ihre, &is_patched)
|
||||||
|
&& enqueue_ih_ring_entry(kfd,
|
||||||
|
is_patched ? patched_ihre : ih_ring_entry))
|
||||||
queue_work(kfd->ih_wq, &kfd->interrupt_work);
|
queue_work(kfd->ih_wq, &kfd->interrupt_work);
|
||||||
|
|
||||||
spin_unlock(&kfd->interrupt_lock);
|
spin_unlock(&kfd->interrupt_lock);
|
||||||
|
@ -26,7 +26,9 @@
|
|||||||
|
|
||||||
|
|
||||||
static bool event_interrupt_isr_v9(struct kfd_dev *dev,
|
static bool event_interrupt_isr_v9(struct kfd_dev *dev,
|
||||||
const uint32_t *ih_ring_entry)
|
const uint32_t *ih_ring_entry,
|
||||||
|
uint32_t *patched_ihre,
|
||||||
|
bool *patched_flag)
|
||||||
{
|
{
|
||||||
uint16_t source_id, client_id, pasid, vmid;
|
uint16_t source_id, client_id, pasid, vmid;
|
||||||
const uint32_t *data = ih_ring_entry;
|
const uint32_t *data = ih_ring_entry;
|
||||||
|
@ -151,13 +151,15 @@ static void interrupt_wq(struct work_struct *work)
|
|||||||
ih_ring_entry);
|
ih_ring_entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry)
|
bool interrupt_is_wanted(struct kfd_dev *dev,
|
||||||
|
const uint32_t *ih_ring_entry,
|
||||||
|
uint32_t *patched_ihre, bool *flag)
|
||||||
{
|
{
|
||||||
/* integer and bitwise OR so there is no boolean short-circuiting */
|
/* integer and bitwise OR so there is no boolean short-circuiting */
|
||||||
unsigned int wanted = 0;
|
unsigned int wanted = 0;
|
||||||
|
|
||||||
wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev,
|
wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev,
|
||||||
ih_ring_entry);
|
ih_ring_entry, patched_ihre, flag);
|
||||||
|
|
||||||
return wanted != 0;
|
return wanted != 0;
|
||||||
}
|
}
|
||||||
|
@ -180,9 +180,10 @@ enum cache_policy {
|
|||||||
|
|
||||||
struct kfd_event_interrupt_class {
|
struct kfd_event_interrupt_class {
|
||||||
bool (*interrupt_isr)(struct kfd_dev *dev,
|
bool (*interrupt_isr)(struct kfd_dev *dev,
|
||||||
const uint32_t *ih_ring_entry);
|
const uint32_t *ih_ring_entry, uint32_t *patched_ihre,
|
||||||
|
bool *patched_flag);
|
||||||
void (*interrupt_wq)(struct kfd_dev *dev,
|
void (*interrupt_wq)(struct kfd_dev *dev,
|
||||||
const uint32_t *ih_ring_entry);
|
const uint32_t *ih_ring_entry);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kfd_device_info {
|
struct kfd_device_info {
|
||||||
@ -806,7 +807,9 @@ int kfd_interrupt_init(struct kfd_dev *dev);
|
|||||||
void kfd_interrupt_exit(struct kfd_dev *dev);
|
void kfd_interrupt_exit(struct kfd_dev *dev);
|
||||||
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
|
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
|
||||||
bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry);
|
bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry);
|
||||||
bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry);
|
bool interrupt_is_wanted(struct kfd_dev *dev,
|
||||||
|
const uint32_t *ih_ring_entry,
|
||||||
|
uint32_t *patched_ihre, bool *flag);
|
||||||
|
|
||||||
/* Power Management */
|
/* Power Management */
|
||||||
void kgd2kfd_suspend(struct kfd_dev *kfd);
|
void kgd2kfd_suspend(struct kfd_dev *kfd);
|
||||||
|
@ -276,6 +276,10 @@ struct tile_config {
|
|||||||
* faults. On GFXv9 VM fault information is fully contained in the IH
|
* faults. On GFXv9 VM fault information is fully contained in the IH
|
||||||
* packet and this function is not needed.
|
* packet and this function is not needed.
|
||||||
*
|
*
|
||||||
|
* @read_vmid_from_vmfault_reg: On Hawaii the VMID is not set in the
|
||||||
|
* IH ring entry. This function allows the KFD ISR to get the VMID
|
||||||
|
* from the fault status register as early as possible.
|
||||||
|
*
|
||||||
* This structure contains function pointers to services that the kgd driver
|
* This structure contains function pointers to services that the kgd driver
|
||||||
* provides to amdkfd driver.
|
* provides to amdkfd driver.
|
||||||
*
|
*
|
||||||
@ -394,6 +398,7 @@ struct kfd2kgd_calls {
|
|||||||
|
|
||||||
int (*get_vm_fault_info)(struct kgd_dev *kgd,
|
int (*get_vm_fault_info)(struct kgd_dev *kgd,
|
||||||
struct kfd_vm_fault_info *info);
|
struct kfd_vm_fault_info *info);
|
||||||
|
uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
Reference in New Issue
Block a user