drm/amdkfd: fix dma mapping leaking warning

For xnack off, restore work dma unmap previous system memory page, and
dma map the updated system memory page to update GPU mapping, this is
not dma mapping leaking, remove the WARN_ONCE for dma mapping leaking.

prange->dma_addr store the VRAM page pfn after the range migrated to
VRAM, should not dma unmap VRAM page when updating GPU mapping or
remove prange. Add helper svm_is_valid_dma_mapping_addr to check VRAM
page and error cases.

Mask out SVM_RANGE_VRAM_DOMAIN flag in dma_addr before calling amdgpu vm
update to avoid BUG_ON(*addr & 0xFFFF00000000003FULL), and set it again
immediately after. This flag is used to know the type of page later to
dma unmapping system memory page.

Fixes: 1d5dbfe6c0 ("drm/amdkfd: classify and map mixed svm range pages in GPU")
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Philip Yang 2021-09-14 16:33:40 -04:00 committed by Alex Deucher
parent 7beb26dced
commit f63251184a

View File

@ -118,6 +118,13 @@ static void svm_range_remove_notifier(struct svm_range *prange)
mmu_interval_notifier_remove(&prange->notifier);
}
static bool
svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr)
{
return dma_addr && !dma_mapping_error(dev, dma_addr) &&
!(dma_addr & SVM_RANGE_VRAM_DOMAIN);
}
static int
svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
unsigned long offset, unsigned long npages,
@ -139,8 +146,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
addr += offset;
for (i = 0; i < npages; i++) {
if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]),
"leaking dma mapping\n"))
if (svm_is_valid_dma_mapping_addr(dev, addr[i]))
dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
page = hmm_pfn_to_page(hmm_pfns[i]);
@ -209,7 +215,7 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
return;
for (i = offset; i < offset + npages; i++) {
if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i]))
if (!svm_is_valid_dma_mapping_addr(dev, dma_addr[i]))
continue;
pr_debug("dma unmapping 0x%llx\n", dma_addr[i] >> PAGE_SHIFT);
dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir);
@ -1165,7 +1171,7 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned long last_start;
int last_domain;
int r = 0;
int64_t i;
int64_t i, j;
last_start = prange->start + offset;
@ -1205,6 +1211,10 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
NULL, dma_addr,
&vm->last_update,
&table_freed);
for (j = last_start - prange->start; j <= i; j++)
dma_addr[j] |= last_domain;
if (r) {
pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
goto out;