4d77f36f2c
If one GTT BO has been evicted/swapped out, it should sit in CPU domain. TTM only alloc struct ttm_resource instead of struct ttm_range_mgr_node for sysMem. Now when we update mapping for such invalidated BOs, we might walk out of bounds of struct ttm_resource. Three possible fix: 1) Let sysMem manager alloc struct ttm_range_mgr_node, like ttm_range_manager does. 2) Pass pages_addr to update_mapping function too, but need memset pages_addr[] to zero when unpopulate. 3) Init amdgpu_res_cursor directly. bug is detected by kfence. ================================================================== BUG: KFENCE: out-of-bounds read in amdgpu_vm_bo_update_mapping+0x564/0x6e0 Out-of-bounds read at 0x000000008ea93fe9 (64B right of kfence-#167): amdgpu_vm_bo_update_mapping+0x564/0x6e0 [amdgpu] amdgpu_vm_bo_update+0x282/0xa40 [amdgpu] amdgpu_vm_handle_moved+0x19e/0x1f0 [amdgpu] amdgpu_cs_vm_handling+0x4e4/0x640 [amdgpu] amdgpu_cs_ioctl+0x19e7/0x23c0 [amdgpu] drm_ioctl_kernel+0xf3/0x180 [drm] drm_ioctl+0x2cb/0x550 [drm] amdgpu_drm_ioctl+0x5e/0xb0 [amdgpu] kfence-#167 [0x000000008e11c055-0x000000001f676b3e ttm_sys_man_alloc+0x35/0x80 [ttm] ttm_resource_alloc+0x39/0x50 [ttm] ttm_bo_swapout+0x252/0x5a0 [ttm] ttm_device_swapout+0x107/0x180 [ttm] ttm_global_swapout+0x6f/0x130 [ttm] ttm_tt_populate+0xb1/0x2a0 [ttm] ttm_bo_handle_move_mem+0x17e/0x1d0 [ttm] ttm_mem_evict_first+0x59d/0x9c0 [ttm] ttm_bo_mem_space+0x39f/0x400 [ttm] ttm_bo_validate+0x13c/0x340 [ttm] ttm_bo_init_reserved+0x269/0x540 [ttm] amdgpu_bo_create+0x1d1/0xa30 [amdgpu] amdgpu_bo_create_user+0x40/0x80 [amdgpu] amdgpu_gem_object_create+0x71/0xc0 [amdgpu] amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x2f2/0xcd0 [amdgpu] kfd_ioctl_alloc_memory_of_gpu+0xe2/0x330 [amdgpu] kfd_ioctl+0x461/0x690 [amdgpu] Signed-off-by: xinhui pan <xinhui.pan@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
108 lines
3.1 KiB
C
108 lines
3.1 KiB
C
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
|
/*
|
|
* Copyright 2020 Advanced Micro Devices, Inc.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*
|
|
* Authors: Christian König
|
|
*/
|
|
|
|
#ifndef __AMDGPU_RES_CURSOR_H__
|
|
#define __AMDGPU_RES_CURSOR_H__
|
|
|
|
#include <drm/drm_mm.h>
|
|
#include <drm/ttm/ttm_resource.h>
|
|
#include <drm/ttm/ttm_range_manager.h>
|
|
|
|
/* state back for walking over vram_mgr and gtt_mgr allocations */
|
|
struct amdgpu_res_cursor {
|
|
uint64_t start;
|
|
uint64_t size;
|
|
uint64_t remaining;
|
|
struct drm_mm_node *node;
|
|
};
|
|
|
|
/**
|
|
* amdgpu_res_first - initialize a amdgpu_res_cursor
|
|
*
|
|
* @res: TTM resource object to walk
|
|
* @start: Start of the range
|
|
* @size: Size of the range
|
|
* @cur: cursor object to initialize
|
|
*
|
|
* Start walking over the range of allocations between @start and @size.
|
|
*/
|
|
static inline void amdgpu_res_first(struct ttm_resource *res,
|
|
uint64_t start, uint64_t size,
|
|
struct amdgpu_res_cursor *cur)
|
|
{
|
|
struct drm_mm_node *node;
|
|
|
|
if (!res || res->mem_type == TTM_PL_SYSTEM) {
|
|
cur->start = start;
|
|
cur->size = size;
|
|
cur->remaining = size;
|
|
cur->node = NULL;
|
|
WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT);
|
|
return;
|
|
}
|
|
|
|
BUG_ON(start + size > res->num_pages << PAGE_SHIFT);
|
|
|
|
node = to_ttm_range_mgr_node(res)->mm_nodes;
|
|
while (start >= node->size << PAGE_SHIFT)
|
|
start -= node++->size << PAGE_SHIFT;
|
|
|
|
cur->start = (node->start << PAGE_SHIFT) + start;
|
|
cur->size = min((node->size << PAGE_SHIFT) - start, size);
|
|
cur->remaining = size;
|
|
cur->node = node;
|
|
}
|
|
|
|
/**
|
|
* amdgpu_res_next - advance the cursor
|
|
*
|
|
* @cur: the cursor to advance
|
|
* @size: number of bytes to move forward
|
|
*
|
|
* Move the cursor @size bytes forwrad, walking to the next node if necessary.
|
|
*/
|
|
static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
|
|
{
|
|
struct drm_mm_node *node = cur->node;
|
|
|
|
BUG_ON(size > cur->remaining);
|
|
|
|
cur->remaining -= size;
|
|
if (!cur->remaining)
|
|
return;
|
|
|
|
cur->size -= size;
|
|
if (cur->size) {
|
|
cur->start += size;
|
|
return;
|
|
}
|
|
|
|
cur->node = ++node;
|
|
cur->start = node->start << PAGE_SHIFT;
|
|
cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
|
|
}
|
|
|
|
#endif
|