forked from Minki/linux
Merge tag 'drm-amdkfd-next-2018-05-14' of git://people.freedesktop.org/~gabbayo/linux into drm-next
This is amdkfd pull for 4.18. The major new features are: - Add support for GFXv9 dGPUs (VEGA) - Add support for userptr memory mapping In addition, there are a couple of small fixes and improvements, such as: - Fix lock handling - Fix rollback packet in kernel kfd_queue - Optimize kfd signal handling - Fix CP hang in APU Signed-off-by: Dave Airlie <airlied@redhat.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180514070126.GA1827@odedg-x270
This commit is contained in:
commit
c76f0b2cc2
@ -767,12 +767,14 @@ F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
|
||||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
|
||||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
|
||||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
|
||||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
|
||||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
|
||||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
|
||||
F: drivers/gpu/drm/amd/amdkfd/
|
||||
F: drivers/gpu/drm/amd/include/cik_structs.h
|
||||
F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h
|
||||
F: drivers/gpu/drm/amd/include/vi_structs.h
|
||||
F: drivers/gpu/drm/amd/include/v9_structs.h
|
||||
F: include/uapi/linux/kfd_ioctl.h
|
||||
|
||||
AMD SEATTLE DEVICE TREE SUPPORT
|
||||
|
@ -130,7 +130,8 @@ amdgpu-y += \
|
||||
amdgpu_amdkfd.o \
|
||||
amdgpu_amdkfd_fence.o \
|
||||
amdgpu_amdkfd_gpuvm.o \
|
||||
amdgpu_amdkfd_gfx_v8.o
|
||||
amdgpu_amdkfd_gfx_v8.o \
|
||||
amdgpu_amdkfd_gfx_v9.o
|
||||
|
||||
# add cgs
|
||||
amdgpu-y += amdgpu_cgs.o
|
||||
|
@ -92,6 +92,10 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
|
||||
case CHIP_POLARIS11:
|
||||
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
|
||||
break;
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_RAVEN:
|
||||
kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
|
||||
break;
|
||||
default:
|
||||
dev_dbg(adev->dev, "kfd not supported on this ASIC\n");
|
||||
return;
|
||||
@ -175,6 +179,28 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
||||
&gpu_resources.doorbell_physical_address,
|
||||
&gpu_resources.doorbell_aperture_size,
|
||||
&gpu_resources.doorbell_start_offset);
|
||||
if (adev->asic_type >= CHIP_VEGA10) {
|
||||
/* On SOC15 the BIF is involved in routing
|
||||
* doorbells using the low 12 bits of the
|
||||
* address. Communicate the assignments to
|
||||
* KFD. KFD uses two doorbell pages per
|
||||
* process in case of 64-bit doorbells so we
|
||||
* can use each doorbell assignment twice.
|
||||
*/
|
||||
gpu_resources.sdma_doorbell[0][0] =
|
||||
AMDGPU_DOORBELL64_sDMA_ENGINE0;
|
||||
gpu_resources.sdma_doorbell[0][1] =
|
||||
AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200;
|
||||
gpu_resources.sdma_doorbell[1][0] =
|
||||
AMDGPU_DOORBELL64_sDMA_ENGINE1;
|
||||
gpu_resources.sdma_doorbell[1][1] =
|
||||
AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200;
|
||||
/* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for
|
||||
* SDMA, IH and VCN. So don't use them for the CP.
|
||||
*/
|
||||
gpu_resources.reserved_doorbell_mask = 0x1f0;
|
||||
gpu_resources.reserved_doorbell_val = 0x0f0;
|
||||
}
|
||||
|
||||
kgd2kfd->device_init(adev->kfd, &gpu_resources);
|
||||
}
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mmu_context.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <kgd_kfd_interface.h>
|
||||
#include <drm/ttm/ttm_execbuf_util.h>
|
||||
#include "amdgpu_sync.h"
|
||||
@ -59,7 +60,9 @@ struct kgd_mem {
|
||||
|
||||
uint32_t mapping_flags;
|
||||
|
||||
atomic_t invalid;
|
||||
struct amdkfd_process_info *process_info;
|
||||
struct page **user_pages;
|
||||
|
||||
struct amdgpu_sync sync;
|
||||
|
||||
@ -84,6 +87,9 @@ struct amdkfd_process_info {
|
||||
struct list_head vm_list_head;
|
||||
/* List head for all KFD BOs that belong to a KFD process. */
|
||||
struct list_head kfd_bo_list;
|
||||
/* List of userptr BOs that are valid or invalid */
|
||||
struct list_head userptr_valid_list;
|
||||
struct list_head userptr_inval_list;
|
||||
/* Lock to protect kfd_bo_list */
|
||||
struct mutex lock;
|
||||
|
||||
@ -91,6 +97,11 @@ struct amdkfd_process_info {
|
||||
unsigned int n_vms;
|
||||
/* Eviction Fence */
|
||||
struct amdgpu_amdkfd_fence *eviction_fence;
|
||||
|
||||
/* MMU-notifier related fields */
|
||||
atomic_t evicted_bos;
|
||||
struct delayed_work restore_userptr_work;
|
||||
struct pid *pid;
|
||||
};
|
||||
|
||||
int amdgpu_amdkfd_init(void);
|
||||
@ -104,12 +115,14 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
|
||||
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
|
||||
void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
|
||||
|
||||
int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
|
||||
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
|
||||
uint32_t vmid, uint64_t gpu_addr,
|
||||
uint32_t *ib_cmd, uint32_t ib_len);
|
||||
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
|
||||
|
||||
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
|
||||
|
||||
|
@ -98,8 +98,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
|
||||
static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
|
||||
unsigned int vmid);
|
||||
|
||||
static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
|
||||
uint32_t hpd_size, uint64_t hpd_gpu_addr);
|
||||
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
|
||||
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
|
||||
uint32_t queue_id, uint32_t __user *wptr,
|
||||
@ -183,7 +181,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
|
||||
.free_pasid = amdgpu_pasid_free,
|
||||
.program_sh_mem_settings = kgd_program_sh_mem_settings,
|
||||
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
|
||||
.init_pipeline = kgd_init_pipeline,
|
||||
.init_interrupts = kgd_init_interrupts,
|
||||
.hqd_load = kgd_hqd_load,
|
||||
.hqd_sdma_load = kgd_hqd_sdma_load,
|
||||
@ -309,13 +306,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
|
||||
uint32_t hpd_size, uint64_t hpd_gpu_addr)
|
||||
{
|
||||
/* amdgpu owns the per-pipe state */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
|
@ -57,8 +57,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
|
||||
uint32_t sh_mem_bases);
|
||||
static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
|
||||
unsigned int vmid);
|
||||
static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
|
||||
uint32_t hpd_size, uint64_t hpd_gpu_addr);
|
||||
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
|
||||
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
|
||||
uint32_t queue_id, uint32_t __user *wptr,
|
||||
@ -141,7 +139,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
|
||||
.free_pasid = amdgpu_pasid_free,
|
||||
.program_sh_mem_settings = kgd_program_sh_mem_settings,
|
||||
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
|
||||
.init_pipeline = kgd_init_pipeline,
|
||||
.init_interrupts = kgd_init_interrupts,
|
||||
.hqd_load = kgd_hqd_load,
|
||||
.hqd_sdma_load = kgd_hqd_sdma_load,
|
||||
@ -270,13 +267,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
|
||||
uint32_t hpd_size, uint64_t hpd_gpu_addr)
|
||||
{
|
||||
/* amdgpu owns the per-pipe state */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
|
1043
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
Normal file
1043
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -23,6 +23,7 @@
|
||||
#define pr_fmt(fmt) "kfd2kgd: " fmt
|
||||
|
||||
#include <linux/list.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <drm/drmP.h>
|
||||
#include "amdgpu_object.h"
|
||||
#include "amdgpu_vm.h"
|
||||
@ -33,10 +34,20 @@
|
||||
*/
|
||||
#define VI_BO_SIZE_ALIGN (0x8000)
|
||||
|
||||
/* BO flag to indicate a KFD userptr BO */
|
||||
#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
|
||||
|
||||
/* Userptr restore delay, just long enough to allow consecutive VM
|
||||
* changes to accumulate
|
||||
*/
|
||||
#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
|
||||
|
||||
/* Impose limit on how much memory KFD can use */
|
||||
static struct {
|
||||
uint64_t max_system_mem_limit;
|
||||
uint64_t max_userptr_mem_limit;
|
||||
int64_t system_mem_used;
|
||||
int64_t userptr_mem_used;
|
||||
spinlock_t mem_limit_lock;
|
||||
} kfd_mem_limit;
|
||||
|
||||
@ -57,6 +68,7 @@ static const char * const domain_bit_to_string[] = {
|
||||
|
||||
#define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
|
||||
|
||||
static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work);
|
||||
|
||||
|
||||
static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
|
||||
@ -78,6 +90,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
|
||||
|
||||
/* Set memory usage limits. Current, limits are
|
||||
* System (kernel) memory - 3/8th System RAM
|
||||
* Userptr memory - 3/4th System RAM
|
||||
*/
|
||||
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
|
||||
{
|
||||
@ -90,8 +103,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
|
||||
|
||||
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
|
||||
kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
|
||||
pr_debug("Kernel memory limit %lluM\n",
|
||||
(kfd_mem_limit.max_system_mem_limit >> 20));
|
||||
kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
|
||||
pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
|
||||
(kfd_mem_limit.max_system_mem_limit >> 20),
|
||||
(kfd_mem_limit.max_userptr_mem_limit >> 20));
|
||||
}
|
||||
|
||||
static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
|
||||
@ -111,6 +126,16 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
|
||||
goto err_no_mem;
|
||||
}
|
||||
kfd_mem_limit.system_mem_used += (acc_size + size);
|
||||
} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
|
||||
if ((kfd_mem_limit.system_mem_used + acc_size >
|
||||
kfd_mem_limit.max_system_mem_limit) ||
|
||||
(kfd_mem_limit.userptr_mem_used + (size + acc_size) >
|
||||
kfd_mem_limit.max_userptr_mem_limit)) {
|
||||
ret = -ENOMEM;
|
||||
goto err_no_mem;
|
||||
}
|
||||
kfd_mem_limit.system_mem_used += acc_size;
|
||||
kfd_mem_limit.userptr_mem_used += size;
|
||||
}
|
||||
err_no_mem:
|
||||
spin_unlock(&kfd_mem_limit.mem_limit_lock);
|
||||
@ -126,10 +151,16 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
|
||||
sizeof(struct amdgpu_bo));
|
||||
|
||||
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
||||
if (domain == AMDGPU_GEM_DOMAIN_GTT)
|
||||
if (domain == AMDGPU_GEM_DOMAIN_GTT) {
|
||||
kfd_mem_limit.system_mem_used -= (acc_size + size);
|
||||
} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
|
||||
kfd_mem_limit.system_mem_used -= acc_size;
|
||||
kfd_mem_limit.userptr_mem_used -= size;
|
||||
}
|
||||
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
|
||||
"kfd system memory accounting unbalanced");
|
||||
WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
|
||||
"kfd userptr memory accounting unbalanced");
|
||||
|
||||
spin_unlock(&kfd_mem_limit.mem_limit_lock);
|
||||
}
|
||||
@ -138,12 +169,17 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
|
||||
{
|
||||
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
||||
|
||||
if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
|
||||
if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
|
||||
kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
|
||||
kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
|
||||
} else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
|
||||
kfd_mem_limit.system_mem_used -=
|
||||
(bo->tbo.acc_size + amdgpu_bo_size(bo));
|
||||
}
|
||||
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
|
||||
"kfd system memory accounting unbalanced");
|
||||
WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
|
||||
"kfd userptr memory accounting unbalanced");
|
||||
|
||||
spin_unlock(&kfd_mem_limit.mem_limit_lock);
|
||||
}
|
||||
@ -506,7 +542,8 @@ static void remove_bo_from_vm(struct amdgpu_device *adev,
|
||||
}
|
||||
|
||||
static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
|
||||
struct amdkfd_process_info *process_info)
|
||||
struct amdkfd_process_info *process_info,
|
||||
bool userptr)
|
||||
{
|
||||
struct ttm_validate_buffer *entry = &mem->validate_list;
|
||||
struct amdgpu_bo *bo = mem->bo;
|
||||
@ -515,10 +552,95 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
|
||||
entry->shared = true;
|
||||
entry->bo = &bo->tbo;
|
||||
mutex_lock(&process_info->lock);
|
||||
list_add_tail(&entry->head, &process_info->kfd_bo_list);
|
||||
if (userptr)
|
||||
list_add_tail(&entry->head, &process_info->userptr_valid_list);
|
||||
else
|
||||
list_add_tail(&entry->head, &process_info->kfd_bo_list);
|
||||
mutex_unlock(&process_info->lock);
|
||||
}
|
||||
|
||||
/* Initializes user pages. It registers the MMU notifier and validates
|
||||
* the userptr BO in the GTT domain.
|
||||
*
|
||||
* The BO must already be on the userptr_valid_list. Otherwise an
|
||||
* eviction and restore may happen that leaves the new BO unmapped
|
||||
* with the user mode queues running.
|
||||
*
|
||||
* Takes the process_info->lock to protect against concurrent restore
|
||||
* workers.
|
||||
*
|
||||
* Returns 0 for success, negative errno for errors.
|
||||
*/
|
||||
static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
|
||||
uint64_t user_addr)
|
||||
{
|
||||
struct amdkfd_process_info *process_info = mem->process_info;
|
||||
struct amdgpu_bo *bo = mem->bo;
|
||||
struct ttm_operation_ctx ctx = { true, false };
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&process_info->lock);
|
||||
|
||||
ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0);
|
||||
if (ret) {
|
||||
pr_err("%s: Failed to set userptr: %d\n", __func__, ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = amdgpu_mn_register(bo, user_addr);
|
||||
if (ret) {
|
||||
pr_err("%s: Failed to register MMU notifier: %d\n",
|
||||
__func__, ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* If no restore worker is running concurrently, user_pages
|
||||
* should not be allocated
|
||||
*/
|
||||
WARN(mem->user_pages, "Leaking user_pages array");
|
||||
|
||||
mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
|
||||
sizeof(struct page *),
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
if (!mem->user_pages) {
|
||||
pr_err("%s: Failed to allocate pages array\n", __func__);
|
||||
ret = -ENOMEM;
|
||||
goto unregister_out;
|
||||
}
|
||||
|
||||
ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
|
||||
if (ret) {
|
||||
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
|
||||
goto free_out;
|
||||
}
|
||||
|
||||
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
|
||||
|
||||
ret = amdgpu_bo_reserve(bo, true);
|
||||
if (ret) {
|
||||
pr_err("%s: Failed to reserve BO\n", __func__);
|
||||
goto release_out;
|
||||
}
|
||||
amdgpu_ttm_placement_from_domain(bo, mem->domain);
|
||||
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
if (ret)
|
||||
pr_err("%s: failed to validate BO\n", __func__);
|
||||
amdgpu_bo_unreserve(bo);
|
||||
|
||||
release_out:
|
||||
if (ret)
|
||||
release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
|
||||
free_out:
|
||||
kvfree(mem->user_pages);
|
||||
mem->user_pages = NULL;
|
||||
unregister_out:
|
||||
if (ret)
|
||||
amdgpu_mn_unregister(bo);
|
||||
out:
|
||||
mutex_unlock(&process_info->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Reserving a BO and its page table BOs must happen atomically to
|
||||
* avoid deadlocks. Some operations update multiple VMs at once. Track
|
||||
* all the reservation info in a context structure. Optionally a sync
|
||||
@ -748,7 +870,8 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
|
||||
}
|
||||
|
||||
static int map_bo_to_gpuvm(struct amdgpu_device *adev,
|
||||
struct kfd_bo_va_list *entry, struct amdgpu_sync *sync)
|
||||
struct kfd_bo_va_list *entry, struct amdgpu_sync *sync,
|
||||
bool no_update_pte)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@ -762,6 +885,9 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev,
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (no_update_pte)
|
||||
return 0;
|
||||
|
||||
ret = update_gpuvm_pte(adev, entry, sync);
|
||||
if (ret) {
|
||||
pr_err("update_gpuvm_pte() failed\n");
|
||||
@ -820,6 +946,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
|
||||
mutex_init(&info->lock);
|
||||
INIT_LIST_HEAD(&info->vm_list_head);
|
||||
INIT_LIST_HEAD(&info->kfd_bo_list);
|
||||
INIT_LIST_HEAD(&info->userptr_valid_list);
|
||||
INIT_LIST_HEAD(&info->userptr_inval_list);
|
||||
|
||||
info->eviction_fence =
|
||||
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
|
||||
@ -830,6 +958,11 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
|
||||
goto create_evict_fence_fail;
|
||||
}
|
||||
|
||||
info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
|
||||
atomic_set(&info->evicted_bos, 0);
|
||||
INIT_DELAYED_WORK(&info->restore_userptr_work,
|
||||
amdgpu_amdkfd_restore_userptr_worker);
|
||||
|
||||
*process_info = info;
|
||||
*ef = dma_fence_get(&info->eviction_fence->base);
|
||||
}
|
||||
@ -872,6 +1005,7 @@ reserve_pd_fail:
|
||||
dma_fence_put(*ef);
|
||||
*ef = NULL;
|
||||
*process_info = NULL;
|
||||
put_pid(info->pid);
|
||||
create_evict_fence_fail:
|
||||
mutex_destroy(&info->lock);
|
||||
kfree(info);
|
||||
@ -967,8 +1101,12 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
|
||||
/* Release per-process resources when last compute VM is destroyed */
|
||||
if (!process_info->n_vms) {
|
||||
WARN_ON(!list_empty(&process_info->kfd_bo_list));
|
||||
WARN_ON(!list_empty(&process_info->userptr_valid_list));
|
||||
WARN_ON(!list_empty(&process_info->userptr_inval_list));
|
||||
|
||||
dma_fence_put(&process_info->eviction_fence->base);
|
||||
cancel_delayed_work_sync(&process_info->restore_userptr_work);
|
||||
put_pid(process_info->pid);
|
||||
mutex_destroy(&process_info->lock);
|
||||
kfree(process_info);
|
||||
}
|
||||
@ -1003,9 +1141,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
|
||||
uint64_t user_addr = 0;
|
||||
struct amdgpu_bo *bo;
|
||||
int byte_align;
|
||||
u32 alloc_domain;
|
||||
u32 domain, alloc_domain;
|
||||
u64 alloc_flags;
|
||||
uint32_t mapping_flags;
|
||||
int ret;
|
||||
@ -1014,14 +1153,21 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
* Check on which domain to allocate BO
|
||||
*/
|
||||
if (flags & ALLOC_MEM_FLAGS_VRAM) {
|
||||
alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
|
||||
domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
|
||||
alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
|
||||
alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
|
||||
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
|
||||
AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
|
||||
} else if (flags & ALLOC_MEM_FLAGS_GTT) {
|
||||
alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
|
||||
domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
|
||||
alloc_flags = 0;
|
||||
} else if (flags & ALLOC_MEM_FLAGS_USERPTR) {
|
||||
domain = AMDGPU_GEM_DOMAIN_GTT;
|
||||
alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
|
||||
alloc_flags = 0;
|
||||
if (!offset || !*offset)
|
||||
return -EINVAL;
|
||||
user_addr = *offset;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -1078,18 +1224,34 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
}
|
||||
bo->kfd_bo = *mem;
|
||||
(*mem)->bo = bo;
|
||||
if (user_addr)
|
||||
bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
|
||||
|
||||
(*mem)->va = va;
|
||||
(*mem)->domain = alloc_domain;
|
||||
(*mem)->domain = domain;
|
||||
(*mem)->mapped_to_gpu_memory = 0;
|
||||
(*mem)->process_info = avm->process_info;
|
||||
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info);
|
||||
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
|
||||
|
||||
if (user_addr) {
|
||||
ret = init_user_pages(*mem, current->mm, user_addr);
|
||||
if (ret) {
|
||||
mutex_lock(&avm->process_info->lock);
|
||||
list_del(&(*mem)->validate_list.head);
|
||||
mutex_unlock(&avm->process_info->lock);
|
||||
goto allocate_init_user_pages_failed;
|
||||
}
|
||||
}
|
||||
|
||||
if (offset)
|
||||
*offset = amdgpu_bo_mmap_offset(bo);
|
||||
|
||||
return 0;
|
||||
|
||||
allocate_init_user_pages_failed:
|
||||
amdgpu_bo_unref(&bo);
|
||||
/* Don't unreserve system mem limit twice */
|
||||
goto err_reserve_system_mem;
|
||||
err_bo_create:
|
||||
unreserve_system_mem_limit(adev, size, alloc_domain);
|
||||
err_reserve_system_mem:
|
||||
@ -1122,12 +1284,24 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
|
||||
* be freed anyway
|
||||
*/
|
||||
|
||||
/* No more MMU notifiers */
|
||||
amdgpu_mn_unregister(mem->bo);
|
||||
|
||||
/* Make sure restore workers don't access the BO any more */
|
||||
bo_list_entry = &mem->validate_list;
|
||||
mutex_lock(&process_info->lock);
|
||||
list_del(&bo_list_entry->head);
|
||||
mutex_unlock(&process_info->lock);
|
||||
|
||||
/* Free user pages if necessary */
|
||||
if (mem->user_pages) {
|
||||
pr_debug("%s: Freeing user_pages array\n", __func__);
|
||||
if (mem->user_pages[0])
|
||||
release_pages(mem->user_pages,
|
||||
mem->bo->tbo.ttm->num_pages);
|
||||
kvfree(mem->user_pages);
|
||||
}
|
||||
|
||||
ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
@ -1173,21 +1347,32 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
|
||||
struct kfd_bo_va_list *bo_va_entry = NULL;
|
||||
struct kfd_bo_va_list *bo_va_entry_aql = NULL;
|
||||
unsigned long bo_size;
|
||||
bool is_invalid_userptr = false;
|
||||
|
||||
/* Make sure restore is not running concurrently.
|
||||
bo = mem->bo;
|
||||
if (!bo) {
|
||||
pr_err("Invalid BO when mapping memory to GPU\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Make sure restore is not running concurrently. Since we
|
||||
* don't map invalid userptr BOs, we rely on the next restore
|
||||
* worker to do the mapping
|
||||
*/
|
||||
mutex_lock(&mem->process_info->lock);
|
||||
|
||||
mutex_lock(&mem->lock);
|
||||
|
||||
bo = mem->bo;
|
||||
|
||||
if (!bo) {
|
||||
pr_err("Invalid BO when mapping memory to GPU\n");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
/* Lock mmap-sem. If we find an invalid userptr BO, we can be
|
||||
* sure that the MMU notifier is no longer running
|
||||
* concurrently and the queues are actually stopped
|
||||
*/
|
||||
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
|
||||
down_write(¤t->mm->mmap_sem);
|
||||
is_invalid_userptr = atomic_read(&mem->invalid);
|
||||
up_write(¤t->mm->mmap_sem);
|
||||
}
|
||||
|
||||
mutex_lock(&mem->lock);
|
||||
|
||||
domain = mem->domain;
|
||||
bo_size = bo->tbo.mem.size;
|
||||
|
||||
@ -1200,6 +1385,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
|
||||
if (unlikely(ret))
|
||||
goto out;
|
||||
|
||||
/* Userptr can be marked as "not invalid", but not actually be
|
||||
* validated yet (still in the system domain). In that case
|
||||
* the queues are still stopped and we can leave mapping for
|
||||
* the next restore worker
|
||||
*/
|
||||
if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
|
||||
is_invalid_userptr = true;
|
||||
|
||||
if (check_if_add_bo_to_vm(avm, mem)) {
|
||||
ret = add_bo_to_vm(adev, mem, avm, false,
|
||||
&bo_va_entry);
|
||||
@ -1217,7 +1410,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
|
||||
goto add_bo_to_vm_failed;
|
||||
}
|
||||
|
||||
if (mem->mapped_to_gpu_memory == 0) {
|
||||
if (mem->mapped_to_gpu_memory == 0 &&
|
||||
!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
|
||||
/* Validate BO only once. The eviction fence gets added to BO
|
||||
* the first time it is mapped. Validate will wait for all
|
||||
* background evictions to complete.
|
||||
@ -1235,7 +1429,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
|
||||
entry->va, entry->va + bo_size,
|
||||
entry);
|
||||
|
||||
ret = map_bo_to_gpuvm(adev, entry, ctx.sync);
|
||||
ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
|
||||
is_invalid_userptr);
|
||||
if (ret) {
|
||||
pr_err("Failed to map radeon bo to gpuvm\n");
|
||||
goto map_bo_to_gpuvm_failed;
|
||||
@ -1418,6 +1613,337 @@ bo_reserve_failed:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Evict a userptr BO by stopping the queues if necessary
|
||||
*
|
||||
* Runs in MMU notifier, may be in RECLAIM_FS context. This means it
|
||||
* cannot do any memory allocations, and cannot take any locks that
|
||||
* are held elsewhere while allocating memory. Therefore this is as
|
||||
* simple as possible, using atomic counters.
|
||||
*
|
||||
* It doesn't do anything to the BO itself. The real work happens in
|
||||
* restore, where we get updated page addresses. This function only
|
||||
* ensures that GPU access to the BO is stopped.
|
||||
*/
|
||||
int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
struct amdkfd_process_info *process_info = mem->process_info;
|
||||
int invalid, evicted_bos;
|
||||
int r = 0;
|
||||
|
||||
invalid = atomic_inc_return(&mem->invalid);
|
||||
evicted_bos = atomic_inc_return(&process_info->evicted_bos);
|
||||
if (evicted_bos == 1) {
|
||||
/* First eviction, stop the queues */
|
||||
r = kgd2kfd->quiesce_mm(mm);
|
||||
if (r)
|
||||
pr_err("Failed to quiesce KFD\n");
|
||||
schedule_delayed_work(&process_info->restore_userptr_work,
|
||||
msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/* Update invalid userptr BOs
|
||||
*
|
||||
* Moves invalidated (evicted) userptr BOs from userptr_valid_list to
|
||||
* userptr_inval_list and updates user pages for all BOs that have
|
||||
* been invalidated since their last update.
|
||||
*/
|
||||
static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
struct kgd_mem *mem, *tmp_mem;
|
||||
struct amdgpu_bo *bo;
|
||||
struct ttm_operation_ctx ctx = { false, false };
|
||||
int invalid, ret;
|
||||
|
||||
/* Move all invalidated BOs to the userptr_inval_list and
|
||||
* release their user pages by migration to the CPU domain
|
||||
*/
|
||||
list_for_each_entry_safe(mem, tmp_mem,
|
||||
&process_info->userptr_valid_list,
|
||||
validate_list.head) {
|
||||
if (!atomic_read(&mem->invalid))
|
||||
continue; /* BO is still valid */
|
||||
|
||||
bo = mem->bo;
|
||||
|
||||
if (amdgpu_bo_reserve(bo, true))
|
||||
return -EAGAIN;
|
||||
amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
|
||||
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
amdgpu_bo_unreserve(bo);
|
||||
if (ret) {
|
||||
pr_err("%s: Failed to invalidate userptr BO\n",
|
||||
__func__);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
list_move_tail(&mem->validate_list.head,
|
||||
&process_info->userptr_inval_list);
|
||||
}
|
||||
|
||||
if (list_empty(&process_info->userptr_inval_list))
|
||||
return 0; /* All evicted userptr BOs were freed */
|
||||
|
||||
/* Go through userptr_inval_list and update any invalid user_pages */
|
||||
list_for_each_entry(mem, &process_info->userptr_inval_list,
|
||||
validate_list.head) {
|
||||
invalid = atomic_read(&mem->invalid);
|
||||
if (!invalid)
|
||||
/* BO hasn't been invalidated since the last
|
||||
* revalidation attempt. Keep its BO list.
|
||||
*/
|
||||
continue;
|
||||
|
||||
bo = mem->bo;
|
||||
|
||||
if (!mem->user_pages) {
|
||||
mem->user_pages =
|
||||
kvmalloc_array(bo->tbo.ttm->num_pages,
|
||||
sizeof(struct page *),
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
if (!mem->user_pages) {
|
||||
pr_err("%s: Failed to allocate pages array\n",
|
||||
__func__);
|
||||
return -ENOMEM;
|
||||
}
|
||||
} else if (mem->user_pages[0]) {
|
||||
release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
|
||||
}
|
||||
|
||||
/* Get updated user pages */
|
||||
ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
|
||||
mem->user_pages);
|
||||
if (ret) {
|
||||
mem->user_pages[0] = NULL;
|
||||
pr_info("%s: Failed to get user pages: %d\n",
|
||||
__func__, ret);
|
||||
/* Pretend it succeeded. It will fail later
|
||||
* with a VM fault if the GPU tries to access
|
||||
* it. Better than hanging indefinitely with
|
||||
* stalled user mode queues.
|
||||
*/
|
||||
}
|
||||
|
||||
/* Mark the BO as valid unless it was invalidated
|
||||
* again concurrently
|
||||
*/
|
||||
if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Validate invalid userptr BOs
|
||||
*
|
||||
* Validates BOs on the userptr_inval_list, and moves them back to the
|
||||
* userptr_valid_list. Also updates GPUVM page tables with new page
|
||||
* addresses and waits for the page table updates to complete.
|
||||
*/
|
||||
static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
|
||||
{
|
||||
struct amdgpu_bo_list_entry *pd_bo_list_entries;
|
||||
struct list_head resv_list, duplicates;
|
||||
struct ww_acquire_ctx ticket;
|
||||
struct amdgpu_sync sync;
|
||||
|
||||
struct amdgpu_vm *peer_vm;
|
||||
struct kgd_mem *mem, *tmp_mem;
|
||||
struct amdgpu_bo *bo;
|
||||
struct ttm_operation_ctx ctx = { false, false };
|
||||
int i, ret;
|
||||
|
||||
pd_bo_list_entries = kcalloc(process_info->n_vms,
|
||||
sizeof(struct amdgpu_bo_list_entry),
|
||||
GFP_KERNEL);
|
||||
if (!pd_bo_list_entries) {
|
||||
pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&resv_list);
|
||||
INIT_LIST_HEAD(&duplicates);
|
||||
|
||||
/* Get all the page directory BOs that need to be reserved */
|
||||
i = 0;
|
||||
list_for_each_entry(peer_vm, &process_info->vm_list_head,
|
||||
vm_list_node)
|
||||
amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
|
||||
&pd_bo_list_entries[i++]);
|
||||
/* Add the userptr_inval_list entries to resv_list */
|
||||
list_for_each_entry(mem, &process_info->userptr_inval_list,
|
||||
validate_list.head) {
|
||||
list_add_tail(&mem->resv_list.head, &resv_list);
|
||||
mem->resv_list.bo = mem->validate_list.bo;
|
||||
mem->resv_list.shared = mem->validate_list.shared;
|
||||
}
|
||||
|
||||
/* Reserve all BOs and page tables for validation */
|
||||
ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
|
||||
WARN(!list_empty(&duplicates), "Duplicates should be empty");
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
amdgpu_sync_create(&sync);
|
||||
|
||||
/* Avoid triggering eviction fences when unmapping invalid
|
||||
* userptr BOs (waits for all fences, doesn't use
|
||||
* FENCE_OWNER_VM)
|
||||
*/
|
||||
list_for_each_entry(peer_vm, &process_info->vm_list_head,
|
||||
vm_list_node)
|
||||
amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo,
|
||||
process_info->eviction_fence,
|
||||
NULL, NULL);
|
||||
|
||||
ret = process_validate_vms(process_info);
|
||||
if (ret)
|
||||
goto unreserve_out;
|
||||
|
||||
/* Validate BOs and update GPUVM page tables */
|
||||
list_for_each_entry_safe(mem, tmp_mem,
|
||||
&process_info->userptr_inval_list,
|
||||
validate_list.head) {
|
||||
struct kfd_bo_va_list *bo_va_entry;
|
||||
|
||||
bo = mem->bo;
|
||||
|
||||
/* Copy pages array and validate the BO if we got user pages */
|
||||
if (mem->user_pages[0]) {
|
||||
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
|
||||
mem->user_pages);
|
||||
amdgpu_ttm_placement_from_domain(bo, mem->domain);
|
||||
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
if (ret) {
|
||||
pr_err("%s: failed to validate BO\n", __func__);
|
||||
goto unreserve_out;
|
||||
}
|
||||
}
|
||||
|
||||
/* Validate succeeded, now the BO owns the pages, free
|
||||
* our copy of the pointer array. Put this BO back on
|
||||
* the userptr_valid_list. If we need to revalidate
|
||||
* it, we need to start from scratch.
|
||||
*/
|
||||
kvfree(mem->user_pages);
|
||||
mem->user_pages = NULL;
|
||||
list_move_tail(&mem->validate_list.head,
|
||||
&process_info->userptr_valid_list);
|
||||
|
||||
/* Update mapping. If the BO was not validated
|
||||
* (because we couldn't get user pages), this will
|
||||
* clear the page table entries, which will result in
|
||||
* VM faults if the GPU tries to access the invalid
|
||||
* memory.
|
||||
*/
|
||||
list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) {
|
||||
if (!bo_va_entry->is_mapped)
|
||||
continue;
|
||||
|
||||
ret = update_gpuvm_pte((struct amdgpu_device *)
|
||||
bo_va_entry->kgd_dev,
|
||||
bo_va_entry, &sync);
|
||||
if (ret) {
|
||||
pr_err("%s: update PTE failed\n", __func__);
|
||||
/* make sure this gets validated again */
|
||||
atomic_inc(&mem->invalid);
|
||||
goto unreserve_out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Update page directories */
|
||||
ret = process_update_pds(process_info, &sync);
|
||||
|
||||
unreserve_out:
|
||||
list_for_each_entry(peer_vm, &process_info->vm_list_head,
|
||||
vm_list_node)
|
||||
amdgpu_bo_fence(peer_vm->root.base.bo,
|
||||
&process_info->eviction_fence->base, true);
|
||||
ttm_eu_backoff_reservation(&ticket, &resv_list);
|
||||
amdgpu_sync_wait(&sync, false);
|
||||
amdgpu_sync_free(&sync);
|
||||
out:
|
||||
kfree(pd_bo_list_entries);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Worker callback to restore evicted userptr BOs
|
||||
*
|
||||
* Tries to update and validate all userptr BOs. If successful and no
|
||||
* concurrent evictions happened, the queues are restarted. Otherwise,
|
||||
* reschedule for another attempt later.
|
||||
*/
|
||||
static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
|
||||
{
|
||||
struct delayed_work *dwork = to_delayed_work(work);
|
||||
struct amdkfd_process_info *process_info =
|
||||
container_of(dwork, struct amdkfd_process_info,
|
||||
restore_userptr_work);
|
||||
struct task_struct *usertask;
|
||||
struct mm_struct *mm;
|
||||
int evicted_bos;
|
||||
|
||||
evicted_bos = atomic_read(&process_info->evicted_bos);
|
||||
if (!evicted_bos)
|
||||
return;
|
||||
|
||||
/* Reference task and mm in case of concurrent process termination */
|
||||
usertask = get_pid_task(process_info->pid, PIDTYPE_PID);
|
||||
if (!usertask)
|
||||
return;
|
||||
mm = get_task_mm(usertask);
|
||||
if (!mm) {
|
||||
put_task_struct(usertask);
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_lock(&process_info->lock);
|
||||
|
||||
if (update_invalid_user_pages(process_info, mm))
|
||||
goto unlock_out;
|
||||
/* userptr_inval_list can be empty if all evicted userptr BOs
|
||||
* have been freed. In that case there is nothing to validate
|
||||
* and we can just restart the queues.
|
||||
*/
|
||||
if (!list_empty(&process_info->userptr_inval_list)) {
|
||||
if (atomic_read(&process_info->evicted_bos) != evicted_bos)
|
||||
goto unlock_out; /* Concurrent eviction, try again */
|
||||
|
||||
if (validate_invalid_user_pages(process_info))
|
||||
goto unlock_out;
|
||||
}
|
||||
/* Final check for concurrent evicton and atomic update. If
|
||||
* another eviction happens after successful update, it will
|
||||
* be a first eviction that calls quiesce_mm. The eviction
|
||||
* reference counting inside KFD will handle this case.
|
||||
*/
|
||||
if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
|
||||
evicted_bos)
|
||||
goto unlock_out;
|
||||
evicted_bos = 0;
|
||||
if (kgd2kfd->resume_mm(mm)) {
|
||||
pr_err("%s: Failed to resume KFD\n", __func__);
|
||||
/* No recovery from this failure. Probably the CP is
|
||||
* hanging. No point trying again.
|
||||
*/
|
||||
}
|
||||
unlock_out:
|
||||
mutex_unlock(&process_info->lock);
|
||||
mmput(mm);
|
||||
put_task_struct(usertask);
|
||||
|
||||
/* If validation failed, reschedule another attempt */
|
||||
if (evicted_bos)
|
||||
schedule_delayed_work(&process_info->restore_userptr_work,
|
||||
msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
|
||||
}
|
||||
|
||||
/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
|
||||
* KFD process identified by process_info
|
||||
*
|
||||
|
@ -536,7 +536,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
||||
if (p->bo_list) {
|
||||
amdgpu_bo_list_get_list(p->bo_list, &p->validated);
|
||||
if (p->bo_list->first_userptr != p->bo_list->num_entries)
|
||||
p->mn = amdgpu_mn_get(p->adev);
|
||||
p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&duplicates);
|
||||
|
@ -36,12 +36,14 @@
|
||||
#include <drm/drm.h>
|
||||
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
|
||||
struct amdgpu_mn {
|
||||
/* constant after initialisation */
|
||||
struct amdgpu_device *adev;
|
||||
struct mm_struct *mm;
|
||||
struct mmu_notifier mn;
|
||||
enum amdgpu_mn_type type;
|
||||
|
||||
/* only used on destruction */
|
||||
struct work_struct work;
|
||||
@ -185,7 +187,7 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_mn_invalidate_range_start - callback to notify about mm change
|
||||
* amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
|
||||
*
|
||||
* @mn: our notifier
|
||||
* @mn: the mm this callback is about
|
||||
@ -195,10 +197,10 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
|
||||
* We block for all BOs between start and end to be idle and
|
||||
* unmap them by move them into system domain again.
|
||||
*/
|
||||
static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
|
||||
struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long end)
|
||||
static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
|
||||
struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
|
||||
struct interval_tree_node *it;
|
||||
@ -219,6 +221,49 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
|
||||
*
|
||||
* @mn: our notifier
|
||||
* @mn: the mm this callback is about
|
||||
* @start: start of updated range
|
||||
* @end: end of updated range
|
||||
*
|
||||
* We temporarily evict all BOs between start and end. This
|
||||
* necessitates evicting all user-mode queues of the process. The BOs
|
||||
* are restorted in amdgpu_mn_invalidate_range_end_hsa.
|
||||
*/
|
||||
static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
|
||||
struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
|
||||
struct interval_tree_node *it;
|
||||
|
||||
/* notification is exclusive, but interval is inclusive */
|
||||
end -= 1;
|
||||
|
||||
amdgpu_mn_read_lock(rmn);
|
||||
|
||||
it = interval_tree_iter_first(&rmn->objects, start, end);
|
||||
while (it) {
|
||||
struct amdgpu_mn_node *node;
|
||||
struct amdgpu_bo *bo;
|
||||
|
||||
node = container_of(it, struct amdgpu_mn_node, it);
|
||||
it = interval_tree_iter_next(it, start, end);
|
||||
|
||||
list_for_each_entry(bo, &node->bos, mn_list) {
|
||||
struct kgd_mem *mem = bo->kfd_bo;
|
||||
|
||||
if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
|
||||
start, end))
|
||||
amdgpu_amdkfd_evict_userptr(mem, mm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_mn_invalidate_range_end - callback to notify about mm change
|
||||
*
|
||||
@ -239,23 +284,39 @@ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
|
||||
amdgpu_mn_read_unlock(rmn);
|
||||
}
|
||||
|
||||
static const struct mmu_notifier_ops amdgpu_mn_ops = {
|
||||
.release = amdgpu_mn_release,
|
||||
.invalidate_range_start = amdgpu_mn_invalidate_range_start,
|
||||
.invalidate_range_end = amdgpu_mn_invalidate_range_end,
|
||||
static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
|
||||
[AMDGPU_MN_TYPE_GFX] = {
|
||||
.release = amdgpu_mn_release,
|
||||
.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
|
||||
.invalidate_range_end = amdgpu_mn_invalidate_range_end,
|
||||
},
|
||||
[AMDGPU_MN_TYPE_HSA] = {
|
||||
.release = amdgpu_mn_release,
|
||||
.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
|
||||
.invalidate_range_end = amdgpu_mn_invalidate_range_end,
|
||||
},
|
||||
};
|
||||
|
||||
/* Low bits of any reasonable mm pointer will be unused due to struct
|
||||
* alignment. Use these bits to make a unique key from the mm pointer
|
||||
* and notifier type.
|
||||
*/
|
||||
#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
|
||||
|
||||
/**
|
||||
* amdgpu_mn_get - create notifier context
|
||||
*
|
||||
* @adev: amdgpu device pointer
|
||||
* @type: type of MMU notifier context
|
||||
*
|
||||
* Creates a notifier context for current->mm.
|
||||
*/
|
||||
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
|
||||
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
|
||||
enum amdgpu_mn_type type)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct amdgpu_mn *rmn;
|
||||
unsigned long key = AMDGPU_MN_KEY(mm, type);
|
||||
int r;
|
||||
|
||||
mutex_lock(&adev->mn_lock);
|
||||
@ -264,8 +325,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
|
||||
return ERR_PTR(-EINTR);
|
||||
}
|
||||
|
||||
hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm)
|
||||
if (rmn->mm == mm)
|
||||
hash_for_each_possible(adev->mn_hash, rmn, node, key)
|
||||
if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key)
|
||||
goto release_locks;
|
||||
|
||||
rmn = kzalloc(sizeof(*rmn), GFP_KERNEL);
|
||||
@ -276,8 +337,9 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
|
||||
|
||||
rmn->adev = adev;
|
||||
rmn->mm = mm;
|
||||
rmn->mn.ops = &amdgpu_mn_ops;
|
||||
init_rwsem(&rmn->lock);
|
||||
rmn->type = type;
|
||||
rmn->mn.ops = &amdgpu_mn_ops[type];
|
||||
rmn->objects = RB_ROOT_CACHED;
|
||||
mutex_init(&rmn->read_lock);
|
||||
atomic_set(&rmn->recursion, 0);
|
||||
@ -286,7 +348,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
|
||||
if (r)
|
||||
goto free_rmn;
|
||||
|
||||
hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm);
|
||||
hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type));
|
||||
|
||||
release_locks:
|
||||
up_write(&mm->mmap_sem);
|
||||
@ -315,15 +377,21 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
|
||||
{
|
||||
unsigned long end = addr + amdgpu_bo_size(bo) - 1;
|
||||
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
||||
enum amdgpu_mn_type type =
|
||||
bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
|
||||
struct amdgpu_mn *rmn;
|
||||
struct amdgpu_mn_node *node = NULL;
|
||||
struct amdgpu_mn_node *node = NULL, *new_node;
|
||||
struct list_head bos;
|
||||
struct interval_tree_node *it;
|
||||
|
||||
rmn = amdgpu_mn_get(adev);
|
||||
rmn = amdgpu_mn_get(adev, type);
|
||||
if (IS_ERR(rmn))
|
||||
return PTR_ERR(rmn);
|
||||
|
||||
new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
|
||||
if (!new_node)
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_LIST_HEAD(&bos);
|
||||
|
||||
down_write(&rmn->lock);
|
||||
@ -337,13 +405,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
|
||||
list_splice(&node->bos, &bos);
|
||||
}
|
||||
|
||||
if (!node) {
|
||||
node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
|
||||
if (!node) {
|
||||
up_write(&rmn->lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
if (!node)
|
||||
node = new_node;
|
||||
else
|
||||
kfree(new_node);
|
||||
|
||||
bo->mn = rmn;
|
||||
|
||||
|
@ -29,16 +29,23 @@
|
||||
*/
|
||||
struct amdgpu_mn;
|
||||
|
||||
enum amdgpu_mn_type {
|
||||
AMDGPU_MN_TYPE_GFX,
|
||||
AMDGPU_MN_TYPE_HSA,
|
||||
};
|
||||
|
||||
#if defined(CONFIG_MMU_NOTIFIER)
|
||||
void amdgpu_mn_lock(struct amdgpu_mn *mn);
|
||||
void amdgpu_mn_unlock(struct amdgpu_mn *mn);
|
||||
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev);
|
||||
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
|
||||
enum amdgpu_mn_type type);
|
||||
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
|
||||
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
|
||||
#else
|
||||
static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
|
||||
static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
|
||||
static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
|
||||
static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
|
||||
enum amdgpu_mn_type type)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
@ -695,7 +695,7 @@ struct amdgpu_ttm_tt {
|
||||
struct ttm_dma_tt ttm;
|
||||
u64 offset;
|
||||
uint64_t userptr;
|
||||
struct mm_struct *usermm;
|
||||
struct task_struct *usertask;
|
||||
uint32_t userflags;
|
||||
spinlock_t guptasklock;
|
||||
struct list_head guptasks;
|
||||
@ -706,14 +706,18 @@ struct amdgpu_ttm_tt {
|
||||
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
|
||||
{
|
||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
||||
struct mm_struct *mm = gtt->usertask->mm;
|
||||
unsigned int flags = 0;
|
||||
unsigned pinned = 0;
|
||||
int r;
|
||||
|
||||
if (!mm) /* Happens during process shutdown */
|
||||
return -ESRCH;
|
||||
|
||||
if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
|
||||
flags |= FOLL_WRITE;
|
||||
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
down_read(&mm->mmap_sem);
|
||||
|
||||
if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
|
||||
/* check that we only use anonymous memory
|
||||
@ -721,9 +725,9 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
|
||||
unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
vma = find_vma(gtt->usermm, gtt->userptr);
|
||||
vma = find_vma(mm, gtt->userptr);
|
||||
if (!vma || vma->vm_file || vma->vm_end < end) {
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
up_read(&mm->mmap_sem);
|
||||
return -EPERM;
|
||||
}
|
||||
}
|
||||
@ -739,7 +743,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
|
||||
list_add(&guptask.list, >t->guptasks);
|
||||
spin_unlock(>t->guptasklock);
|
||||
|
||||
r = get_user_pages(userptr, num_pages, flags, p, NULL);
|
||||
if (mm == current->mm)
|
||||
r = get_user_pages(userptr, num_pages, flags, p, NULL);
|
||||
else
|
||||
r = get_user_pages_remote(gtt->usertask,
|
||||
mm, userptr, num_pages,
|
||||
flags, p, NULL, NULL);
|
||||
|
||||
spin_lock(>t->guptasklock);
|
||||
list_del(&guptask.list);
|
||||
@ -752,12 +761,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
|
||||
|
||||
} while (pinned < ttm->num_pages);
|
||||
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
up_read(&mm->mmap_sem);
|
||||
return 0;
|
||||
|
||||
release_pages:
|
||||
release_pages(pages, pinned);
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
up_read(&mm->mmap_sem);
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -978,6 +987,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
|
||||
{
|
||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
||||
|
||||
if (gtt->usertask)
|
||||
put_task_struct(gtt->usertask);
|
||||
|
||||
ttm_dma_tt_fini(>t->ttm);
|
||||
kfree(gtt);
|
||||
}
|
||||
@ -1079,8 +1091,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
|
||||
return -EINVAL;
|
||||
|
||||
gtt->userptr = addr;
|
||||
gtt->usermm = current->mm;
|
||||
gtt->userflags = flags;
|
||||
|
||||
if (gtt->usertask)
|
||||
put_task_struct(gtt->usertask);
|
||||
gtt->usertask = current->group_leader;
|
||||
get_task_struct(gtt->usertask);
|
||||
|
||||
spin_lock_init(>t->guptasklock);
|
||||
INIT_LIST_HEAD(>t->guptasks);
|
||||
atomic_set(>t->mmu_invalidations, 0);
|
||||
@ -1096,7 +1113,10 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
|
||||
if (gtt == NULL)
|
||||
return NULL;
|
||||
|
||||
return gtt->usermm;
|
||||
if (gtt->usertask == NULL)
|
||||
return NULL;
|
||||
|
||||
return gtt->usertask->mm;
|
||||
}
|
||||
|
||||
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
|
||||
|
@ -4686,6 +4686,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
|
||||
|
||||
cu_info->number = active_cu_number;
|
||||
cu_info->ao_cu_mask = ao_cu_mask;
|
||||
cu_info->simd_per_cu = NUM_SIMD_PER_CU;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -268,6 +268,11 @@
|
||||
* x=1: tmz_end
|
||||
*/
|
||||
|
||||
#define PACKET3_INVALIDATE_TLBS 0x98
|
||||
# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
|
||||
# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
|
||||
# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
|
||||
# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29)
|
||||
#define PACKET3_SET_RESOURCES 0xA0
|
||||
/* 1. header
|
||||
* 2. CONTROL
|
||||
|
@ -30,12 +30,14 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
|
||||
kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
|
||||
kfd_process.o kfd_queue.o kfd_mqd_manager.o \
|
||||
kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \
|
||||
kfd_mqd_manager_v9.o \
|
||||
kfd_kernel_queue.o kfd_kernel_queue_cik.o \
|
||||
kfd_kernel_queue_vi.o kfd_packet_manager.o \
|
||||
kfd_process_queue_manager.o kfd_device_queue_manager.o \
|
||||
kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
|
||||
kfd_kernel_queue_vi.o kfd_kernel_queue_v9.o \
|
||||
kfd_packet_manager.o kfd_process_queue_manager.o \
|
||||
kfd_device_queue_manager.o kfd_device_queue_manager_cik.o \
|
||||
kfd_device_queue_manager_vi.o kfd_device_queue_manager_v9.o \
|
||||
kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
|
||||
kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
|
||||
kfd_int_process_v9.o kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
|
||||
|
||||
ifneq ($(CONFIG_AMD_IOMMU_V2),)
|
||||
amdkfd-y += kfd_iommu.o
|
||||
|
@ -27,18 +27,28 @@
|
||||
static bool cik_event_interrupt_isr(struct kfd_dev *dev,
|
||||
const uint32_t *ih_ring_entry)
|
||||
{
|
||||
unsigned int pasid;
|
||||
const struct cik_ih_ring_entry *ihre =
|
||||
(const struct cik_ih_ring_entry *)ih_ring_entry;
|
||||
unsigned int vmid, pasid;
|
||||
|
||||
/* Only handle interrupts from KFD VMIDs */
|
||||
vmid = (ihre->ring_id & 0x0000ff00) >> 8;
|
||||
if (vmid < dev->vm_info.first_vmid_kfd ||
|
||||
vmid > dev->vm_info.last_vmid_kfd)
|
||||
return 0;
|
||||
|
||||
/* If there is no valid PASID, it's likely a firmware bug */
|
||||
pasid = (ihre->ring_id & 0xffff0000) >> 16;
|
||||
if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt"))
|
||||
return 0;
|
||||
|
||||
/* Do not process in ISR, just request it to be forwarded to WQ. */
|
||||
return (pasid != 0) &&
|
||||
(ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
|
||||
/* Interrupt types we care about: various signals and faults.
|
||||
* They will be forwarded to a work queue (see below).
|
||||
*/
|
||||
return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
|
||||
ihre->source_id == CIK_INTSRC_SDMA_TRAP ||
|
||||
ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
|
||||
ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE);
|
||||
ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE;
|
||||
}
|
||||
|
||||
static void cik_event_interrupt_wq(struct kfd_dev *dev,
|
||||
|
@ -33,7 +33,8 @@
|
||||
#define APE1_MTYPE(x) ((x) << 7)
|
||||
|
||||
/* valid for both DEFAULT_MTYPE and APE1_MTYPE */
|
||||
#define MTYPE_CACHED 0
|
||||
#define MTYPE_CACHED_NV 0
|
||||
#define MTYPE_CACHED 1
|
||||
#define MTYPE_NONCACHED 3
|
||||
|
||||
#define DEFAULT_CP_HQD_PERSISTENT_STATE (0x33U << 8)
|
||||
|
560
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
Normal file
560
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
Normal file
@ -0,0 +1,560 @@
|
||||
/*
|
||||
* Copyright 2018 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
static const uint32_t cwsr_trap_gfx8_hex[] = {
|
||||
0xbf820001, 0xbf820125,
|
||||
0xb8f4f802, 0x89748674,
|
||||
0xb8f5f803, 0x8675ff75,
|
||||
0x00000400, 0xbf850011,
|
||||
0xc00a1e37, 0x00000000,
|
||||
0xbf8c007f, 0x87777978,
|
||||
0xbf840002, 0xb974f802,
|
||||
0xbe801d78, 0xb8f5f803,
|
||||
0x8675ff75, 0x000001ff,
|
||||
0xbf850002, 0x80708470,
|
||||
0x82718071, 0x8671ff71,
|
||||
0x0000ffff, 0xb974f802,
|
||||
0xbe801f70, 0xb8f5f803,
|
||||
0x8675ff75, 0x00000100,
|
||||
0xbf840006, 0xbefa0080,
|
||||
0xb97a0203, 0x8671ff71,
|
||||
0x0000ffff, 0x80f08870,
|
||||
0x82f18071, 0xbefa0080,
|
||||
0xb97a0283, 0xbef60068,
|
||||
0xbef70069, 0xb8fa1c07,
|
||||
0x8e7a9c7a, 0x87717a71,
|
||||
0xb8fa03c7, 0x8e7a9b7a,
|
||||
0x87717a71, 0xb8faf807,
|
||||
0x867aff7a, 0x00007fff,
|
||||
0xb97af807, 0xbef2007e,
|
||||
0xbef3007f, 0xbefe0180,
|
||||
0xbf900004, 0x877a8474,
|
||||
0xb97af802, 0xbf8e0002,
|
||||
0xbf88fffe, 0xbef8007e,
|
||||
0x8679ff7f, 0x0000ffff,
|
||||
0x8779ff79, 0x00040000,
|
||||
0xbefa0080, 0xbefb00ff,
|
||||
0x00807fac, 0x867aff7f,
|
||||
0x08000000, 0x8f7a837a,
|
||||
0x877b7a7b, 0x867aff7f,
|
||||
0x70000000, 0x8f7a817a,
|
||||
0x877b7a7b, 0xbeef007c,
|
||||
0xbeee0080, 0xb8ee2a05,
|
||||
0x806e816e, 0x8e6e8a6e,
|
||||
0xb8fa1605, 0x807a817a,
|
||||
0x8e7a867a, 0x806e7a6e,
|
||||
0xbefa0084, 0xbefa00ff,
|
||||
0x01000000, 0xbefe007c,
|
||||
0xbefc006e, 0xc0611bfc,
|
||||
0x0000007c, 0x806e846e,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc006e, 0xc0611c3c,
|
||||
0x0000007c, 0x806e846e,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc006e, 0xc0611c7c,
|
||||
0x0000007c, 0x806e846e,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc006e, 0xc0611cbc,
|
||||
0x0000007c, 0x806e846e,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc006e, 0xc0611cfc,
|
||||
0x0000007c, 0x806e846e,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc006e, 0xc0611d3c,
|
||||
0x0000007c, 0x806e846e,
|
||||
0xbefc007e, 0xb8f5f803,
|
||||
0xbefe007c, 0xbefc006e,
|
||||
0xc0611d7c, 0x0000007c,
|
||||
0x806e846e, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc006e,
|
||||
0xc0611dbc, 0x0000007c,
|
||||
0x806e846e, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc006e,
|
||||
0xc0611dfc, 0x0000007c,
|
||||
0x806e846e, 0xbefc007e,
|
||||
0xb8eff801, 0xbefe007c,
|
||||
0xbefc006e, 0xc0611bfc,
|
||||
0x0000007c, 0x806e846e,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc006e, 0xc0611b3c,
|
||||
0x0000007c, 0x806e846e,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc006e, 0xc0611b7c,
|
||||
0x0000007c, 0x806e846e,
|
||||
0xbefc007e, 0x867aff7f,
|
||||
0x04000000, 0xbef30080,
|
||||
0x8773737a, 0xb8ee2a05,
|
||||
0x806e816e, 0x8e6e8a6e,
|
||||
0xb8f51605, 0x80758175,
|
||||
0x8e758475, 0x8e7a8275,
|
||||
0xbefa00ff, 0x01000000,
|
||||
0xbef60178, 0x80786e78,
|
||||
0x82798079, 0xbefc0080,
|
||||
0xbe802b00, 0xbe822b02,
|
||||
0xbe842b04, 0xbe862b06,
|
||||
0xbe882b08, 0xbe8a2b0a,
|
||||
0xbe8c2b0c, 0xbe8e2b0e,
|
||||
0xc06b003c, 0x00000000,
|
||||
0xc06b013c, 0x00000010,
|
||||
0xc06b023c, 0x00000020,
|
||||
0xc06b033c, 0x00000030,
|
||||
0x8078c078, 0x82798079,
|
||||
0x807c907c, 0xbf0a757c,
|
||||
0xbf85ffeb, 0xbef80176,
|
||||
0xbeee0080, 0xbefe00c1,
|
||||
0xbeff00c1, 0xbefa00ff,
|
||||
0x01000000, 0xe0724000,
|
||||
0x6e1e0000, 0xe0724100,
|
||||
0x6e1e0100, 0xe0724200,
|
||||
0x6e1e0200, 0xe0724300,
|
||||
0x6e1e0300, 0xbefe00c1,
|
||||
0xbeff00c1, 0xb8f54306,
|
||||
0x8675c175, 0xbf84002c,
|
||||
0xbf8a0000, 0x867aff73,
|
||||
0x04000000, 0xbf840028,
|
||||
0x8e758675, 0x8e758275,
|
||||
0xbefa0075, 0xb8ee2a05,
|
||||
0x806e816e, 0x8e6e8a6e,
|
||||
0xb8fa1605, 0x807a817a,
|
||||
0x8e7a867a, 0x806e7a6e,
|
||||
0x806eff6e, 0x00000080,
|
||||
0xbefa00ff, 0x01000000,
|
||||
0xbefc0080, 0xd28c0002,
|
||||
0x000100c1, 0xd28d0003,
|
||||
0x000204c1, 0xd1060002,
|
||||
0x00011103, 0x7e0602ff,
|
||||
0x00000200, 0xbefc00ff,
|
||||
0x00010000, 0xbe80007b,
|
||||
0x867bff7b, 0xff7fffff,
|
||||
0x877bff7b, 0x00058000,
|
||||
0xd8ec0000, 0x00000002,
|
||||
0xbf8c007f, 0xe0765000,
|
||||
0x6e1e0002, 0x32040702,
|
||||
0xd0c9006a, 0x0000eb02,
|
||||
0xbf87fff7, 0xbefb0000,
|
||||
0xbeee00ff, 0x00000400,
|
||||
0xbefe00c1, 0xbeff00c1,
|
||||
0xb8f52a05, 0x80758175,
|
||||
0x8e758275, 0x8e7a8875,
|
||||
0xbefa00ff, 0x01000000,
|
||||
0xbefc0084, 0xbf0a757c,
|
||||
0xbf840015, 0xbf11017c,
|
||||
0x8075ff75, 0x00001000,
|
||||
0x7e000300, 0x7e020301,
|
||||
0x7e040302, 0x7e060303,
|
||||
0xe0724000, 0x6e1e0000,
|
||||
0xe0724100, 0x6e1e0100,
|
||||
0xe0724200, 0x6e1e0200,
|
||||
0xe0724300, 0x6e1e0300,
|
||||
0x807c847c, 0x806eff6e,
|
||||
0x00000400, 0xbf0a757c,
|
||||
0xbf85ffef, 0xbf9c0000,
|
||||
0xbf8200ca, 0xbef8007e,
|
||||
0x8679ff7f, 0x0000ffff,
|
||||
0x8779ff79, 0x00040000,
|
||||
0xbefa0080, 0xbefb00ff,
|
||||
0x00807fac, 0x8676ff7f,
|
||||
0x08000000, 0x8f768376,
|
||||
0x877b767b, 0x8676ff7f,
|
||||
0x70000000, 0x8f768176,
|
||||
0x877b767b, 0x8676ff7f,
|
||||
0x04000000, 0xbf84001e,
|
||||
0xbefe00c1, 0xbeff00c1,
|
||||
0xb8f34306, 0x8673c173,
|
||||
0xbf840019, 0x8e738673,
|
||||
0x8e738273, 0xbefa0073,
|
||||
0xb8f22a05, 0x80728172,
|
||||
0x8e728a72, 0xb8f61605,
|
||||
0x80768176, 0x8e768676,
|
||||
0x80727672, 0x8072ff72,
|
||||
0x00000080, 0xbefa00ff,
|
||||
0x01000000, 0xbefc0080,
|
||||
0xe0510000, 0x721e0000,
|
||||
0xe0510100, 0x721e0000,
|
||||
0x807cff7c, 0x00000200,
|
||||
0x8072ff72, 0x00000200,
|
||||
0xbf0a737c, 0xbf85fff6,
|
||||
0xbef20080, 0xbefe00c1,
|
||||
0xbeff00c1, 0xb8f32a05,
|
||||
0x80738173, 0x8e738273,
|
||||
0x8e7a8873, 0xbefa00ff,
|
||||
0x01000000, 0xbef60072,
|
||||
0x8072ff72, 0x00000400,
|
||||
0xbefc0084, 0xbf11087c,
|
||||
0x8073ff73, 0x00008000,
|
||||
0xe0524000, 0x721e0000,
|
||||
0xe0524100, 0x721e0100,
|
||||
0xe0524200, 0x721e0200,
|
||||
0xe0524300, 0x721e0300,
|
||||
0xbf8c0f70, 0x7e000300,
|
||||
0x7e020301, 0x7e040302,
|
||||
0x7e060303, 0x807c847c,
|
||||
0x8072ff72, 0x00000400,
|
||||
0xbf0a737c, 0xbf85ffee,
|
||||
0xbf9c0000, 0xe0524000,
|
||||
0x761e0000, 0xe0524100,
|
||||
0x761e0100, 0xe0524200,
|
||||
0x761e0200, 0xe0524300,
|
||||
0x761e0300, 0xb8f22a05,
|
||||
0x80728172, 0x8e728a72,
|
||||
0xb8f61605, 0x80768176,
|
||||
0x8e768676, 0x80727672,
|
||||
0x80f2c072, 0xb8f31605,
|
||||
0x80738173, 0x8e738473,
|
||||
0x8e7a8273, 0xbefa00ff,
|
||||
0x01000000, 0xbefc0073,
|
||||
0xc031003c, 0x00000072,
|
||||
0x80f2c072, 0xbf8c007f,
|
||||
0x80fc907c, 0xbe802d00,
|
||||
0xbe822d02, 0xbe842d04,
|
||||
0xbe862d06, 0xbe882d08,
|
||||
0xbe8a2d0a, 0xbe8c2d0c,
|
||||
0xbe8e2d0e, 0xbf06807c,
|
||||
0xbf84fff1, 0xb8f22a05,
|
||||
0x80728172, 0x8e728a72,
|
||||
0xb8f61605, 0x80768176,
|
||||
0x8e768676, 0x80727672,
|
||||
0xbefa0084, 0xbefa00ff,
|
||||
0x01000000, 0xc0211cfc,
|
||||
0x00000072, 0x80728472,
|
||||
0xc0211c3c, 0x00000072,
|
||||
0x80728472, 0xc0211c7c,
|
||||
0x00000072, 0x80728472,
|
||||
0xc0211bbc, 0x00000072,
|
||||
0x80728472, 0xc0211bfc,
|
||||
0x00000072, 0x80728472,
|
||||
0xc0211d3c, 0x00000072,
|
||||
0x80728472, 0xc0211d7c,
|
||||
0x00000072, 0x80728472,
|
||||
0xc0211a3c, 0x00000072,
|
||||
0x80728472, 0xc0211a7c,
|
||||
0x00000072, 0x80728472,
|
||||
0xc0211dfc, 0x00000072,
|
||||
0x80728472, 0xc0211b3c,
|
||||
0x00000072, 0x80728472,
|
||||
0xc0211b7c, 0x00000072,
|
||||
0x80728472, 0xbf8c007f,
|
||||
0xbefc0073, 0xbefe006e,
|
||||
0xbeff006f, 0x867375ff,
|
||||
0x000003ff, 0xb9734803,
|
||||
0x867375ff, 0xfffff800,
|
||||
0x8f738b73, 0xb973a2c3,
|
||||
0xb977f801, 0x8673ff71,
|
||||
0xf0000000, 0x8f739c73,
|
||||
0x8e739073, 0xbef60080,
|
||||
0x87767376, 0x8673ff71,
|
||||
0x08000000, 0x8f739b73,
|
||||
0x8e738f73, 0x87767376,
|
||||
0x8673ff74, 0x00800000,
|
||||
0x8f739773, 0xb976f807,
|
||||
0x8671ff71, 0x0000ffff,
|
||||
0x86fe7e7e, 0x86ea6a6a,
|
||||
0xb974f802, 0xbf8a0000,
|
||||
0x95807370, 0xbf810000,
|
||||
};
|
||||
|
||||
|
||||
static const uint32_t cwsr_trap_gfx9_hex[] = {
|
||||
0xbf820001, 0xbf82015a,
|
||||
0xb8f8f802, 0x89788678,
|
||||
0xb8f1f803, 0x866eff71,
|
||||
0x00000400, 0xbf850034,
|
||||
0x866eff71, 0x00000800,
|
||||
0xbf850003, 0x866eff71,
|
||||
0x00000100, 0xbf840008,
|
||||
0x866eff78, 0x00002000,
|
||||
0xbf840001, 0xbf810000,
|
||||
0x8778ff78, 0x00002000,
|
||||
0x80ec886c, 0x82ed806d,
|
||||
0xb8eef807, 0x866fff6e,
|
||||
0x001f8000, 0x8e6f8b6f,
|
||||
0x8977ff77, 0xfc000000,
|
||||
0x87776f77, 0x896eff6e,
|
||||
0x001f8000, 0xb96ef807,
|
||||
0xb8f0f812, 0xb8f1f813,
|
||||
0x8ef08870, 0xc0071bb8,
|
||||
0x00000000, 0xbf8cc07f,
|
||||
0xc0071c38, 0x00000008,
|
||||
0xbf8cc07f, 0x86ee6e6e,
|
||||
0xbf840001, 0xbe801d6e,
|
||||
0xb8f1f803, 0x8671ff71,
|
||||
0x000001ff, 0xbf850002,
|
||||
0x806c846c, 0x826d806d,
|
||||
0x866dff6d, 0x0000ffff,
|
||||
0x8f6e8b77, 0x866eff6e,
|
||||
0x001f8000, 0xb96ef807,
|
||||
0x86fe7e7e, 0x86ea6a6a,
|
||||
0xb978f802, 0xbe801f6c,
|
||||
0x866dff6d, 0x0000ffff,
|
||||
0xbef00080, 0xb9700283,
|
||||
0xb8f02407, 0x8e709c70,
|
||||
0x876d706d, 0xb8f003c7,
|
||||
0x8e709b70, 0x876d706d,
|
||||
0xb8f0f807, 0x8670ff70,
|
||||
0x00007fff, 0xb970f807,
|
||||
0xbeee007e, 0xbeef007f,
|
||||
0xbefe0180, 0xbf900004,
|
||||
0x87708478, 0xb970f802,
|
||||
0xbf8e0002, 0xbf88fffe,
|
||||
0xb8f02a05, 0x80708170,
|
||||
0x8e708a70, 0xb8f11605,
|
||||
0x80718171, 0x8e718671,
|
||||
0x80707170, 0x80707e70,
|
||||
0x8271807f, 0x8671ff71,
|
||||
0x0000ffff, 0xc0471cb8,
|
||||
0x00000040, 0xbf8cc07f,
|
||||
0xc04b1d38, 0x00000048,
|
||||
0xbf8cc07f, 0xc0431e78,
|
||||
0x00000058, 0xbf8cc07f,
|
||||
0xc0471eb8, 0x0000005c,
|
||||
0xbf8cc07f, 0xbef4007e,
|
||||
0x8675ff7f, 0x0000ffff,
|
||||
0x8775ff75, 0x00040000,
|
||||
0xbef60080, 0xbef700ff,
|
||||
0x00807fac, 0x8670ff7f,
|
||||
0x08000000, 0x8f708370,
|
||||
0x87777077, 0x8670ff7f,
|
||||
0x70000000, 0x8f708170,
|
||||
0x87777077, 0xbefb007c,
|
||||
0xbefa0080, 0xb8fa2a05,
|
||||
0x807a817a, 0x8e7a8a7a,
|
||||
0xb8f01605, 0x80708170,
|
||||
0x8e708670, 0x807a707a,
|
||||
0xbef60084, 0xbef600ff,
|
||||
0x01000000, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611efa,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611b3a, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611b7a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611bba, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611bfa,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611e3a, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xb8f1f803,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611c7a, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611a3a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611a7a, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xb8fbf801,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611efa, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0x8670ff7f,
|
||||
0x04000000, 0xbeef0080,
|
||||
0x876f6f70, 0xb8fa2a05,
|
||||
0x807a817a, 0x8e7a8a7a,
|
||||
0xb8f11605, 0x80718171,
|
||||
0x8e718471, 0x8e768271,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbef20174, 0x80747a74,
|
||||
0x82758075, 0xbefc0080,
|
||||
0xbf800000, 0xbe802b00,
|
||||
0xbe822b02, 0xbe842b04,
|
||||
0xbe862b06, 0xbe882b08,
|
||||
0xbe8a2b0a, 0xbe8c2b0c,
|
||||
0xbe8e2b0e, 0xc06b003a,
|
||||
0x00000000, 0xbf8cc07f,
|
||||
0xc06b013a, 0x00000010,
|
||||
0xbf8cc07f, 0xc06b023a,
|
||||
0x00000020, 0xbf8cc07f,
|
||||
0xc06b033a, 0x00000030,
|
||||
0xbf8cc07f, 0x8074c074,
|
||||
0x82758075, 0x807c907c,
|
||||
0xbf0a717c, 0xbf85ffe7,
|
||||
0xbef40172, 0xbefa0080,
|
||||
0xbefe00c1, 0xbeff00c1,
|
||||
0xbee80080, 0xbee90080,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xe0724000, 0x7a1d0000,
|
||||
0xe0724100, 0x7a1d0100,
|
||||
0xe0724200, 0x7a1d0200,
|
||||
0xe0724300, 0x7a1d0300,
|
||||
0xbefe00c1, 0xbeff00c1,
|
||||
0xb8f14306, 0x8671c171,
|
||||
0xbf84002c, 0xbf8a0000,
|
||||
0x8670ff6f, 0x04000000,
|
||||
0xbf840028, 0x8e718671,
|
||||
0x8e718271, 0xbef60071,
|
||||
0xb8fa2a05, 0x807a817a,
|
||||
0x8e7a8a7a, 0xb8f01605,
|
||||
0x80708170, 0x8e708670,
|
||||
0x807a707a, 0x807aff7a,
|
||||
0x00000080, 0xbef600ff,
|
||||
0x01000000, 0xbefc0080,
|
||||
0xd28c0002, 0x000100c1,
|
||||
0xd28d0003, 0x000204c1,
|
||||
0xd1060002, 0x00011103,
|
||||
0x7e0602ff, 0x00000200,
|
||||
0xbefc00ff, 0x00010000,
|
||||
0xbe800077, 0x8677ff77,
|
||||
0xff7fffff, 0x8777ff77,
|
||||
0x00058000, 0xd8ec0000,
|
||||
0x00000002, 0xbf8cc07f,
|
||||
0xe0765000, 0x7a1d0002,
|
||||
0x68040702, 0xd0c9006a,
|
||||
0x0000e302, 0xbf87fff7,
|
||||
0xbef70000, 0xbefa00ff,
|
||||
0x00000400, 0xbefe00c1,
|
||||
0xbeff00c1, 0xb8f12a05,
|
||||
0x80718171, 0x8e718271,
|
||||
0x8e768871, 0xbef600ff,
|
||||
0x01000000, 0xbefc0084,
|
||||
0xbf0a717c, 0xbf840015,
|
||||
0xbf11017c, 0x8071ff71,
|
||||
0x00001000, 0x7e000300,
|
||||
0x7e020301, 0x7e040302,
|
||||
0x7e060303, 0xe0724000,
|
||||
0x7a1d0000, 0xe0724100,
|
||||
0x7a1d0100, 0xe0724200,
|
||||
0x7a1d0200, 0xe0724300,
|
||||
0x7a1d0300, 0x807c847c,
|
||||
0x807aff7a, 0x00000400,
|
||||
0xbf0a717c, 0xbf85ffef,
|
||||
0xbf9c0000, 0xbf8200d9,
|
||||
0xbef4007e, 0x8675ff7f,
|
||||
0x0000ffff, 0x8775ff75,
|
||||
0x00040000, 0xbef60080,
|
||||
0xbef700ff, 0x00807fac,
|
||||
0x866eff7f, 0x08000000,
|
||||
0x8f6e836e, 0x87776e77,
|
||||
0x866eff7f, 0x70000000,
|
||||
0x8f6e816e, 0x87776e77,
|
||||
0x866eff7f, 0x04000000,
|
||||
0xbf84001e, 0xbefe00c1,
|
||||
0xbeff00c1, 0xb8ef4306,
|
||||
0x866fc16f, 0xbf840019,
|
||||
0x8e6f866f, 0x8e6f826f,
|
||||
0xbef6006f, 0xb8f82a05,
|
||||
0x80788178, 0x8e788a78,
|
||||
0xb8ee1605, 0x806e816e,
|
||||
0x8e6e866e, 0x80786e78,
|
||||
0x8078ff78, 0x00000080,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbefc0080, 0xe0510000,
|
||||
0x781d0000, 0xe0510100,
|
||||
0x781d0000, 0x807cff7c,
|
||||
0x00000200, 0x8078ff78,
|
||||
0x00000200, 0xbf0a6f7c,
|
||||
0xbf85fff6, 0xbef80080,
|
||||
0xbefe00c1, 0xbeff00c1,
|
||||
0xb8ef2a05, 0x806f816f,
|
||||
0x8e6f826f, 0x8e76886f,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbeee0078, 0x8078ff78,
|
||||
0x00000400, 0xbefc0084,
|
||||
0xbf11087c, 0x806fff6f,
|
||||
0x00008000, 0xe0524000,
|
||||
0x781d0000, 0xe0524100,
|
||||
0x781d0100, 0xe0524200,
|
||||
0x781d0200, 0xe0524300,
|
||||
0x781d0300, 0xbf8c0f70,
|
||||
0x7e000300, 0x7e020301,
|
||||
0x7e040302, 0x7e060303,
|
||||
0x807c847c, 0x8078ff78,
|
||||
0x00000400, 0xbf0a6f7c,
|
||||
0xbf85ffee, 0xbf9c0000,
|
||||
0xe0524000, 0x6e1d0000,
|
||||
0xe0524100, 0x6e1d0100,
|
||||
0xe0524200, 0x6e1d0200,
|
||||
0xe0524300, 0x6e1d0300,
|
||||
0xb8f82a05, 0x80788178,
|
||||
0x8e788a78, 0xb8ee1605,
|
||||
0x806e816e, 0x8e6e866e,
|
||||
0x80786e78, 0x80f8c078,
|
||||
0xb8ef1605, 0x806f816f,
|
||||
0x8e6f846f, 0x8e76826f,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbefc006f, 0xc031003a,
|
||||
0x00000078, 0x80f8c078,
|
||||
0xbf8cc07f, 0x80fc907c,
|
||||
0xbf800000, 0xbe802d00,
|
||||
0xbe822d02, 0xbe842d04,
|
||||
0xbe862d06, 0xbe882d08,
|
||||
0xbe8a2d0a, 0xbe8c2d0c,
|
||||
0xbe8e2d0e, 0xbf06807c,
|
||||
0xbf84fff0, 0xb8f82a05,
|
||||
0x80788178, 0x8e788a78,
|
||||
0xb8ee1605, 0x806e816e,
|
||||
0x8e6e866e, 0x80786e78,
|
||||
0xbef60084, 0xbef600ff,
|
||||
0x01000000, 0xc0211bfa,
|
||||
0x00000078, 0x80788478,
|
||||
0xc0211b3a, 0x00000078,
|
||||
0x80788478, 0xc0211b7a,
|
||||
0x00000078, 0x80788478,
|
||||
0xc0211eba, 0x00000078,
|
||||
0x80788478, 0xc0211efa,
|
||||
0x00000078, 0x80788478,
|
||||
0xc0211c3a, 0x00000078,
|
||||
0x80788478, 0xc0211c7a,
|
||||
0x00000078, 0x80788478,
|
||||
0xc0211a3a, 0x00000078,
|
||||
0x80788478, 0xc0211a7a,
|
||||
0x00000078, 0x80788478,
|
||||
0xc0211cfa, 0x00000078,
|
||||
0x80788478, 0xbf8cc07f,
|
||||
0xbefc006f, 0xbefe007a,
|
||||
0xbeff007b, 0x866f71ff,
|
||||
0x000003ff, 0xb96f4803,
|
||||
0x866f71ff, 0xfffff800,
|
||||
0x8f6f8b6f, 0xb96fa2c3,
|
||||
0xb973f801, 0xb8ee2a05,
|
||||
0x806e816e, 0x8e6e8a6e,
|
||||
0xb8ef1605, 0x806f816f,
|
||||
0x8e6f866f, 0x806e6f6e,
|
||||
0x806e746e, 0x826f8075,
|
||||
0x866fff6f, 0x0000ffff,
|
||||
0xc0071cb7, 0x00000040,
|
||||
0xc00b1d37, 0x00000048,
|
||||
0xc0031e77, 0x00000058,
|
||||
0xc0071eb7, 0x0000005c,
|
||||
0xbf8cc07f, 0x866fff6d,
|
||||
0xf0000000, 0x8f6f9c6f,
|
||||
0x8e6f906f, 0xbeee0080,
|
||||
0x876e6f6e, 0x866fff6d,
|
||||
0x08000000, 0x8f6f9b6f,
|
||||
0x8e6f8f6f, 0x876e6f6e,
|
||||
0x866fff70, 0x00800000,
|
||||
0x8f6f976f, 0xb96ef807,
|
||||
0x866dff6d, 0x0000ffff,
|
||||
0x86fe7e7e, 0x86ea6a6a,
|
||||
0xb970f802, 0xbf8a0000,
|
||||
0x95806f6c, 0xbf810000,
|
||||
};
|
@ -20,9 +20,12 @@
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#if 0
|
||||
HW (VI) source code for CWSR trap handler
|
||||
#Version 18 + multiple trap handler
|
||||
/* To compile this assembly code:
|
||||
* PROJECT=vi ./sp3 cwsr_trap_handler_gfx8.asm -hex tmp.hex
|
||||
*/
|
||||
|
||||
/* HW (VI) source code for CWSR trap handler */
|
||||
/* Version 18 + multiple trap handler */
|
||||
|
||||
// this performance-optimal version was originally from Seven Xu at SRDC
|
||||
|
||||
@ -98,6 +101,7 @@ var SWIZZLE_EN = 0 //whether we use swi
|
||||
/**************************************************************************/
|
||||
var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
|
||||
var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
|
||||
var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1
|
||||
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
|
||||
|
||||
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
|
||||
@ -149,7 +153,7 @@ var s_save_spi_init_lo = exec_lo
|
||||
var s_save_spi_init_hi = exec_hi
|
||||
|
||||
//tba_lo and tba_hi need to be saved/restored
|
||||
var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3??h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
|
||||
var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
|
||||
var s_save_pc_hi = ttmp1
|
||||
var s_save_exec_lo = ttmp2
|
||||
var s_save_exec_hi = ttmp3
|
||||
@ -319,6 +323,10 @@ end
|
||||
s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC
|
||||
end
|
||||
|
||||
// Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for.
|
||||
s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT)
|
||||
s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp
|
||||
|
||||
L_SLEEP:
|
||||
s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0
|
||||
|
||||
@ -1007,8 +1015,6 @@ end
|
||||
|
||||
s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS
|
||||
|
||||
s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
|
||||
|
||||
//for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise:
|
||||
if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
|
||||
s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore)
|
||||
@ -1044,6 +1050,7 @@ end
|
||||
s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
|
||||
s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp
|
||||
|
||||
s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
|
||||
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
|
||||
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
|
||||
s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
|
||||
@ -1127,258 +1134,3 @@ end
|
||||
function get_hwreg_size_bytes
|
||||
return 128 //HWREG size 128 bytes
|
||||
end
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
static const uint32_t cwsr_trap_gfx8_hex[] = {
|
||||
0xbf820001, 0xbf820123,
|
||||
0xb8f4f802, 0x89748674,
|
||||
0xb8f5f803, 0x8675ff75,
|
||||
0x00000400, 0xbf850011,
|
||||
0xc00a1e37, 0x00000000,
|
||||
0xbf8c007f, 0x87777978,
|
||||
0xbf840002, 0xb974f802,
|
||||
0xbe801d78, 0xb8f5f803,
|
||||
0x8675ff75, 0x000001ff,
|
||||
0xbf850002, 0x80708470,
|
||||
0x82718071, 0x8671ff71,
|
||||
0x0000ffff, 0xb974f802,
|
||||
0xbe801f70, 0xb8f5f803,
|
||||
0x8675ff75, 0x00000100,
|
||||
0xbf840006, 0xbefa0080,
|
||||
0xb97a0203, 0x8671ff71,
|
||||
0x0000ffff, 0x80f08870,
|
||||
0x82f18071, 0xbefa0080,
|
||||
0xb97a0283, 0xbef60068,
|
||||
0xbef70069, 0xb8fa1c07,
|
||||
0x8e7a9c7a, 0x87717a71,
|
||||
0xb8fa03c7, 0x8e7a9b7a,
|
||||
0x87717a71, 0xb8faf807,
|
||||
0x867aff7a, 0x00007fff,
|
||||
0xb97af807, 0xbef2007e,
|
||||
0xbef3007f, 0xbefe0180,
|
||||
0xbf900004, 0xbf8e0002,
|
||||
0xbf88fffe, 0xbef8007e,
|
||||
0x8679ff7f, 0x0000ffff,
|
||||
0x8779ff79, 0x00040000,
|
||||
0xbefa0080, 0xbefb00ff,
|
||||
0x00807fac, 0x867aff7f,
|
||||
0x08000000, 0x8f7a837a,
|
||||
0x877b7a7b, 0x867aff7f,
|
||||
0x70000000, 0x8f7a817a,
|
||||
0x877b7a7b, 0xbeef007c,
|
||||
0xbeee0080, 0xb8ee2a05,
|
||||
0x806e816e, 0x8e6e8a6e,
|
||||
0xb8fa1605, 0x807a817a,
|
||||
0x8e7a867a, 0x806e7a6e,
|
||||
0xbefa0084, 0xbefa00ff,
|
||||
0x01000000, 0xbefe007c,
|
||||
0xbefc006e, 0xc0611bfc,
|
||||
0x0000007c, 0x806e846e,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc006e, 0xc0611c3c,
|
||||
0x0000007c, 0x806e846e,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc006e, 0xc0611c7c,
|
||||
0x0000007c, 0x806e846e,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc006e, 0xc0611cbc,
|
||||
0x0000007c, 0x806e846e,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc006e, 0xc0611cfc,
|
||||
0x0000007c, 0x806e846e,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc006e, 0xc0611d3c,
|
||||
0x0000007c, 0x806e846e,
|
||||
0xbefc007e, 0xb8f5f803,
|
||||
0xbefe007c, 0xbefc006e,
|
||||
0xc0611d7c, 0x0000007c,
|
||||
0x806e846e, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc006e,
|
||||
0xc0611dbc, 0x0000007c,
|
||||
0x806e846e, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc006e,
|
||||
0xc0611dfc, 0x0000007c,
|
||||
0x806e846e, 0xbefc007e,
|
||||
0xb8eff801, 0xbefe007c,
|
||||
0xbefc006e, 0xc0611bfc,
|
||||
0x0000007c, 0x806e846e,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc006e, 0xc0611b3c,
|
||||
0x0000007c, 0x806e846e,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc006e, 0xc0611b7c,
|
||||
0x0000007c, 0x806e846e,
|
||||
0xbefc007e, 0x867aff7f,
|
||||
0x04000000, 0xbef30080,
|
||||
0x8773737a, 0xb8ee2a05,
|
||||
0x806e816e, 0x8e6e8a6e,
|
||||
0xb8f51605, 0x80758175,
|
||||
0x8e758475, 0x8e7a8275,
|
||||
0xbefa00ff, 0x01000000,
|
||||
0xbef60178, 0x80786e78,
|
||||
0x82798079, 0xbefc0080,
|
||||
0xbe802b00, 0xbe822b02,
|
||||
0xbe842b04, 0xbe862b06,
|
||||
0xbe882b08, 0xbe8a2b0a,
|
||||
0xbe8c2b0c, 0xbe8e2b0e,
|
||||
0xc06b003c, 0x00000000,
|
||||
0xc06b013c, 0x00000010,
|
||||
0xc06b023c, 0x00000020,
|
||||
0xc06b033c, 0x00000030,
|
||||
0x8078c078, 0x82798079,
|
||||
0x807c907c, 0xbf0a757c,
|
||||
0xbf85ffeb, 0xbef80176,
|
||||
0xbeee0080, 0xbefe00c1,
|
||||
0xbeff00c1, 0xbefa00ff,
|
||||
0x01000000, 0xe0724000,
|
||||
0x6e1e0000, 0xe0724100,
|
||||
0x6e1e0100, 0xe0724200,
|
||||
0x6e1e0200, 0xe0724300,
|
||||
0x6e1e0300, 0xbefe00c1,
|
||||
0xbeff00c1, 0xb8f54306,
|
||||
0x8675c175, 0xbf84002c,
|
||||
0xbf8a0000, 0x867aff73,
|
||||
0x04000000, 0xbf840028,
|
||||
0x8e758675, 0x8e758275,
|
||||
0xbefa0075, 0xb8ee2a05,
|
||||
0x806e816e, 0x8e6e8a6e,
|
||||
0xb8fa1605, 0x807a817a,
|
||||
0x8e7a867a, 0x806e7a6e,
|
||||
0x806eff6e, 0x00000080,
|
||||
0xbefa00ff, 0x01000000,
|
||||
0xbefc0080, 0xd28c0002,
|
||||
0x000100c1, 0xd28d0003,
|
||||
0x000204c1, 0xd1060002,
|
||||
0x00011103, 0x7e0602ff,
|
||||
0x00000200, 0xbefc00ff,
|
||||
0x00010000, 0xbe80007b,
|
||||
0x867bff7b, 0xff7fffff,
|
||||
0x877bff7b, 0x00058000,
|
||||
0xd8ec0000, 0x00000002,
|
||||
0xbf8c007f, 0xe0765000,
|
||||
0x6e1e0002, 0x32040702,
|
||||
0xd0c9006a, 0x0000eb02,
|
||||
0xbf87fff7, 0xbefb0000,
|
||||
0xbeee00ff, 0x00000400,
|
||||
0xbefe00c1, 0xbeff00c1,
|
||||
0xb8f52a05, 0x80758175,
|
||||
0x8e758275, 0x8e7a8875,
|
||||
0xbefa00ff, 0x01000000,
|
||||
0xbefc0084, 0xbf0a757c,
|
||||
0xbf840015, 0xbf11017c,
|
||||
0x8075ff75, 0x00001000,
|
||||
0x7e000300, 0x7e020301,
|
||||
0x7e040302, 0x7e060303,
|
||||
0xe0724000, 0x6e1e0000,
|
||||
0xe0724100, 0x6e1e0100,
|
||||
0xe0724200, 0x6e1e0200,
|
||||
0xe0724300, 0x6e1e0300,
|
||||
0x807c847c, 0x806eff6e,
|
||||
0x00000400, 0xbf0a757c,
|
||||
0xbf85ffef, 0xbf9c0000,
|
||||
0xbf8200ca, 0xbef8007e,
|
||||
0x8679ff7f, 0x0000ffff,
|
||||
0x8779ff79, 0x00040000,
|
||||
0xbefa0080, 0xbefb00ff,
|
||||
0x00807fac, 0x8676ff7f,
|
||||
0x08000000, 0x8f768376,
|
||||
0x877b767b, 0x8676ff7f,
|
||||
0x70000000, 0x8f768176,
|
||||
0x877b767b, 0x8676ff7f,
|
||||
0x04000000, 0xbf84001e,
|
||||
0xbefe00c1, 0xbeff00c1,
|
||||
0xb8f34306, 0x8673c173,
|
||||
0xbf840019, 0x8e738673,
|
||||
0x8e738273, 0xbefa0073,
|
||||
0xb8f22a05, 0x80728172,
|
||||
0x8e728a72, 0xb8f61605,
|
||||
0x80768176, 0x8e768676,
|
||||
0x80727672, 0x8072ff72,
|
||||
0x00000080, 0xbefa00ff,
|
||||
0x01000000, 0xbefc0080,
|
||||
0xe0510000, 0x721e0000,
|
||||
0xe0510100, 0x721e0000,
|
||||
0x807cff7c, 0x00000200,
|
||||
0x8072ff72, 0x00000200,
|
||||
0xbf0a737c, 0xbf85fff6,
|
||||
0xbef20080, 0xbefe00c1,
|
||||
0xbeff00c1, 0xb8f32a05,
|
||||
0x80738173, 0x8e738273,
|
||||
0x8e7a8873, 0xbefa00ff,
|
||||
0x01000000, 0xbef60072,
|
||||
0x8072ff72, 0x00000400,
|
||||
0xbefc0084, 0xbf11087c,
|
||||
0x8073ff73, 0x00008000,
|
||||
0xe0524000, 0x721e0000,
|
||||
0xe0524100, 0x721e0100,
|
||||
0xe0524200, 0x721e0200,
|
||||
0xe0524300, 0x721e0300,
|
||||
0xbf8c0f70, 0x7e000300,
|
||||
0x7e020301, 0x7e040302,
|
||||
0x7e060303, 0x807c847c,
|
||||
0x8072ff72, 0x00000400,
|
||||
0xbf0a737c, 0xbf85ffee,
|
||||
0xbf9c0000, 0xe0524000,
|
||||
0x761e0000, 0xe0524100,
|
||||
0x761e0100, 0xe0524200,
|
||||
0x761e0200, 0xe0524300,
|
||||
0x761e0300, 0xb8f22a05,
|
||||
0x80728172, 0x8e728a72,
|
||||
0xb8f61605, 0x80768176,
|
||||
0x8e768676, 0x80727672,
|
||||
0x80f2c072, 0xb8f31605,
|
||||
0x80738173, 0x8e738473,
|
||||
0x8e7a8273, 0xbefa00ff,
|
||||
0x01000000, 0xbefc0073,
|
||||
0xc031003c, 0x00000072,
|
||||
0x80f2c072, 0xbf8c007f,
|
||||
0x80fc907c, 0xbe802d00,
|
||||
0xbe822d02, 0xbe842d04,
|
||||
0xbe862d06, 0xbe882d08,
|
||||
0xbe8a2d0a, 0xbe8c2d0c,
|
||||
0xbe8e2d0e, 0xbf06807c,
|
||||
0xbf84fff1, 0xb8f22a05,
|
||||
0x80728172, 0x8e728a72,
|
||||
0xb8f61605, 0x80768176,
|
||||
0x8e768676, 0x80727672,
|
||||
0xbefa0084, 0xbefa00ff,
|
||||
0x01000000, 0xc0211cfc,
|
||||
0x00000072, 0x80728472,
|
||||
0xc0211c3c, 0x00000072,
|
||||
0x80728472, 0xc0211c7c,
|
||||
0x00000072, 0x80728472,
|
||||
0xc0211bbc, 0x00000072,
|
||||
0x80728472, 0xc0211bfc,
|
||||
0x00000072, 0x80728472,
|
||||
0xc0211d3c, 0x00000072,
|
||||
0x80728472, 0xc0211d7c,
|
||||
0x00000072, 0x80728472,
|
||||
0xc0211a3c, 0x00000072,
|
||||
0x80728472, 0xc0211a7c,
|
||||
0x00000072, 0x80728472,
|
||||
0xc0211dfc, 0x00000072,
|
||||
0x80728472, 0xc0211b3c,
|
||||
0x00000072, 0x80728472,
|
||||
0xc0211b7c, 0x00000072,
|
||||
0x80728472, 0xbf8c007f,
|
||||
0x8671ff71, 0x0000ffff,
|
||||
0xbefc0073, 0xbefe006e,
|
||||
0xbeff006f, 0x867375ff,
|
||||
0x000003ff, 0xb9734803,
|
||||
0x867375ff, 0xfffff800,
|
||||
0x8f738b73, 0xb973a2c3,
|
||||
0xb977f801, 0x8673ff71,
|
||||
0xf0000000, 0x8f739c73,
|
||||
0x8e739073, 0xbef60080,
|
||||
0x87767376, 0x8673ff71,
|
||||
0x08000000, 0x8f739b73,
|
||||
0x8e738f73, 0x87767376,
|
||||
0x8673ff74, 0x00800000,
|
||||
0x8f739773, 0xb976f807,
|
||||
0x86fe7e7e, 0x86ea6a6a,
|
||||
0xb974f802, 0xbf8a0000,
|
||||
0x95807370, 0xbf810000,
|
||||
};
|
||||
|
||||
|
1214
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
Normal file
1214
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
Normal file
File diff suppressed because it is too large
Load Diff
@ -233,7 +233,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
|
||||
pr_debug("Queue Size: 0x%llX, %u\n",
|
||||
q_properties->queue_size, args->ring_size);
|
||||
|
||||
pr_debug("Queue r/w Pointers: %p, %p\n",
|
||||
pr_debug("Queue r/w Pointers: %px, %px\n",
|
||||
q_properties->read_ptr,
|
||||
q_properties->write_ptr);
|
||||
|
||||
@ -292,8 +292,16 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
|
||||
|
||||
|
||||
/* Return gpu_id as doorbell offset for mmap usage */
|
||||
args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id);
|
||||
args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
|
||||
args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
|
||||
args->doorbell_offset <<= PAGE_SHIFT;
|
||||
if (KFD_IS_SOC15(dev->device_info->asic_family))
|
||||
/* On SOC15 ASICs, doorbell allocation must be
|
||||
* per-device, and independent from the per-process
|
||||
* queue_id. Return the doorbell offset within the
|
||||
* doorbell aperture to user mode.
|
||||
*/
|
||||
args->doorbell_offset |= q_properties.doorbell_off;
|
||||
|
||||
mutex_unlock(&p->mutex);
|
||||
|
||||
@ -1296,8 +1304,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
|
||||
GFP_KERNEL);
|
||||
devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
|
||||
GFP_KERNEL);
|
||||
if (!devices_arr)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -1405,8 +1413,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
|
||||
GFP_KERNEL);
|
||||
devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
|
||||
GFP_KERNEL);
|
||||
if (!devices_arr)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -1645,23 +1653,33 @@ err_i1:
|
||||
static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
struct kfd_process *process;
|
||||
struct kfd_dev *dev = NULL;
|
||||
unsigned long vm_pgoff;
|
||||
unsigned int gpu_id;
|
||||
|
||||
process = kfd_get_process(current);
|
||||
if (IS_ERR(process))
|
||||
return PTR_ERR(process);
|
||||
|
||||
if ((vma->vm_pgoff & KFD_MMAP_DOORBELL_MASK) ==
|
||||
KFD_MMAP_DOORBELL_MASK) {
|
||||
vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_DOORBELL_MASK;
|
||||
return kfd_doorbell_mmap(process, vma);
|
||||
} else if ((vma->vm_pgoff & KFD_MMAP_EVENTS_MASK) ==
|
||||
KFD_MMAP_EVENTS_MASK) {
|
||||
vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK;
|
||||
vm_pgoff = vma->vm_pgoff;
|
||||
vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
|
||||
gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
|
||||
if (gpu_id)
|
||||
dev = kfd_device_by_id(gpu_id);
|
||||
|
||||
switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
|
||||
case KFD_MMAP_TYPE_DOORBELL:
|
||||
if (!dev)
|
||||
return -ENODEV;
|
||||
return kfd_doorbell_mmap(dev, process, vma);
|
||||
|
||||
case KFD_MMAP_TYPE_EVENTS:
|
||||
return kfd_event_mmap(process, vma);
|
||||
} else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) ==
|
||||
KFD_MMAP_RESERVED_MEM_MASK) {
|
||||
vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK;
|
||||
return kfd_reserved_mem_mmap(process, vma);
|
||||
|
||||
case KFD_MMAP_TYPE_RESERVED_MEM:
|
||||
if (!dev)
|
||||
return -ENODEV;
|
||||
return kfd_reserved_mem_mmap(dev, process, vma);
|
||||
}
|
||||
|
||||
return -EFAULT;
|
||||
|
@ -132,6 +132,9 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
|
||||
#define fiji_cache_info carrizo_cache_info
|
||||
#define polaris10_cache_info carrizo_cache_info
|
||||
#define polaris11_cache_info carrizo_cache_info
|
||||
/* TODO - check & update Vega10 cache details */
|
||||
#define vega10_cache_info carrizo_cache_info
|
||||
#define raven_cache_info carrizo_cache_info
|
||||
|
||||
static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
|
||||
struct crat_subtype_computeunit *cu)
|
||||
@ -603,6 +606,14 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
|
||||
pcache_info = polaris11_cache_info;
|
||||
num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
|
||||
break;
|
||||
case CHIP_VEGA10:
|
||||
pcache_info = vega10_cache_info;
|
||||
num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
|
||||
break;
|
||||
case CHIP_RAVEN:
|
||||
pcache_info = raven_cache_info;
|
||||
num_of_cache_types = ARRAY_SIZE(raven_cache_info);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -20,16 +20,13 @@
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
|
||||
#include <linux/amd-iommu.h>
|
||||
#endif
|
||||
#include <linux/bsearch.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/slab.h>
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_device_queue_manager.h"
|
||||
#include "kfd_pm4_headers_vi.h"
|
||||
#include "cwsr_trap_handler_gfx8.asm"
|
||||
#include "cwsr_trap_handler.h"
|
||||
#include "kfd_iommu.h"
|
||||
|
||||
#define MQD_SIZE_ALIGNED 768
|
||||
@ -41,6 +38,7 @@ static const struct kfd_device_info kaveri_device_info = {
|
||||
.max_pasid_bits = 16,
|
||||
/* max num of queues for KV.TODO should be a dynamic value */
|
||||
.max_no_of_hqd = 24,
|
||||
.doorbell_size = 4,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_cik,
|
||||
.num_of_watch_points = 4,
|
||||
@ -55,6 +53,7 @@ static const struct kfd_device_info carrizo_device_info = {
|
||||
.max_pasid_bits = 16,
|
||||
/* max num of queues for CZ.TODO should be a dynamic value */
|
||||
.max_no_of_hqd = 24,
|
||||
.doorbell_size = 4,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_cik,
|
||||
.num_of_watch_points = 4,
|
||||
@ -70,6 +69,7 @@ static const struct kfd_device_info hawaii_device_info = {
|
||||
.max_pasid_bits = 16,
|
||||
/* max num of queues for KV.TODO should be a dynamic value */
|
||||
.max_no_of_hqd = 24,
|
||||
.doorbell_size = 4,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_cik,
|
||||
.num_of_watch_points = 4,
|
||||
@ -83,6 +83,7 @@ static const struct kfd_device_info tonga_device_info = {
|
||||
.asic_family = CHIP_TONGA,
|
||||
.max_pasid_bits = 16,
|
||||
.max_no_of_hqd = 24,
|
||||
.doorbell_size = 4,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_cik,
|
||||
.num_of_watch_points = 4,
|
||||
@ -96,6 +97,7 @@ static const struct kfd_device_info tonga_vf_device_info = {
|
||||
.asic_family = CHIP_TONGA,
|
||||
.max_pasid_bits = 16,
|
||||
.max_no_of_hqd = 24,
|
||||
.doorbell_size = 4,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_cik,
|
||||
.num_of_watch_points = 4,
|
||||
@ -109,6 +111,7 @@ static const struct kfd_device_info fiji_device_info = {
|
||||
.asic_family = CHIP_FIJI,
|
||||
.max_pasid_bits = 16,
|
||||
.max_no_of_hqd = 24,
|
||||
.doorbell_size = 4,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_cik,
|
||||
.num_of_watch_points = 4,
|
||||
@ -122,6 +125,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
|
||||
.asic_family = CHIP_FIJI,
|
||||
.max_pasid_bits = 16,
|
||||
.max_no_of_hqd = 24,
|
||||
.doorbell_size = 4,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_cik,
|
||||
.num_of_watch_points = 4,
|
||||
@ -136,6 +140,7 @@ static const struct kfd_device_info polaris10_device_info = {
|
||||
.asic_family = CHIP_POLARIS10,
|
||||
.max_pasid_bits = 16,
|
||||
.max_no_of_hqd = 24,
|
||||
.doorbell_size = 4,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_cik,
|
||||
.num_of_watch_points = 4,
|
||||
@ -149,6 +154,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
|
||||
.asic_family = CHIP_POLARIS10,
|
||||
.max_pasid_bits = 16,
|
||||
.max_no_of_hqd = 24,
|
||||
.doorbell_size = 4,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_cik,
|
||||
.num_of_watch_points = 4,
|
||||
@ -162,6 +168,7 @@ static const struct kfd_device_info polaris11_device_info = {
|
||||
.asic_family = CHIP_POLARIS11,
|
||||
.max_pasid_bits = 16,
|
||||
.max_no_of_hqd = 24,
|
||||
.doorbell_size = 4,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_cik,
|
||||
.num_of_watch_points = 4,
|
||||
@ -171,6 +178,34 @@ static const struct kfd_device_info polaris11_device_info = {
|
||||
.needs_pci_atomics = true,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info vega10_device_info = {
|
||||
.asic_family = CHIP_VEGA10,
|
||||
.max_pasid_bits = 16,
|
||||
.max_no_of_hqd = 24,
|
||||
.doorbell_size = 8,
|
||||
.ih_ring_entry_size = 8 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_v9,
|
||||
.num_of_watch_points = 4,
|
||||
.mqd_size_aligned = MQD_SIZE_ALIGNED,
|
||||
.supports_cwsr = true,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info vega10_vf_device_info = {
|
||||
.asic_family = CHIP_VEGA10,
|
||||
.max_pasid_bits = 16,
|
||||
.max_no_of_hqd = 24,
|
||||
.doorbell_size = 8,
|
||||
.ih_ring_entry_size = 8 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_v9,
|
||||
.num_of_watch_points = 4,
|
||||
.mqd_size_aligned = MQD_SIZE_ALIGNED,
|
||||
.supports_cwsr = true,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
};
|
||||
|
||||
|
||||
struct kfd_deviceid {
|
||||
unsigned short did;
|
||||
@ -250,6 +285,15 @@ static const struct kfd_deviceid supported_devices[] = {
|
||||
{ 0x67EB, &polaris11_device_info }, /* Polaris11 */
|
||||
{ 0x67EF, &polaris11_device_info }, /* Polaris11 */
|
||||
{ 0x67FF, &polaris11_device_info }, /* Polaris11 */
|
||||
{ 0x6860, &vega10_device_info }, /* Vega10 */
|
||||
{ 0x6861, &vega10_device_info }, /* Vega10 */
|
||||
{ 0x6862, &vega10_device_info }, /* Vega10 */
|
||||
{ 0x6863, &vega10_device_info }, /* Vega10 */
|
||||
{ 0x6864, &vega10_device_info }, /* Vega10 */
|
||||
{ 0x6867, &vega10_device_info }, /* Vega10 */
|
||||
{ 0x6868, &vega10_device_info }, /* Vega10 */
|
||||
{ 0x686C, &vega10_vf_device_info }, /* Vega10 vf*/
|
||||
{ 0x687F, &vega10_device_info }, /* Vega10 */
|
||||
};
|
||||
|
||||
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
|
||||
@ -279,7 +323,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
|
||||
struct pci_dev *pdev, const struct kfd2kgd_calls *f2g)
|
||||
{
|
||||
struct kfd_dev *kfd;
|
||||
|
||||
int ret;
|
||||
const struct kfd_device_info *device_info =
|
||||
lookup_device_info(pdev->device);
|
||||
|
||||
@ -288,19 +332,18 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (device_info->needs_pci_atomics) {
|
||||
/* Allow BIF to recode atomics to PCIe 3.0
|
||||
* AtomicOps. 32 and 64-bit requests are possible and
|
||||
* must be supported.
|
||||
*/
|
||||
if (pci_enable_atomic_ops_to_root(pdev,
|
||||
PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
|
||||
PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) {
|
||||
dev_info(kfd_device,
|
||||
"skipped device %x:%x, PCI rejects atomics",
|
||||
pdev->vendor, pdev->device);
|
||||
return NULL;
|
||||
}
|
||||
/* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
|
||||
* 32 and 64-bit requests are possible and must be
|
||||
* supported.
|
||||
*/
|
||||
ret = pci_enable_atomic_ops_to_root(pdev,
|
||||
PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
|
||||
PCI_EXP_DEVCAP2_ATOMIC_COMP64);
|
||||
if (device_info->needs_pci_atomics && ret < 0) {
|
||||
dev_info(kfd_device,
|
||||
"skipped device %x:%x, PCI rejects atomics\n",
|
||||
pdev->vendor, pdev->device);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
|
||||
@ -323,10 +366,16 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
|
||||
static void kfd_cwsr_init(struct kfd_dev *kfd)
|
||||
{
|
||||
if (cwsr_enable && kfd->device_info->supports_cwsr) {
|
||||
BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
|
||||
if (kfd->device_info->asic_family < CHIP_VEGA10) {
|
||||
BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
|
||||
kfd->cwsr_isa = cwsr_trap_gfx8_hex;
|
||||
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
|
||||
} else {
|
||||
BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
|
||||
kfd->cwsr_isa = cwsr_trap_gfx9_hex;
|
||||
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
|
||||
}
|
||||
|
||||
kfd->cwsr_isa = cwsr_trap_gfx8_hex;
|
||||
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
|
||||
kfd->cwsr_enabled = true;
|
||||
}
|
||||
}
|
||||
@ -541,6 +590,44 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
|
||||
spin_unlock(&kfd->interrupt_lock);
|
||||
}
|
||||
|
||||
int kgd2kfd_quiesce_mm(struct mm_struct *mm)
|
||||
{
|
||||
struct kfd_process *p;
|
||||
int r;
|
||||
|
||||
/* Because we are called from arbitrary context (workqueue) as opposed
|
||||
* to process context, kfd_process could attempt to exit while we are
|
||||
* running so the lookup function increments the process ref count.
|
||||
*/
|
||||
p = kfd_lookup_process_by_mm(mm);
|
||||
if (!p)
|
||||
return -ESRCH;
|
||||
|
||||
r = kfd_process_evict_queues(p);
|
||||
|
||||
kfd_unref_process(p);
|
||||
return r;
|
||||
}
|
||||
|
||||
int kgd2kfd_resume_mm(struct mm_struct *mm)
|
||||
{
|
||||
struct kfd_process *p;
|
||||
int r;
|
||||
|
||||
/* Because we are called from arbitrary context (workqueue) as opposed
|
||||
* to process context, kfd_process could attempt to exit while we are
|
||||
* running so the lookup function increments the process ref count.
|
||||
*/
|
||||
p = kfd_lookup_process_by_mm(mm);
|
||||
if (!p)
|
||||
return -ESRCH;
|
||||
|
||||
r = kfd_process_restore_queues(p);
|
||||
|
||||
kfd_unref_process(p);
|
||||
return r;
|
||||
}
|
||||
|
||||
/** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
|
||||
* prepare for safe eviction of KFD BOs that belong to the specified
|
||||
* process.
|
||||
@ -652,7 +739,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
|
||||
if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
|
||||
return -ENOMEM;
|
||||
|
||||
*mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
|
||||
*mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
|
||||
if ((*mem_obj) == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -110,6 +110,57 @@ void program_sh_mem_settings(struct device_queue_manager *dqm,
|
||||
qpd->sh_mem_bases);
|
||||
}
|
||||
|
||||
static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
|
||||
{
|
||||
struct kfd_dev *dev = qpd->dqm->dev;
|
||||
|
||||
if (!KFD_IS_SOC15(dev->device_info->asic_family)) {
|
||||
/* On pre-SOC15 chips we need to use the queue ID to
|
||||
* preserve the user mode ABI.
|
||||
*/
|
||||
q->doorbell_id = q->properties.queue_id;
|
||||
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||
/* For SDMA queues on SOC15, use static doorbell
|
||||
* assignments based on the engine and queue.
|
||||
*/
|
||||
q->doorbell_id = dev->shared_resources.sdma_doorbell
|
||||
[q->properties.sdma_engine_id]
|
||||
[q->properties.sdma_queue_id];
|
||||
} else {
|
||||
/* For CP queues on SOC15 reserve a free doorbell ID */
|
||||
unsigned int found;
|
||||
|
||||
found = find_first_zero_bit(qpd->doorbell_bitmap,
|
||||
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
|
||||
if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
|
||||
pr_debug("No doorbells available");
|
||||
return -EBUSY;
|
||||
}
|
||||
set_bit(found, qpd->doorbell_bitmap);
|
||||
q->doorbell_id = found;
|
||||
}
|
||||
|
||||
q->properties.doorbell_off =
|
||||
kfd_doorbell_id_to_offset(dev, q->process,
|
||||
q->doorbell_id);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void deallocate_doorbell(struct qcm_process_device *qpd,
|
||||
struct queue *q)
|
||||
{
|
||||
unsigned int old;
|
||||
struct kfd_dev *dev = qpd->dqm->dev;
|
||||
|
||||
if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
||||
return;
|
||||
|
||||
old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
|
||||
WARN_ON(!old);
|
||||
}
|
||||
|
||||
static int allocate_vmid(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd,
|
||||
struct queue *q)
|
||||
@ -145,15 +196,19 @@ static int allocate_vmid(struct device_queue_manager *dqm,
|
||||
static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
uint32_t len;
|
||||
const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf;
|
||||
int ret;
|
||||
|
||||
if (!qpd->ib_kaddr)
|
||||
return -ENOMEM;
|
||||
|
||||
len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
|
||||
ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
|
||||
qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len);
|
||||
qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
|
||||
pmf->release_mem_size / sizeof(uint32_t));
|
||||
}
|
||||
|
||||
static void deallocate_vmid(struct device_queue_manager *dqm,
|
||||
@ -301,10 +356,14 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
if (retval)
|
||||
return retval;
|
||||
|
||||
retval = allocate_doorbell(qpd, q);
|
||||
if (retval)
|
||||
goto out_deallocate_hqd;
|
||||
|
||||
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
|
||||
&q->gart_mqd_addr, &q->properties);
|
||||
if (retval)
|
||||
goto out_deallocate_hqd;
|
||||
goto out_deallocate_doorbell;
|
||||
|
||||
pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
|
||||
q->pipe, q->queue);
|
||||
@ -324,6 +383,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
|
||||
out_uninit_mqd:
|
||||
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
|
||||
out_deallocate_doorbell:
|
||||
deallocate_doorbell(qpd, q);
|
||||
out_deallocate_hqd:
|
||||
deallocate_hqd(dqm, q);
|
||||
|
||||
@ -357,6 +418,8 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
|
||||
}
|
||||
dqm->total_queue_count--;
|
||||
|
||||
deallocate_doorbell(qpd, q);
|
||||
|
||||
retval = mqd->destroy_mqd(mqd, q->mqd,
|
||||
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
|
||||
KFD_UNMAP_LATENCY_MS,
|
||||
@ -861,6 +924,10 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE;
|
||||
q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
|
||||
|
||||
retval = allocate_doorbell(qpd, q);
|
||||
if (retval)
|
||||
goto out_deallocate_sdma_queue;
|
||||
|
||||
pr_debug("SDMA id is: %d\n", q->sdma_id);
|
||||
pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
|
||||
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
|
||||
@ -869,7 +936,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
|
||||
&q->gart_mqd_addr, &q->properties);
|
||||
if (retval)
|
||||
goto out_deallocate_sdma_queue;
|
||||
goto out_deallocate_doorbell;
|
||||
|
||||
retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL);
|
||||
if (retval)
|
||||
@ -879,6 +946,8 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
|
||||
out_uninit_mqd:
|
||||
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
|
||||
out_deallocate_doorbell:
|
||||
deallocate_doorbell(qpd, q);
|
||||
out_deallocate_sdma_queue:
|
||||
deallocate_sdma_queue(dqm, q->sdma_id);
|
||||
|
||||
@ -1070,12 +1139,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||
q->properties.sdma_engine_id =
|
||||
q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
|
||||
}
|
||||
|
||||
retval = allocate_doorbell(qpd, q);
|
||||
if (retval)
|
||||
goto out_deallocate_sdma_queue;
|
||||
|
||||
mqd = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
|
||||
if (!mqd) {
|
||||
retval = -ENOMEM;
|
||||
goto out_deallocate_sdma_queue;
|
||||
goto out_deallocate_doorbell;
|
||||
}
|
||||
/*
|
||||
* Eviction state logic: we only mark active queues as evicted
|
||||
@ -1093,7 +1167,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
|
||||
&q->gart_mqd_addr, &q->properties);
|
||||
if (retval)
|
||||
goto out_deallocate_sdma_queue;
|
||||
goto out_deallocate_doorbell;
|
||||
|
||||
list_add(&q->list, &qpd->queues_list);
|
||||
qpd->queue_count++;
|
||||
@ -1117,6 +1191,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||
mutex_unlock(&dqm->lock);
|
||||
return retval;
|
||||
|
||||
out_deallocate_doorbell:
|
||||
deallocate_doorbell(qpd, q);
|
||||
out_deallocate_sdma_queue:
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
||||
deallocate_sdma_queue(dqm, q->sdma_id);
|
||||
@ -1257,6 +1333,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
|
||||
goto failed;
|
||||
}
|
||||
|
||||
deallocate_doorbell(qpd, q);
|
||||
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||
dqm->sdma_queue_count--;
|
||||
deallocate_sdma_queue(dqm, q->sdma_id);
|
||||
@ -1308,7 +1386,10 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
|
||||
void __user *alternate_aperture_base,
|
||||
uint64_t alternate_aperture_size)
|
||||
{
|
||||
bool retval;
|
||||
bool retval = true;
|
||||
|
||||
if (!dqm->asic_ops.set_cache_memory_policy)
|
||||
return retval;
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
|
||||
@ -1577,6 +1658,11 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
||||
case CHIP_POLARIS11:
|
||||
device_queue_manager_init_vi_tonga(&dqm->asic_ops);
|
||||
break;
|
||||
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_RAVEN:
|
||||
device_queue_manager_init_v9(&dqm->asic_ops);
|
||||
break;
|
||||
default:
|
||||
WARN(1, "Unexpected ASIC family %u",
|
||||
dev->device_info->asic_family);
|
||||
@ -1627,6 +1713,18 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
|
||||
int pipe, queue;
|
||||
int r = 0;
|
||||
|
||||
r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd,
|
||||
KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs);
|
||||
if (!r) {
|
||||
seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n",
|
||||
KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
|
||||
KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
|
||||
KFD_CIK_HIQ_QUEUE);
|
||||
seq_reg_dump(m, dump, n_regs);
|
||||
|
||||
kfree(dump);
|
||||
}
|
||||
|
||||
for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
|
||||
int pipe_offset = pipe * get_queues_per_pipe(dqm);
|
||||
|
||||
|
@ -200,6 +200,8 @@ void device_queue_manager_init_vi(
|
||||
struct device_queue_manager_asic_ops *asic_ops);
|
||||
void device_queue_manager_init_vi_tonga(
|
||||
struct device_queue_manager_asic_ops *asic_ops);
|
||||
void device_queue_manager_init_v9(
|
||||
struct device_queue_manager_asic_ops *asic_ops);
|
||||
void program_sh_mem_settings(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd);
|
||||
unsigned int get_queues_num(struct device_queue_manager *dqm);
|
||||
|
84
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
Normal file
84
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
Normal file
@ -0,0 +1,84 @@
|
||||
/*
|
||||
* Copyright 2016-2018 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kfd_device_queue_manager.h"
|
||||
#include "vega10_enum.h"
|
||||
#include "gc/gc_9_0_offset.h"
|
||||
#include "gc/gc_9_0_sh_mask.h"
|
||||
#include "sdma0/sdma0_4_0_sh_mask.h"
|
||||
|
||||
static int update_qpd_v9(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd);
|
||||
static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q,
|
||||
struct qcm_process_device *qpd);
|
||||
|
||||
void device_queue_manager_init_v9(
|
||||
struct device_queue_manager_asic_ops *asic_ops)
|
||||
{
|
||||
asic_ops->update_qpd = update_qpd_v9;
|
||||
asic_ops->init_sdma_vm = init_sdma_vm_v9;
|
||||
}
|
||||
|
||||
static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
|
||||
{
|
||||
uint32_t shared_base = pdd->lds_base >> 48;
|
||||
uint32_t private_base = pdd->scratch_base >> 48;
|
||||
|
||||
return (shared_base << SH_MEM_BASES__SHARED_BASE__SHIFT) |
|
||||
private_base;
|
||||
}
|
||||
|
||||
static int update_qpd_v9(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
|
||||
pdd = qpd_to_pdd(qpd);
|
||||
|
||||
/* check if sh_mem_config register already configured */
|
||||
if (qpd->sh_mem_config == 0) {
|
||||
qpd->sh_mem_config =
|
||||
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
|
||||
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
|
||||
if (vega10_noretry &&
|
||||
!dqm->dev->device_info->needs_iommu_device)
|
||||
qpd->sh_mem_config |=
|
||||
1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
|
||||
|
||||
qpd->sh_mem_ape1_limit = 0;
|
||||
qpd->sh_mem_ape1_base = 0;
|
||||
}
|
||||
|
||||
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
|
||||
|
||||
pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
/* Not needed on SDMAv4 any more */
|
||||
q->properties.sdma_vm_addr = 0;
|
||||
}
|
@ -33,7 +33,6 @@
|
||||
|
||||
static DEFINE_IDA(doorbell_ida);
|
||||
static unsigned int max_doorbell_slices;
|
||||
#define KFD_SIZE_OF_DOORBELL_IN_BYTES 4
|
||||
|
||||
/*
|
||||
* Each device exposes a doorbell aperture, a PCI MMIO aperture that
|
||||
@ -50,9 +49,9 @@ static unsigned int max_doorbell_slices;
|
||||
*/
|
||||
|
||||
/* # of doorbell bytes allocated for each process. */
|
||||
static inline size_t doorbell_process_allocation(void)
|
||||
size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
|
||||
{
|
||||
return roundup(KFD_SIZE_OF_DOORBELL_IN_BYTES *
|
||||
return roundup(kfd->device_info->doorbell_size *
|
||||
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
|
||||
PAGE_SIZE);
|
||||
}
|
||||
@ -72,16 +71,16 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
|
||||
|
||||
doorbell_start_offset =
|
||||
roundup(kfd->shared_resources.doorbell_start_offset,
|
||||
doorbell_process_allocation());
|
||||
kfd_doorbell_process_slice(kfd));
|
||||
|
||||
doorbell_aperture_size =
|
||||
rounddown(kfd->shared_resources.doorbell_aperture_size,
|
||||
doorbell_process_allocation());
|
||||
kfd_doorbell_process_slice(kfd));
|
||||
|
||||
if (doorbell_aperture_size > doorbell_start_offset)
|
||||
doorbell_process_limit =
|
||||
(doorbell_aperture_size - doorbell_start_offset) /
|
||||
doorbell_process_allocation();
|
||||
kfd_doorbell_process_slice(kfd);
|
||||
else
|
||||
return -ENOSPC;
|
||||
|
||||
@ -95,7 +94,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
|
||||
kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32);
|
||||
|
||||
kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
|
||||
doorbell_process_allocation());
|
||||
kfd_doorbell_process_slice(kfd));
|
||||
|
||||
if (!kfd->doorbell_kernel_ptr)
|
||||
return -ENOMEM;
|
||||
@ -127,21 +126,16 @@ void kfd_doorbell_fini(struct kfd_dev *kfd)
|
||||
iounmap(kfd->doorbell_kernel_ptr);
|
||||
}
|
||||
|
||||
int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
|
||||
int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
phys_addr_t address;
|
||||
struct kfd_dev *dev;
|
||||
|
||||
/*
|
||||
* For simplicitly we only allow mapping of the entire doorbell
|
||||
* allocation of a single device & process.
|
||||
*/
|
||||
if (vma->vm_end - vma->vm_start != doorbell_process_allocation())
|
||||
return -EINVAL;
|
||||
|
||||
/* Find kfd device according to gpu id */
|
||||
dev = kfd_device_by_id(vma->vm_pgoff);
|
||||
if (!dev)
|
||||
if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
|
||||
return -EINVAL;
|
||||
|
||||
/* Calculate physical address of doorbell */
|
||||
@ -158,19 +152,19 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
|
||||
" vm_flags == 0x%04lX\n"
|
||||
" size == 0x%04lX\n",
|
||||
(unsigned long long) vma->vm_start, address, vma->vm_flags,
|
||||
doorbell_process_allocation());
|
||||
kfd_doorbell_process_slice(dev));
|
||||
|
||||
|
||||
return io_remap_pfn_range(vma,
|
||||
vma->vm_start,
|
||||
address >> PAGE_SHIFT,
|
||||
doorbell_process_allocation(),
|
||||
kfd_doorbell_process_slice(dev),
|
||||
vma->vm_page_prot);
|
||||
}
|
||||
|
||||
|
||||
/* get kernel iomem pointer for a doorbell */
|
||||
u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
|
||||
void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
|
||||
unsigned int *doorbell_off)
|
||||
{
|
||||
u32 inx;
|
||||
@ -185,6 +179,8 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
|
||||
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
|
||||
return NULL;
|
||||
|
||||
inx *= kfd->device_info->doorbell_size / sizeof(u32);
|
||||
|
||||
/*
|
||||
* Calculating the kernel doorbell offset using the first
|
||||
* doorbell page.
|
||||
@ -210,7 +206,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
|
||||
mutex_unlock(&kfd->doorbell_mutex);
|
||||
}
|
||||
|
||||
inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
|
||||
void write_kernel_doorbell(void __iomem *db, u32 value)
|
||||
{
|
||||
if (db) {
|
||||
writel(value, db);
|
||||
@ -218,30 +214,37 @@ inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* queue_ids are in the range [0,MAX_PROCESS_QUEUES) and are mapped 1:1
|
||||
* to doorbells with the process's doorbell page
|
||||
*/
|
||||
unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
|
||||
void write_kernel_doorbell64(void __iomem *db, u64 value)
|
||||
{
|
||||
if (db) {
|
||||
WARN(((unsigned long)db & 7) != 0,
|
||||
"Unaligned 64-bit doorbell");
|
||||
writeq(value, (u64 __iomem *)db);
|
||||
pr_debug("writing %llu to doorbell address %p\n", value, db);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
|
||||
struct kfd_process *process,
|
||||
unsigned int queue_id)
|
||||
unsigned int doorbell_id)
|
||||
{
|
||||
/*
|
||||
* doorbell_id_offset accounts for doorbells taken by KGD.
|
||||
* index * doorbell_process_allocation/sizeof(u32) adjusts to
|
||||
* the process's doorbells.
|
||||
* index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
|
||||
* the process's doorbells. The offset returned is in dword
|
||||
* units regardless of the ASIC-dependent doorbell size.
|
||||
*/
|
||||
return kfd->doorbell_id_offset +
|
||||
process->doorbell_index
|
||||
* doorbell_process_allocation() / sizeof(u32) +
|
||||
queue_id;
|
||||
* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
|
||||
doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
|
||||
}
|
||||
|
||||
uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
|
||||
{
|
||||
uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
|
||||
kfd->shared_resources.doorbell_start_offset) /
|
||||
doorbell_process_allocation() + 1;
|
||||
kfd_doorbell_process_slice(kfd) + 1;
|
||||
|
||||
return num_of_elems;
|
||||
|
||||
@ -251,7 +254,7 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
|
||||
struct kfd_process *process)
|
||||
{
|
||||
return dev->doorbell_base +
|
||||
process->doorbell_index * doorbell_process_allocation();
|
||||
process->doorbell_index * kfd_doorbell_process_slice(dev);
|
||||
}
|
||||
|
||||
int kfd_alloc_process_doorbells(struct kfd_process *process)
|
||||
|
@ -345,7 +345,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
|
||||
case KFD_EVENT_TYPE_DEBUG:
|
||||
ret = create_signal_event(devkfd, p, ev);
|
||||
if (!ret) {
|
||||
*event_page_offset = KFD_MMAP_EVENTS_MASK;
|
||||
*event_page_offset = KFD_MMAP_TYPE_EVENTS;
|
||||
*event_page_offset <<= PAGE_SHIFT;
|
||||
*event_slot_index = ev->event_id;
|
||||
}
|
||||
@ -496,7 +496,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
|
||||
pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n",
|
||||
partial_id, valid_id_bits);
|
||||
|
||||
if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) {
|
||||
if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT / 64) {
|
||||
/* With relatively few events, it's faster to
|
||||
* iterate over the event IDR
|
||||
*/
|
||||
|
@ -275,23 +275,35 @@
|
||||
* for FLAT_* / S_LOAD operations.
|
||||
*/
|
||||
|
||||
#define MAKE_GPUVM_APP_BASE(gpu_num) \
|
||||
#define MAKE_GPUVM_APP_BASE_VI(gpu_num) \
|
||||
(((uint64_t)(gpu_num) << 61) + 0x1000000000000L)
|
||||
|
||||
#define MAKE_GPUVM_APP_LIMIT(base, size) \
|
||||
(((uint64_t)(base) & 0xFFFFFF0000000000UL) + (size) - 1)
|
||||
|
||||
#define MAKE_SCRATCH_APP_BASE() \
|
||||
#define MAKE_SCRATCH_APP_BASE_VI() \
|
||||
(((uint64_t)(0x1UL) << 61) + 0x100000000L)
|
||||
|
||||
#define MAKE_SCRATCH_APP_LIMIT(base) \
|
||||
(((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
|
||||
|
||||
#define MAKE_LDS_APP_BASE() \
|
||||
#define MAKE_LDS_APP_BASE_VI() \
|
||||
(((uint64_t)(0x1UL) << 61) + 0x0)
|
||||
#define MAKE_LDS_APP_LIMIT(base) \
|
||||
(((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
|
||||
|
||||
/* On GFXv9 the LDS and scratch apertures are programmed independently
|
||||
* using the high 16 bits of the 64-bit virtual address. They must be
|
||||
* in the hole, which will be the case as long as the high 16 bits are
|
||||
* not 0.
|
||||
*
|
||||
* The aperture sizes are still 4GB implicitly.
|
||||
*
|
||||
* A GPUVM aperture is not applicable on GFXv9.
|
||||
*/
|
||||
#define MAKE_LDS_APP_BASE_V9() ((uint64_t)(0x1UL) << 48)
|
||||
#define MAKE_SCRATCH_APP_BASE_V9() ((uint64_t)(0x2UL) << 48)
|
||||
|
||||
/* User mode manages most of the SVM aperture address space. The low
|
||||
* 16MB are reserved for kernel use (CWSR trap handler and kernel IB
|
||||
* for now).
|
||||
@ -300,6 +312,55 @@
|
||||
#define SVM_CWSR_BASE (SVM_USER_BASE - KFD_CWSR_TBA_TMA_SIZE)
|
||||
#define SVM_IB_BASE (SVM_CWSR_BASE - PAGE_SIZE)
|
||||
|
||||
static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
|
||||
{
|
||||
/*
|
||||
* node id couldn't be 0 - the three MSB bits of
|
||||
* aperture shoudn't be 0
|
||||
*/
|
||||
pdd->lds_base = MAKE_LDS_APP_BASE_VI();
|
||||
pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
|
||||
|
||||
if (!pdd->dev->device_info->needs_iommu_device) {
|
||||
/* dGPUs: SVM aperture starting at 0
|
||||
* with small reserved space for kernel.
|
||||
* Set them to CANONICAL addresses.
|
||||
*/
|
||||
pdd->gpuvm_base = SVM_USER_BASE;
|
||||
pdd->gpuvm_limit =
|
||||
pdd->dev->shared_resources.gpuvm_size - 1;
|
||||
} else {
|
||||
/* set them to non CANONICAL addresses, and no SVM is
|
||||
* allocated.
|
||||
*/
|
||||
pdd->gpuvm_base = MAKE_GPUVM_APP_BASE_VI(id + 1);
|
||||
pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base,
|
||||
pdd->dev->shared_resources.gpuvm_size);
|
||||
}
|
||||
|
||||
pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI();
|
||||
pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
|
||||
}
|
||||
|
||||
static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id)
|
||||
{
|
||||
pdd->lds_base = MAKE_LDS_APP_BASE_V9();
|
||||
pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
|
||||
|
||||
/* Raven needs SVM to support graphic handle, etc. Leave the small
|
||||
* reserved space before SVM on Raven as well, even though we don't
|
||||
* have to.
|
||||
* Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they
|
||||
* are used in Thunk to reserve SVM.
|
||||
*/
|
||||
pdd->gpuvm_base = SVM_USER_BASE;
|
||||
pdd->gpuvm_limit =
|
||||
pdd->dev->shared_resources.gpuvm_size - 1;
|
||||
|
||||
pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
|
||||
pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
|
||||
}
|
||||
|
||||
int kfd_init_apertures(struct kfd_process *process)
|
||||
{
|
||||
uint8_t id = 0;
|
||||
@ -307,9 +368,7 @@ int kfd_init_apertures(struct kfd_process *process)
|
||||
struct kfd_process_device *pdd;
|
||||
|
||||
/*Iterating over all devices*/
|
||||
while (kfd_topology_enum_kfd_devices(id, &dev) == 0 &&
|
||||
id < NUM_OF_SUPPORTED_GPUS) {
|
||||
|
||||
while (kfd_topology_enum_kfd_devices(id, &dev) == 0) {
|
||||
if (!dev) {
|
||||
id++; /* Skip non GPU devices */
|
||||
continue;
|
||||
@ -318,7 +377,7 @@ int kfd_init_apertures(struct kfd_process *process)
|
||||
pdd = kfd_create_process_device_data(dev, process);
|
||||
if (!pdd) {
|
||||
pr_err("Failed to create process device data\n");
|
||||
return -1;
|
||||
return -ENOMEM;
|
||||
}
|
||||
/*
|
||||
* For 64 bit process apertures will be statically reserved in
|
||||
@ -330,32 +389,30 @@ int kfd_init_apertures(struct kfd_process *process)
|
||||
pdd->gpuvm_base = pdd->gpuvm_limit = 0;
|
||||
pdd->scratch_base = pdd->scratch_limit = 0;
|
||||
} else {
|
||||
/* Same LDS and scratch apertures can be used
|
||||
* on all GPUs. This allows using more dGPUs
|
||||
* than placement options for apertures.
|
||||
*/
|
||||
pdd->lds_base = MAKE_LDS_APP_BASE();
|
||||
pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
|
||||
switch (dev->device_info->asic_family) {
|
||||
case CHIP_KAVERI:
|
||||
case CHIP_HAWAII:
|
||||
case CHIP_CARRIZO:
|
||||
case CHIP_TONGA:
|
||||
case CHIP_FIJI:
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
kfd_init_apertures_vi(pdd, id);
|
||||
break;
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_RAVEN:
|
||||
kfd_init_apertures_v9(pdd, id);
|
||||
break;
|
||||
default:
|
||||
WARN(1, "Unexpected ASIC family %u",
|
||||
dev->device_info->asic_family);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
pdd->scratch_base = MAKE_SCRATCH_APP_BASE();
|
||||
pdd->scratch_limit =
|
||||
MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
|
||||
|
||||
if (dev->device_info->needs_iommu_device) {
|
||||
/* APUs: GPUVM aperture in
|
||||
* non-canonical address space
|
||||
if (!dev->device_info->needs_iommu_device) {
|
||||
/* dGPUs: the reserved space for kernel
|
||||
* before SVM
|
||||
*/
|
||||
pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1);
|
||||
pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(
|
||||
pdd->gpuvm_base,
|
||||
dev->shared_resources.gpuvm_size);
|
||||
} else {
|
||||
/* dGPUs: SVM aperture starting at 0
|
||||
* with small reserved space for kernel
|
||||
*/
|
||||
pdd->gpuvm_base = SVM_USER_BASE;
|
||||
pdd->gpuvm_limit =
|
||||
dev->shared_resources.gpuvm_size - 1;
|
||||
pdd->qpd.cwsr_base = SVM_CWSR_BASE;
|
||||
pdd->qpd.ib_base = SVM_IB_BASE;
|
||||
}
|
||||
|
92
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
Normal file
92
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
Normal file
@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Copyright 2016-2018 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_events.h"
|
||||
#include "soc15_int.h"
|
||||
|
||||
|
||||
static bool event_interrupt_isr_v9(struct kfd_dev *dev,
|
||||
const uint32_t *ih_ring_entry)
|
||||
{
|
||||
uint16_t source_id, client_id, pasid, vmid;
|
||||
const uint32_t *data = ih_ring_entry;
|
||||
|
||||
/* Only handle interrupts from KFD VMIDs */
|
||||
vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
|
||||
if (vmid < dev->vm_info.first_vmid_kfd ||
|
||||
vmid > dev->vm_info.last_vmid_kfd)
|
||||
return 0;
|
||||
|
||||
/* If there is no valid PASID, it's likely a firmware bug */
|
||||
pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
|
||||
if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt"))
|
||||
return 0;
|
||||
|
||||
source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
|
||||
client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
|
||||
|
||||
pr_debug("client id 0x%x, source id %d, pasid 0x%x. raw data:\n",
|
||||
client_id, source_id, pasid);
|
||||
pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
|
||||
data[0], data[1], data[2], data[3],
|
||||
data[4], data[5], data[6], data[7]);
|
||||
|
||||
/* Interrupt types we care about: various signals and faults.
|
||||
* They will be forwarded to a work queue (see below).
|
||||
*/
|
||||
return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
|
||||
source_id == SOC15_INTSRC_SDMA_TRAP ||
|
||||
source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
|
||||
source_id == SOC15_INTSRC_CP_BAD_OPCODE;
|
||||
}
|
||||
|
||||
static void event_interrupt_wq_v9(struct kfd_dev *dev,
|
||||
const uint32_t *ih_ring_entry)
|
||||
{
|
||||
uint16_t source_id, client_id, pasid, vmid;
|
||||
uint32_t context_id;
|
||||
|
||||
source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
|
||||
client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
|
||||
pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
|
||||
vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
|
||||
context_id = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
|
||||
|
||||
if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
|
||||
kfd_signal_event_interrupt(pasid, context_id, 32);
|
||||
else if (source_id == SOC15_INTSRC_SDMA_TRAP)
|
||||
kfd_signal_event_interrupt(pasid, context_id & 0xfffffff, 28);
|
||||
else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG)
|
||||
kfd_signal_event_interrupt(pasid, context_id & 0xffffff, 24);
|
||||
else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
|
||||
kfd_signal_hw_exception_event(pasid);
|
||||
else if (client_id == SOC15_IH_CLIENTID_VMC ||
|
||||
client_id == SOC15_IH_CLIENTID_UTCL2) {
|
||||
/* TODO */
|
||||
}
|
||||
}
|
||||
|
||||
const struct kfd_event_interrupt_class event_interrupt_class_v9 = {
|
||||
.interrupt_isr = event_interrupt_isr_v9,
|
||||
.interrupt_wq = event_interrupt_wq_v9,
|
||||
};
|
@ -139,10 +139,12 @@ static void interrupt_wq(struct work_struct *work)
|
||||
{
|
||||
struct kfd_dev *dev = container_of(work, struct kfd_dev,
|
||||
interrupt_work);
|
||||
uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
|
||||
|
||||
uint32_t ih_ring_entry[DIV_ROUND_UP(
|
||||
dev->device_info->ih_ring_entry_size,
|
||||
sizeof(uint32_t))];
|
||||
if (dev->device_info->ih_ring_entry_size > sizeof(ih_ring_entry)) {
|
||||
dev_err_once(kfd_chardev(), "Ring entry too small\n");
|
||||
return;
|
||||
}
|
||||
|
||||
while (dequeue_ih_ring_entry(dev, ih_ring_entry))
|
||||
dev->device_info->event_interrupt_class->interrupt_wq(dev,
|
||||
|
@ -99,7 +99,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
|
||||
kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
|
||||
|
||||
retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel),
|
||||
retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size,
|
||||
&kq->wptr_mem);
|
||||
|
||||
if (retval != 0)
|
||||
@ -208,6 +208,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
|
||||
size_t available_size;
|
||||
size_t queue_size_dwords;
|
||||
uint32_t wptr, rptr;
|
||||
uint64_t wptr64;
|
||||
unsigned int *queue_address;
|
||||
|
||||
/* When rptr == wptr, the buffer is empty.
|
||||
@ -216,7 +217,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
|
||||
* the opposite. So we can only use up to queue_size_dwords - 1 dwords.
|
||||
*/
|
||||
rptr = *kq->rptr_kernel;
|
||||
wptr = *kq->wptr_kernel;
|
||||
wptr = kq->pending_wptr;
|
||||
wptr64 = kq->pending_wptr64;
|
||||
queue_address = (unsigned int *)kq->pq_kernel_addr;
|
||||
queue_size_dwords = kq->queue->properties.queue_size / 4;
|
||||
|
||||
@ -232,29 +234,33 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
|
||||
* make sure calling functions know
|
||||
* acquire_packet_buffer() failed
|
||||
*/
|
||||
*buffer_ptr = NULL;
|
||||
return -ENOMEM;
|
||||
goto err_no_space;
|
||||
}
|
||||
|
||||
if (wptr + packet_size_in_dwords >= queue_size_dwords) {
|
||||
/* make sure after rolling back to position 0, there is
|
||||
* still enough space.
|
||||
*/
|
||||
if (packet_size_in_dwords >= rptr) {
|
||||
*buffer_ptr = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (packet_size_in_dwords >= rptr)
|
||||
goto err_no_space;
|
||||
|
||||
/* fill nops, roll back and start at position 0 */
|
||||
while (wptr > 0) {
|
||||
queue_address[wptr] = kq->nop_packet;
|
||||
wptr = (wptr + 1) % queue_size_dwords;
|
||||
wptr64++;
|
||||
}
|
||||
}
|
||||
|
||||
*buffer_ptr = &queue_address[wptr];
|
||||
kq->pending_wptr = wptr + packet_size_in_dwords;
|
||||
kq->pending_wptr64 = wptr64 + packet_size_in_dwords;
|
||||
|
||||
return 0;
|
||||
|
||||
err_no_space:
|
||||
*buffer_ptr = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static void submit_packet(struct kernel_queue *kq)
|
||||
@ -270,14 +276,18 @@ static void submit_packet(struct kernel_queue *kq)
|
||||
pr_debug("\n");
|
||||
#endif
|
||||
|
||||
*kq->wptr_kernel = kq->pending_wptr;
|
||||
write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
|
||||
kq->pending_wptr);
|
||||
kq->ops_asic_specific.submit_packet(kq);
|
||||
}
|
||||
|
||||
static void rollback_packet(struct kernel_queue *kq)
|
||||
{
|
||||
kq->pending_wptr = *kq->queue->properties.write_ptr;
|
||||
if (kq->dev->device_info->doorbell_size == 8) {
|
||||
kq->pending_wptr64 = *kq->wptr64_kernel;
|
||||
kq->pending_wptr = *kq->wptr_kernel %
|
||||
(kq->queue->properties.queue_size / 4);
|
||||
} else {
|
||||
kq->pending_wptr = *kq->wptr_kernel;
|
||||
}
|
||||
}
|
||||
|
||||
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
|
||||
@ -308,6 +318,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
|
||||
case CHIP_HAWAII:
|
||||
kernel_queue_init_cik(&kq->ops_asic_specific);
|
||||
break;
|
||||
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_RAVEN:
|
||||
kernel_queue_init_v9(&kq->ops_asic_specific);
|
||||
break;
|
||||
default:
|
||||
WARN(1, "Unexpected ASIC family %u",
|
||||
dev->device_info->asic_family);
|
||||
|
@ -72,6 +72,7 @@ struct kernel_queue {
|
||||
struct kfd_dev *dev;
|
||||
struct mqd_manager *mqd;
|
||||
struct queue *queue;
|
||||
uint64_t pending_wptr64;
|
||||
uint32_t pending_wptr;
|
||||
unsigned int nop_packet;
|
||||
|
||||
@ -79,7 +80,10 @@ struct kernel_queue {
|
||||
uint32_t *rptr_kernel;
|
||||
uint64_t rptr_gpu_addr;
|
||||
struct kfd_mem_obj *wptr_mem;
|
||||
uint32_t *wptr_kernel;
|
||||
union {
|
||||
uint64_t *wptr64_kernel;
|
||||
uint32_t *wptr_kernel;
|
||||
};
|
||||
uint64_t wptr_gpu_addr;
|
||||
struct kfd_mem_obj *pq;
|
||||
uint64_t pq_gpu_addr;
|
||||
@ -97,5 +101,6 @@ struct kernel_queue {
|
||||
|
||||
void kernel_queue_init_cik(struct kernel_queue_ops *ops);
|
||||
void kernel_queue_init_vi(struct kernel_queue_ops *ops);
|
||||
void kernel_queue_init_v9(struct kernel_queue_ops *ops);
|
||||
|
||||
#endif /* KFD_KERNEL_QUEUE_H_ */
|
||||
|
@ -26,11 +26,13 @@
|
||||
static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
enum kfd_queue_type type, unsigned int queue_size);
|
||||
static void uninitialize_cik(struct kernel_queue *kq);
|
||||
static void submit_packet_cik(struct kernel_queue *kq);
|
||||
|
||||
void kernel_queue_init_cik(struct kernel_queue_ops *ops)
|
||||
{
|
||||
ops->initialize = initialize_cik;
|
||||
ops->uninitialize = uninitialize_cik;
|
||||
ops->submit_packet = submit_packet_cik;
|
||||
}
|
||||
|
||||
static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
@ -42,3 +44,10 @@ static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
static void uninitialize_cik(struct kernel_queue *kq)
|
||||
{
|
||||
}
|
||||
|
||||
static void submit_packet_cik(struct kernel_queue *kq)
|
||||
{
|
||||
*kq->wptr_kernel = kq->pending_wptr;
|
||||
write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
|
||||
kq->pending_wptr);
|
||||
}
|
||||
|
340
drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
Normal file
340
drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
Normal file
@ -0,0 +1,340 @@
|
||||
/*
|
||||
* Copyright 2016-2018 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kfd_kernel_queue.h"
|
||||
#include "kfd_device_queue_manager.h"
|
||||
#include "kfd_pm4_headers_ai.h"
|
||||
#include "kfd_pm4_opcodes.h"
|
||||
|
||||
static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
enum kfd_queue_type type, unsigned int queue_size);
|
||||
static void uninitialize_v9(struct kernel_queue *kq);
|
||||
static void submit_packet_v9(struct kernel_queue *kq);
|
||||
|
||||
void kernel_queue_init_v9(struct kernel_queue_ops *ops)
|
||||
{
|
||||
ops->initialize = initialize_v9;
|
||||
ops->uninitialize = uninitialize_v9;
|
||||
ops->submit_packet = submit_packet_v9;
|
||||
}
|
||||
|
||||
static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
enum kfd_queue_type type, unsigned int queue_size)
|
||||
{
|
||||
int retval;
|
||||
|
||||
retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
|
||||
if (retval)
|
||||
return false;
|
||||
|
||||
kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
|
||||
kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
|
||||
|
||||
memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void uninitialize_v9(struct kernel_queue *kq)
|
||||
{
|
||||
kfd_gtt_sa_free(kq->dev, kq->eop_mem);
|
||||
}
|
||||
|
||||
static void submit_packet_v9(struct kernel_queue *kq)
|
||||
{
|
||||
*kq->wptr64_kernel = kq->pending_wptr64;
|
||||
write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
|
||||
kq->pending_wptr64);
|
||||
}
|
||||
|
||||
static int pm_map_process_v9(struct packet_manager *pm,
|
||||
uint32_t *buffer, struct qcm_process_device *qpd)
|
||||
{
|
||||
struct pm4_mes_map_process *packet;
|
||||
uint64_t vm_page_table_base_addr =
|
||||
(uint64_t)(qpd->page_table_base) << 12;
|
||||
|
||||
packet = (struct pm4_mes_map_process *)buffer;
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_map_process));
|
||||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
|
||||
sizeof(struct pm4_mes_map_process));
|
||||
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
|
||||
packet->bitfields2.process_quantum = 1;
|
||||
packet->bitfields2.pasid = qpd->pqm->process->pasid;
|
||||
packet->bitfields14.gds_size = qpd->gds_size;
|
||||
packet->bitfields14.num_gws = qpd->num_gws;
|
||||
packet->bitfields14.num_oac = qpd->num_oac;
|
||||
packet->bitfields14.sdma_enable = 1;
|
||||
packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
|
||||
|
||||
packet->sh_mem_config = qpd->sh_mem_config;
|
||||
packet->sh_mem_bases = qpd->sh_mem_bases;
|
||||
packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
|
||||
packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
|
||||
packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
|
||||
packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
|
||||
|
||||
packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
|
||||
packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
|
||||
|
||||
packet->vm_context_page_table_base_addr_lo32 =
|
||||
lower_32_bits(vm_page_table_base_addr);
|
||||
packet->vm_context_page_table_base_addr_hi32 =
|
||||
upper_32_bits(vm_page_table_base_addr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
|
||||
uint64_t ib, size_t ib_size_in_dwords, bool chain)
|
||||
{
|
||||
struct pm4_mes_runlist *packet;
|
||||
|
||||
int concurrent_proc_cnt = 0;
|
||||
struct kfd_dev *kfd = pm->dqm->dev;
|
||||
|
||||
/* Determine the number of processes to map together to HW:
|
||||
* it can not exceed the number of VMIDs available to the
|
||||
* scheduler, and it is determined by the smaller of the number
|
||||
* of processes in the runlist and kfd module parameter
|
||||
* hws_max_conc_proc.
|
||||
* Note: the arbitration between the number of VMIDs and
|
||||
* hws_max_conc_proc has been done in
|
||||
* kgd2kfd_device_init().
|
||||
*/
|
||||
concurrent_proc_cnt = min(pm->dqm->processes_count,
|
||||
kfd->max_proc_per_quantum);
|
||||
|
||||
packet = (struct pm4_mes_runlist *)buffer;
|
||||
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_runlist));
|
||||
packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
|
||||
sizeof(struct pm4_mes_runlist));
|
||||
|
||||
packet->bitfields4.ib_size = ib_size_in_dwords;
|
||||
packet->bitfields4.chain = chain ? 1 : 0;
|
||||
packet->bitfields4.offload_polling = 0;
|
||||
packet->bitfields4.valid = 1;
|
||||
packet->bitfields4.process_cnt = concurrent_proc_cnt;
|
||||
packet->ordinal2 = lower_32_bits(ib);
|
||||
packet->ib_base_hi = upper_32_bits(ib);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
|
||||
struct queue *q, bool is_static)
|
||||
{
|
||||
struct pm4_mes_map_queues *packet;
|
||||
bool use_static = is_static;
|
||||
|
||||
packet = (struct pm4_mes_map_queues *)buffer;
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
|
||||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
|
||||
sizeof(struct pm4_mes_map_queues));
|
||||
packet->bitfields2.alloc_format =
|
||||
alloc_format__mes_map_queues__one_per_pipe_vi;
|
||||
packet->bitfields2.num_queues = 1;
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
|
||||
|
||||
packet->bitfields2.engine_sel =
|
||||
engine_sel__mes_map_queues__compute_vi;
|
||||
packet->bitfields2.queue_type =
|
||||
queue_type__mes_map_queues__normal_compute_vi;
|
||||
|
||||
switch (q->properties.type) {
|
||||
case KFD_QUEUE_TYPE_COMPUTE:
|
||||
if (use_static)
|
||||
packet->bitfields2.queue_type =
|
||||
queue_type__mes_map_queues__normal_latency_static_queue_vi;
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_DIQ:
|
||||
packet->bitfields2.queue_type =
|
||||
queue_type__mes_map_queues__debug_interface_queue_vi;
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_SDMA:
|
||||
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
|
||||
engine_sel__mes_map_queues__sdma0_vi;
|
||||
use_static = false; /* no static queues under SDMA */
|
||||
break;
|
||||
default:
|
||||
WARN(1, "queue type %d", q->properties.type);
|
||||
return -EINVAL;
|
||||
}
|
||||
packet->bitfields3.doorbell_offset =
|
||||
q->properties.doorbell_off;
|
||||
|
||||
packet->mqd_addr_lo =
|
||||
lower_32_bits(q->gart_mqd_addr);
|
||||
|
||||
packet->mqd_addr_hi =
|
||||
upper_32_bits(q->gart_mqd_addr);
|
||||
|
||||
packet->wptr_addr_lo =
|
||||
lower_32_bits((uint64_t)q->properties.write_ptr);
|
||||
|
||||
packet->wptr_addr_hi =
|
||||
upper_32_bits((uint64_t)q->properties.write_ptr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
|
||||
enum kfd_queue_type type,
|
||||
enum kfd_unmap_queues_filter filter,
|
||||
uint32_t filter_param, bool reset,
|
||||
unsigned int sdma_engine)
|
||||
{
|
||||
struct pm4_mes_unmap_queues *packet;
|
||||
|
||||
packet = (struct pm4_mes_unmap_queues *)buffer;
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
|
||||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
|
||||
sizeof(struct pm4_mes_unmap_queues));
|
||||
switch (type) {
|
||||
case KFD_QUEUE_TYPE_COMPUTE:
|
||||
case KFD_QUEUE_TYPE_DIQ:
|
||||
packet->bitfields2.engine_sel =
|
||||
engine_sel__mes_unmap_queues__compute;
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_SDMA:
|
||||
packet->bitfields2.engine_sel =
|
||||
engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
|
||||
break;
|
||||
default:
|
||||
WARN(1, "queue type %d", type);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (reset)
|
||||
packet->bitfields2.action =
|
||||
action__mes_unmap_queues__reset_queues;
|
||||
else
|
||||
packet->bitfields2.action =
|
||||
action__mes_unmap_queues__preempt_queues;
|
||||
|
||||
switch (filter) {
|
||||
case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
|
||||
packet->bitfields2.num_queues = 1;
|
||||
packet->bitfields3b.doorbell_offset0 = filter_param;
|
||||
break;
|
||||
case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
|
||||
packet->bitfields3a.pasid = filter_param;
|
||||
break;
|
||||
case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_unmap_queues__unmap_all_queues;
|
||||
break;
|
||||
case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
|
||||
/* in this case, we do not preempt static queues */
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
|
||||
break;
|
||||
default:
|
||||
WARN(1, "filter %d", filter);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
|
||||
uint64_t fence_address, uint32_t fence_value)
|
||||
{
|
||||
struct pm4_mes_query_status *packet;
|
||||
|
||||
packet = (struct pm4_mes_query_status *)buffer;
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_query_status));
|
||||
|
||||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
|
||||
sizeof(struct pm4_mes_query_status));
|
||||
|
||||
packet->bitfields2.context_id = 0;
|
||||
packet->bitfields2.interrupt_sel =
|
||||
interrupt_sel__mes_query_status__completion_status;
|
||||
packet->bitfields2.command =
|
||||
command__mes_query_status__fence_only_after_write_ack;
|
||||
|
||||
packet->addr_hi = upper_32_bits((uint64_t)fence_address);
|
||||
packet->addr_lo = lower_32_bits((uint64_t)fence_address);
|
||||
packet->data_hi = upper_32_bits((uint64_t)fence_value);
|
||||
packet->data_lo = lower_32_bits((uint64_t)fence_value);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
|
||||
{
|
||||
struct pm4_mec_release_mem *packet;
|
||||
|
||||
packet = (struct pm4_mec_release_mem *)buffer;
|
||||
memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
|
||||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
|
||||
sizeof(struct pm4_mec_release_mem));
|
||||
|
||||
packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
|
||||
packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
|
||||
packet->bitfields2.tcl1_action_ena = 1;
|
||||
packet->bitfields2.tc_action_ena = 1;
|
||||
packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
|
||||
|
||||
packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
|
||||
packet->bitfields3.int_sel =
|
||||
int_sel__mec_release_mem__send_interrupt_after_write_confirm;
|
||||
|
||||
packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
|
||||
packet->address_hi = upper_32_bits(gpu_addr);
|
||||
|
||||
packet->data_lo = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct packet_manager_funcs kfd_v9_pm_funcs = {
|
||||
.map_process = pm_map_process_v9,
|
||||
.runlist = pm_runlist_v9,
|
||||
.set_resources = pm_set_resources_vi,
|
||||
.map_queues = pm_map_queues_v9,
|
||||
.unmap_queues = pm_unmap_queues_v9,
|
||||
.query_status = pm_query_status_v9,
|
||||
.release_mem = pm_release_mem_v9,
|
||||
.map_process_size = sizeof(struct pm4_mes_map_process),
|
||||
.runlist_size = sizeof(struct pm4_mes_runlist),
|
||||
.set_resources_size = sizeof(struct pm4_mes_set_resources),
|
||||
.map_queues_size = sizeof(struct pm4_mes_map_queues),
|
||||
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
|
||||
.query_status_size = sizeof(struct pm4_mes_query_status),
|
||||
.release_mem_size = sizeof(struct pm4_mec_release_mem)
|
||||
};
|
@ -22,15 +22,20 @@
|
||||
*/
|
||||
|
||||
#include "kfd_kernel_queue.h"
|
||||
#include "kfd_device_queue_manager.h"
|
||||
#include "kfd_pm4_headers_vi.h"
|
||||
#include "kfd_pm4_opcodes.h"
|
||||
|
||||
static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
enum kfd_queue_type type, unsigned int queue_size);
|
||||
static void uninitialize_vi(struct kernel_queue *kq);
|
||||
static void submit_packet_vi(struct kernel_queue *kq);
|
||||
|
||||
void kernel_queue_init_vi(struct kernel_queue_ops *ops)
|
||||
{
|
||||
ops->initialize = initialize_vi;
|
||||
ops->uninitialize = uninitialize_vi;
|
||||
ops->submit_packet = submit_packet_vi;
|
||||
}
|
||||
|
||||
static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
@ -54,3 +59,317 @@ static void uninitialize_vi(struct kernel_queue *kq)
|
||||
{
|
||||
kfd_gtt_sa_free(kq->dev, kq->eop_mem);
|
||||
}
|
||||
|
||||
static void submit_packet_vi(struct kernel_queue *kq)
|
||||
{
|
||||
*kq->wptr_kernel = kq->pending_wptr;
|
||||
write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
|
||||
kq->pending_wptr);
|
||||
}
|
||||
|
||||
unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size)
|
||||
{
|
||||
union PM4_MES_TYPE_3_HEADER header;
|
||||
|
||||
header.u32All = 0;
|
||||
header.opcode = opcode;
|
||||
header.count = packet_size / 4 - 2;
|
||||
header.type = PM4_TYPE_3;
|
||||
|
||||
return header.u32All;
|
||||
}
|
||||
|
||||
static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
struct pm4_mes_map_process *packet;
|
||||
|
||||
packet = (struct pm4_mes_map_process *)buffer;
|
||||
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_map_process));
|
||||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
|
||||
sizeof(struct pm4_mes_map_process));
|
||||
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
|
||||
packet->bitfields2.process_quantum = 1;
|
||||
packet->bitfields2.pasid = qpd->pqm->process->pasid;
|
||||
packet->bitfields3.page_table_base = qpd->page_table_base;
|
||||
packet->bitfields10.gds_size = qpd->gds_size;
|
||||
packet->bitfields10.num_gws = qpd->num_gws;
|
||||
packet->bitfields10.num_oac = qpd->num_oac;
|
||||
packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
|
||||
|
||||
packet->sh_mem_config = qpd->sh_mem_config;
|
||||
packet->sh_mem_bases = qpd->sh_mem_bases;
|
||||
packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base;
|
||||
packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit;
|
||||
|
||||
packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base;
|
||||
|
||||
packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
|
||||
packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer,
|
||||
uint64_t ib, size_t ib_size_in_dwords, bool chain)
|
||||
{
|
||||
struct pm4_mes_runlist *packet;
|
||||
int concurrent_proc_cnt = 0;
|
||||
struct kfd_dev *kfd = pm->dqm->dev;
|
||||
|
||||
if (WARN_ON(!ib))
|
||||
return -EFAULT;
|
||||
|
||||
/* Determine the number of processes to map together to HW:
|
||||
* it can not exceed the number of VMIDs available to the
|
||||
* scheduler, and it is determined by the smaller of the number
|
||||
* of processes in the runlist and kfd module parameter
|
||||
* hws_max_conc_proc.
|
||||
* Note: the arbitration between the number of VMIDs and
|
||||
* hws_max_conc_proc has been done in
|
||||
* kgd2kfd_device_init().
|
||||
*/
|
||||
concurrent_proc_cnt = min(pm->dqm->processes_count,
|
||||
kfd->max_proc_per_quantum);
|
||||
|
||||
packet = (struct pm4_mes_runlist *)buffer;
|
||||
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_runlist));
|
||||
packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
|
||||
sizeof(struct pm4_mes_runlist));
|
||||
|
||||
packet->bitfields4.ib_size = ib_size_in_dwords;
|
||||
packet->bitfields4.chain = chain ? 1 : 0;
|
||||
packet->bitfields4.offload_polling = 0;
|
||||
packet->bitfields4.valid = 1;
|
||||
packet->bitfields4.process_cnt = concurrent_proc_cnt;
|
||||
packet->ordinal2 = lower_32_bits(ib);
|
||||
packet->bitfields3.ib_base_hi = upper_32_bits(ib);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
|
||||
struct scheduling_resources *res)
|
||||
{
|
||||
struct pm4_mes_set_resources *packet;
|
||||
|
||||
packet = (struct pm4_mes_set_resources *)buffer;
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_set_resources));
|
||||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES,
|
||||
sizeof(struct pm4_mes_set_resources));
|
||||
|
||||
packet->bitfields2.queue_type =
|
||||
queue_type__mes_set_resources__hsa_interface_queue_hiq;
|
||||
packet->bitfields2.vmid_mask = res->vmid_mask;
|
||||
packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
|
||||
packet->bitfields7.oac_mask = res->oac_mask;
|
||||
packet->bitfields8.gds_heap_base = res->gds_heap_base;
|
||||
packet->bitfields8.gds_heap_size = res->gds_heap_size;
|
||||
|
||||
packet->gws_mask_lo = lower_32_bits(res->gws_mask);
|
||||
packet->gws_mask_hi = upper_32_bits(res->gws_mask);
|
||||
|
||||
packet->queue_mask_lo = lower_32_bits(res->queue_mask);
|
||||
packet->queue_mask_hi = upper_32_bits(res->queue_mask);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
|
||||
struct queue *q, bool is_static)
|
||||
{
|
||||
struct pm4_mes_map_queues *packet;
|
||||
bool use_static = is_static;
|
||||
|
||||
packet = (struct pm4_mes_map_queues *)buffer;
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
|
||||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
|
||||
sizeof(struct pm4_mes_map_queues));
|
||||
packet->bitfields2.alloc_format =
|
||||
alloc_format__mes_map_queues__one_per_pipe_vi;
|
||||
packet->bitfields2.num_queues = 1;
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
|
||||
|
||||
packet->bitfields2.engine_sel =
|
||||
engine_sel__mes_map_queues__compute_vi;
|
||||
packet->bitfields2.queue_type =
|
||||
queue_type__mes_map_queues__normal_compute_vi;
|
||||
|
||||
switch (q->properties.type) {
|
||||
case KFD_QUEUE_TYPE_COMPUTE:
|
||||
if (use_static)
|
||||
packet->bitfields2.queue_type =
|
||||
queue_type__mes_map_queues__normal_latency_static_queue_vi;
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_DIQ:
|
||||
packet->bitfields2.queue_type =
|
||||
queue_type__mes_map_queues__debug_interface_queue_vi;
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_SDMA:
|
||||
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
|
||||
engine_sel__mes_map_queues__sdma0_vi;
|
||||
use_static = false; /* no static queues under SDMA */
|
||||
break;
|
||||
default:
|
||||
WARN(1, "queue type %d", q->properties.type);
|
||||
return -EINVAL;
|
||||
}
|
||||
packet->bitfields3.doorbell_offset =
|
||||
q->properties.doorbell_off;
|
||||
|
||||
packet->mqd_addr_lo =
|
||||
lower_32_bits(q->gart_mqd_addr);
|
||||
|
||||
packet->mqd_addr_hi =
|
||||
upper_32_bits(q->gart_mqd_addr);
|
||||
|
||||
packet->wptr_addr_lo =
|
||||
lower_32_bits((uint64_t)q->properties.write_ptr);
|
||||
|
||||
packet->wptr_addr_hi =
|
||||
upper_32_bits((uint64_t)q->properties.write_ptr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
|
||||
enum kfd_queue_type type,
|
||||
enum kfd_unmap_queues_filter filter,
|
||||
uint32_t filter_param, bool reset,
|
||||
unsigned int sdma_engine)
|
||||
{
|
||||
struct pm4_mes_unmap_queues *packet;
|
||||
|
||||
packet = (struct pm4_mes_unmap_queues *)buffer;
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
|
||||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
|
||||
sizeof(struct pm4_mes_unmap_queues));
|
||||
switch (type) {
|
||||
case KFD_QUEUE_TYPE_COMPUTE:
|
||||
case KFD_QUEUE_TYPE_DIQ:
|
||||
packet->bitfields2.engine_sel =
|
||||
engine_sel__mes_unmap_queues__compute;
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_SDMA:
|
||||
packet->bitfields2.engine_sel =
|
||||
engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
|
||||
break;
|
||||
default:
|
||||
WARN(1, "queue type %d", type);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (reset)
|
||||
packet->bitfields2.action =
|
||||
action__mes_unmap_queues__reset_queues;
|
||||
else
|
||||
packet->bitfields2.action =
|
||||
action__mes_unmap_queues__preempt_queues;
|
||||
|
||||
switch (filter) {
|
||||
case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
|
||||
packet->bitfields2.num_queues = 1;
|
||||
packet->bitfields3b.doorbell_offset0 = filter_param;
|
||||
break;
|
||||
case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
|
||||
packet->bitfields3a.pasid = filter_param;
|
||||
break;
|
||||
case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_unmap_queues__unmap_all_queues;
|
||||
break;
|
||||
case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
|
||||
/* in this case, we do not preempt static queues */
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
|
||||
break;
|
||||
default:
|
||||
WARN(1, "filter %d", filter);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
static int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer,
|
||||
uint64_t fence_address, uint32_t fence_value)
|
||||
{
|
||||
struct pm4_mes_query_status *packet;
|
||||
|
||||
packet = (struct pm4_mes_query_status *)buffer;
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_query_status));
|
||||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
|
||||
sizeof(struct pm4_mes_query_status));
|
||||
|
||||
packet->bitfields2.context_id = 0;
|
||||
packet->bitfields2.interrupt_sel =
|
||||
interrupt_sel__mes_query_status__completion_status;
|
||||
packet->bitfields2.command =
|
||||
command__mes_query_status__fence_only_after_write_ack;
|
||||
|
||||
packet->addr_hi = upper_32_bits((uint64_t)fence_address);
|
||||
packet->addr_lo = lower_32_bits((uint64_t)fence_address);
|
||||
packet->data_hi = upper_32_bits((uint64_t)fence_value);
|
||||
packet->data_lo = lower_32_bits((uint64_t)fence_value);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pm_release_mem_vi(uint64_t gpu_addr, uint32_t *buffer)
|
||||
{
|
||||
struct pm4_mec_release_mem *packet;
|
||||
|
||||
packet = (struct pm4_mec_release_mem *)buffer;
|
||||
memset(buffer, 0, sizeof(*packet));
|
||||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
|
||||
sizeof(*packet));
|
||||
|
||||
packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
|
||||
packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
|
||||
packet->bitfields2.tcl1_action_ena = 1;
|
||||
packet->bitfields2.tc_action_ena = 1;
|
||||
packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
|
||||
packet->bitfields2.atc = 0;
|
||||
|
||||
packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
|
||||
packet->bitfields3.int_sel =
|
||||
int_sel___release_mem__send_interrupt_after_write_confirm;
|
||||
|
||||
packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
|
||||
packet->address_hi = upper_32_bits(gpu_addr);
|
||||
|
||||
packet->data_lo = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct packet_manager_funcs kfd_vi_pm_funcs = {
|
||||
.map_process = pm_map_process_vi,
|
||||
.runlist = pm_runlist_vi,
|
||||
.set_resources = pm_set_resources_vi,
|
||||
.map_queues = pm_map_queues_vi,
|
||||
.unmap_queues = pm_unmap_queues_vi,
|
||||
.query_status = pm_query_status_vi,
|
||||
.release_mem = pm_release_mem_vi,
|
||||
.map_process_size = sizeof(struct pm4_mes_map_process),
|
||||
.runlist_size = sizeof(struct pm4_mes_runlist),
|
||||
.set_resources_size = sizeof(struct pm4_mes_set_resources),
|
||||
.map_queues_size = sizeof(struct pm4_mes_map_queues),
|
||||
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
|
||||
.query_status_size = sizeof(struct pm4_mes_query_status),
|
||||
.release_mem_size = sizeof(struct pm4_mec_release_mem)
|
||||
};
|
||||
|
@ -43,6 +43,8 @@ static const struct kgd2kfd_calls kgd2kfd = {
|
||||
.interrupt = kgd2kfd_interrupt,
|
||||
.suspend = kgd2kfd_suspend,
|
||||
.resume = kgd2kfd_resume,
|
||||
.quiesce_mm = kgd2kfd_quiesce_mm,
|
||||
.resume_mm = kgd2kfd_resume_mm,
|
||||
.schedule_evict_and_restore_process =
|
||||
kgd2kfd_schedule_evict_and_restore_process,
|
||||
};
|
||||
@ -81,6 +83,11 @@ module_param(ignore_crat, int, 0444);
|
||||
MODULE_PARM_DESC(ignore_crat,
|
||||
"Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
|
||||
|
||||
int vega10_noretry;
|
||||
module_param_named(noretry, vega10_noretry, int, 0644);
|
||||
MODULE_PARM_DESC(noretry,
|
||||
"Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)");
|
||||
|
||||
static int amdkfd_init_completed;
|
||||
|
||||
int kgd2kfd_init(unsigned int interface_version,
|
||||
|
@ -38,6 +38,9 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
return mqd_manager_init_vi_tonga(type, dev);
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_RAVEN:
|
||||
return mqd_manager_init_v9(type, dev);
|
||||
default:
|
||||
WARN(1, "Unexpected ASIC family %u",
|
||||
dev->device_info->asic_family);
|
||||
|
@ -79,10 +79,6 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
|
||||
m->cp_mqd_base_addr_hi = upper_32_bits(addr);
|
||||
|
||||
m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN;
|
||||
/* Although WinKFD writes this, I suspect it should not be necessary */
|
||||
m->cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE;
|
||||
|
||||
m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS |
|
||||
QUANTUM_DURATION(10);
|
||||
|
||||
@ -412,7 +408,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
||||
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
|
||||
return NULL;
|
||||
|
||||
mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
|
||||
mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
|
||||
if (!mqd)
|
||||
return NULL;
|
||||
|
||||
|
443
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
Normal file
443
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
Normal file
@ -0,0 +1,443 @@
|
||||
/*
|
||||
* Copyright 2016-2018 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/printk.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_mqd_manager.h"
|
||||
#include "v9_structs.h"
|
||||
#include "gc/gc_9_0_offset.h"
|
||||
#include "gc/gc_9_0_sh_mask.h"
|
||||
#include "sdma0/sdma0_4_0_sh_mask.h"
|
||||
|
||||
static inline struct v9_mqd *get_mqd(void *mqd)
|
||||
{
|
||||
return (struct v9_mqd *)mqd;
|
||||
}
|
||||
|
||||
static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
|
||||
{
|
||||
return (struct v9_sdma_mqd *)mqd;
|
||||
}
|
||||
|
||||
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
int retval;
|
||||
uint64_t addr;
|
||||
struct v9_mqd *m;
|
||||
struct kfd_dev *kfd = mm->dev;
|
||||
|
||||
/* From V9, for CWSR, the control stack is located on the next page
|
||||
* boundary after the mqd, we will use the gtt allocation function
|
||||
* instead of sub-allocation function.
|
||||
*/
|
||||
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
|
||||
*mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
|
||||
if (!*mqd_mem_obj)
|
||||
return -ENOMEM;
|
||||
retval = kfd->kfd2kgd->init_gtt_mem_allocation(kfd->kgd,
|
||||
ALIGN(q->ctl_stack_size, PAGE_SIZE) +
|
||||
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
|
||||
&((*mqd_mem_obj)->gtt_mem),
|
||||
&((*mqd_mem_obj)->gpu_addr),
|
||||
(void *)&((*mqd_mem_obj)->cpu_ptr));
|
||||
} else
|
||||
retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd),
|
||||
mqd_mem_obj);
|
||||
if (retval != 0)
|
||||
return -ENOMEM;
|
||||
|
||||
m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||
addr = (*mqd_mem_obj)->gpu_addr;
|
||||
|
||||
memset(m, 0, sizeof(struct v9_mqd));
|
||||
|
||||
m->header = 0xC0310800;
|
||||
m->compute_pipelinestat_enable = 1;
|
||||
m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
|
||||
m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
|
||||
m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
|
||||
m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
|
||||
|
||||
m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
|
||||
0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
|
||||
|
||||
m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
|
||||
|
||||
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
|
||||
m->cp_mqd_base_addr_hi = upper_32_bits(addr);
|
||||
|
||||
m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
|
||||
1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
|
||||
10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
|
||||
|
||||
m->cp_hqd_pipe_priority = 1;
|
||||
m->cp_hqd_queue_priority = 15;
|
||||
|
||||
if (q->format == KFD_QUEUE_FORMAT_AQL) {
|
||||
m->cp_hqd_aql_control =
|
||||
1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
|
||||
}
|
||||
|
||||
if (q->tba_addr) {
|
||||
m->compute_pgm_rsrc2 |=
|
||||
(1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
|
||||
}
|
||||
|
||||
if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) {
|
||||
m->cp_hqd_persistent_state |=
|
||||
(1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
|
||||
m->cp_hqd_ctx_save_base_addr_lo =
|
||||
lower_32_bits(q->ctx_save_restore_area_address);
|
||||
m->cp_hqd_ctx_save_base_addr_hi =
|
||||
upper_32_bits(q->ctx_save_restore_area_address);
|
||||
m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
|
||||
m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
|
||||
m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
|
||||
m->cp_hqd_wg_state_offset = q->ctl_stack_size;
|
||||
}
|
||||
|
||||
*mqd = m;
|
||||
if (gart_addr)
|
||||
*gart_addr = addr;
|
||||
retval = mm->update_mqd(mm, m, q);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int load_mqd(struct mqd_manager *mm, void *mqd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
struct queue_properties *p, struct mm_struct *mms)
|
||||
{
|
||||
/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
|
||||
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
|
||||
|
||||
return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
|
||||
(uint32_t __user *)p->write_ptr,
|
||||
wptr_shift, 0, mms);
|
||||
}
|
||||
|
||||
static int update_mqd(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
struct v9_mqd *m;
|
||||
|
||||
m = get_mqd(mqd);
|
||||
|
||||
m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
|
||||
m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
|
||||
pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
|
||||
|
||||
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
|
||||
m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
|
||||
|
||||
m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
|
||||
m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
|
||||
m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
|
||||
m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
|
||||
|
||||
m->cp_hqd_pq_doorbell_control =
|
||||
q->doorbell_off <<
|
||||
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
|
||||
pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
|
||||
m->cp_hqd_pq_doorbell_control);
|
||||
|
||||
m->cp_hqd_ib_control =
|
||||
3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT |
|
||||
1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT;
|
||||
|
||||
/*
|
||||
* HW does not clamp this field correctly. Maximum EOP queue size
|
||||
* is constrained by per-SE EOP done signal count, which is 8-bit.
|
||||
* Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
|
||||
* more than (EOP entry count - 1) so a queue size of 0x800 dwords
|
||||
* is safe, giving a maximum field value of 0xA.
|
||||
*/
|
||||
m->cp_hqd_eop_control = min(0xA,
|
||||
order_base_2(q->eop_ring_buffer_size / 4) - 1);
|
||||
m->cp_hqd_eop_base_addr_lo =
|
||||
lower_32_bits(q->eop_ring_buffer_address >> 8);
|
||||
m->cp_hqd_eop_base_addr_hi =
|
||||
upper_32_bits(q->eop_ring_buffer_address >> 8);
|
||||
|
||||
m->cp_hqd_iq_timer = 0;
|
||||
|
||||
m->cp_hqd_vmid = q->vmid;
|
||||
|
||||
if (q->format == KFD_QUEUE_FORMAT_AQL) {
|
||||
m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
|
||||
2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
|
||||
1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT |
|
||||
1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT;
|
||||
m->cp_hqd_pq_doorbell_control |= 1 <<
|
||||
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
|
||||
}
|
||||
if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address)
|
||||
m->cp_hqd_ctx_save_control = 0;
|
||||
|
||||
q->is_active = (q->queue_size > 0 &&
|
||||
q->queue_address != 0 &&
|
||||
q->queue_percent > 0 &&
|
||||
!q->is_evicted);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int destroy_mqd(struct mqd_manager *mm, void *mqd,
|
||||
enum kfd_preempt_type type,
|
||||
unsigned int timeout, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
{
|
||||
return mm->dev->kfd2kgd->hqd_destroy
|
||||
(mm->dev->kgd, mqd, type, timeout,
|
||||
pipe_id, queue_id);
|
||||
}
|
||||
|
||||
static void uninit_mqd(struct mqd_manager *mm, void *mqd,
|
||||
struct kfd_mem_obj *mqd_mem_obj)
|
||||
{
|
||||
struct kfd_dev *kfd = mm->dev;
|
||||
|
||||
if (mqd_mem_obj->gtt_mem) {
|
||||
kfd->kfd2kgd->free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
|
||||
kfree(mqd_mem_obj);
|
||||
} else {
|
||||
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
|
||||
}
|
||||
}
|
||||
|
||||
static bool is_occupied(struct mqd_manager *mm, void *mqd,
|
||||
uint64_t queue_address, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
{
|
||||
return mm->dev->kfd2kgd->hqd_is_occupied(
|
||||
mm->dev->kgd, queue_address,
|
||||
pipe_id, queue_id);
|
||||
}
|
||||
|
||||
static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
|
||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
struct v9_mqd *m;
|
||||
int retval = init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
|
||||
|
||||
if (retval != 0)
|
||||
return retval;
|
||||
|
||||
m = get_mqd(*mqd);
|
||||
|
||||
m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
|
||||
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
struct v9_mqd *m;
|
||||
int retval = update_mqd(mm, mqd, q);
|
||||
|
||||
if (retval != 0)
|
||||
return retval;
|
||||
|
||||
/* TODO: what's the point? update_mqd already does this. */
|
||||
m = get_mqd(mqd);
|
||||
m->cp_hqd_vmid = q->vmid;
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
int retval;
|
||||
struct v9_sdma_mqd *m;
|
||||
|
||||
|
||||
retval = kfd_gtt_sa_allocate(mm->dev,
|
||||
sizeof(struct v9_sdma_mqd),
|
||||
mqd_mem_obj);
|
||||
|
||||
if (retval != 0)
|
||||
return -ENOMEM;
|
||||
|
||||
m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||
|
||||
memset(m, 0, sizeof(struct v9_sdma_mqd));
|
||||
|
||||
*mqd = m;
|
||||
if (gart_addr)
|
||||
*gart_addr = (*mqd_mem_obj)->gpu_addr;
|
||||
|
||||
retval = mm->update_mqd(mm, m, q);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||
struct kfd_mem_obj *mqd_mem_obj)
|
||||
{
|
||||
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
|
||||
}
|
||||
|
||||
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
struct queue_properties *p, struct mm_struct *mms)
|
||||
{
|
||||
return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
|
||||
(uint32_t __user *)p->write_ptr,
|
||||
mms);
|
||||
}
|
||||
|
||||
#define SDMA_RLC_DUMMY_DEFAULT 0xf
|
||||
|
||||
static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
struct v9_sdma_mqd *m;
|
||||
|
||||
m = get_sdma_mqd(mqd);
|
||||
m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4)
|
||||
<< SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
|
||||
q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
|
||||
1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
|
||||
6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
|
||||
|
||||
m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
|
||||
m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
|
||||
m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
|
||||
m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
|
||||
m->sdmax_rlcx_doorbell_offset =
|
||||
q->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
|
||||
|
||||
m->sdma_engine_id = q->sdma_engine_id;
|
||||
m->sdma_queue_id = q->sdma_queue_id;
|
||||
m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
|
||||
|
||||
q->is_active = (q->queue_size > 0 &&
|
||||
q->queue_address != 0 &&
|
||||
q->queue_percent > 0 &&
|
||||
!q->is_evicted);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* * preempt type here is ignored because there is only one way
|
||||
* * to preempt sdma queue
|
||||
*/
|
||||
static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||
enum kfd_preempt_type type,
|
||||
unsigned int timeout, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
{
|
||||
return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
|
||||
}
|
||||
|
||||
static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
|
||||
uint64_t queue_address, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
{
|
||||
return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
|
||||
static int debugfs_show_mqd(struct seq_file *m, void *data)
|
||||
{
|
||||
seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
|
||||
data, sizeof(struct v9_mqd), false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
|
||||
{
|
||||
seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
|
||||
data, sizeof(struct v9_sdma_mqd), false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev)
|
||||
{
|
||||
struct mqd_manager *mqd;
|
||||
|
||||
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
|
||||
return NULL;
|
||||
|
||||
mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
|
||||
if (!mqd)
|
||||
return NULL;
|
||||
|
||||
mqd->dev = dev;
|
||||
|
||||
switch (type) {
|
||||
case KFD_MQD_TYPE_CP:
|
||||
case KFD_MQD_TYPE_COMPUTE:
|
||||
mqd->init_mqd = init_mqd;
|
||||
mqd->uninit_mqd = uninit_mqd;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_HIQ:
|
||||
mqd->init_mqd = init_mqd_hiq;
|
||||
mqd->uninit_mqd = uninit_mqd;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd_hiq;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_SDMA:
|
||||
mqd->init_mqd = init_mqd_sdma;
|
||||
mqd->uninit_mqd = uninit_mqd_sdma;
|
||||
mqd->load_mqd = load_mqd_sdma;
|
||||
mqd->update_mqd = update_mqd_sdma;
|
||||
mqd->destroy_mqd = destroy_mqd_sdma;
|
||||
mqd->is_occupied = is_occupied_sdma;
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
kfree(mqd);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return mqd;
|
||||
}
|
@ -394,7 +394,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
|
||||
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
|
||||
return NULL;
|
||||
|
||||
mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
|
||||
mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
|
||||
if (!mqd)
|
||||
return NULL;
|
||||
|
||||
|
@ -26,8 +26,6 @@
|
||||
#include "kfd_device_queue_manager.h"
|
||||
#include "kfd_kernel_queue.h"
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_pm4_headers_vi.h"
|
||||
#include "kfd_pm4_opcodes.h"
|
||||
|
||||
static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
|
||||
unsigned int buffer_size_bytes)
|
||||
@ -39,18 +37,6 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
|
||||
*wptr = temp;
|
||||
}
|
||||
|
||||
static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size)
|
||||
{
|
||||
union PM4_MES_TYPE_3_HEADER header;
|
||||
|
||||
header.u32All = 0;
|
||||
header.opcode = opcode;
|
||||
header.count = packet_size / 4 - 2;
|
||||
header.type = PM4_TYPE_3;
|
||||
|
||||
return header.u32All;
|
||||
}
|
||||
|
||||
static void pm_calc_rlib_size(struct packet_manager *pm,
|
||||
unsigned int *rlib_size,
|
||||
bool *over_subscription)
|
||||
@ -80,9 +66,9 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
|
||||
pr_debug("Over subscribed runlist\n");
|
||||
}
|
||||
|
||||
map_queue_size = sizeof(struct pm4_mes_map_queues);
|
||||
map_queue_size = pm->pmf->map_queues_size;
|
||||
/* calculate run list ib allocation size */
|
||||
*rlib_size = process_count * sizeof(struct pm4_mes_map_process) +
|
||||
*rlib_size = process_count * pm->pmf->map_process_size +
|
||||
queue_count * map_queue_size;
|
||||
|
||||
/*
|
||||
@ -90,7 +76,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
|
||||
* when over subscription
|
||||
*/
|
||||
if (*over_subscription)
|
||||
*rlib_size += sizeof(struct pm4_mes_runlist);
|
||||
*rlib_size += pm->pmf->runlist_size;
|
||||
|
||||
pr_debug("runlist ib size %d\n", *rlib_size);
|
||||
}
|
||||
@ -108,12 +94,14 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
|
||||
|
||||
pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
|
||||
|
||||
mutex_lock(&pm->lock);
|
||||
|
||||
retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
|
||||
&pm->ib_buffer_obj);
|
||||
|
||||
if (retval) {
|
||||
pr_err("Failed to allocate runlist IB\n");
|
||||
return retval;
|
||||
goto out;
|
||||
}
|
||||
|
||||
*(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr;
|
||||
@ -121,140 +109,12 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
|
||||
|
||||
memset(*rl_buffer, 0, *rl_buffer_size);
|
||||
pm->allocated = true;
|
||||
|
||||
out:
|
||||
mutex_unlock(&pm->lock);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
|
||||
uint64_t ib, size_t ib_size_in_dwords, bool chain)
|
||||
{
|
||||
struct pm4_mes_runlist *packet;
|
||||
int concurrent_proc_cnt = 0;
|
||||
struct kfd_dev *kfd = pm->dqm->dev;
|
||||
|
||||
if (WARN_ON(!ib))
|
||||
return -EFAULT;
|
||||
|
||||
/* Determine the number of processes to map together to HW:
|
||||
* it can not exceed the number of VMIDs available to the
|
||||
* scheduler, and it is determined by the smaller of the number
|
||||
* of processes in the runlist and kfd module parameter
|
||||
* hws_max_conc_proc.
|
||||
* Note: the arbitration between the number of VMIDs and
|
||||
* hws_max_conc_proc has been done in
|
||||
* kgd2kfd_device_init().
|
||||
*/
|
||||
concurrent_proc_cnt = min(pm->dqm->processes_count,
|
||||
kfd->max_proc_per_quantum);
|
||||
|
||||
packet = (struct pm4_mes_runlist *)buffer;
|
||||
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_runlist));
|
||||
packet->header.u32All = build_pm4_header(IT_RUN_LIST,
|
||||
sizeof(struct pm4_mes_runlist));
|
||||
|
||||
packet->bitfields4.ib_size = ib_size_in_dwords;
|
||||
packet->bitfields4.chain = chain ? 1 : 0;
|
||||
packet->bitfields4.offload_polling = 0;
|
||||
packet->bitfields4.valid = 1;
|
||||
packet->bitfields4.process_cnt = concurrent_proc_cnt;
|
||||
packet->ordinal2 = lower_32_bits(ib);
|
||||
packet->bitfields3.ib_base_hi = upper_32_bits(ib);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
struct pm4_mes_map_process *packet;
|
||||
|
||||
packet = (struct pm4_mes_map_process *)buffer;
|
||||
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_map_process));
|
||||
|
||||
packet->header.u32All = build_pm4_header(IT_MAP_PROCESS,
|
||||
sizeof(struct pm4_mes_map_process));
|
||||
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
|
||||
packet->bitfields2.process_quantum = 1;
|
||||
packet->bitfields2.pasid = qpd->pqm->process->pasid;
|
||||
packet->bitfields3.page_table_base = qpd->page_table_base;
|
||||
packet->bitfields10.gds_size = qpd->gds_size;
|
||||
packet->bitfields10.num_gws = qpd->num_gws;
|
||||
packet->bitfields10.num_oac = qpd->num_oac;
|
||||
packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
|
||||
|
||||
packet->sh_mem_config = qpd->sh_mem_config;
|
||||
packet->sh_mem_bases = qpd->sh_mem_bases;
|
||||
packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base;
|
||||
packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit;
|
||||
|
||||
packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base;
|
||||
|
||||
packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
|
||||
packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
|
||||
struct queue *q, bool is_static)
|
||||
{
|
||||
struct pm4_mes_map_queues *packet;
|
||||
bool use_static = is_static;
|
||||
|
||||
packet = (struct pm4_mes_map_queues *)buffer;
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
|
||||
|
||||
packet->header.u32All = build_pm4_header(IT_MAP_QUEUES,
|
||||
sizeof(struct pm4_mes_map_queues));
|
||||
packet->bitfields2.alloc_format =
|
||||
alloc_format__mes_map_queues__one_per_pipe_vi;
|
||||
packet->bitfields2.num_queues = 1;
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
|
||||
|
||||
packet->bitfields2.engine_sel =
|
||||
engine_sel__mes_map_queues__compute_vi;
|
||||
packet->bitfields2.queue_type =
|
||||
queue_type__mes_map_queues__normal_compute_vi;
|
||||
|
||||
switch (q->properties.type) {
|
||||
case KFD_QUEUE_TYPE_COMPUTE:
|
||||
if (use_static)
|
||||
packet->bitfields2.queue_type =
|
||||
queue_type__mes_map_queues__normal_latency_static_queue_vi;
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_DIQ:
|
||||
packet->bitfields2.queue_type =
|
||||
queue_type__mes_map_queues__debug_interface_queue_vi;
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_SDMA:
|
||||
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
|
||||
engine_sel__mes_map_queues__sdma0_vi;
|
||||
use_static = false; /* no static queues under SDMA */
|
||||
break;
|
||||
default:
|
||||
WARN(1, "queue type %d", q->properties.type);
|
||||
return -EINVAL;
|
||||
}
|
||||
packet->bitfields3.doorbell_offset =
|
||||
q->properties.doorbell_off;
|
||||
|
||||
packet->mqd_addr_lo =
|
||||
lower_32_bits(q->gart_mqd_addr);
|
||||
|
||||
packet->mqd_addr_hi =
|
||||
upper_32_bits(q->gart_mqd_addr);
|
||||
|
||||
packet->wptr_addr_lo =
|
||||
lower_32_bits((uint64_t)q->properties.write_ptr);
|
||||
|
||||
packet->wptr_addr_hi =
|
||||
upper_32_bits((uint64_t)q->properties.write_ptr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pm_create_runlist_ib(struct packet_manager *pm,
|
||||
struct list_head *queues,
|
||||
uint64_t *rl_gpu_addr,
|
||||
@ -292,12 +152,12 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd);
|
||||
retval = pm->pmf->map_process(pm, &rl_buffer[rl_wptr], qpd);
|
||||
if (retval)
|
||||
return retval;
|
||||
|
||||
proccesses_mapped++;
|
||||
inc_wptr(&rl_wptr, sizeof(struct pm4_mes_map_process),
|
||||
inc_wptr(&rl_wptr, pm->pmf->map_process_size,
|
||||
alloc_size_bytes);
|
||||
|
||||
list_for_each_entry(kq, &qpd->priv_queue_list, list) {
|
||||
@ -307,7 +167,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
|
||||
pr_debug("static_queue, mapping kernel q %d, is debug status %d\n",
|
||||
kq->queue->queue, qpd->is_debug);
|
||||
|
||||
retval = pm_create_map_queue(pm,
|
||||
retval = pm->pmf->map_queues(pm,
|
||||
&rl_buffer[rl_wptr],
|
||||
kq->queue,
|
||||
qpd->is_debug);
|
||||
@ -315,7 +175,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
|
||||
return retval;
|
||||
|
||||
inc_wptr(&rl_wptr,
|
||||
sizeof(struct pm4_mes_map_queues),
|
||||
pm->pmf->map_queues_size,
|
||||
alloc_size_bytes);
|
||||
}
|
||||
|
||||
@ -326,7 +186,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
|
||||
pr_debug("static_queue, mapping user queue %d, is debug status %d\n",
|
||||
q->queue, qpd->is_debug);
|
||||
|
||||
retval = pm_create_map_queue(pm,
|
||||
retval = pm->pmf->map_queues(pm,
|
||||
&rl_buffer[rl_wptr],
|
||||
q,
|
||||
qpd->is_debug);
|
||||
@ -335,7 +195,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
|
||||
return retval;
|
||||
|
||||
inc_wptr(&rl_wptr,
|
||||
sizeof(struct pm4_mes_map_queues),
|
||||
pm->pmf->map_queues_size,
|
||||
alloc_size_bytes);
|
||||
}
|
||||
}
|
||||
@ -343,7 +203,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
|
||||
pr_debug("Finished map process and queues to runlist\n");
|
||||
|
||||
if (is_over_subscription)
|
||||
retval = pm_create_runlist(pm, &rl_buffer[rl_wptr],
|
||||
retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
|
||||
*rl_gpu_addr,
|
||||
alloc_size_bytes / sizeof(uint32_t),
|
||||
true);
|
||||
@ -355,45 +215,29 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
|
||||
return retval;
|
||||
}
|
||||
|
||||
/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size
|
||||
* of this packet
|
||||
* @gpu_addr - GPU address of the packet. It's a virtual address.
|
||||
* @buffer - buffer to fill up with the packet. It's a CPU kernel pointer
|
||||
* Return - length of the packet
|
||||
*/
|
||||
uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer)
|
||||
{
|
||||
struct pm4_mec_release_mem *packet;
|
||||
|
||||
WARN_ON(!buffer);
|
||||
|
||||
packet = (struct pm4_mec_release_mem *)buffer;
|
||||
memset(buffer, 0, sizeof(*packet));
|
||||
|
||||
packet->header.u32All = build_pm4_header(IT_RELEASE_MEM,
|
||||
sizeof(*packet));
|
||||
|
||||
packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
|
||||
packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
|
||||
packet->bitfields2.tcl1_action_ena = 1;
|
||||
packet->bitfields2.tc_action_ena = 1;
|
||||
packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
|
||||
packet->bitfields2.atc = 0;
|
||||
|
||||
packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
|
||||
packet->bitfields3.int_sel =
|
||||
int_sel___release_mem__send_interrupt_after_write_confirm;
|
||||
|
||||
packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
|
||||
packet->address_hi = upper_32_bits(gpu_addr);
|
||||
|
||||
packet->data_lo = 0;
|
||||
|
||||
return sizeof(*packet) / sizeof(unsigned int);
|
||||
}
|
||||
|
||||
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
|
||||
{
|
||||
switch (dqm->dev->device_info->asic_family) {
|
||||
case CHIP_KAVERI:
|
||||
case CHIP_HAWAII:
|
||||
/* PM4 packet structures on CIK are the same as on VI */
|
||||
case CHIP_CARRIZO:
|
||||
case CHIP_TONGA:
|
||||
case CHIP_FIJI:
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
pm->pmf = &kfd_vi_pm_funcs;
|
||||
break;
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_RAVEN:
|
||||
pm->pmf = &kfd_v9_pm_funcs;
|
||||
break;
|
||||
default:
|
||||
WARN(1, "Unexpected ASIC family %u",
|
||||
dqm->dev->device_info->asic_family);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
pm->dqm = dqm;
|
||||
mutex_init(&pm->lock);
|
||||
pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ);
|
||||
@ -415,38 +259,25 @@ void pm_uninit(struct packet_manager *pm)
|
||||
int pm_send_set_resources(struct packet_manager *pm,
|
||||
struct scheduling_resources *res)
|
||||
{
|
||||
struct pm4_mes_set_resources *packet;
|
||||
uint32_t *buffer, size;
|
||||
int retval = 0;
|
||||
|
||||
size = pm->pmf->set_resources_size;
|
||||
mutex_lock(&pm->lock);
|
||||
pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
|
||||
sizeof(*packet) / sizeof(uint32_t),
|
||||
(unsigned int **)&packet);
|
||||
if (!packet) {
|
||||
size / sizeof(uint32_t),
|
||||
(unsigned int **)&buffer);
|
||||
if (!buffer) {
|
||||
pr_err("Failed to allocate buffer on kernel queue\n");
|
||||
retval = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
memset(packet, 0, sizeof(struct pm4_mes_set_resources));
|
||||
packet->header.u32All = build_pm4_header(IT_SET_RESOURCES,
|
||||
sizeof(struct pm4_mes_set_resources));
|
||||
|
||||
packet->bitfields2.queue_type =
|
||||
queue_type__mes_set_resources__hsa_interface_queue_hiq;
|
||||
packet->bitfields2.vmid_mask = res->vmid_mask;
|
||||
packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
|
||||
packet->bitfields7.oac_mask = res->oac_mask;
|
||||
packet->bitfields8.gds_heap_base = res->gds_heap_base;
|
||||
packet->bitfields8.gds_heap_size = res->gds_heap_size;
|
||||
|
||||
packet->gws_mask_lo = lower_32_bits(res->gws_mask);
|
||||
packet->gws_mask_hi = upper_32_bits(res->gws_mask);
|
||||
|
||||
packet->queue_mask_lo = lower_32_bits(res->queue_mask);
|
||||
packet->queue_mask_hi = upper_32_bits(res->queue_mask);
|
||||
|
||||
pm->priv_queue->ops.submit_packet(pm->priv_queue);
|
||||
retval = pm->pmf->set_resources(pm, buffer, res);
|
||||
if (!retval)
|
||||
pm->priv_queue->ops.submit_packet(pm->priv_queue);
|
||||
else
|
||||
pm->priv_queue->ops.rollback_packet(pm->priv_queue);
|
||||
|
||||
out:
|
||||
mutex_unlock(&pm->lock);
|
||||
@ -468,7 +299,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
|
||||
|
||||
pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
|
||||
|
||||
packet_size_dwords = sizeof(struct pm4_mes_runlist) / sizeof(uint32_t);
|
||||
packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
|
||||
mutex_lock(&pm->lock);
|
||||
|
||||
retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
|
||||
@ -476,7 +307,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
|
||||
if (retval)
|
||||
goto fail_acquire_packet_buffer;
|
||||
|
||||
retval = pm_create_runlist(pm, rl_buffer, rl_gpu_ib_addr,
|
||||
retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,
|
||||
rl_ib_size / sizeof(uint32_t), false);
|
||||
if (retval)
|
||||
goto fail_create_runlist;
|
||||
@ -499,37 +330,29 @@ fail_create_runlist_ib:
|
||||
int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
|
||||
uint32_t fence_value)
|
||||
{
|
||||
int retval;
|
||||
struct pm4_mes_query_status *packet;
|
||||
uint32_t *buffer, size;
|
||||
int retval = 0;
|
||||
|
||||
if (WARN_ON(!fence_address))
|
||||
return -EFAULT;
|
||||
|
||||
size = pm->pmf->query_status_size;
|
||||
mutex_lock(&pm->lock);
|
||||
retval = pm->priv_queue->ops.acquire_packet_buffer(
|
||||
pm->priv_queue,
|
||||
sizeof(struct pm4_mes_query_status) / sizeof(uint32_t),
|
||||
(unsigned int **)&packet);
|
||||
if (retval)
|
||||
goto fail_acquire_packet_buffer;
|
||||
pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
|
||||
size / sizeof(uint32_t), (unsigned int **)&buffer);
|
||||
if (!buffer) {
|
||||
pr_err("Failed to allocate buffer on kernel queue\n");
|
||||
retval = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
packet->header.u32All = build_pm4_header(IT_QUERY_STATUS,
|
||||
sizeof(struct pm4_mes_query_status));
|
||||
retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);
|
||||
if (!retval)
|
||||
pm->priv_queue->ops.submit_packet(pm->priv_queue);
|
||||
else
|
||||
pm->priv_queue->ops.rollback_packet(pm->priv_queue);
|
||||
|
||||
packet->bitfields2.context_id = 0;
|
||||
packet->bitfields2.interrupt_sel =
|
||||
interrupt_sel__mes_query_status__completion_status;
|
||||
packet->bitfields2.command =
|
||||
command__mes_query_status__fence_only_after_write_ack;
|
||||
|
||||
packet->addr_hi = upper_32_bits((uint64_t)fence_address);
|
||||
packet->addr_lo = lower_32_bits((uint64_t)fence_address);
|
||||
packet->data_hi = upper_32_bits((uint64_t)fence_value);
|
||||
packet->data_lo = lower_32_bits((uint64_t)fence_value);
|
||||
|
||||
pm->priv_queue->ops.submit_packet(pm->priv_queue);
|
||||
|
||||
fail_acquire_packet_buffer:
|
||||
out:
|
||||
mutex_unlock(&pm->lock);
|
||||
return retval;
|
||||
}
|
||||
@ -539,82 +362,27 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
|
||||
uint32_t filter_param, bool reset,
|
||||
unsigned int sdma_engine)
|
||||
{
|
||||
int retval;
|
||||
uint32_t *buffer;
|
||||
struct pm4_mes_unmap_queues *packet;
|
||||
uint32_t *buffer, size;
|
||||
int retval = 0;
|
||||
|
||||
size = pm->pmf->unmap_queues_size;
|
||||
mutex_lock(&pm->lock);
|
||||
retval = pm->priv_queue->ops.acquire_packet_buffer(
|
||||
pm->priv_queue,
|
||||
sizeof(struct pm4_mes_unmap_queues) / sizeof(uint32_t),
|
||||
&buffer);
|
||||
if (retval)
|
||||
goto err_acquire_packet_buffer;
|
||||
|
||||
packet = (struct pm4_mes_unmap_queues *)buffer;
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
|
||||
pr_debug("static_queue: unmapping queues: filter is %d , reset is %d , type is %d\n",
|
||||
filter, reset, type);
|
||||
packet->header.u32All = build_pm4_header(IT_UNMAP_QUEUES,
|
||||
sizeof(struct pm4_mes_unmap_queues));
|
||||
switch (type) {
|
||||
case KFD_QUEUE_TYPE_COMPUTE:
|
||||
case KFD_QUEUE_TYPE_DIQ:
|
||||
packet->bitfields2.engine_sel =
|
||||
engine_sel__mes_unmap_queues__compute;
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_SDMA:
|
||||
packet->bitfields2.engine_sel =
|
||||
engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
|
||||
break;
|
||||
default:
|
||||
WARN(1, "queue type %d", type);
|
||||
retval = -EINVAL;
|
||||
goto err_invalid;
|
||||
pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
|
||||
size / sizeof(uint32_t), (unsigned int **)&buffer);
|
||||
if (!buffer) {
|
||||
pr_err("Failed to allocate buffer on kernel queue\n");
|
||||
retval = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (reset)
|
||||
packet->bitfields2.action =
|
||||
action__mes_unmap_queues__reset_queues;
|
||||
retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param,
|
||||
reset, sdma_engine);
|
||||
if (!retval)
|
||||
pm->priv_queue->ops.submit_packet(pm->priv_queue);
|
||||
else
|
||||
packet->bitfields2.action =
|
||||
action__mes_unmap_queues__preempt_queues;
|
||||
pm->priv_queue->ops.rollback_packet(pm->priv_queue);
|
||||
|
||||
switch (filter) {
|
||||
case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
|
||||
packet->bitfields2.num_queues = 1;
|
||||
packet->bitfields3b.doorbell_offset0 = filter_param;
|
||||
break;
|
||||
case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
|
||||
packet->bitfields3a.pasid = filter_param;
|
||||
break;
|
||||
case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_unmap_queues__unmap_all_queues;
|
||||
break;
|
||||
case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
|
||||
/* in this case, we do not preempt static queues */
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
|
||||
break;
|
||||
default:
|
||||
WARN(1, "filter %d", filter);
|
||||
retval = -EINVAL;
|
||||
goto err_invalid;
|
||||
}
|
||||
|
||||
pm->priv_queue->ops.submit_packet(pm->priv_queue);
|
||||
|
||||
mutex_unlock(&pm->lock);
|
||||
return 0;
|
||||
|
||||
err_invalid:
|
||||
pm->priv_queue->ops.rollback_packet(pm->priv_queue);
|
||||
err_acquire_packet_buffer:
|
||||
out:
|
||||
mutex_unlock(&pm->lock);
|
||||
return retval;
|
||||
}
|
||||
|
583
drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
Normal file
583
drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
Normal file
@ -0,0 +1,583 @@
|
||||
/*
|
||||
* Copyright 2016 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef F32_MES_PM4_PACKETS_H
|
||||
#define F32_MES_PM4_PACKETS_H
|
||||
|
||||
#ifndef PM4_MES_HEADER_DEFINED
|
||||
#define PM4_MES_HEADER_DEFINED
|
||||
union PM4_MES_TYPE_3_HEADER {
|
||||
struct {
|
||||
uint32_t reserved1 : 8; /* < reserved */
|
||||
uint32_t opcode : 8; /* < IT opcode */
|
||||
uint32_t count : 14;/* < number of DWORDs - 1 in the
|
||||
* information body.
|
||||
*/
|
||||
uint32_t type : 2; /* < packet identifier.
|
||||
* It should be 3 for type 3 packets
|
||||
*/
|
||||
};
|
||||
uint32_t u32All;
|
||||
};
|
||||
#endif /* PM4_MES_HEADER_DEFINED */
|
||||
|
||||
/*--------------------MES_SET_RESOURCES--------------------*/
|
||||
|
||||
#ifndef PM4_MES_SET_RESOURCES_DEFINED
|
||||
#define PM4_MES_SET_RESOURCES_DEFINED
|
||||
enum mes_set_resources_queue_type_enum {
|
||||
queue_type__mes_set_resources__kernel_interface_queue_kiq = 0,
|
||||
queue_type__mes_set_resources__hsa_interface_queue_hiq = 1,
|
||||
queue_type__mes_set_resources__hsa_debug_interface_queue = 4
|
||||
};
|
||||
|
||||
|
||||
struct pm4_mes_set_resources {
|
||||
union {
|
||||
union PM4_MES_TYPE_3_HEADER header; /* header */
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t vmid_mask:16;
|
||||
uint32_t unmap_latency:8;
|
||||
uint32_t reserved1:5;
|
||||
enum mes_set_resources_queue_type_enum queue_type:3;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
uint32_t queue_mask_lo;
|
||||
uint32_t queue_mask_hi;
|
||||
uint32_t gws_mask_lo;
|
||||
uint32_t gws_mask_hi;
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t oac_mask:16;
|
||||
uint32_t reserved2:16;
|
||||
} bitfields7;
|
||||
uint32_t ordinal7;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t gds_heap_base:6;
|
||||
uint32_t reserved3:5;
|
||||
uint32_t gds_heap_size:6;
|
||||
uint32_t reserved4:15;
|
||||
} bitfields8;
|
||||
uint32_t ordinal8;
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
||||
|
||||
/*--------------------MES_RUN_LIST--------------------*/
|
||||
|
||||
#ifndef PM4_MES_RUN_LIST_DEFINED
|
||||
#define PM4_MES_RUN_LIST_DEFINED
|
||||
|
||||
struct pm4_mes_runlist {
|
||||
union {
|
||||
union PM4_MES_TYPE_3_HEADER header; /* header */
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t reserved1:2;
|
||||
uint32_t ib_base_lo:30;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
uint32_t ib_base_hi;
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t ib_size:20;
|
||||
uint32_t chain:1;
|
||||
uint32_t offload_polling:1;
|
||||
uint32_t reserved2:1;
|
||||
uint32_t valid:1;
|
||||
uint32_t process_cnt:4;
|
||||
uint32_t reserved3:4;
|
||||
} bitfields4;
|
||||
uint32_t ordinal4;
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
||||
|
||||
/*--------------------MES_MAP_PROCESS--------------------*/
|
||||
|
||||
#ifndef PM4_MES_MAP_PROCESS_DEFINED
|
||||
#define PM4_MES_MAP_PROCESS_DEFINED
|
||||
|
||||
struct pm4_mes_map_process {
|
||||
union {
|
||||
union PM4_MES_TYPE_3_HEADER header; /* header */
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t pasid:16;
|
||||
uint32_t reserved1:8;
|
||||
uint32_t diq_enable:1;
|
||||
uint32_t process_quantum:7;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
uint32_t vm_context_page_table_base_addr_lo32;
|
||||
|
||||
uint32_t vm_context_page_table_base_addr_hi32;
|
||||
|
||||
uint32_t sh_mem_bases;
|
||||
|
||||
uint32_t sh_mem_config;
|
||||
|
||||
uint32_t sq_shader_tba_lo;
|
||||
|
||||
uint32_t sq_shader_tba_hi;
|
||||
|
||||
uint32_t sq_shader_tma_lo;
|
||||
|
||||
uint32_t sq_shader_tma_hi;
|
||||
|
||||
uint32_t reserved6;
|
||||
|
||||
uint32_t gds_addr_lo;
|
||||
|
||||
uint32_t gds_addr_hi;
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t num_gws:6;
|
||||
uint32_t reserved7:1;
|
||||
uint32_t sdma_enable:1;
|
||||
uint32_t num_oac:4;
|
||||
uint32_t reserved8:4;
|
||||
uint32_t gds_size:6;
|
||||
uint32_t num_queues:10;
|
||||
} bitfields14;
|
||||
uint32_t ordinal14;
|
||||
};
|
||||
|
||||
uint32_t completion_signal_lo;
|
||||
|
||||
uint32_t completion_signal_hi;
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/*--------------------MES_MAP_PROCESS_VM--------------------*/
|
||||
|
||||
#ifndef PM4_MES_MAP_PROCESS_VM_DEFINED
|
||||
#define PM4_MES_MAP_PROCESS_VM_DEFINED
|
||||
|
||||
struct PM4_MES_MAP_PROCESS_VM {
|
||||
union {
|
||||
union PM4_MES_TYPE_3_HEADER header; /* header */
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
uint32_t reserved1;
|
||||
|
||||
uint32_t vm_context_cntl;
|
||||
|
||||
uint32_t reserved2;
|
||||
|
||||
uint32_t vm_context_page_table_end_addr_lo32;
|
||||
|
||||
uint32_t vm_context_page_table_end_addr_hi32;
|
||||
|
||||
uint32_t vm_context_page_table_start_addr_lo32;
|
||||
|
||||
uint32_t vm_context_page_table_start_addr_hi32;
|
||||
|
||||
uint32_t reserved3;
|
||||
|
||||
uint32_t reserved4;
|
||||
|
||||
uint32_t reserved5;
|
||||
|
||||
uint32_t reserved6;
|
||||
|
||||
uint32_t reserved7;
|
||||
|
||||
uint32_t reserved8;
|
||||
|
||||
uint32_t completion_signal_lo32;
|
||||
|
||||
uint32_t completion_signal_hi32;
|
||||
|
||||
};
|
||||
#endif
|
||||
|
||||
/*--------------------MES_MAP_QUEUES--------------------*/
|
||||
|
||||
#ifndef PM4_MES_MAP_QUEUES_VI_DEFINED
|
||||
#define PM4_MES_MAP_QUEUES_VI_DEFINED
|
||||
enum mes_map_queues_queue_sel_enum {
|
||||
queue_sel__mes_map_queues__map_to_specified_queue_slots_vi = 0,
|
||||
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi = 1
|
||||
};
|
||||
|
||||
enum mes_map_queues_queue_type_enum {
|
||||
queue_type__mes_map_queues__normal_compute_vi = 0,
|
||||
queue_type__mes_map_queues__debug_interface_queue_vi = 1,
|
||||
queue_type__mes_map_queues__normal_latency_static_queue_vi = 2,
|
||||
queue_type__mes_map_queues__low_latency_static_queue_vi = 3
|
||||
};
|
||||
|
||||
enum mes_map_queues_alloc_format_enum {
|
||||
alloc_format__mes_map_queues__one_per_pipe_vi = 0,
|
||||
alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
|
||||
};
|
||||
|
||||
enum mes_map_queues_engine_sel_enum {
|
||||
engine_sel__mes_map_queues__compute_vi = 0,
|
||||
engine_sel__mes_map_queues__sdma0_vi = 2,
|
||||
engine_sel__mes_map_queues__sdma1_vi = 3
|
||||
};
|
||||
|
||||
|
||||
struct pm4_mes_map_queues {
|
||||
union {
|
||||
union PM4_MES_TYPE_3_HEADER header; /* header */
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t reserved1:4;
|
||||
enum mes_map_queues_queue_sel_enum queue_sel:2;
|
||||
uint32_t reserved2:15;
|
||||
enum mes_map_queues_queue_type_enum queue_type:3;
|
||||
enum mes_map_queues_alloc_format_enum alloc_format:2;
|
||||
enum mes_map_queues_engine_sel_enum engine_sel:3;
|
||||
uint32_t num_queues:3;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t reserved3:1;
|
||||
uint32_t check_disable:1;
|
||||
uint32_t doorbell_offset:26;
|
||||
uint32_t reserved4:4;
|
||||
} bitfields3;
|
||||
uint32_t ordinal3;
|
||||
};
|
||||
|
||||
uint32_t mqd_addr_lo;
|
||||
uint32_t mqd_addr_hi;
|
||||
uint32_t wptr_addr_lo;
|
||||
uint32_t wptr_addr_hi;
|
||||
};
|
||||
#endif
|
||||
|
||||
/*--------------------MES_QUERY_STATUS--------------------*/
|
||||
|
||||
#ifndef PM4_MES_QUERY_STATUS_DEFINED
|
||||
#define PM4_MES_QUERY_STATUS_DEFINED
|
||||
enum mes_query_status_interrupt_sel_enum {
|
||||
interrupt_sel__mes_query_status__completion_status = 0,
|
||||
interrupt_sel__mes_query_status__process_status = 1,
|
||||
interrupt_sel__mes_query_status__queue_status = 2
|
||||
};
|
||||
|
||||
enum mes_query_status_command_enum {
|
||||
command__mes_query_status__interrupt_only = 0,
|
||||
command__mes_query_status__fence_only_immediate = 1,
|
||||
command__mes_query_status__fence_only_after_write_ack = 2,
|
||||
command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3
|
||||
};
|
||||
|
||||
enum mes_query_status_engine_sel_enum {
|
||||
engine_sel__mes_query_status__compute = 0,
|
||||
engine_sel__mes_query_status__sdma0_queue = 2,
|
||||
engine_sel__mes_query_status__sdma1_queue = 3
|
||||
};
|
||||
|
||||
struct pm4_mes_query_status {
|
||||
union {
|
||||
union PM4_MES_TYPE_3_HEADER header; /* header */
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t context_id:28;
|
||||
enum mes_query_status_interrupt_sel_enum interrupt_sel:2;
|
||||
enum mes_query_status_command_enum command:2;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t pasid:16;
|
||||
uint32_t reserved1:16;
|
||||
} bitfields3a;
|
||||
struct {
|
||||
uint32_t reserved2:2;
|
||||
uint32_t doorbell_offset:26;
|
||||
enum mes_query_status_engine_sel_enum engine_sel:3;
|
||||
uint32_t reserved3:1;
|
||||
} bitfields3b;
|
||||
uint32_t ordinal3;
|
||||
};
|
||||
|
||||
uint32_t addr_lo;
|
||||
uint32_t addr_hi;
|
||||
uint32_t data_lo;
|
||||
uint32_t data_hi;
|
||||
};
|
||||
#endif
|
||||
|
||||
/*--------------------MES_UNMAP_QUEUES--------------------*/
|
||||
|
||||
#ifndef PM4_MES_UNMAP_QUEUES_DEFINED
|
||||
#define PM4_MES_UNMAP_QUEUES_DEFINED
|
||||
enum mes_unmap_queues_action_enum {
|
||||
action__mes_unmap_queues__preempt_queues = 0,
|
||||
action__mes_unmap_queues__reset_queues = 1,
|
||||
action__mes_unmap_queues__disable_process_queues = 2,
|
||||
action__mes_unmap_queues__reserved = 3
|
||||
};
|
||||
|
||||
enum mes_unmap_queues_queue_sel_enum {
|
||||
queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
|
||||
queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
|
||||
queue_sel__mes_unmap_queues__unmap_all_queues = 2,
|
||||
queue_sel__mes_unmap_queues__unmap_all_non_static_queues = 3
|
||||
};
|
||||
|
||||
enum mes_unmap_queues_engine_sel_enum {
|
||||
engine_sel__mes_unmap_queues__compute = 0,
|
||||
engine_sel__mes_unmap_queues__sdma0 = 2,
|
||||
engine_sel__mes_unmap_queues__sdmal = 3
|
||||
};
|
||||
|
||||
struct pm4_mes_unmap_queues {
|
||||
union {
|
||||
union PM4_MES_TYPE_3_HEADER header; /* header */
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
enum mes_unmap_queues_action_enum action:2;
|
||||
uint32_t reserved1:2;
|
||||
enum mes_unmap_queues_queue_sel_enum queue_sel:2;
|
||||
uint32_t reserved2:20;
|
||||
enum mes_unmap_queues_engine_sel_enum engine_sel:3;
|
||||
uint32_t num_queues:3;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t pasid:16;
|
||||
uint32_t reserved3:16;
|
||||
} bitfields3a;
|
||||
struct {
|
||||
uint32_t reserved4:2;
|
||||
uint32_t doorbell_offset0:26;
|
||||
int32_t reserved5:4;
|
||||
} bitfields3b;
|
||||
uint32_t ordinal3;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t reserved6:2;
|
||||
uint32_t doorbell_offset1:26;
|
||||
uint32_t reserved7:4;
|
||||
} bitfields4;
|
||||
uint32_t ordinal4;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t reserved8:2;
|
||||
uint32_t doorbell_offset2:26;
|
||||
uint32_t reserved9:4;
|
||||
} bitfields5;
|
||||
uint32_t ordinal5;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t reserved10:2;
|
||||
uint32_t doorbell_offset3:26;
|
||||
uint32_t reserved11:4;
|
||||
} bitfields6;
|
||||
uint32_t ordinal6;
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifndef PM4_MEC_RELEASE_MEM_DEFINED
|
||||
#define PM4_MEC_RELEASE_MEM_DEFINED
|
||||
|
||||
enum mec_release_mem_event_index_enum {
|
||||
event_index__mec_release_mem__end_of_pipe = 5,
|
||||
event_index__mec_release_mem__shader_done = 6
|
||||
};
|
||||
|
||||
enum mec_release_mem_cache_policy_enum {
|
||||
cache_policy__mec_release_mem__lru = 0,
|
||||
cache_policy__mec_release_mem__stream = 1
|
||||
};
|
||||
|
||||
enum mec_release_mem_pq_exe_status_enum {
|
||||
pq_exe_status__mec_release_mem__default = 0,
|
||||
pq_exe_status__mec_release_mem__phase_update = 1
|
||||
};
|
||||
|
||||
enum mec_release_mem_dst_sel_enum {
|
||||
dst_sel__mec_release_mem__memory_controller = 0,
|
||||
dst_sel__mec_release_mem__tc_l2 = 1,
|
||||
dst_sel__mec_release_mem__queue_write_pointer_register = 2,
|
||||
dst_sel__mec_release_mem__queue_write_pointer_poll_mask_bit = 3
|
||||
};
|
||||
|
||||
enum mec_release_mem_int_sel_enum {
|
||||
int_sel__mec_release_mem__none = 0,
|
||||
int_sel__mec_release_mem__send_interrupt_only = 1,
|
||||
int_sel__mec_release_mem__send_interrupt_after_write_confirm = 2,
|
||||
int_sel__mec_release_mem__send_data_after_write_confirm = 3,
|
||||
int_sel__mec_release_mem__unconditionally_send_int_ctxid = 4,
|
||||
int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_32_bit_compare = 5,
|
||||
int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_64_bit_compare = 6
|
||||
};
|
||||
|
||||
enum mec_release_mem_data_sel_enum {
|
||||
data_sel__mec_release_mem__none = 0,
|
||||
data_sel__mec_release_mem__send_32_bit_low = 1,
|
||||
data_sel__mec_release_mem__send_64_bit_data = 2,
|
||||
data_sel__mec_release_mem__send_gpu_clock_counter = 3,
|
||||
data_sel__mec_release_mem__send_cp_perfcounter_hi_lo = 4,
|
||||
data_sel__mec_release_mem__store_gds_data_to_memory = 5
|
||||
};
|
||||
|
||||
struct pm4_mec_release_mem {
|
||||
union {
|
||||
union PM4_MES_TYPE_3_HEADER header; /*header */
|
||||
unsigned int ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int event_type:6;
|
||||
unsigned int reserved1:2;
|
||||
enum mec_release_mem_event_index_enum event_index:4;
|
||||
unsigned int tcl1_vol_action_ena:1;
|
||||
unsigned int tc_vol_action_ena:1;
|
||||
unsigned int reserved2:1;
|
||||
unsigned int tc_wb_action_ena:1;
|
||||
unsigned int tcl1_action_ena:1;
|
||||
unsigned int tc_action_ena:1;
|
||||
uint32_t reserved3:1;
|
||||
uint32_t tc_nc_action_ena:1;
|
||||
uint32_t tc_wc_action_ena:1;
|
||||
uint32_t tc_md_action_ena:1;
|
||||
uint32_t reserved4:3;
|
||||
enum mec_release_mem_cache_policy_enum cache_policy:2;
|
||||
uint32_t reserved5:2;
|
||||
enum mec_release_mem_pq_exe_status_enum pq_exe_status:1;
|
||||
uint32_t reserved6:2;
|
||||
} bitfields2;
|
||||
unsigned int ordinal2;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t reserved7:16;
|
||||
enum mec_release_mem_dst_sel_enum dst_sel:2;
|
||||
uint32_t reserved8:6;
|
||||
enum mec_release_mem_int_sel_enum int_sel:3;
|
||||
uint32_t reserved9:2;
|
||||
enum mec_release_mem_data_sel_enum data_sel:3;
|
||||
} bitfields3;
|
||||
unsigned int ordinal3;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t reserved10:2;
|
||||
unsigned int address_lo_32b:30;
|
||||
} bitfields4;
|
||||
struct {
|
||||
uint32_t reserved11:3;
|
||||
uint32_t address_lo_64b:29;
|
||||
} bitfields4b;
|
||||
uint32_t reserved12;
|
||||
unsigned int ordinal4;
|
||||
};
|
||||
|
||||
union {
|
||||
uint32_t address_hi;
|
||||
uint32_t reserved13;
|
||||
uint32_t ordinal5;
|
||||
};
|
||||
|
||||
union {
|
||||
uint32_t data_lo;
|
||||
uint32_t cmp_data_lo;
|
||||
struct {
|
||||
uint32_t dw_offset:16;
|
||||
uint32_t num_dwords:16;
|
||||
} bitfields6c;
|
||||
uint32_t reserved14;
|
||||
uint32_t ordinal6;
|
||||
};
|
||||
|
||||
union {
|
||||
uint32_t data_hi;
|
||||
uint32_t cmp_data_hi;
|
||||
uint32_t reserved15;
|
||||
uint32_t reserved16;
|
||||
uint32_t ordinal7;
|
||||
};
|
||||
|
||||
uint32_t int_ctxid;
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
enum {
|
||||
CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014
|
||||
};
|
||||
#endif
|
||||
|
@ -39,11 +39,37 @@
|
||||
|
||||
#include "amd_shared.h"
|
||||
|
||||
#define KFD_MAX_RING_ENTRY_SIZE 8
|
||||
|
||||
#define KFD_SYSFS_FILE_MODE 0444
|
||||
|
||||
#define KFD_MMAP_DOORBELL_MASK 0x8000000000000ull
|
||||
#define KFD_MMAP_EVENTS_MASK 0x4000000000000ull
|
||||
#define KFD_MMAP_RESERVED_MEM_MASK 0x2000000000000ull
|
||||
/* GPU ID hash width in bits */
|
||||
#define KFD_GPU_ID_HASH_WIDTH 16
|
||||
|
||||
/* Use upper bits of mmap offset to store KFD driver specific information.
|
||||
* BITS[63:62] - Encode MMAP type
|
||||
* BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to
|
||||
* BITS[45:0] - MMAP offset value
|
||||
*
|
||||
* NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
|
||||
* defines are w.r.t to PAGE_SIZE
|
||||
*/
|
||||
#define KFD_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT)
|
||||
#define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT)
|
||||
#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
|
||||
#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT)
|
||||
#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
|
||||
|
||||
#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
|
||||
#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
|
||||
<< KFD_MMAP_GPU_ID_SHIFT)
|
||||
#define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
|
||||
& KFD_MMAP_GPU_ID_MASK)
|
||||
#define KFD_MMAP_GPU_ID_GET(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \
|
||||
>> KFD_MMAP_GPU_ID_SHIFT)
|
||||
|
||||
#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFULL >> PAGE_SHIFT)
|
||||
#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK)
|
||||
|
||||
/*
|
||||
* When working with cp scheduler we should assign the HIQ manually or via
|
||||
@ -55,9 +81,6 @@
|
||||
#define KFD_CIK_HIQ_PIPE 4
|
||||
#define KFD_CIK_HIQ_QUEUE 0
|
||||
|
||||
/* GPU ID hash width in bits */
|
||||
#define KFD_GPU_ID_HASH_WIDTH 16
|
||||
|
||||
/* Macro for allocating structures */
|
||||
#define kfd_alloc_struct(ptr_to_struct) \
|
||||
((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL))
|
||||
@ -116,6 +139,11 @@ extern int debug_largebar;
|
||||
*/
|
||||
extern int ignore_crat;
|
||||
|
||||
/*
|
||||
* Set sh_mem_config.retry_disable on Vega10
|
||||
*/
|
||||
extern int vega10_noretry;
|
||||
|
||||
/**
|
||||
* enum kfd_sched_policy
|
||||
*
|
||||
@ -148,6 +176,8 @@ enum cache_policy {
|
||||
cache_policy_noncoherent
|
||||
};
|
||||
|
||||
#define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10)
|
||||
|
||||
struct kfd_event_interrupt_class {
|
||||
bool (*interrupt_isr)(struct kfd_dev *dev,
|
||||
const uint32_t *ih_ring_entry);
|
||||
@ -160,6 +190,7 @@ struct kfd_device_info {
|
||||
const struct kfd_event_interrupt_class *event_interrupt_class;
|
||||
unsigned int max_pasid_bits;
|
||||
unsigned int max_no_of_hqd;
|
||||
unsigned int doorbell_size;
|
||||
size_t ih_ring_entry_size;
|
||||
uint8_t num_of_watch_points;
|
||||
uint16_t mqd_size_aligned;
|
||||
@ -173,6 +204,7 @@ struct kfd_mem_obj {
|
||||
uint32_t range_end;
|
||||
uint64_t gpu_addr;
|
||||
uint32_t *cpu_ptr;
|
||||
void *gtt_mem;
|
||||
};
|
||||
|
||||
struct kfd_vmid_info {
|
||||
@ -364,7 +396,7 @@ struct queue_properties {
|
||||
uint32_t queue_percent;
|
||||
uint32_t *read_ptr;
|
||||
uint32_t *write_ptr;
|
||||
uint32_t __iomem *doorbell_ptr;
|
||||
void __iomem *doorbell_ptr;
|
||||
uint32_t doorbell_off;
|
||||
bool is_interop;
|
||||
bool is_evicted;
|
||||
@ -427,6 +459,7 @@ struct queue {
|
||||
uint32_t queue;
|
||||
|
||||
unsigned int sdma_id;
|
||||
unsigned int doorbell_id;
|
||||
|
||||
struct kfd_process *process;
|
||||
struct kfd_dev *device;
|
||||
@ -501,6 +534,9 @@ struct qcm_process_device {
|
||||
/* IB memory */
|
||||
uint64_t ib_base;
|
||||
void *ib_kaddr;
|
||||
|
||||
/* doorbell resources per process per device */
|
||||
unsigned long *doorbell_bitmap;
|
||||
};
|
||||
|
||||
/* KFD Memory Eviction */
|
||||
@ -512,6 +548,8 @@ struct qcm_process_device {
|
||||
/* Approx. time before evicting the process again */
|
||||
#define PROCESS_ACTIVE_TIME_MS 10
|
||||
|
||||
int kgd2kfd_quiesce_mm(struct mm_struct *mm);
|
||||
int kgd2kfd_resume_mm(struct mm_struct *mm);
|
||||
int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
|
||||
struct dma_fence *fence);
|
||||
|
||||
@ -681,6 +719,8 @@ struct kfd_process *kfd_get_process(const struct task_struct *);
|
||||
struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
|
||||
struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
|
||||
void kfd_unref_process(struct kfd_process *p);
|
||||
int kfd_process_evict_queues(struct kfd_process *p);
|
||||
int kfd_process_restore_queues(struct kfd_process *p);
|
||||
void kfd_suspend_all_processes(void);
|
||||
int kfd_resume_all_processes(void);
|
||||
|
||||
@ -693,7 +733,7 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
|
||||
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
|
||||
struct kfd_process *p);
|
||||
|
||||
int kfd_reserved_mem_mmap(struct kfd_process *process,
|
||||
int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
|
||||
struct vm_area_struct *vma);
|
||||
|
||||
/* KFD process API for creating and translating handles */
|
||||
@ -721,17 +761,20 @@ unsigned int kfd_pasid_alloc(void);
|
||||
void kfd_pasid_free(unsigned int pasid);
|
||||
|
||||
/* Doorbells */
|
||||
size_t kfd_doorbell_process_slice(struct kfd_dev *kfd);
|
||||
int kfd_doorbell_init(struct kfd_dev *kfd);
|
||||
void kfd_doorbell_fini(struct kfd_dev *kfd);
|
||||
int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma);
|
||||
u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
|
||||
int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
|
||||
struct vm_area_struct *vma);
|
||||
void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
|
||||
unsigned int *doorbell_off);
|
||||
void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
|
||||
u32 read_kernel_doorbell(u32 __iomem *db);
|
||||
void write_kernel_doorbell(u32 __iomem *db, u32 value);
|
||||
unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
|
||||
void write_kernel_doorbell(void __iomem *db, u32 value);
|
||||
void write_kernel_doorbell64(void __iomem *db, u64 value);
|
||||
unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
|
||||
struct kfd_process *process,
|
||||
unsigned int queue_id);
|
||||
unsigned int doorbell_id);
|
||||
phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
|
||||
struct kfd_process *process);
|
||||
int kfd_alloc_process_doorbells(struct kfd_process *process);
|
||||
@ -788,6 +831,8 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev);
|
||||
struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev);
|
||||
struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev);
|
||||
struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
|
||||
void device_queue_manager_uninit(struct device_queue_manager *dqm);
|
||||
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
|
||||
@ -832,8 +877,42 @@ struct packet_manager {
|
||||
bool allocated;
|
||||
struct kfd_mem_obj *ib_buffer_obj;
|
||||
unsigned int ib_size_bytes;
|
||||
|
||||
const struct packet_manager_funcs *pmf;
|
||||
};
|
||||
|
||||
struct packet_manager_funcs {
|
||||
/* Support ASIC-specific packet formats for PM4 packets */
|
||||
int (*map_process)(struct packet_manager *pm, uint32_t *buffer,
|
||||
struct qcm_process_device *qpd);
|
||||
int (*runlist)(struct packet_manager *pm, uint32_t *buffer,
|
||||
uint64_t ib, size_t ib_size_in_dwords, bool chain);
|
||||
int (*set_resources)(struct packet_manager *pm, uint32_t *buffer,
|
||||
struct scheduling_resources *res);
|
||||
int (*map_queues)(struct packet_manager *pm, uint32_t *buffer,
|
||||
struct queue *q, bool is_static);
|
||||
int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
|
||||
enum kfd_queue_type type,
|
||||
enum kfd_unmap_queues_filter mode,
|
||||
uint32_t filter_param, bool reset,
|
||||
unsigned int sdma_engine);
|
||||
int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
|
||||
uint64_t fence_address, uint32_t fence_value);
|
||||
int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
|
||||
|
||||
/* Packet sizes */
|
||||
int map_process_size;
|
||||
int runlist_size;
|
||||
int set_resources_size;
|
||||
int map_queues_size;
|
||||
int unmap_queues_size;
|
||||
int query_status_size;
|
||||
int release_mem_size;
|
||||
};
|
||||
|
||||
extern const struct packet_manager_funcs kfd_vi_pm_funcs;
|
||||
extern const struct packet_manager_funcs kfd_v9_pm_funcs;
|
||||
|
||||
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
|
||||
void pm_uninit(struct packet_manager *pm);
|
||||
int pm_send_set_resources(struct packet_manager *pm,
|
||||
@ -849,12 +928,17 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
|
||||
|
||||
void pm_release_ib(struct packet_manager *pm);
|
||||
|
||||
uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer);
|
||||
/* Following PM funcs can be shared among VI and AI */
|
||||
unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
|
||||
int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
|
||||
struct scheduling_resources *res);
|
||||
|
||||
uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
|
||||
|
||||
/* Events */
|
||||
extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
|
||||
extern const struct kfd_event_interrupt_class event_interrupt_class_v9;
|
||||
|
||||
extern const struct kfd_device_global_init_class device_global_init_class_cik;
|
||||
|
||||
void kfd_event_init_process(struct kfd_process *p);
|
||||
|
@ -332,6 +332,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
|
||||
free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
|
||||
get_order(KFD_CWSR_TBA_TMA_SIZE));
|
||||
|
||||
kfree(pdd->qpd.doorbell_bitmap);
|
||||
idr_destroy(&pdd->alloc_idr);
|
||||
|
||||
kfree(pdd);
|
||||
@ -451,7 +452,8 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
|
||||
if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
|
||||
continue;
|
||||
|
||||
offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT;
|
||||
offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id))
|
||||
<< PAGE_SHIFT;
|
||||
qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
|
||||
KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
|
||||
MAP_SHARED, offset);
|
||||
@ -585,6 +587,31 @@ err_alloc_process:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static int init_doorbell_bitmap(struct qcm_process_device *qpd,
|
||||
struct kfd_dev *dev)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
if (!KFD_IS_SOC15(dev->device_info->asic_family))
|
||||
return 0;
|
||||
|
||||
qpd->doorbell_bitmap =
|
||||
kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
|
||||
BITS_PER_BYTE), GFP_KERNEL);
|
||||
if (!qpd->doorbell_bitmap)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Mask out any reserved doorbells */
|
||||
for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS; i++)
|
||||
if ((dev->shared_resources.reserved_doorbell_mask & i) ==
|
||||
dev->shared_resources.reserved_doorbell_val) {
|
||||
set_bit(i, qpd->doorbell_bitmap);
|
||||
pr_debug("reserved doorbell 0x%03x\n", i);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
|
||||
struct kfd_process *p)
|
||||
{
|
||||
@ -606,6 +633,12 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
|
||||
if (!pdd)
|
||||
return NULL;
|
||||
|
||||
if (init_doorbell_bitmap(&pdd->qpd, dev)) {
|
||||
pr_err("Failed to init doorbell for process\n");
|
||||
kfree(pdd);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pdd->dev = dev;
|
||||
INIT_LIST_HEAD(&pdd->qpd.queues_list);
|
||||
INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
|
||||
@ -808,7 +841,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
|
||||
* Eviction is reference-counted per process-device. This means multiple
|
||||
* evictions from different sources can be nested safely.
|
||||
*/
|
||||
static int process_evict_queues(struct kfd_process *p)
|
||||
int kfd_process_evict_queues(struct kfd_process *p)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
int r = 0;
|
||||
@ -844,7 +877,7 @@ fail:
|
||||
}
|
||||
|
||||
/* process_restore_queues - Restore all user queues of a process */
|
||||
static int process_restore_queues(struct kfd_process *p)
|
||||
int kfd_process_restore_queues(struct kfd_process *p)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
int r, ret = 0;
|
||||
@ -886,7 +919,7 @@ static void evict_process_worker(struct work_struct *work)
|
||||
flush_delayed_work(&p->restore_work);
|
||||
|
||||
pr_debug("Started evicting pasid %d\n", p->pasid);
|
||||
ret = process_evict_queues(p);
|
||||
ret = kfd_process_evict_queues(p);
|
||||
if (!ret) {
|
||||
dma_fence_signal(p->ef);
|
||||
dma_fence_put(p->ef);
|
||||
@ -946,7 +979,7 @@ static void restore_process_worker(struct work_struct *work)
|
||||
return;
|
||||
}
|
||||
|
||||
ret = process_restore_queues(p);
|
||||
ret = kfd_process_restore_queues(p);
|
||||
if (!ret)
|
||||
pr_debug("Finished restoring pasid %d\n", p->pasid);
|
||||
else
|
||||
@ -963,7 +996,7 @@ void kfd_suspend_all_processes(void)
|
||||
cancel_delayed_work_sync(&p->eviction_work);
|
||||
cancel_delayed_work_sync(&p->restore_work);
|
||||
|
||||
if (process_evict_queues(p))
|
||||
if (kfd_process_evict_queues(p))
|
||||
pr_err("Failed to suspend process %d\n", p->pasid);
|
||||
dma_fence_signal(p->ef);
|
||||
dma_fence_put(p->ef);
|
||||
@ -989,15 +1022,12 @@ int kfd_resume_all_processes(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kfd_reserved_mem_mmap(struct kfd_process *process,
|
||||
int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff);
|
||||
struct kfd_process_device *pdd;
|
||||
struct qcm_process_device *qpd;
|
||||
|
||||
if (!dev)
|
||||
return -EINVAL;
|
||||
if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
|
||||
pr_err("Incorrect CWSR mapping size.\n");
|
||||
return -EINVAL;
|
||||
|
@ -119,9 +119,6 @@ static int create_cp_queue(struct process_queue_manager *pqm,
|
||||
/* Doorbell initialized in user space*/
|
||||
q_properties->doorbell_ptr = NULL;
|
||||
|
||||
q_properties->doorbell_off =
|
||||
kfd_queue_id_to_doorbell(dev, pqm->process, qid);
|
||||
|
||||
/* let DQM handle it*/
|
||||
q_properties->vmid = 0;
|
||||
q_properties->queue_id = qid;
|
||||
@ -244,10 +241,20 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
||||
}
|
||||
|
||||
if (retval != 0) {
|
||||
pr_err("DQM create queue failed\n");
|
||||
pr_err("Pasid %d DQM create queue %d failed. ret %d\n",
|
||||
pqm->process->pasid, type, retval);
|
||||
goto err_create_queue;
|
||||
}
|
||||
|
||||
if (q)
|
||||
/* Return the doorbell offset within the doorbell page
|
||||
* to the caller so it can be passed up to user mode
|
||||
* (in bytes).
|
||||
*/
|
||||
properties->doorbell_off =
|
||||
(q->properties.doorbell_off * sizeof(uint32_t)) &
|
||||
(kfd_doorbell_process_slice(dev) - 1);
|
||||
|
||||
pr_debug("PQM After DQM create queue\n");
|
||||
|
||||
list_add(&pqn->process_queue_list, &pqm->queues);
|
||||
@ -313,8 +320,11 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
|
||||
dqm = pqn->q->device->dqm;
|
||||
retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
|
||||
if (retval) {
|
||||
pr_debug("Destroy queue failed, returned %d\n", retval);
|
||||
goto err_destroy_queue;
|
||||
pr_err("Pasid %d destroy queue %d failed, ret %d\n",
|
||||
pqm->process->pasid,
|
||||
pqn->q->properties.queue_id, retval);
|
||||
if (retval != -ETIME)
|
||||
goto err_destroy_queue;
|
||||
}
|
||||
uninit_queue(pqn->q);
|
||||
}
|
||||
|
@ -36,8 +36,8 @@ void print_queue_properties(struct queue_properties *q)
|
||||
pr_debug("Queue Address: 0x%llX\n", q->queue_address);
|
||||
pr_debug("Queue Id: %u\n", q->queue_id);
|
||||
pr_debug("Queue Process Vmid: %u\n", q->vmid);
|
||||
pr_debug("Queue Read Pointer: 0x%p\n", q->read_ptr);
|
||||
pr_debug("Queue Write Pointer: 0x%p\n", q->write_ptr);
|
||||
pr_debug("Queue Read Pointer: 0x%px\n", q->read_ptr);
|
||||
pr_debug("Queue Write Pointer: 0x%px\n", q->write_ptr);
|
||||
pr_debug("Queue Doorbell Pointer: 0x%p\n", q->doorbell_ptr);
|
||||
pr_debug("Queue Doorbell Offset: %u\n", q->doorbell_off);
|
||||
}
|
||||
@ -53,8 +53,8 @@ void print_queue(struct queue *q)
|
||||
pr_debug("Queue Address: 0x%llX\n", q->properties.queue_address);
|
||||
pr_debug("Queue Id: %u\n", q->properties.queue_id);
|
||||
pr_debug("Queue Process Vmid: %u\n", q->properties.vmid);
|
||||
pr_debug("Queue Read Pointer: 0x%p\n", q->properties.read_ptr);
|
||||
pr_debug("Queue Write Pointer: 0x%p\n", q->properties.write_ptr);
|
||||
pr_debug("Queue Read Pointer: 0x%px\n", q->properties.read_ptr);
|
||||
pr_debug("Queue Write Pointer: 0x%px\n", q->properties.write_ptr);
|
||||
pr_debug("Queue Doorbell Pointer: 0x%p\n", q->properties.doorbell_ptr);
|
||||
pr_debug("Queue Doorbell Offset: %u\n", q->properties.doorbell_off);
|
||||
pr_debug("Queue MQD Address: 0x%p\n", q->mqd);
|
||||
|
@ -1239,6 +1239,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
|
||||
HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
|
||||
HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
|
||||
break;
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_RAVEN:
|
||||
dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
|
||||
HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
|
||||
HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
|
||||
break;
|
||||
default:
|
||||
WARN(1, "Unexpected ASIC family %u",
|
||||
dev->gpu->device_info->asic_family);
|
||||
|
@ -45,6 +45,7 @@
|
||||
|
||||
#define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0
|
||||
#define HSA_CAP_DOORBELL_TYPE_1_0 0x1
|
||||
#define HSA_CAP_DOORBELL_TYPE_2_0 0x2
|
||||
#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000
|
||||
|
||||
struct kfd_node_properties {
|
||||
|
47
drivers/gpu/drm/amd/amdkfd/soc15_int.h
Normal file
47
drivers/gpu/drm/amd/amdkfd/soc15_int.h
Normal file
@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright 2016-2018 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HSA_SOC15_INT_H_INCLUDED
|
||||
#define HSA_SOC15_INT_H_INCLUDED
|
||||
|
||||
#include "soc15_ih_clientid.h"
|
||||
|
||||
#define SOC15_INTSRC_CP_END_OF_PIPE 181
|
||||
#define SOC15_INTSRC_CP_BAD_OPCODE 183
|
||||
#define SOC15_INTSRC_SQ_INTERRUPT_MSG 239
|
||||
#define SOC15_INTSRC_VMC_FAULT 0
|
||||
#define SOC15_INTSRC_SDMA_TRAP 224
|
||||
|
||||
|
||||
#define SOC15_CLIENT_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) & 0xff)
|
||||
#define SOC15_SOURCE_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 8 & 0xff)
|
||||
#define SOC15_RING_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 16 & 0xff)
|
||||
#define SOC15_VMID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 24 & 0xf)
|
||||
#define SOC15_VMID_TYPE_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 31 & 0x1)
|
||||
#define SOC15_PASID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[3]) & 0xffff)
|
||||
#define SOC15_CONTEXT_ID0_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[4]))
|
||||
#define SOC15_CONTEXT_ID1_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[5]))
|
||||
#define SOC15_CONTEXT_ID2_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[6]))
|
||||
#define SOC15_CONTEXT_ID3_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[7]))
|
||||
|
||||
#endif
|
||||
|
@ -100,6 +100,21 @@ struct kgd2kfd_shared_resources {
|
||||
/* Bit n == 1 means Queue n is available for KFD */
|
||||
DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES);
|
||||
|
||||
/* Doorbell assignments (SOC15 and later chips only). Only
|
||||
* specific doorbells are routed to each SDMA engine. Others
|
||||
* are routed to IH and VCN. They are not usable by the CP.
|
||||
*
|
||||
* Any doorbell number D that satisfies the following condition
|
||||
* is reserved: (D & reserved_doorbell_mask) == reserved_doorbell_val
|
||||
*
|
||||
* KFD currently uses 1024 (= 0x3ff) doorbells per process. If
|
||||
* doorbells 0x0f0-0x0f7 and 0x2f-0x2f7 are reserved, that means
|
||||
* mask would be set to 0x1f8 and val set to 0x0f0.
|
||||
*/
|
||||
unsigned int sdma_doorbell[2][2];
|
||||
unsigned int reserved_doorbell_mask;
|
||||
unsigned int reserved_doorbell_val;
|
||||
|
||||
/* Base address of doorbell aperture. */
|
||||
phys_addr_t doorbell_physical_address;
|
||||
|
||||
@ -173,8 +188,6 @@ struct tile_config {
|
||||
* @set_pasid_vmid_mapping: Exposes pasid/vmid pair to the H/W for no cp
|
||||
* scheduling mode. Only used for no cp scheduling mode.
|
||||
*
|
||||
* @init_pipeline: Initialized the compute pipelines.
|
||||
*
|
||||
* @hqd_load: Loads the mqd structure to a H/W hqd slot. used only for no cp
|
||||
* sceduling mode.
|
||||
*
|
||||
@ -274,9 +287,6 @@ struct kfd2kgd_calls {
|
||||
int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, unsigned int pasid,
|
||||
unsigned int vmid);
|
||||
|
||||
int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id,
|
||||
uint32_t hpd_size, uint64_t hpd_gpu_addr);
|
||||
|
||||
int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id);
|
||||
|
||||
int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
|
||||
@ -382,6 +392,10 @@ struct kfd2kgd_calls {
|
||||
*
|
||||
* @resume: Notifies amdkfd about a resume action done to a kgd device
|
||||
*
|
||||
* @quiesce_mm: Quiesce all user queue access to specified MM address space
|
||||
*
|
||||
* @resume_mm: Resume user queue access to specified MM address space
|
||||
*
|
||||
* @schedule_evict_and_restore_process: Schedules work queue that will prepare
|
||||
* for safe eviction of KFD BOs that belong to the specified process.
|
||||
*
|
||||
@ -399,6 +413,8 @@ struct kgd2kfd_calls {
|
||||
void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry);
|
||||
void (*suspend)(struct kfd_dev *kfd);
|
||||
int (*resume)(struct kfd_dev *kfd);
|
||||
int (*quiesce_mm)(struct mm_struct *mm);
|
||||
int (*resume_mm)(struct mm_struct *mm);
|
||||
int (*schedule_evict_and_restore_process)(struct mm_struct *mm,
|
||||
struct dma_fence *fence);
|
||||
};
|
||||
|
@ -29,10 +29,10 @@ struct v9_sdma_mqd {
|
||||
uint32_t sdmax_rlcx_rb_base;
|
||||
uint32_t sdmax_rlcx_rb_base_hi;
|
||||
uint32_t sdmax_rlcx_rb_rptr;
|
||||
uint32_t sdmax_rlcx_rb_rptr_hi;
|
||||
uint32_t sdmax_rlcx_rb_wptr;
|
||||
uint32_t sdmax_rlcx_rb_wptr_hi;
|
||||
uint32_t sdmax_rlcx_rb_wptr_poll_cntl;
|
||||
uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi;
|
||||
uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo;
|
||||
uint32_t sdmax_rlcx_rb_rptr_addr_hi;
|
||||
uint32_t sdmax_rlcx_rb_rptr_addr_lo;
|
||||
uint32_t sdmax_rlcx_ib_cntl;
|
||||
@ -44,29 +44,29 @@ struct v9_sdma_mqd {
|
||||
uint32_t sdmax_rlcx_skip_cntl;
|
||||
uint32_t sdmax_rlcx_context_status;
|
||||
uint32_t sdmax_rlcx_doorbell;
|
||||
uint32_t sdmax_rlcx_virtual_addr;
|
||||
uint32_t sdmax_rlcx_ape1_cntl;
|
||||
uint32_t sdmax_rlcx_status;
|
||||
uint32_t sdmax_rlcx_doorbell_log;
|
||||
uint32_t reserved_22;
|
||||
uint32_t reserved_23;
|
||||
uint32_t reserved_24;
|
||||
uint32_t reserved_25;
|
||||
uint32_t reserved_26;
|
||||
uint32_t reserved_27;
|
||||
uint32_t reserved_28;
|
||||
uint32_t reserved_29;
|
||||
uint32_t reserved_30;
|
||||
uint32_t reserved_31;
|
||||
uint32_t reserved_32;
|
||||
uint32_t reserved_33;
|
||||
uint32_t reserved_34;
|
||||
uint32_t reserved_35;
|
||||
uint32_t reserved_36;
|
||||
uint32_t reserved_37;
|
||||
uint32_t reserved_38;
|
||||
uint32_t reserved_39;
|
||||
uint32_t reserved_40;
|
||||
uint32_t reserved_41;
|
||||
uint32_t sdmax_rlcx_watermark;
|
||||
uint32_t sdmax_rlcx_doorbell_offset;
|
||||
uint32_t sdmax_rlcx_csa_addr_lo;
|
||||
uint32_t sdmax_rlcx_csa_addr_hi;
|
||||
uint32_t sdmax_rlcx_ib_sub_remain;
|
||||
uint32_t sdmax_rlcx_preempt;
|
||||
uint32_t sdmax_rlcx_dummy_reg;
|
||||
uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi;
|
||||
uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo;
|
||||
uint32_t sdmax_rlcx_rb_aql_cntl;
|
||||
uint32_t sdmax_rlcx_minor_ptr_update;
|
||||
uint32_t sdmax_rlcx_midcmd_data0;
|
||||
uint32_t sdmax_rlcx_midcmd_data1;
|
||||
uint32_t sdmax_rlcx_midcmd_data2;
|
||||
uint32_t sdmax_rlcx_midcmd_data3;
|
||||
uint32_t sdmax_rlcx_midcmd_data4;
|
||||
uint32_t sdmax_rlcx_midcmd_data5;
|
||||
uint32_t sdmax_rlcx_midcmd_data6;
|
||||
uint32_t sdmax_rlcx_midcmd_data7;
|
||||
uint32_t sdmax_rlcx_midcmd_data8;
|
||||
uint32_t sdmax_rlcx_midcmd_cntl;
|
||||
uint32_t reserved_42;
|
||||
uint32_t reserved_43;
|
||||
uint32_t reserved_44;
|
||||
|
Loading…
Reference in New Issue
Block a user