Merge tag 'amd-drm-next-5.14-2021-05-19' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-5.14-2021-05-19: amdgpu: - Aldebaran updates - More LTTPR display work - Vangogh updates - SDMA 5.x GCR fixes - RAS fixes - PCIe ASPM support - Modifier fixes - Enable TMZ on Renoir - Buffer object code cleanup - Display overlay fixes - Initial support for multiple eDP panels - Initial SR-IOV support for Aldebaran - DP link training refactor - Misc code cleanups and bug fixes - SMU regression fixes for variable sized arrays - MAINTAINERS fixes for amdgpu amdkfd: - Initial SR-IOV support for Aldebaran - Topology fixes - Initial HMM SVM support - Misc code cleanups and bug fixes radeon: - Misc code cleanups and bug fixes - SMU regression fixes for variable sized arrays - Flickering fix for Oland with multiple 4K displays UAPI: - amdgpu: Drop AMDGPU_GEM_CREATE_SHADOW flag. This was always a kernel internal flag and userspace use of it has always been blocked. It's no longer needed so remove it. - amdkgd: HMM SVM support Overview: https://patchwork.freedesktop.org/series/85562/ Porposed userspace: https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/tree/fxkamd/hmm-wip Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210520031258.231896-1-alexander.deucher@amd.com
This commit is contained in:
commit
c99c4d0ca5
@ -878,7 +878,7 @@ M: Harry Wentland <harry.wentland@amd.com>
|
||||
M: Leo Li <sunpeng.li@amd.com>
|
||||
L: amd-gfx@lists.freedesktop.org
|
||||
S: Supported
|
||||
T: git git://people.freedesktop.org/~agd5f/linux
|
||||
T: git https://gitlab.freedesktop.org/agd5f/linux.git
|
||||
F: drivers/gpu/drm/amd/display/
|
||||
|
||||
AMD FAM15H PROCESSOR POWER MONITORING DRIVER
|
||||
@ -954,7 +954,7 @@ AMD POWERPLAY
|
||||
M: Evan Quan <evan.quan@amd.com>
|
||||
L: amd-gfx@lists.freedesktop.org
|
||||
S: Supported
|
||||
T: git git://people.freedesktop.org/~agd5f/linux
|
||||
T: git https://gitlab.freedesktop.org/agd5f/linux.git
|
||||
F: drivers/gpu/drm/amd/pm/powerplay/
|
||||
|
||||
AMD SEATTLE DEVICE TREE SUPPORT
|
||||
|
@ -56,7 +56,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
|
||||
amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
|
||||
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
|
||||
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
|
||||
amdgpu_fw_attestation.o amdgpu_securedisplay.o
|
||||
amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o
|
||||
|
||||
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
|
||||
|
||||
|
@ -1075,7 +1075,8 @@ struct amdgpu_device {
|
||||
|
||||
atomic_t throttling_logging_enabled;
|
||||
struct ratelimit_state throttling_logging_rs;
|
||||
uint32_t ras_features;
|
||||
uint32_t ras_hw_enabled;
|
||||
uint32_t ras_enabled;
|
||||
|
||||
bool in_pci_err_recovery;
|
||||
struct pci_saved_state *pci_state;
|
||||
|
@ -76,7 +76,7 @@ struct amdgpu_atif {
|
||||
/**
|
||||
* amdgpu_atif_call - call an ATIF method
|
||||
*
|
||||
* @handle: acpi handle
|
||||
* @atif: acpi handle
|
||||
* @function: the ATIF function to execute
|
||||
* @params: ATIF function params
|
||||
*
|
||||
@ -166,7 +166,6 @@ static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mas
|
||||
/**
|
||||
* amdgpu_atif_verify_interface - verify ATIF
|
||||
*
|
||||
* @handle: acpi handle
|
||||
* @atif: amdgpu atif struct
|
||||
*
|
||||
* Execute the ATIF_FUNCTION_VERIFY_INTERFACE ATIF function
|
||||
@ -240,8 +239,7 @@ out:
|
||||
/**
|
||||
* amdgpu_atif_get_notification_params - determine notify configuration
|
||||
*
|
||||
* @handle: acpi handle
|
||||
* @n: atif notification configuration struct
|
||||
* @atif: acpi handle
|
||||
*
|
||||
* Execute the ATIF_FUNCTION_GET_SYSTEM_PARAMETERS ATIF function
|
||||
* to determine if a notifier is used and if so which one
|
||||
@ -304,7 +302,7 @@ out:
|
||||
/**
|
||||
* amdgpu_atif_query_backlight_caps - get min and max backlight input signal
|
||||
*
|
||||
* @handle: acpi handle
|
||||
* @atif: acpi handle
|
||||
*
|
||||
* Execute the QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS ATIF function
|
||||
* to determine the acceptable range of backlight values
|
||||
@ -363,7 +361,7 @@ out:
|
||||
/**
|
||||
* amdgpu_atif_get_sbios_requests - get requested sbios event
|
||||
*
|
||||
* @handle: acpi handle
|
||||
* @atif: acpi handle
|
||||
* @req: atif sbios request struct
|
||||
*
|
||||
* Execute the ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS ATIF function
|
||||
@ -899,6 +897,8 @@ void amdgpu_acpi_fini(struct amdgpu_device *adev)
|
||||
/**
|
||||
* amdgpu_acpi_is_s0ix_supported
|
||||
*
|
||||
* @adev: amdgpu_device_pointer
|
||||
*
|
||||
* returns true if supported, false if not.
|
||||
*/
|
||||
bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev)
|
||||
|
@ -75,6 +75,7 @@ struct amdgpu_amdkfd_fence {
|
||||
struct mm_struct *mm;
|
||||
spinlock_t lock;
|
||||
char timeline_name[TASK_COMM_LEN];
|
||||
struct svm_range_bo *svm_bo;
|
||||
};
|
||||
|
||||
struct amdgpu_kfd_dev {
|
||||
@ -148,7 +149,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
|
||||
int queue_bit);
|
||||
|
||||
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
|
||||
struct mm_struct *mm);
|
||||
struct mm_struct *mm,
|
||||
struct svm_range_bo *svm_bo);
|
||||
#if IS_ENABLED(CONFIG_HSA_AMD)
|
||||
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
|
||||
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
|
||||
@ -234,22 +236,27 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s
|
||||
})
|
||||
|
||||
/* GPUVM API */
|
||||
#define drm_priv_to_vm(drm_priv) \
|
||||
(&((struct amdgpu_fpriv *) \
|
||||
((struct drm_file *)(drm_priv))->driver_priv)->vm)
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
|
||||
struct file *filp, u32 pasid,
|
||||
void **vm, void **process_info,
|
||||
void **process_info,
|
||||
struct dma_fence **ef);
|
||||
void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm);
|
||||
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
|
||||
void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv);
|
||||
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
|
||||
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
struct kgd_dev *kgd, uint64_t va, uint64_t size,
|
||||
void *vm, struct kgd_mem **mem,
|
||||
void *drm_priv, struct kgd_mem **mem,
|
||||
uint64_t *offset, uint32_t flags);
|
||||
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
|
||||
struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size);
|
||||
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
|
||||
uint64_t *size);
|
||||
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
|
||||
struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
|
||||
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
|
||||
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
|
||||
struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
|
||||
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
|
||||
int amdgpu_amdkfd_gpuvm_sync_memory(
|
||||
struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
|
||||
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
|
||||
@ -260,7 +267,7 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
|
||||
struct kfd_vm_fault_info *info);
|
||||
int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
|
||||
struct dma_buf *dmabuf,
|
||||
uint64_t va, void *vm,
|
||||
uint64_t va, void *drm_priv,
|
||||
struct kgd_mem **mem, uint64_t *size,
|
||||
uint64_t *mmap_offset);
|
||||
int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
|
||||
@ -270,6 +277,7 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
|
||||
void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
|
||||
struct amdgpu_vm *vm);
|
||||
void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
|
||||
void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
|
||||
#else
|
||||
static inline
|
||||
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include <linux/firmware.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
#include "amdgpu_amdkfd_arcturus.h"
|
||||
#include "sdma0/sdma0_4_2_2_offset.h"
|
||||
#include "sdma0/sdma0_4_2_2_sh_mask.h"
|
||||
#include "sdma1/sdma1_4_2_2_offset.h"
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include "amdgpu_amdkfd.h"
|
||||
#include "kfd_svm.h"
|
||||
|
||||
static const struct dma_fence_ops amdkfd_fence_ops;
|
||||
static atomic_t fence_seq = ATOMIC_INIT(0);
|
||||
@ -60,7 +61,8 @@ static atomic_t fence_seq = ATOMIC_INIT(0);
|
||||
*/
|
||||
|
||||
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
|
||||
struct mm_struct *mm)
|
||||
struct mm_struct *mm,
|
||||
struct svm_range_bo *svm_bo)
|
||||
{
|
||||
struct amdgpu_amdkfd_fence *fence;
|
||||
|
||||
@ -73,7 +75,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
|
||||
fence->mm = mm;
|
||||
get_task_comm(fence->timeline_name, current);
|
||||
spin_lock_init(&fence->lock);
|
||||
|
||||
fence->svm_bo = svm_bo;
|
||||
dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock,
|
||||
context, atomic_inc_return(&fence_seq));
|
||||
|
||||
@ -111,6 +113,8 @@ static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f)
|
||||
* a KFD BO and schedules a job to move the BO.
|
||||
* If fence is already signaled return true.
|
||||
* If fence is not signaled schedule a evict KFD process work item.
|
||||
*
|
||||
* @f: dma_fence
|
||||
*/
|
||||
static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
|
||||
{
|
||||
@ -122,16 +126,20 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
|
||||
if (dma_fence_is_signaled(f))
|
||||
return true;
|
||||
|
||||
if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
|
||||
return true;
|
||||
|
||||
if (!fence->svm_bo) {
|
||||
if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
|
||||
return true;
|
||||
} else {
|
||||
if (!svm_range_schedule_evict_svm_bo(fence))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdkfd_fence_release - callback that fence can be freed
|
||||
*
|
||||
* @fence: fence
|
||||
* @f: dma_fence
|
||||
*
|
||||
* This function is called when the reference count becomes zero.
|
||||
* Drops the mm_struct reference and RCU schedules freeing up the fence.
|
||||
|
@ -719,7 +719,7 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
|
||||
}
|
||||
|
||||
/**
|
||||
* @get_wave_count: Read device registers to get number of waves in flight for
|
||||
* get_wave_count: Read device registers to get number of waves in flight for
|
||||
* a particular queue. The method also returns the VMID associated with the
|
||||
* queue.
|
||||
*
|
||||
@ -755,19 +755,19 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
|
||||
}
|
||||
|
||||
/**
|
||||
* @kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each
|
||||
* kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each
|
||||
* shader engine and aggregates the number of waves that are in flight for the
|
||||
* process whose pasid is provided as a parameter. The process could have ZERO
|
||||
* or more queues running and submitting waves to compute units.
|
||||
*
|
||||
* @kgd: Handle of device from which to get number of waves in flight
|
||||
* @pasid: Identifies the process for which this query call is invoked
|
||||
* @wave_cnt: Output parameter updated with number of waves in flight that
|
||||
* @pasid_wave_cnt: Output parameter updated with number of waves in flight that
|
||||
* belong to process with given pasid
|
||||
* @max_waves_per_cu: Output parameter updated with maximum number of waves
|
||||
* possible per Compute Unit
|
||||
*
|
||||
* @note: It's possible that the device has too many queues (oversubscription)
|
||||
* Note: It's possible that the device has too many queues (oversubscription)
|
||||
* in which case a VMID could be remapped to a different PASID. This could lead
|
||||
* to an iaccurate wave count. Following is a high-level sequence:
|
||||
* Time T1: vmid = getVmid(); vmid is associated with Pasid P1
|
||||
|
@ -33,9 +33,6 @@
|
||||
#include <uapi/linux/kfd_ioctl.h>
|
||||
#include "amdgpu_xgmi.h"
|
||||
|
||||
/* BO flag to indicate a KFD userptr BO */
|
||||
#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
|
||||
|
||||
/* Userptr restore delay, just long enough to allow consecutive VM
|
||||
* changes to accumulate
|
||||
*/
|
||||
@ -108,6 +105,11 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
|
||||
(kfd_mem_limit.max_ttm_mem_limit >> 20));
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
|
||||
{
|
||||
kfd_mem_limit.system_mem_used += size;
|
||||
}
|
||||
|
||||
/* Estimate page table size needed to represent a given memory size
|
||||
*
|
||||
* With 4KB pages, we need one 8 byte PTE for each 4KB of memory
|
||||
@ -217,7 +219,7 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
|
||||
u32 domain = bo->preferred_domains;
|
||||
bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
|
||||
|
||||
if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
|
||||
if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) {
|
||||
domain = AMDGPU_GEM_DOMAIN_CPU;
|
||||
sg = false;
|
||||
}
|
||||
@ -967,7 +969,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
|
||||
|
||||
info->eviction_fence =
|
||||
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
|
||||
current->mm);
|
||||
current->mm,
|
||||
NULL);
|
||||
if (!info->eviction_fence) {
|
||||
pr_err("Failed to create eviction fence\n");
|
||||
ret = -ENOMEM;
|
||||
@ -1036,15 +1039,19 @@ create_evict_fence_fail:
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
|
||||
struct file *filp, u32 pasid,
|
||||
void **vm, void **process_info,
|
||||
void **process_info,
|
||||
struct dma_fence **ef)
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
struct drm_file *drm_priv = filp->private_data;
|
||||
struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv;
|
||||
struct amdgpu_vm *avm = &drv_priv->vm;
|
||||
struct amdgpu_fpriv *drv_priv;
|
||||
struct amdgpu_vm *avm;
|
||||
int ret;
|
||||
|
||||
ret = amdgpu_file_to_fpriv(filp, &drv_priv);
|
||||
if (ret)
|
||||
return ret;
|
||||
avm = &drv_priv->vm;
|
||||
|
||||
/* Already a compute VM? */
|
||||
if (avm->process_info)
|
||||
return -EINVAL;
|
||||
@ -1059,7 +1066,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
*vm = (void *)avm;
|
||||
amdgpu_vm_set_task_info(avm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1100,15 +1107,17 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
|
||||
}
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)
|
||||
void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv)
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
|
||||
struct amdgpu_vm *avm;
|
||||
|
||||
if (WARN_ON(!kgd || !vm))
|
||||
if (WARN_ON(!kgd || !drm_priv))
|
||||
return;
|
||||
|
||||
pr_debug("Releasing process vm %p\n", vm);
|
||||
avm = drm_priv_to_vm(drm_priv);
|
||||
|
||||
pr_debug("Releasing process vm %p\n", avm);
|
||||
|
||||
/* The original pasid of amdgpu vm has already been
|
||||
* released during making a amdgpu vm to a compute vm
|
||||
@ -1119,9 +1128,9 @@ void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)
|
||||
amdgpu_vm_release_compute(adev, avm);
|
||||
}
|
||||
|
||||
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
|
||||
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
|
||||
{
|
||||
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
|
||||
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
|
||||
struct amdgpu_bo *pd = avm->root.base.bo;
|
||||
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
|
||||
|
||||
@ -1132,11 +1141,11 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
struct kgd_dev *kgd, uint64_t va, uint64_t size,
|
||||
void *vm, struct kgd_mem **mem,
|
||||
void *drm_priv, struct kgd_mem **mem,
|
||||
uint64_t *offset, uint32_t flags)
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
|
||||
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
|
||||
enum ttm_bo_type bo_type = ttm_bo_type_device;
|
||||
struct sg_table *sg = NULL;
|
||||
uint64_t user_addr = 0;
|
||||
@ -1216,6 +1225,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
domain_string(alloc_domain), ret);
|
||||
goto err_bo_create;
|
||||
}
|
||||
ret = drm_vma_node_allow(&gobj->vma_node, drm_priv);
|
||||
if (ret) {
|
||||
pr_debug("Failed to allow vma node access. ret %d\n", ret);
|
||||
goto err_node_allow;
|
||||
}
|
||||
bo = gem_to_amdgpu_bo(gobj);
|
||||
if (bo_type == ttm_bo_type_sg) {
|
||||
bo->tbo.sg = sg;
|
||||
@ -1224,7 +1238,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
bo->kfd_bo = *mem;
|
||||
(*mem)->bo = bo;
|
||||
if (user_addr)
|
||||
bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
|
||||
bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO;
|
||||
|
||||
(*mem)->va = va;
|
||||
(*mem)->domain = domain;
|
||||
@ -1245,6 +1259,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
|
||||
allocate_init_user_pages_failed:
|
||||
remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
|
||||
drm_vma_node_revoke(&gobj->vma_node, drm_priv);
|
||||
err_node_allow:
|
||||
amdgpu_bo_unref(&bo);
|
||||
/* Don't unreserve system mem limit twice */
|
||||
goto err_reserve_limit;
|
||||
@ -1262,7 +1278,8 @@ err:
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
|
||||
struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size)
|
||||
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
|
||||
uint64_t *size)
|
||||
{
|
||||
struct amdkfd_process_info *process_info = mem->process_info;
|
||||
unsigned long bo_size = mem->bo->tbo.base.size;
|
||||
@ -1339,6 +1356,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
|
||||
}
|
||||
|
||||
/* Free the BO*/
|
||||
drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
|
||||
drm_gem_object_put(&mem->bo->tbo.base);
|
||||
mutex_destroy(&mem->lock);
|
||||
kfree(mem);
|
||||
@ -1347,10 +1365,10 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
|
||||
struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
|
||||
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv)
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
|
||||
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
|
||||
int ret;
|
||||
struct amdgpu_bo *bo;
|
||||
uint32_t domain;
|
||||
@ -1391,9 +1409,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
|
||||
pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n",
|
||||
mem->va,
|
||||
mem->va + bo_size * (1 + mem->aql_queue),
|
||||
vm, domain_string(domain));
|
||||
avm, domain_string(domain));
|
||||
|
||||
ret = reserve_bo_and_vm(mem, vm, &ctx);
|
||||
ret = reserve_bo_and_vm(mem, avm, &ctx);
|
||||
if (unlikely(ret))
|
||||
goto out;
|
||||
|
||||
@ -1437,7 +1455,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
|
||||
}
|
||||
|
||||
list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
|
||||
if (entry->bo_va->base.vm == vm && !entry->is_mapped) {
|
||||
if (entry->bo_va->base.vm == avm && !entry->is_mapped) {
|
||||
pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
|
||||
entry->va, entry->va + bo_size,
|
||||
entry);
|
||||
@ -1449,7 +1467,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
|
||||
goto map_bo_to_gpuvm_failed;
|
||||
}
|
||||
|
||||
ret = vm_update_pds(vm, ctx.sync);
|
||||
ret = vm_update_pds(avm, ctx.sync);
|
||||
if (ret) {
|
||||
pr_err("Failed to update page directories\n");
|
||||
goto map_bo_to_gpuvm_failed;
|
||||
@ -1485,11 +1503,11 @@ out:
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
|
||||
struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
|
||||
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv)
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
struct amdkfd_process_info *process_info =
|
||||
((struct amdgpu_vm *)vm)->process_info;
|
||||
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
|
||||
struct amdkfd_process_info *process_info = avm->process_info;
|
||||
unsigned long bo_size = mem->bo->tbo.base.size;
|
||||
struct kfd_bo_va_list *entry;
|
||||
struct bo_vm_reservation_context ctx;
|
||||
@ -1497,7 +1515,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
|
||||
|
||||
mutex_lock(&mem->lock);
|
||||
|
||||
ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx);
|
||||
ret = reserve_bo_and_cond_vms(mem, avm, BO_VM_MAPPED, &ctx);
|
||||
if (unlikely(ret))
|
||||
goto out;
|
||||
/* If no VMs were reserved, it means the BO wasn't actually mapped */
|
||||
@ -1506,17 +1524,17 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
|
||||
goto unreserve_out;
|
||||
}
|
||||
|
||||
ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm);
|
||||
ret = vm_validate_pt_pd_bos(avm);
|
||||
if (unlikely(ret))
|
||||
goto unreserve_out;
|
||||
|
||||
pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n",
|
||||
mem->va,
|
||||
mem->va + bo_size * (1 + mem->aql_queue),
|
||||
vm);
|
||||
avm);
|
||||
|
||||
list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
|
||||
if (entry->bo_va->base.vm == vm && entry->is_mapped) {
|
||||
if (entry->bo_va->base.vm == avm && entry->is_mapped) {
|
||||
pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
|
||||
entry->va,
|
||||
entry->va + bo_size,
|
||||
@ -1642,14 +1660,15 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
|
||||
struct dma_buf *dma_buf,
|
||||
uint64_t va, void *vm,
|
||||
uint64_t va, void *drm_priv,
|
||||
struct kgd_mem **mem, uint64_t *size,
|
||||
uint64_t *mmap_offset)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
|
||||
struct drm_gem_object *obj;
|
||||
struct amdgpu_bo *bo;
|
||||
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
|
||||
int ret;
|
||||
|
||||
if (dma_buf->ops != &amdgpu_dmabuf_ops)
|
||||
/* Can't handle non-graphics buffers */
|
||||
@ -1670,6 +1689,12 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
|
||||
if (!*mem)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
|
||||
if (ret) {
|
||||
kfree(mem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (size)
|
||||
*size = amdgpu_bo_size(bo);
|
||||
|
||||
@ -2135,7 +2160,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
|
||||
*/
|
||||
new_fence = amdgpu_amdkfd_fence_create(
|
||||
process_info->eviction_fence->base.context,
|
||||
process_info->eviction_fence->mm);
|
||||
process_info->eviction_fence->mm,
|
||||
NULL);
|
||||
if (!new_fence) {
|
||||
pr_err("Failed to create eviction fence\n");
|
||||
ret = -ENOMEM;
|
||||
|
@ -672,7 +672,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
|
||||
}
|
||||
|
||||
/**
|
||||
* cs_parser_fini() - clean parser states
|
||||
* amdgpu_cs_parser_fini() - clean parser states
|
||||
* @parser: parser structure holding parsing context.
|
||||
* @error: error number
|
||||
* @backoff: indicator to backoff the reservation
|
||||
@ -1488,7 +1488,7 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_cs_wait_all_fence - wait on all fences to signal
|
||||
* amdgpu_cs_wait_all_fences - wait on all fences to signal
|
||||
*
|
||||
* @adev: amdgpu device
|
||||
* @filp: file private
|
||||
@ -1639,7 +1639,7 @@ err_free_fences:
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_cs_find_bo_va - find bo_va for VM address
|
||||
* amdgpu_cs_find_mapping - find bo_va for VM address
|
||||
*
|
||||
* @parser: command submission parser context
|
||||
* @addr: VM address
|
||||
|
@ -2856,7 +2856,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
|
||||
AMD_IP_BLOCK_TYPE_IH,
|
||||
};
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
|
||||
for (i = 0; i < adev->num_ip_blocks; i++) {
|
||||
int j;
|
||||
struct amdgpu_ip_block *block;
|
||||
|
||||
@ -3179,8 +3179,8 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* By default timeout for non compute jobs is 10000.
|
||||
* And there is no timeout enforced on compute jobs.
|
||||
* By default timeout for non compute jobs is 10000
|
||||
* and 60000 for compute jobs.
|
||||
* In SR-IOV or passthrough mode, timeout for compute
|
||||
* jobs are 60000 by default.
|
||||
*/
|
||||
@ -3189,10 +3189,8 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
|
||||
msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
|
||||
else if (amdgpu_passthrough(adev))
|
||||
adev->compute_timeout = msecs_to_jiffies(60000);
|
||||
else
|
||||
adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
|
||||
adev->compute_timeout = msecs_to_jiffies(60000);
|
||||
|
||||
if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
|
||||
while ((timeout_setting = strsep(&input, ",")) &&
|
||||
@ -3741,7 +3739,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
|
||||
int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
|
||||
{
|
||||
struct amdgpu_device *adev = drm_to_adev(dev);
|
||||
int r;
|
||||
|
||||
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
|
||||
return 0;
|
||||
@ -3756,7 +3753,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
|
||||
|
||||
amdgpu_ras_suspend(adev);
|
||||
|
||||
r = amdgpu_device_ip_suspend_phase1(adev);
|
||||
amdgpu_device_ip_suspend_phase1(adev);
|
||||
|
||||
if (!adev->in_s0ix)
|
||||
amdgpu_amdkfd_suspend(adev, adev->in_runpm);
|
||||
@ -3766,7 +3763,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
|
||||
|
||||
amdgpu_fence_driver_suspend(adev);
|
||||
|
||||
r = amdgpu_device_ip_suspend_phase2(adev);
|
||||
amdgpu_device_ip_suspend_phase2(adev);
|
||||
/* evict remaining vram memory
|
||||
* This second call to evict vram is to evict the gart page table
|
||||
* using the CPU.
|
||||
@ -5124,7 +5121,8 @@ int amdgpu_device_baco_enter(struct drm_device *dev)
|
||||
if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
|
||||
return -ENOTSUPP;
|
||||
|
||||
if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
|
||||
if (ras && adev->ras_enabled &&
|
||||
adev->nbio.funcs->enable_doorbell_interrupt)
|
||||
adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
|
||||
|
||||
return amdgpu_dpm_baco_enter(adev);
|
||||
@ -5143,7 +5141,8 @@ int amdgpu_device_baco_exit(struct drm_device *dev)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
|
||||
if (ras && adev->ras_enabled &&
|
||||
adev->nbio.funcs->enable_doorbell_interrupt)
|
||||
adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
|
||||
|
||||
return 0;
|
||||
|
@ -288,9 +288,9 @@ module_param_named(msi, amdgpu_msi, int, 0444);
|
||||
* for SDMA and Video.
|
||||
*
|
||||
* By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
|
||||
* jobs is 10000. And there is no timeout enforced on compute jobs.
|
||||
* jobs is 10000. The timeout for compute is 60000.
|
||||
*/
|
||||
MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and infinity timeout for compute jobs; "
|
||||
MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and 60000 for compute jobs; "
|
||||
"for passthrough or sriov, 10000 for all jobs."
|
||||
" 0: keep default value. negative: infinity timeout), "
|
||||
"format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
|
||||
@ -641,7 +641,8 @@ module_param_named(mes, amdgpu_mes, int, 0444);
|
||||
|
||||
/**
|
||||
* DOC: noretry (int)
|
||||
* Disable retry faults in the GPU memory controller.
|
||||
* Disable XNACK retry in the SQ by default on GFXv9 hardware. On ASICs that
|
||||
* do not support per-process XNACK this also disables retry page faults.
|
||||
* (0 = retry enabled, 1 = retry disabled, -1 auto (default))
|
||||
*/
|
||||
MODULE_PARM_DESC(noretry,
|
||||
@ -1186,6 +1187,7 @@ static const struct pci_device_id pciidlist[] = {
|
||||
{0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
|
||||
{0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
|
||||
{0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
|
||||
{0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
|
||||
|
||||
{0, 0, 0}
|
||||
};
|
||||
@ -1598,17 +1600,15 @@ static int amdgpu_pmops_runtime_idle(struct device *dev)
|
||||
if (amdgpu_device_has_dc_support(adev)) {
|
||||
struct drm_crtc *crtc;
|
||||
|
||||
drm_modeset_lock_all(drm_dev);
|
||||
|
||||
drm_for_each_crtc(crtc, drm_dev) {
|
||||
if (crtc->state->active) {
|
||||
drm_modeset_lock(&crtc->mutex, NULL);
|
||||
if (crtc->state->active)
|
||||
ret = -EBUSY;
|
||||
drm_modeset_unlock(&crtc->mutex);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
drm_modeset_unlock_all(drm_dev);
|
||||
|
||||
} else {
|
||||
struct drm_connector *list_connector;
|
||||
struct drm_connector_list_iter iter;
|
||||
|
@ -434,6 +434,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
|
||||
*
|
||||
* @ring: ring to init the fence driver on
|
||||
* @num_hw_submission: number of entries on the hardware queue
|
||||
* @sched_score: optional score atomic shared with other schedulers
|
||||
*
|
||||
* Init the fence driver for the requested ring (all asics).
|
||||
* Helper function for amdgpu_fence_driver_init().
|
||||
|
@ -60,7 +60,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* amdgpu_dummy_page_init - init dummy page used by the driver
|
||||
* amdgpu_gart_dummy_page_init - init dummy page used by the driver
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
@ -86,7 +86,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_dummy_page_fini - free dummy page used by the driver
|
||||
* amdgpu_gart_dummy_page_fini - free dummy page used by the driver
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
|
@ -332,6 +332,17 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
|
||||
mc->agp_size >> 20, mc->agp_start, mc->agp_end);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gmc_fault_key - get hask key from vm fault address and pasid
|
||||
*
|
||||
* @addr: 48 bit physical address, page aligned (36 significant bits)
|
||||
* @pasid: 16 bit process address space identifier
|
||||
*/
|
||||
static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid)
|
||||
{
|
||||
return addr << 4 | pasid;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gmc_filter_faults - filter VM faults
|
||||
*
|
||||
@ -348,8 +359,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
|
||||
uint16_t pasid, uint64_t timestamp)
|
||||
{
|
||||
struct amdgpu_gmc *gmc = &adev->gmc;
|
||||
|
||||
uint64_t stamp, key = addr << 4 | pasid;
|
||||
uint64_t stamp, key = amdgpu_gmc_fault_key(addr, pasid);
|
||||
struct amdgpu_gmc_fault *fault;
|
||||
uint32_t hash;
|
||||
|
||||
@ -365,7 +375,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
|
||||
while (fault->timestamp >= stamp) {
|
||||
uint64_t tmp;
|
||||
|
||||
if (fault->key == key)
|
||||
if (atomic64_read(&fault->key) == key)
|
||||
return true;
|
||||
|
||||
tmp = fault->timestamp;
|
||||
@ -378,7 +388,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
|
||||
|
||||
/* Add the fault to the ring */
|
||||
fault = &gmc->fault_ring[gmc->last_fault];
|
||||
fault->key = key;
|
||||
atomic64_set(&fault->key, key);
|
||||
fault->timestamp = timestamp;
|
||||
|
||||
/* And update the hash */
|
||||
@ -387,6 +397,36 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gmc_filter_faults_remove - remove address from VM faults filter
|
||||
*
|
||||
* @adev: amdgpu device structure
|
||||
* @addr: address of the VM fault
|
||||
* @pasid: PASID of the process causing the fault
|
||||
*
|
||||
* Remove the address from fault filter, then future vm fault on this address
|
||||
* will pass to retry fault handler to recover.
|
||||
*/
|
||||
void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
|
||||
uint16_t pasid)
|
||||
{
|
||||
struct amdgpu_gmc *gmc = &adev->gmc;
|
||||
uint64_t key = amdgpu_gmc_fault_key(addr, pasid);
|
||||
struct amdgpu_gmc_fault *fault;
|
||||
uint32_t hash;
|
||||
uint64_t tmp;
|
||||
|
||||
hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
|
||||
fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
|
||||
do {
|
||||
if (atomic64_cmpxchg(&fault->key, key, 0) == key)
|
||||
break;
|
||||
|
||||
tmp = fault->timestamp;
|
||||
fault = &gmc->fault_ring[fault->next];
|
||||
} while (fault->timestamp < tmp);
|
||||
}
|
||||
|
||||
int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
|
||||
{
|
||||
int r;
|
||||
@ -415,6 +455,13 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (adev->hdp.ras_funcs &&
|
||||
adev->hdp.ras_funcs->ras_late_init) {
|
||||
r = adev->hdp.ras_funcs->ras_late_init(adev);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -426,11 +473,15 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
|
||||
|
||||
if (adev->mmhub.ras_funcs &&
|
||||
adev->mmhub.ras_funcs->ras_fini)
|
||||
amdgpu_mmhub_ras_fini(adev);
|
||||
adev->mmhub.ras_funcs->ras_fini(adev);
|
||||
|
||||
if (adev->gmc.xgmi.ras_funcs &&
|
||||
adev->gmc.xgmi.ras_funcs->ras_fini)
|
||||
adev->gmc.xgmi.ras_funcs->ras_fini(adev);
|
||||
|
||||
if (adev->hdp.ras_funcs &&
|
||||
adev->hdp.ras_funcs->ras_fini)
|
||||
adev->hdp.ras_funcs->ras_fini(adev);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -66,9 +66,9 @@ struct firmware;
|
||||
* GMC page fault information
|
||||
*/
|
||||
struct amdgpu_gmc_fault {
|
||||
uint64_t timestamp;
|
||||
uint64_t timestamp:48;
|
||||
uint64_t next:AMDGPU_GMC_FAULT_RING_ORDER;
|
||||
uint64_t key:52;
|
||||
atomic64_t key;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -318,6 +318,8 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
|
||||
struct amdgpu_gmc *mc);
|
||||
bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
|
||||
uint16_t pasid, uint64_t timestamp);
|
||||
void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
|
||||
uint16_t pasid);
|
||||
int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
|
||||
void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
|
||||
int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
|
||||
|
@ -24,7 +24,8 @@
|
||||
|
||||
#include "amdgpu.h"
|
||||
|
||||
static inline struct amdgpu_gtt_mgr *to_gtt_mgr(struct ttm_resource_manager *man)
|
||||
static inline struct amdgpu_gtt_mgr *
|
||||
to_gtt_mgr(struct ttm_resource_manager *man)
|
||||
{
|
||||
return container_of(man, struct amdgpu_gtt_mgr, manager);
|
||||
}
|
||||
@ -43,12 +44,14 @@ struct amdgpu_gtt_node {
|
||||
* the GTT block, in bytes
|
||||
*/
|
||||
static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = drm_to_adev(ddev);
|
||||
struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
|
||||
struct ttm_resource_manager *man;
|
||||
|
||||
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
|
||||
return sysfs_emit(buf, "%llu\n", man->size * PAGE_SIZE);
|
||||
}
|
||||
|
||||
@ -61,12 +64,14 @@ static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev,
|
||||
* size of the GTT block, in bytes
|
||||
*/
|
||||
static ssize_t amdgpu_mem_info_gtt_used_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = drm_to_adev(ddev);
|
||||
struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
|
||||
struct ttm_resource_manager *man;
|
||||
|
||||
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
|
||||
return sysfs_emit(buf, "%llu\n", amdgpu_gtt_mgr_usage(man));
|
||||
}
|
||||
|
||||
@ -75,80 +80,6 @@ static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO,
|
||||
static DEVICE_ATTR(mem_info_gtt_used, S_IRUGO,
|
||||
amdgpu_mem_info_gtt_used_show, NULL);
|
||||
|
||||
static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func;
|
||||
/**
|
||||
* amdgpu_gtt_mgr_init - init GTT manager and DRM MM
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @gtt_size: maximum size of GTT
|
||||
*
|
||||
* Allocate and initialize the GTT manager.
|
||||
*/
|
||||
int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)
|
||||
{
|
||||
struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
|
||||
struct ttm_resource_manager *man = &mgr->manager;
|
||||
uint64_t start, size;
|
||||
int ret;
|
||||
|
||||
man->use_tt = true;
|
||||
man->func = &amdgpu_gtt_mgr_func;
|
||||
|
||||
ttm_resource_manager_init(man, gtt_size >> PAGE_SHIFT);
|
||||
|
||||
start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
|
||||
size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
|
||||
drm_mm_init(&mgr->mm, start, size);
|
||||
spin_lock_init(&mgr->lock);
|
||||
atomic64_set(&mgr->available, gtt_size >> PAGE_SHIFT);
|
||||
|
||||
ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_total);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to create device file mem_info_gtt_total\n");
|
||||
return ret;
|
||||
}
|
||||
ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_used);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to create device file mem_info_gtt_used\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager);
|
||||
ttm_resource_manager_set_used(man, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gtt_mgr_fini - free and destroy GTT manager
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Destroy and free the GTT manager, returns -EBUSY if ranges are still
|
||||
* allocated inside it.
|
||||
*/
|
||||
void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
|
||||
struct ttm_resource_manager *man = &mgr->manager;
|
||||
int ret;
|
||||
|
||||
ttm_resource_manager_set_used(man, false);
|
||||
|
||||
ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
spin_lock(&mgr->lock);
|
||||
drm_mm_takedown(&mgr->mm);
|
||||
spin_unlock(&mgr->lock);
|
||||
|
||||
device_remove_file(adev->dev, &dev_attr_mem_info_gtt_total);
|
||||
device_remove_file(adev->dev, &dev_attr_mem_info_gtt_used);
|
||||
|
||||
ttm_resource_manager_cleanup(man);
|
||||
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gtt_mgr_has_gart_addr - Check if mem has address space
|
||||
*
|
||||
@ -265,6 +196,13 @@ uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man)
|
||||
return (result > 0 ? result : 0) * PAGE_SIZE;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gtt_mgr_recover - re-init gart
|
||||
*
|
||||
* @man: TTM memory type manager
|
||||
*
|
||||
* Re-init the gart for each known BO in the GTT.
|
||||
*/
|
||||
int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man)
|
||||
{
|
||||
struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
|
||||
@ -311,3 +249,76 @@ static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func = {
|
||||
.free = amdgpu_gtt_mgr_del,
|
||||
.debug = amdgpu_gtt_mgr_debug
|
||||
};
|
||||
|
||||
/**
|
||||
* amdgpu_gtt_mgr_init - init GTT manager and DRM MM
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @gtt_size: maximum size of GTT
|
||||
*
|
||||
* Allocate and initialize the GTT manager.
|
||||
*/
|
||||
int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)
|
||||
{
|
||||
struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
|
||||
struct ttm_resource_manager *man = &mgr->manager;
|
||||
uint64_t start, size;
|
||||
int ret;
|
||||
|
||||
man->use_tt = true;
|
||||
man->func = &amdgpu_gtt_mgr_func;
|
||||
|
||||
ttm_resource_manager_init(man, gtt_size >> PAGE_SHIFT);
|
||||
|
||||
start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
|
||||
size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
|
||||
drm_mm_init(&mgr->mm, start, size);
|
||||
spin_lock_init(&mgr->lock);
|
||||
atomic64_set(&mgr->available, gtt_size >> PAGE_SHIFT);
|
||||
|
||||
ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_total);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to create device file mem_info_gtt_total\n");
|
||||
return ret;
|
||||
}
|
||||
ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_used);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to create device file mem_info_gtt_used\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager);
|
||||
ttm_resource_manager_set_used(man, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gtt_mgr_fini - free and destroy GTT manager
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Destroy and free the GTT manager, returns -EBUSY if ranges are still
|
||||
* allocated inside it.
|
||||
*/
|
||||
void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
|
||||
struct ttm_resource_manager *man = &mgr->manager;
|
||||
int ret;
|
||||
|
||||
ttm_resource_manager_set_used(man, false);
|
||||
|
||||
ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
spin_lock(&mgr->lock);
|
||||
drm_mm_takedown(&mgr->mm);
|
||||
spin_unlock(&mgr->lock);
|
||||
|
||||
device_remove_file(adev->dev, &dev_attr_mem_info_gtt_total);
|
||||
device_remove_file(adev->dev, &dev_attr_mem_info_gtt_used);
|
||||
|
||||
ttm_resource_manager_cleanup(man);
|
||||
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL);
|
||||
}
|
||||
|
69
drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
Normal file
69
drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
Normal file
@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright 2021 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_ras.h"
|
||||
|
||||
int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev)
|
||||
{
|
||||
int r;
|
||||
struct ras_ih_if ih_info = {
|
||||
.cb = NULL,
|
||||
};
|
||||
struct ras_fs_if fs_info = {
|
||||
.sysfs_name = "hdp_err_count",
|
||||
};
|
||||
|
||||
if (!adev->hdp.ras_if) {
|
||||
adev->hdp.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
|
||||
if (!adev->hdp.ras_if)
|
||||
return -ENOMEM;
|
||||
adev->hdp.ras_if->block = AMDGPU_RAS_BLOCK__HDP;
|
||||
adev->hdp.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
|
||||
adev->hdp.ras_if->sub_block_index = 0;
|
||||
strcpy(adev->hdp.ras_if->name, "hdp");
|
||||
}
|
||||
ih_info.head = fs_info.head = *adev->hdp.ras_if;
|
||||
r = amdgpu_ras_late_init(adev, adev->hdp.ras_if,
|
||||
&fs_info, &ih_info);
|
||||
if (r || !amdgpu_ras_is_supported(adev, adev->hdp.ras_if->block)) {
|
||||
kfree(adev->hdp.ras_if);
|
||||
adev->hdp.ras_if = NULL;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
void amdgpu_hdp_ras_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) &&
|
||||
adev->hdp.ras_if) {
|
||||
struct ras_common_if *ras_if = adev->hdp.ras_if;
|
||||
struct ras_ih_if ih_info = {
|
||||
.cb = NULL,
|
||||
};
|
||||
|
||||
amdgpu_ras_late_fini(adev, ras_if, &ih_info);
|
||||
kfree(ras_if);
|
||||
}
|
||||
}
|
@ -23,18 +23,29 @@
|
||||
#ifndef __AMDGPU_HDP_H__
|
||||
#define __AMDGPU_HDP_H__
|
||||
|
||||
struct amdgpu_hdp_ras_funcs {
|
||||
int (*ras_late_init)(struct amdgpu_device *adev);
|
||||
void (*ras_fini)(struct amdgpu_device *adev);
|
||||
void (*query_ras_error_count)(struct amdgpu_device *adev,
|
||||
void *ras_error_status);
|
||||
void (*reset_ras_error_count)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
struct amdgpu_hdp_funcs {
|
||||
void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
|
||||
void (*invalidate_hdp)(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring);
|
||||
void (*reset_ras_error_count)(struct amdgpu_device *adev);
|
||||
void (*update_clock_gating)(struct amdgpu_device *adev, bool enable);
|
||||
void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags);
|
||||
void (*init_registers)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
struct amdgpu_hdp {
|
||||
struct ras_common_if *ras_if;
|
||||
const struct amdgpu_hdp_funcs *funcs;
|
||||
const struct amdgpu_hdp_ras_funcs *ras_funcs;
|
||||
};
|
||||
|
||||
int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev);
|
||||
void amdgpu_hdp_ras_fini(struct amdgpu_device *adev);
|
||||
#endif /* __AMDGPU_HDP_H__ */
|
||||
|
@ -328,7 +328,7 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
|
||||
|
||||
for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {
|
||||
if (i == AMDGPU_IB_POOL_DIRECT)
|
||||
size = PAGE_SIZE * 2;
|
||||
size = PAGE_SIZE * 6;
|
||||
else
|
||||
size = AMDGPU_IB_POOL_SIZE;
|
||||
|
||||
|
@ -175,7 +175,9 @@ static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev,
|
||||
cur_rptr += ih->ptr_mask + 1;
|
||||
*prev_rptr = cur_rptr;
|
||||
|
||||
return cur_rptr >= checkpoint_wptr;
|
||||
/* check ring is empty to workaround missing wptr overflow flag */
|
||||
return cur_rptr >= checkpoint_wptr ||
|
||||
(cur_rptr & ih->ptr_mask) == amdgpu_ih_get_wptr(adev, ih);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -986,7 +986,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||
|
||||
if (!ras)
|
||||
return -EINVAL;
|
||||
ras_mask = (uint64_t)ras->supported << 32 | ras->features;
|
||||
ras_mask = (uint64_t)adev->ras_enabled << 32 | ras->features;
|
||||
|
||||
return copy_to_user(out, &ras_mask,
|
||||
min_t(u64, size, sizeof(ras_mask))) ?
|
||||
@ -1114,7 +1114,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
|
||||
dev_warn(adev->dev, "No more PASIDs available!");
|
||||
pasid = 0;
|
||||
}
|
||||
r = amdgpu_vm_init(adev, &fpriv->vm, AMDGPU_VM_CONTEXT_GFX, pasid);
|
||||
|
||||
r = amdgpu_vm_init(adev, &fpriv->vm, pasid);
|
||||
if (r)
|
||||
goto error_pasid;
|
||||
|
||||
|
@ -28,6 +28,7 @@ struct amdgpu_mmhub_ras_funcs {
|
||||
void *ras_error_status);
|
||||
void (*query_ras_error_status)(struct amdgpu_device *adev);
|
||||
void (*reset_ras_error_count)(struct amdgpu_device *adev);
|
||||
void (*reset_ras_error_status)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
struct amdgpu_mmhub_funcs {
|
||||
|
@ -155,3 +155,89 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
|
||||
mmu_interval_notifier_remove(&bo->notifier);
|
||||
bo->notifier.mm = NULL;
|
||||
}
|
||||
|
||||
int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
|
||||
struct mm_struct *mm, struct page **pages,
|
||||
uint64_t start, uint64_t npages,
|
||||
struct hmm_range **phmm_range, bool readonly,
|
||||
bool mmap_locked)
|
||||
{
|
||||
struct hmm_range *hmm_range;
|
||||
unsigned long timeout;
|
||||
unsigned long i;
|
||||
unsigned long *pfns;
|
||||
int r = 0;
|
||||
|
||||
hmm_range = kzalloc(sizeof(*hmm_range), GFP_KERNEL);
|
||||
if (unlikely(!hmm_range))
|
||||
return -ENOMEM;
|
||||
|
||||
pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
|
||||
if (unlikely(!pfns)) {
|
||||
r = -ENOMEM;
|
||||
goto out_free_range;
|
||||
}
|
||||
|
||||
hmm_range->notifier = notifier;
|
||||
hmm_range->default_flags = HMM_PFN_REQ_FAULT;
|
||||
if (!readonly)
|
||||
hmm_range->default_flags |= HMM_PFN_REQ_WRITE;
|
||||
hmm_range->hmm_pfns = pfns;
|
||||
hmm_range->start = start;
|
||||
hmm_range->end = start + npages * PAGE_SIZE;
|
||||
|
||||
/* Assuming 512MB takes maxmium 1 second to fault page address */
|
||||
timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT;
|
||||
timeout = jiffies + msecs_to_jiffies(timeout);
|
||||
|
||||
retry:
|
||||
hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
|
||||
|
||||
if (likely(!mmap_locked))
|
||||
mmap_read_lock(mm);
|
||||
|
||||
r = hmm_range_fault(hmm_range);
|
||||
|
||||
if (likely(!mmap_locked))
|
||||
mmap_read_unlock(mm);
|
||||
if (unlikely(r)) {
|
||||
/*
|
||||
* FIXME: This timeout should encompass the retry from
|
||||
* mmu_interval_read_retry() as well.
|
||||
*/
|
||||
if (r == -EBUSY && !time_after(jiffies, timeout))
|
||||
goto retry;
|
||||
goto out_free_pfns;
|
||||
}
|
||||
|
||||
/*
|
||||
* Due to default_flags, all pages are HMM_PFN_VALID or
|
||||
* hmm_range_fault() fails. FIXME: The pages cannot be touched outside
|
||||
* the notifier_lock, and mmu_interval_read_retry() must be done first.
|
||||
*/
|
||||
for (i = 0; pages && i < npages; i++)
|
||||
pages[i] = hmm_pfn_to_page(pfns[i]);
|
||||
|
||||
*phmm_range = hmm_range;
|
||||
|
||||
return 0;
|
||||
|
||||
out_free_pfns:
|
||||
kvfree(pfns);
|
||||
out_free_range:
|
||||
kfree(hmm_range);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = mmu_interval_read_retry(hmm_range->notifier,
|
||||
hmm_range->notifier_seq);
|
||||
kvfree(hmm_range->hmm_pfns);
|
||||
kfree(hmm_range);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -30,6 +30,13 @@
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/interval_tree.h>
|
||||
|
||||
int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
|
||||
struct mm_struct *mm, struct page **pages,
|
||||
uint64_t start, uint64_t npages,
|
||||
struct hmm_range **phmm_range, bool readonly,
|
||||
bool mmap_locked);
|
||||
int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
|
||||
|
||||
#if defined(CONFIG_HMM_MIRROR)
|
||||
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
|
||||
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
|
||||
|
@ -491,7 +491,18 @@ bool amdgpu_bo_support_uswc(u64 bo_flags)
|
||||
#endif
|
||||
}
|
||||
|
||||
static int amdgpu_bo_do_create(struct amdgpu_device *adev,
|
||||
/**
|
||||
* amdgpu_bo_create - create an &amdgpu_bo buffer object
|
||||
* @adev: amdgpu device object
|
||||
* @bp: parameters to be used for the buffer object
|
||||
* @bo_ptr: pointer to the buffer object pointer
|
||||
*
|
||||
* Creates an &amdgpu_bo buffer object.
|
||||
*
|
||||
* Returns:
|
||||
* 0 for success or a negative error code on failure.
|
||||
*/
|
||||
int amdgpu_bo_create(struct amdgpu_device *adev,
|
||||
struct amdgpu_bo_param *bp,
|
||||
struct amdgpu_bo **bo_ptr)
|
||||
{
|
||||
@ -601,9 +612,9 @@ fail_unreserve:
|
||||
return r;
|
||||
}
|
||||
|
||||
static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
|
||||
unsigned long size,
|
||||
struct amdgpu_bo *bo)
|
||||
int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
|
||||
unsigned long size,
|
||||
struct amdgpu_bo *bo)
|
||||
{
|
||||
struct amdgpu_bo_param bp;
|
||||
int r;
|
||||
@ -614,13 +625,12 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
|
||||
memset(&bp, 0, sizeof(bp));
|
||||
bp.size = size;
|
||||
bp.domain = AMDGPU_GEM_DOMAIN_GTT;
|
||||
bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC |
|
||||
AMDGPU_GEM_CREATE_SHADOW;
|
||||
bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
|
||||
bp.type = ttm_bo_type_kernel;
|
||||
bp.resv = bo->tbo.base.resv;
|
||||
bp.bo_ptr_size = sizeof(struct amdgpu_bo);
|
||||
|
||||
r = amdgpu_bo_do_create(adev, &bp, &bo->shadow);
|
||||
r = amdgpu_bo_create(adev, &bp, &bo->shadow);
|
||||
if (!r) {
|
||||
bo->shadow->parent = amdgpu_bo_ref(bo);
|
||||
mutex_lock(&adev->shadow_list_lock);
|
||||
@ -631,50 +641,6 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_bo_create - create an &amdgpu_bo buffer object
|
||||
* @adev: amdgpu device object
|
||||
* @bp: parameters to be used for the buffer object
|
||||
* @bo_ptr: pointer to the buffer object pointer
|
||||
*
|
||||
* Creates an &amdgpu_bo buffer object; and if requested, also creates a
|
||||
* shadow object.
|
||||
* Shadow object is used to backup the original buffer object, and is always
|
||||
* in GTT.
|
||||
*
|
||||
* Returns:
|
||||
* 0 for success or a negative error code on failure.
|
||||
*/
|
||||
int amdgpu_bo_create(struct amdgpu_device *adev,
|
||||
struct amdgpu_bo_param *bp,
|
||||
struct amdgpu_bo **bo_ptr)
|
||||
{
|
||||
u64 flags = bp->flags;
|
||||
int r;
|
||||
|
||||
bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW;
|
||||
|
||||
r = amdgpu_bo_do_create(adev, bp, bo_ptr);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) {
|
||||
if (!bp->resv)
|
||||
WARN_ON(dma_resv_lock((*bo_ptr)->tbo.base.resv,
|
||||
NULL));
|
||||
|
||||
r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr);
|
||||
|
||||
if (!bp->resv)
|
||||
dma_resv_unlock((*bo_ptr)->tbo.base.resv);
|
||||
|
||||
if (r)
|
||||
amdgpu_bo_unref(bo_ptr);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_bo_create_user - create an &amdgpu_bo_user buffer object
|
||||
* @adev: amdgpu device object
|
||||
@ -694,9 +660,8 @@ int amdgpu_bo_create_user(struct amdgpu_device *adev,
|
||||
struct amdgpu_bo *bo_ptr;
|
||||
int r;
|
||||
|
||||
bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW;
|
||||
bp->bo_ptr_size = sizeof(struct amdgpu_bo_user);
|
||||
r = amdgpu_bo_do_create(adev, bp, &bo_ptr);
|
||||
r = amdgpu_bo_create(adev, bp, &bo_ptr);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
@ -1595,7 +1560,6 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
|
||||
amdgpu_bo_print_flag(m, bo, NO_CPU_ACCESS);
|
||||
amdgpu_bo_print_flag(m, bo, CPU_GTT_USWC);
|
||||
amdgpu_bo_print_flag(m, bo, VRAM_CLEARED);
|
||||
amdgpu_bo_print_flag(m, bo, SHADOW);
|
||||
amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
|
||||
amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID);
|
||||
amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC);
|
||||
|
@ -37,6 +37,10 @@
|
||||
#define AMDGPU_BO_INVALID_OFFSET LONG_MAX
|
||||
#define AMDGPU_BO_MAX_PLACEMENTS 3
|
||||
|
||||
/* BO flag to indicate a KFD userptr BO */
|
||||
#define AMDGPU_AMDKFD_CREATE_USERPTR_BO (1ULL << 63)
|
||||
#define AMDGPU_AMDKFD_CREATE_SVM_BO (1ULL << 62)
|
||||
|
||||
#define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo)
|
||||
|
||||
struct amdgpu_bo_param {
|
||||
@ -267,6 +271,9 @@ int amdgpu_bo_create_user(struct amdgpu_device *adev,
|
||||
struct amdgpu_bo_user **ubo_ptr);
|
||||
void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
|
||||
void **cpu_addr);
|
||||
int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
|
||||
unsigned long size,
|
||||
struct amdgpu_bo *bo);
|
||||
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
|
||||
void *amdgpu_bo_kptr(struct amdgpu_bo *bo);
|
||||
void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
|
||||
|
@ -417,31 +417,12 @@ static int psp_tmr_init(struct psp_context *psp)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int psp_clear_vf_fw(struct psp_context *psp)
|
||||
{
|
||||
int ret;
|
||||
struct psp_gfx_cmd_resp *cmd;
|
||||
|
||||
if (!amdgpu_sriov_vf(psp->adev) || psp->adev->asic_type != CHIP_NAVI12)
|
||||
return 0;
|
||||
|
||||
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
|
||||
if (!cmd)
|
||||
return -ENOMEM;
|
||||
|
||||
cmd->cmd_id = GFX_CMD_ID_CLEAR_VF_FW;
|
||||
|
||||
ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
|
||||
kfree(cmd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool psp_skip_tmr(struct psp_context *psp)
|
||||
{
|
||||
switch (psp->adev->asic_type) {
|
||||
case CHIP_NAVI12:
|
||||
case CHIP_SIENNA_CICHLID:
|
||||
case CHIP_ALDEBARAN:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
@ -1037,6 +1018,13 @@ static int psp_ras_load(struct psp_context *psp)
|
||||
memset(psp->fw_pri_buf, 0, PSP_1_MEG);
|
||||
memcpy(psp->fw_pri_buf, psp->ta_ras_start_addr, psp->ta_ras_ucode_size);
|
||||
|
||||
ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf;
|
||||
|
||||
if (psp->adev->gmc.xgmi.connected_to_cpu)
|
||||
ras_cmd->ras_in_message.init_flags.poison_mode_en = 1;
|
||||
else
|
||||
ras_cmd->ras_in_message.init_flags.dgpu_mode = 1;
|
||||
|
||||
psp_prep_ta_load_cmd_buf(cmd,
|
||||
psp->fw_pri_mc_addr,
|
||||
psp->ta_ras_ucode_size,
|
||||
@ -1046,8 +1034,6 @@ static int psp_ras_load(struct psp_context *psp)
|
||||
ret = psp_cmd_submit_buf(psp, NULL, cmd,
|
||||
psp->fence_buf_mc_addr);
|
||||
|
||||
ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf;
|
||||
|
||||
if (!ret) {
|
||||
psp->ras.session_id = cmd->resp.session_id;
|
||||
|
||||
@ -1128,6 +1114,31 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int psp_ras_status_to_errno(struct amdgpu_device *adev,
|
||||
enum ta_ras_status ras_status)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
|
||||
switch (ras_status) {
|
||||
case TA_RAS_STATUS__SUCCESS:
|
||||
ret = 0;
|
||||
break;
|
||||
case TA_RAS_STATUS__RESET_NEEDED:
|
||||
ret = -EAGAIN;
|
||||
break;
|
||||
case TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE:
|
||||
dev_warn(adev->dev, "RAS WARN: ras function unavailable\n");
|
||||
break;
|
||||
case TA_RAS_STATUS__ERROR_ASD_READ_WRITE:
|
||||
dev_warn(adev->dev, "RAS WARN: asd read or write failed\n");
|
||||
break;
|
||||
default:
|
||||
dev_err(adev->dev, "RAS ERROR: ras function failed ret 0x%X\n", ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int psp_ras_enable_features(struct psp_context *psp,
|
||||
union ta_ras_cmd_input *info, bool enable)
|
||||
{
|
||||
@ -1151,7 +1162,7 @@ int psp_ras_enable_features(struct psp_context *psp,
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
||||
return ras_cmd->ras_status;
|
||||
return psp_ras_status_to_errno(psp->adev, ras_cmd->ras_status);
|
||||
}
|
||||
|
||||
static int psp_ras_terminate(struct psp_context *psp)
|
||||
@ -1234,7 +1245,7 @@ int psp_ras_trigger_error(struct psp_context *psp,
|
||||
if (amdgpu_ras_intr_triggered())
|
||||
return 0;
|
||||
|
||||
return ras_cmd->ras_status;
|
||||
return psp_ras_status_to_errno(psp->adev, ras_cmd->ras_status);
|
||||
}
|
||||
// ras end
|
||||
|
||||
@ -1920,12 +1931,6 @@ static int psp_hw_start(struct psp_context *psp)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = psp_clear_vf_fw(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP clear vf fw!\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = psp_boot_config_set(adev);
|
||||
if (ret) {
|
||||
DRM_WARN("PSP set boot config@\n");
|
||||
@ -2166,7 +2171,7 @@ static int psp_load_smu_fw(struct psp_context *psp)
|
||||
return 0;
|
||||
|
||||
if ((amdgpu_in_reset(adev) &&
|
||||
ras && ras->supported &&
|
||||
ras && adev->ras_enabled &&
|
||||
(adev->asic_type == CHIP_ARCTURUS ||
|
||||
adev->asic_type == CHIP_VEGA20)) ||
|
||||
(adev->in_runpm &&
|
||||
@ -2434,7 +2439,6 @@ static int psp_hw_fini(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
struct psp_context *psp = &adev->psp;
|
||||
int ret;
|
||||
|
||||
if (psp->adev->psp.ta_fw) {
|
||||
psp_ras_terminate(psp);
|
||||
@ -2445,11 +2449,6 @@ static int psp_hw_fini(void *handle)
|
||||
}
|
||||
|
||||
psp_asd_unload(psp);
|
||||
ret = psp_clear_vf_fw(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP clear vf fw!\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
psp_tmr_terminate(psp);
|
||||
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include "amdgpu_atomfirmware.h"
|
||||
#include "amdgpu_xgmi.h"
|
||||
#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
|
||||
#include "atom.h"
|
||||
|
||||
static const char *RAS_FS_NAME = "ras";
|
||||
|
||||
@ -320,11 +321,14 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
|
||||
* "disable" requires only the block.
|
||||
* "enable" requires the block and error type.
|
||||
* "inject" requires the block, error type, address, and value.
|
||||
*
|
||||
* The block is one of: umc, sdma, gfx, etc.
|
||||
* see ras_block_string[] for details
|
||||
*
|
||||
* The error type is one of: ue, ce, where,
|
||||
* ue is multi-uncorrectable
|
||||
* ce is single-correctable
|
||||
*
|
||||
* The sub-block is a the sub-block index, pass 0 if there is no sub-block.
|
||||
* The address and value are hexadecimal numbers, leading 0x is optional.
|
||||
*
|
||||
@ -531,7 +535,7 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
struct ras_manager *obj;
|
||||
|
||||
if (!adev->ras_features || !con)
|
||||
if (!adev->ras_enabled || !con)
|
||||
return NULL;
|
||||
|
||||
if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
|
||||
@ -558,7 +562,7 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
|
||||
struct ras_manager *obj;
|
||||
int i;
|
||||
|
||||
if (!adev->ras_features || !con)
|
||||
if (!adev->ras_enabled || !con)
|
||||
return NULL;
|
||||
|
||||
if (head) {
|
||||
@ -585,36 +589,11 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
|
||||
}
|
||||
/* obj end */
|
||||
|
||||
static void amdgpu_ras_parse_status_code(struct amdgpu_device *adev,
|
||||
const char* invoke_type,
|
||||
const char* block_name,
|
||||
enum ta_ras_status ret)
|
||||
{
|
||||
switch (ret) {
|
||||
case TA_RAS_STATUS__SUCCESS:
|
||||
return;
|
||||
case TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE:
|
||||
dev_warn(adev->dev,
|
||||
"RAS WARN: %s %s currently unavailable\n",
|
||||
invoke_type,
|
||||
block_name);
|
||||
break;
|
||||
default:
|
||||
dev_err(adev->dev,
|
||||
"RAS ERROR: %s %s error failed ret 0x%X\n",
|
||||
invoke_type,
|
||||
block_name,
|
||||
ret);
|
||||
}
|
||||
}
|
||||
|
||||
/* feature ctl begin */
|
||||
static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev,
|
||||
struct ras_common_if *head)
|
||||
struct ras_common_if *head)
|
||||
{
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
|
||||
return con->hw_supported & BIT(head->block);
|
||||
return adev->ras_hw_enabled & BIT(head->block);
|
||||
}
|
||||
|
||||
static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev,
|
||||
@ -658,11 +637,7 @@ static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,
|
||||
con->features |= BIT(head->block);
|
||||
} else {
|
||||
if (obj && amdgpu_ras_is_feature_enabled(adev, head)) {
|
||||
/* skip clean gfx ras context feature for VEGA20 Gaming.
|
||||
* will clean later
|
||||
*/
|
||||
if (!(!adev->ras_features && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)))
|
||||
con->features &= ~BIT(head->block);
|
||||
con->features &= ~BIT(head->block);
|
||||
put_obj(obj);
|
||||
}
|
||||
}
|
||||
@ -708,15 +683,10 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
|
||||
if (!amdgpu_ras_intr_triggered()) {
|
||||
ret = psp_ras_enable_features(&adev->psp, info, enable);
|
||||
if (ret) {
|
||||
amdgpu_ras_parse_status_code(adev,
|
||||
enable ? "enable":"disable",
|
||||
ras_block_str(head->block),
|
||||
(enum ta_ras_status)ret);
|
||||
if (ret == TA_RAS_STATUS__RESET_NEEDED)
|
||||
ret = -EAGAIN;
|
||||
else
|
||||
ret = -EINVAL;
|
||||
|
||||
dev_err(adev->dev, "ras %s %s failed %d\n",
|
||||
enable ? "enable":"disable",
|
||||
ras_block_str(head->block),
|
||||
ret);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
@ -770,6 +740,10 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
|
||||
con->features |= BIT(head->block);
|
||||
|
||||
ret = amdgpu_ras_feature_enable(adev, head, 0);
|
||||
|
||||
/* clean gfx block ras features flag */
|
||||
if (adev->ras_enabled && head->block == AMDGPU_RAS_BLOCK__GFX)
|
||||
con->features &= ~BIT(head->block);
|
||||
}
|
||||
} else
|
||||
ret = amdgpu_ras_feature_enable(adev, head, enable);
|
||||
@ -890,6 +864,11 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
|
||||
adev->gmc.xgmi.ras_funcs->query_ras_error_count)
|
||||
adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, &err_data);
|
||||
break;
|
||||
case AMDGPU_RAS_BLOCK__HDP:
|
||||
if (adev->hdp.ras_funcs &&
|
||||
adev->hdp.ras_funcs->query_ras_error_count)
|
||||
adev->hdp.ras_funcs->query_ras_error_count(adev, &err_data);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -901,17 +880,42 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
|
||||
info->ce_count = obj->err_data.ce_count;
|
||||
|
||||
if (err_data.ce_count) {
|
||||
dev_info(adev->dev, "%ld correctable hardware errors "
|
||||
if (adev->smuio.funcs &&
|
||||
adev->smuio.funcs->get_socket_id &&
|
||||
adev->smuio.funcs->get_die_id) {
|
||||
dev_info(adev->dev, "socket: %d, die: %d "
|
||||
"%ld correctable hardware errors "
|
||||
"detected in %s block, no user "
|
||||
"action is needed.\n",
|
||||
adev->smuio.funcs->get_socket_id(adev),
|
||||
adev->smuio.funcs->get_die_id(adev),
|
||||
obj->err_data.ce_count,
|
||||
ras_block_str(info->head.block));
|
||||
} else {
|
||||
dev_info(adev->dev, "%ld correctable hardware errors "
|
||||
"detected in %s block, no user "
|
||||
"action is needed.\n",
|
||||
obj->err_data.ce_count,
|
||||
ras_block_str(info->head.block));
|
||||
}
|
||||
}
|
||||
if (err_data.ue_count) {
|
||||
dev_info(adev->dev, "%ld uncorrectable hardware errors "
|
||||
if (adev->smuio.funcs &&
|
||||
adev->smuio.funcs->get_socket_id &&
|
||||
adev->smuio.funcs->get_die_id) {
|
||||
dev_info(adev->dev, "socket: %d, die: %d "
|
||||
"%ld uncorrectable hardware errors "
|
||||
"detected in %s block\n",
|
||||
adev->smuio.funcs->get_socket_id(adev),
|
||||
adev->smuio.funcs->get_die_id(adev),
|
||||
obj->err_data.ue_count,
|
||||
ras_block_str(info->head.block));
|
||||
} else {
|
||||
dev_info(adev->dev, "%ld uncorrectable hardware errors "
|
||||
"detected in %s block\n",
|
||||
obj->err_data.ue_count,
|
||||
ras_block_str(info->head.block));
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -937,11 +941,20 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
|
||||
if (adev->mmhub.ras_funcs &&
|
||||
adev->mmhub.ras_funcs->reset_ras_error_count)
|
||||
adev->mmhub.ras_funcs->reset_ras_error_count(adev);
|
||||
|
||||
if (adev->mmhub.ras_funcs &&
|
||||
adev->mmhub.ras_funcs->reset_ras_error_status)
|
||||
adev->mmhub.ras_funcs->reset_ras_error_status(adev);
|
||||
break;
|
||||
case AMDGPU_RAS_BLOCK__SDMA:
|
||||
if (adev->sdma.funcs->reset_ras_error_count)
|
||||
adev->sdma.funcs->reset_ras_error_count(adev);
|
||||
break;
|
||||
case AMDGPU_RAS_BLOCK__HDP:
|
||||
if (adev->hdp.ras_funcs &&
|
||||
adev->hdp.ras_funcs->reset_ras_error_count)
|
||||
adev->hdp.ras_funcs->reset_ras_error_count(adev);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -1022,10 +1035,9 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
amdgpu_ras_parse_status_code(adev,
|
||||
"inject",
|
||||
ras_block_str(info->head.block),
|
||||
(enum ta_ras_status)ret);
|
||||
if (ret)
|
||||
dev_err(adev->dev, "ras inject %s failed %d\n",
|
||||
ras_block_str(info->head.block), ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -1038,7 +1050,7 @@ unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
|
||||
struct ras_manager *obj;
|
||||
struct ras_err_data data = {0, 0};
|
||||
|
||||
if (!adev->ras_features || !con)
|
||||
if (!adev->ras_enabled || !con)
|
||||
return 0;
|
||||
|
||||
list_for_each_entry(obj, &con->head, node) {
|
||||
@ -1265,8 +1277,8 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
|
||||
static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
struct dentry *dir;
|
||||
struct drm_minor *minor = adev_to_drm(adev)->primary;
|
||||
struct drm_minor *minor = adev_to_drm(adev)->primary;
|
||||
struct dentry *dir;
|
||||
|
||||
dir = debugfs_create_dir(RAS_FS_NAME, minor->debugfs_root);
|
||||
debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, dir, adev,
|
||||
@ -1275,6 +1287,8 @@ static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *
|
||||
&amdgpu_ras_debugfs_eeprom_ops);
|
||||
debugfs_create_u32("bad_page_cnt_threshold", 0444, dir,
|
||||
&con->bad_page_cnt_threshold);
|
||||
debugfs_create_x32("ras_hw_enabled", 0444, dir, &adev->ras_hw_enabled);
|
||||
debugfs_create_x32("ras_enabled", 0444, dir, &adev->ras_enabled);
|
||||
|
||||
/*
|
||||
* After one uncorrectable error happens, usually GPU recovery will
|
||||
@ -1561,7 +1575,7 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
struct ras_manager *obj;
|
||||
|
||||
if (!adev->ras_features || !con)
|
||||
if (!adev->ras_enabled || !con)
|
||||
return;
|
||||
|
||||
list_for_each_entry(obj, &con->head, node) {
|
||||
@ -1611,7 +1625,7 @@ static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
struct ras_manager *obj;
|
||||
|
||||
if (!adev->ras_features || !con)
|
||||
if (!adev->ras_enabled || !con)
|
||||
return;
|
||||
|
||||
list_for_each_entry(obj, &con->head, node) {
|
||||
@ -1925,7 +1939,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
|
||||
bool exc_err_limit = false;
|
||||
int ret;
|
||||
|
||||
if (adev->ras_features && con)
|
||||
if (adev->ras_enabled && con)
|
||||
data = &con->eh_data;
|
||||
else
|
||||
return 0;
|
||||
@ -2028,6 +2042,23 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
|
||||
adev->asic_type == CHIP_SIENNA_CICHLID;
|
||||
}
|
||||
|
||||
/*
|
||||
* this is workaround for vega20 workstation sku,
|
||||
* force enable gfx ras, ignore vbios gfx ras flag
|
||||
* due to GC EDC can not write
|
||||
*/
|
||||
static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)
|
||||
{
|
||||
struct atom_context *ctx = adev->mode_info.atom_context;
|
||||
|
||||
if (!ctx)
|
||||
return;
|
||||
|
||||
if (strnstr(ctx->vbios_version, "D16406",
|
||||
sizeof(ctx->vbios_version)))
|
||||
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);
|
||||
}
|
||||
|
||||
/*
|
||||
* check hardware's ras ability which will be saved in hw_supported.
|
||||
* if hardware does not support ras, we can skip some ras initializtion and
|
||||
@ -2037,11 +2068,9 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
|
||||
* we have to initialize ras as normal. but need check if operation is
|
||||
* allowed or not in each function.
|
||||
*/
|
||||
static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
|
||||
uint32_t *hw_supported, uint32_t *supported)
|
||||
static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
|
||||
{
|
||||
*hw_supported = 0;
|
||||
*supported = 0;
|
||||
adev->ras_hw_enabled = adev->ras_enabled = 0;
|
||||
|
||||
if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
|
||||
!amdgpu_ras_asic_supported(adev))
|
||||
@ -2050,33 +2079,34 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
|
||||
if (!adev->gmc.xgmi.connected_to_cpu) {
|
||||
if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
|
||||
dev_info(adev->dev, "MEM ECC is active.\n");
|
||||
*hw_supported |= (1 << AMDGPU_RAS_BLOCK__UMC |
|
||||
1 << AMDGPU_RAS_BLOCK__DF);
|
||||
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC |
|
||||
1 << AMDGPU_RAS_BLOCK__DF);
|
||||
} else {
|
||||
dev_info(adev->dev, "MEM ECC is not presented.\n");
|
||||
}
|
||||
|
||||
if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
|
||||
dev_info(adev->dev, "SRAM ECC is active.\n");
|
||||
*hw_supported |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
|
||||
1 << AMDGPU_RAS_BLOCK__DF);
|
||||
adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
|
||||
1 << AMDGPU_RAS_BLOCK__DF);
|
||||
} else {
|
||||
dev_info(adev->dev, "SRAM ECC is not presented.\n");
|
||||
}
|
||||
} else {
|
||||
/* driver only manages a few IP blocks RAS feature
|
||||
* when GPU is connected cpu through XGMI */
|
||||
*hw_supported |= (1 << AMDGPU_RAS_BLOCK__GFX |
|
||||
1 << AMDGPU_RAS_BLOCK__SDMA |
|
||||
1 << AMDGPU_RAS_BLOCK__MMHUB);
|
||||
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX |
|
||||
1 << AMDGPU_RAS_BLOCK__SDMA |
|
||||
1 << AMDGPU_RAS_BLOCK__MMHUB);
|
||||
}
|
||||
|
||||
/* hw_supported needs to be aligned with RAS block mask. */
|
||||
*hw_supported &= AMDGPU_RAS_BLOCK_MASK;
|
||||
amdgpu_ras_get_quirks(adev);
|
||||
|
||||
*supported = amdgpu_ras_enable == 0 ?
|
||||
0 : *hw_supported & amdgpu_ras_mask;
|
||||
adev->ras_features = *supported;
|
||||
/* hw_supported needs to be aligned with RAS block mask. */
|
||||
adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK;
|
||||
|
||||
adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 :
|
||||
adev->ras_hw_enabled & amdgpu_ras_mask;
|
||||
}
|
||||
|
||||
int amdgpu_ras_init(struct amdgpu_device *adev)
|
||||
@ -2097,13 +2127,13 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
|
||||
|
||||
amdgpu_ras_set_context(adev, con);
|
||||
|
||||
amdgpu_ras_check_supported(adev, &con->hw_supported,
|
||||
&con->supported);
|
||||
if (!con->hw_supported || (adev->asic_type == CHIP_VEGA10)) {
|
||||
amdgpu_ras_check_supported(adev);
|
||||
|
||||
if (!adev->ras_enabled || adev->asic_type == CHIP_VEGA10) {
|
||||
/* set gfx block ras context feature for VEGA20 Gaming
|
||||
* send ras disable cmd to ras ta during ras late init.
|
||||
*/
|
||||
if (!adev->ras_features && adev->asic_type == CHIP_VEGA20) {
|
||||
if (!adev->ras_enabled && adev->asic_type == CHIP_VEGA20) {
|
||||
con->features |= BIT(AMDGPU_RAS_BLOCK__GFX);
|
||||
|
||||
return 0;
|
||||
@ -2153,8 +2183,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
|
||||
}
|
||||
|
||||
dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
|
||||
"hardware ability[%x] ras_mask[%x]\n",
|
||||
con->hw_supported, con->supported);
|
||||
"hardware ability[%x] ras_mask[%x]\n",
|
||||
adev->ras_hw_enabled, adev->ras_enabled);
|
||||
|
||||
return 0;
|
||||
release_con:
|
||||
amdgpu_ras_set_context(adev, NULL);
|
||||
@ -2268,7 +2299,7 @@ void amdgpu_ras_resume(struct amdgpu_device *adev)
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
struct ras_manager *obj, *tmp;
|
||||
|
||||
if (!adev->ras_features || !con) {
|
||||
if (!adev->ras_enabled || !con) {
|
||||
/* clean ras context for VEGA20 Gaming after send ras disable cmd */
|
||||
amdgpu_release_ras_context(adev);
|
||||
|
||||
@ -2314,7 +2345,7 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
|
||||
if (!adev->ras_features || !con)
|
||||
if (!adev->ras_enabled || !con)
|
||||
return;
|
||||
|
||||
amdgpu_ras_disable_all_features(adev, 0);
|
||||
@ -2328,7 +2359,7 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
|
||||
if (!adev->ras_features || !con)
|
||||
if (!adev->ras_enabled || !con)
|
||||
return 0;
|
||||
|
||||
/* Need disable ras on all IPs here before ip [hw/sw]fini */
|
||||
@ -2341,7 +2372,7 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
|
||||
if (!adev->ras_features || !con)
|
||||
if (!adev->ras_enabled || !con)
|
||||
return 0;
|
||||
|
||||
amdgpu_ras_fs_fini(adev);
|
||||
@ -2360,10 +2391,8 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
|
||||
|
||||
void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
|
||||
{
|
||||
uint32_t hw_supported, supported;
|
||||
|
||||
amdgpu_ras_check_supported(adev, &hw_supported, &supported);
|
||||
if (!hw_supported)
|
||||
amdgpu_ras_check_supported(adev);
|
||||
if (!adev->ras_hw_enabled)
|
||||
return;
|
||||
|
||||
if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
|
||||
@ -2392,7 +2421,7 @@ void amdgpu_release_ras_context(struct amdgpu_device *adev)
|
||||
if (!con)
|
||||
return;
|
||||
|
||||
if (!adev->ras_features && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) {
|
||||
if (!adev->ras_enabled && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) {
|
||||
con->features &= ~BIT(AMDGPU_RAS_BLOCK__GFX);
|
||||
amdgpu_ras_set_context(adev, NULL);
|
||||
kfree(con);
|
||||
|
@ -313,9 +313,6 @@ struct ras_common_if {
|
||||
struct amdgpu_ras {
|
||||
/* ras infrastructure */
|
||||
/* for ras itself. */
|
||||
uint32_t hw_supported;
|
||||
/* for IP to check its ras ability. */
|
||||
uint32_t supported;
|
||||
uint32_t features;
|
||||
struct list_head head;
|
||||
/* sysfs */
|
||||
@ -478,7 +475,7 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
|
||||
|
||||
if (block >= AMDGPU_RAS_BLOCK_COUNT)
|
||||
return 0;
|
||||
return ras && (ras->supported & (1 << block));
|
||||
return ras && (adev->ras_enabled & (1 << block));
|
||||
}
|
||||
|
||||
int amdgpu_ras_recovery_init(struct amdgpu_device *adev);
|
||||
|
@ -158,6 +158,7 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
|
||||
* @irq_src: interrupt source to use for this ring
|
||||
* @irq_type: interrupt type to use for this ring
|
||||
* @hw_prio: ring priority (NORMAL/HIGH)
|
||||
* @sched_score: optional score atomic shared with other schedulers
|
||||
*
|
||||
* Initialize the driver information for the selected ring (all asics).
|
||||
* Returns 0 on success, error on failure.
|
||||
|
@ -29,6 +29,7 @@ struct amdgpu_smuio_funcs {
|
||||
void (*update_rom_clock_gating)(struct amdgpu_device *adev, bool enable);
|
||||
void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags);
|
||||
u32 (*get_die_id)(struct amdgpu_device *adev);
|
||||
u32 (*get_socket_id)(struct amdgpu_device *adev);
|
||||
bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
|
@ -32,7 +32,6 @@
|
||||
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/hmm.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/sched/task.h>
|
||||
#include <linux/sched/mm.h>
|
||||
@ -112,6 +111,20 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
|
||||
}
|
||||
|
||||
abo = ttm_to_amdgpu_bo(bo);
|
||||
if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) {
|
||||
struct dma_fence *fence;
|
||||
struct dma_resv *resv = &bo->base._resv;
|
||||
|
||||
rcu_read_lock();
|
||||
fence = rcu_dereference(resv->fence_excl);
|
||||
if (fence && !fence->ops->signaled)
|
||||
dma_fence_enable_sw_signaling(fence);
|
||||
|
||||
placement->num_placement = 0;
|
||||
placement->num_busy_placement = 0;
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
switch (bo->mem.mem_type) {
|
||||
case AMDGPU_PL_GDS:
|
||||
case AMDGPU_PL_GWS:
|
||||
@ -165,13 +178,6 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
|
||||
{
|
||||
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
|
||||
|
||||
/*
|
||||
* Don't verify access for KFD BOs. They don't have a GEM
|
||||
* object associated with them.
|
||||
*/
|
||||
if (abo->kfd_bo)
|
||||
return 0;
|
||||
|
||||
if (amdgpu_ttm_tt_get_usermm(bo->ttm))
|
||||
return -EPERM;
|
||||
return drm_vma_node_verify_access(&abo->tbo.base.vma_node,
|
||||
@ -288,7 +294,7 @@ error_free:
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_copy_ttm_mem_to_mem - Helper function for copy
|
||||
* amdgpu_ttm_copy_mem_to_mem - Helper function for copy
|
||||
* @adev: amdgpu device
|
||||
* @src: buffer/address where to read from
|
||||
* @dst: buffer/address where to write to
|
||||
@ -670,10 +676,8 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
|
||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
||||
unsigned long start = gtt->userptr;
|
||||
struct vm_area_struct *vma;
|
||||
struct hmm_range *range;
|
||||
unsigned long timeout;
|
||||
struct mm_struct *mm;
|
||||
unsigned long i;
|
||||
bool readonly;
|
||||
int r = 0;
|
||||
|
||||
mm = bo->notifier.mm;
|
||||
@ -689,76 +693,26 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
|
||||
if (!mmget_not_zero(mm)) /* Happens during process shutdown */
|
||||
return -ESRCH;
|
||||
|
||||
range = kzalloc(sizeof(*range), GFP_KERNEL);
|
||||
if (unlikely(!range)) {
|
||||
r = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
range->notifier = &bo->notifier;
|
||||
range->start = bo->notifier.interval_tree.start;
|
||||
range->end = bo->notifier.interval_tree.last + 1;
|
||||
range->default_flags = HMM_PFN_REQ_FAULT;
|
||||
if (!amdgpu_ttm_tt_is_readonly(ttm))
|
||||
range->default_flags |= HMM_PFN_REQ_WRITE;
|
||||
|
||||
range->hmm_pfns = kvmalloc_array(ttm->num_pages,
|
||||
sizeof(*range->hmm_pfns), GFP_KERNEL);
|
||||
if (unlikely(!range->hmm_pfns)) {
|
||||
r = -ENOMEM;
|
||||
goto out_free_ranges;
|
||||
}
|
||||
|
||||
mmap_read_lock(mm);
|
||||
vma = find_vma(mm, start);
|
||||
mmap_read_unlock(mm);
|
||||
if (unlikely(!vma || start < vma->vm_start)) {
|
||||
r = -EFAULT;
|
||||
goto out_unlock;
|
||||
goto out_putmm;
|
||||
}
|
||||
if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
|
||||
vma->vm_file)) {
|
||||
r = -EPERM;
|
||||
goto out_unlock;
|
||||
}
|
||||
mmap_read_unlock(mm);
|
||||
timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
|
||||
|
||||
retry:
|
||||
range->notifier_seq = mmu_interval_read_begin(&bo->notifier);
|
||||
|
||||
mmap_read_lock(mm);
|
||||
r = hmm_range_fault(range);
|
||||
mmap_read_unlock(mm);
|
||||
if (unlikely(r)) {
|
||||
/*
|
||||
* FIXME: This timeout should encompass the retry from
|
||||
* mmu_interval_read_retry() as well.
|
||||
*/
|
||||
if (r == -EBUSY && !time_after(jiffies, timeout))
|
||||
goto retry;
|
||||
goto out_free_pfns;
|
||||
goto out_putmm;
|
||||
}
|
||||
|
||||
/*
|
||||
* Due to default_flags, all pages are HMM_PFN_VALID or
|
||||
* hmm_range_fault() fails. FIXME: The pages cannot be touched outside
|
||||
* the notifier_lock, and mmu_interval_read_retry() must be done first.
|
||||
*/
|
||||
for (i = 0; i < ttm->num_pages; i++)
|
||||
pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]);
|
||||
|
||||
gtt->range = range;
|
||||
readonly = amdgpu_ttm_tt_is_readonly(ttm);
|
||||
r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start,
|
||||
ttm->num_pages, >t->range, readonly,
|
||||
false);
|
||||
out_putmm:
|
||||
mmput(mm);
|
||||
|
||||
return 0;
|
||||
|
||||
out_unlock:
|
||||
mmap_read_unlock(mm);
|
||||
out_free_pfns:
|
||||
kvfree(range->hmm_pfns);
|
||||
out_free_ranges:
|
||||
kfree(range);
|
||||
out:
|
||||
mmput(mm);
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -787,10 +741,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
|
||||
* FIXME: Must always hold notifier_lock for this, and must
|
||||
* not ignore the return code.
|
||||
*/
|
||||
r = mmu_interval_read_retry(gtt->range->notifier,
|
||||
gtt->range->notifier_seq);
|
||||
kvfree(gtt->range->hmm_pfns);
|
||||
kfree(gtt->range);
|
||||
r = amdgpu_hmm_range_get_pages_done(gtt->range);
|
||||
gtt->range = NULL;
|
||||
}
|
||||
|
||||
|
@ -50,9 +50,12 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
|
||||
struct drm_device *ddev = adev_to_drm(adev);
|
||||
|
||||
/* enable virtual display */
|
||||
if (adev->mode_info.num_crtc == 0)
|
||||
adev->mode_info.num_crtc = 1;
|
||||
adev->enable_virtual_display = true;
|
||||
if (adev->asic_type != CHIP_ALDEBARAN &&
|
||||
adev->asic_type != CHIP_ARCTURUS) {
|
||||
if (adev->mode_info.num_crtc == 0)
|
||||
adev->mode_info.num_crtc = 1;
|
||||
adev->enable_virtual_display = true;
|
||||
}
|
||||
ddev->driver_features &= ~DRIVER_ATOMIC;
|
||||
adev->cg_flags = 0;
|
||||
adev->pg_flags = 0;
|
||||
@ -679,6 +682,7 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_VEGA20:
|
||||
case CHIP_ARCTURUS:
|
||||
case CHIP_ALDEBARAN:
|
||||
soc15_set_virt_ops(adev);
|
||||
break;
|
||||
case CHIP_NAVI10:
|
||||
|
@ -38,6 +38,7 @@
|
||||
#include "amdgpu_gmc.h"
|
||||
#include "amdgpu_xgmi.h"
|
||||
#include "amdgpu_dma_buf.h"
|
||||
#include "kfd_svm.h"
|
||||
|
||||
/**
|
||||
* DOC: GPUVM
|
||||
@ -850,35 +851,60 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_vm_bo_param - fill in parameters for PD/PT allocation
|
||||
* amdgpu_vm_pt_create - create bo for PD/PT
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @vm: requesting vm
|
||||
* @level: the page table level
|
||||
* @immediate: use a immediate update
|
||||
* @bp: resulting BO allocation parameters
|
||||
* @bo: pointer to the buffer object pointer
|
||||
*/
|
||||
static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
static int amdgpu_vm_pt_create(struct amdgpu_device *adev,
|
||||
struct amdgpu_vm *vm,
|
||||
int level, bool immediate,
|
||||
struct amdgpu_bo_param *bp)
|
||||
struct amdgpu_bo **bo)
|
||||
{
|
||||
memset(bp, 0, sizeof(*bp));
|
||||
struct amdgpu_bo_param bp;
|
||||
int r;
|
||||
|
||||
bp->size = amdgpu_vm_bo_size(adev, level);
|
||||
bp->byte_align = AMDGPU_GPU_PAGE_SIZE;
|
||||
bp->domain = AMDGPU_GEM_DOMAIN_VRAM;
|
||||
bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain);
|
||||
bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
|
||||
memset(&bp, 0, sizeof(bp));
|
||||
|
||||
bp.size = amdgpu_vm_bo_size(adev, level);
|
||||
bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
|
||||
bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
|
||||
bp.domain = amdgpu_bo_get_preferred_pin_domain(adev, bp.domain);
|
||||
bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
|
||||
AMDGPU_GEM_CREATE_CPU_GTT_USWC;
|
||||
bp->bo_ptr_size = sizeof(struct amdgpu_bo);
|
||||
bp.bo_ptr_size = sizeof(struct amdgpu_bo);
|
||||
if (vm->use_cpu_for_update)
|
||||
bp->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
|
||||
else if (!vm->root.base.bo || vm->root.base.bo->shadow)
|
||||
bp->flags |= AMDGPU_GEM_CREATE_SHADOW;
|
||||
bp->type = ttm_bo_type_kernel;
|
||||
bp->no_wait_gpu = immediate;
|
||||
bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
|
||||
|
||||
bp.type = ttm_bo_type_kernel;
|
||||
bp.no_wait_gpu = immediate;
|
||||
if (vm->root.base.bo)
|
||||
bp->resv = vm->root.base.bo->tbo.base.resv;
|
||||
bp.resv = vm->root.base.bo->tbo.base.resv;
|
||||
|
||||
r = amdgpu_bo_create(adev, &bp, bo);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (vm->is_compute_context && (adev->flags & AMD_IS_APU))
|
||||
return 0;
|
||||
|
||||
if (!bp.resv)
|
||||
WARN_ON(dma_resv_lock((*bo)->tbo.base.resv,
|
||||
NULL));
|
||||
r = amdgpu_bo_create_shadow(adev, bp.size, *bo);
|
||||
|
||||
if (!bp.resv)
|
||||
dma_resv_unlock((*bo)->tbo.base.resv);
|
||||
|
||||
if (r) {
|
||||
amdgpu_bo_unref(bo);
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -901,7 +927,6 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
|
||||
bool immediate)
|
||||
{
|
||||
struct amdgpu_vm_pt *entry = cursor->entry;
|
||||
struct amdgpu_bo_param bp;
|
||||
struct amdgpu_bo *pt;
|
||||
int r;
|
||||
|
||||
@ -919,9 +944,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
|
||||
if (entry->base.bo)
|
||||
return 0;
|
||||
|
||||
amdgpu_vm_bo_param(adev, vm, cursor->level, immediate, &bp);
|
||||
|
||||
r = amdgpu_bo_create(adev, &bp, &pt);
|
||||
r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
@ -1593,15 +1616,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
|
||||
* Returns:
|
||||
* 0 for success, -EINVAL for failure.
|
||||
*/
|
||||
static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
|
||||
struct amdgpu_device *bo_adev,
|
||||
struct amdgpu_vm *vm, bool immediate,
|
||||
bool unlocked, struct dma_resv *resv,
|
||||
uint64_t start, uint64_t last,
|
||||
uint64_t flags, uint64_t offset,
|
||||
struct drm_mm_node *nodes,
|
||||
dma_addr_t *pages_addr,
|
||||
struct dma_fence **fence)
|
||||
int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
|
||||
struct amdgpu_device *bo_adev,
|
||||
struct amdgpu_vm *vm, bool immediate,
|
||||
bool unlocked, struct dma_resv *resv,
|
||||
uint64_t start, uint64_t last,
|
||||
uint64_t flags, uint64_t offset,
|
||||
struct drm_mm_node *nodes,
|
||||
dma_addr_t *pages_addr,
|
||||
struct dma_fence **fence)
|
||||
{
|
||||
struct amdgpu_vm_update_params params;
|
||||
enum amdgpu_sync_mode sync_mode;
|
||||
@ -2818,7 +2841,6 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @vm: requested vm
|
||||
* @vm_context: Indicates if it GFX or Compute context
|
||||
* @pasid: Process address space identifier
|
||||
*
|
||||
* Init @vm fields.
|
||||
@ -2826,10 +2848,8 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
|
||||
* Returns:
|
||||
* 0 for success, error for failure.
|
||||
*/
|
||||
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
int vm_context, u32 pasid)
|
||||
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid)
|
||||
{
|
||||
struct amdgpu_bo_param bp;
|
||||
struct amdgpu_bo *root;
|
||||
int r, i;
|
||||
|
||||
@ -2861,16 +2881,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
vm->pte_support_ats = false;
|
||||
vm->is_compute_context = false;
|
||||
|
||||
if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
|
||||
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
|
||||
AMDGPU_VM_USE_CPU_FOR_COMPUTE);
|
||||
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
|
||||
AMDGPU_VM_USE_CPU_FOR_GFX);
|
||||
|
||||
if (adev->asic_type == CHIP_RAVEN)
|
||||
vm->pte_support_ats = true;
|
||||
} else {
|
||||
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
|
||||
AMDGPU_VM_USE_CPU_FOR_GFX);
|
||||
}
|
||||
DRM_DEBUG_DRIVER("VM update mode is %s\n",
|
||||
vm->use_cpu_for_update ? "CPU" : "SDMA");
|
||||
WARN_ONCE((vm->use_cpu_for_update &&
|
||||
@ -2887,10 +2900,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
mutex_init(&vm->eviction_lock);
|
||||
vm->evicting = false;
|
||||
|
||||
amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp);
|
||||
if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
|
||||
bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW;
|
||||
r = amdgpu_bo_create(adev, &bp, &root);
|
||||
r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
|
||||
false, &root);
|
||||
if (r)
|
||||
goto error_free_delayed;
|
||||
|
||||
@ -3349,6 +3360,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
|
||||
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
|
||||
uint64_t addr)
|
||||
{
|
||||
bool is_compute_context = false;
|
||||
struct amdgpu_bo *root;
|
||||
uint64_t value, flags;
|
||||
struct amdgpu_vm *vm;
|
||||
@ -3356,15 +3368,25 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
|
||||
|
||||
spin_lock(&adev->vm_manager.pasid_lock);
|
||||
vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
|
||||
if (vm)
|
||||
if (vm) {
|
||||
root = amdgpu_bo_ref(vm->root.base.bo);
|
||||
else
|
||||
is_compute_context = vm->is_compute_context;
|
||||
} else {
|
||||
root = NULL;
|
||||
}
|
||||
spin_unlock(&adev->vm_manager.pasid_lock);
|
||||
|
||||
if (!root)
|
||||
return false;
|
||||
|
||||
addr /= AMDGPU_GPU_PAGE_SIZE;
|
||||
|
||||
if (is_compute_context &&
|
||||
!svm_range_restore_pages(adev, pasid, addr)) {
|
||||
amdgpu_bo_unref(&root);
|
||||
return true;
|
||||
}
|
||||
|
||||
r = amdgpu_bo_reserve(root, true);
|
||||
if (r)
|
||||
goto error_unref;
|
||||
@ -3378,18 +3400,16 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
|
||||
if (!vm)
|
||||
goto error_unlock;
|
||||
|
||||
addr /= AMDGPU_GPU_PAGE_SIZE;
|
||||
flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
|
||||
AMDGPU_PTE_SYSTEM;
|
||||
|
||||
if (vm->is_compute_context) {
|
||||
if (is_compute_context) {
|
||||
/* Intentionally setting invalid PTE flag
|
||||
* combination to force a no-retry-fault
|
||||
*/
|
||||
flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE |
|
||||
AMDGPU_PTE_TF;
|
||||
value = 0;
|
||||
|
||||
} else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
|
||||
/* Redirect the access to the dummy page */
|
||||
value = adev->dummy_page_addr;
|
||||
|
@ -121,9 +121,6 @@ struct amdgpu_bo_list_entry;
|
||||
/* max vmids dedicated for process */
|
||||
#define AMDGPU_VM_MAX_RESERVED_VMID 1
|
||||
|
||||
#define AMDGPU_VM_CONTEXT_GFX 0
|
||||
#define AMDGPU_VM_CONTEXT_COMPUTE 1
|
||||
|
||||
/* See vm_update_mode */
|
||||
#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
|
||||
#define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
|
||||
@ -367,6 +364,8 @@ struct amdgpu_vm_manager {
|
||||
spinlock_t pasid_lock;
|
||||
};
|
||||
|
||||
struct amdgpu_bo_va_mapping;
|
||||
|
||||
#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
|
||||
#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
|
||||
#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
|
||||
@ -378,8 +377,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev);
|
||||
void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
|
||||
|
||||
long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
|
||||
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
int vm_context, u32 pasid);
|
||||
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid);
|
||||
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid);
|
||||
void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
|
||||
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
|
||||
@ -398,6 +396,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
|
||||
struct dma_fence **fence);
|
||||
int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
|
||||
struct amdgpu_vm *vm);
|
||||
int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
|
||||
struct amdgpu_device *bo_adev,
|
||||
struct amdgpu_vm *vm, bool immediate,
|
||||
bool unlocked, struct dma_resv *resv,
|
||||
uint64_t start, uint64_t last,
|
||||
uint64_t flags, uint64_t offset,
|
||||
struct drm_mm_node *nodes,
|
||||
dma_addr_t *pages_addr,
|
||||
struct dma_fence **fence);
|
||||
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
|
||||
struct amdgpu_bo_va *bo_va,
|
||||
bool clear);
|
||||
|
@ -29,12 +29,14 @@
|
||||
#include "amdgpu_atomfirmware.h"
|
||||
#include "atom.h"
|
||||
|
||||
static inline struct amdgpu_vram_mgr *to_vram_mgr(struct ttm_resource_manager *man)
|
||||
static inline struct amdgpu_vram_mgr *
|
||||
to_vram_mgr(struct ttm_resource_manager *man)
|
||||
{
|
||||
return container_of(man, struct amdgpu_vram_mgr, manager);
|
||||
}
|
||||
|
||||
static inline struct amdgpu_device *to_amdgpu_device(struct amdgpu_vram_mgr *mgr)
|
||||
static inline struct amdgpu_device *
|
||||
to_amdgpu_device(struct amdgpu_vram_mgr *mgr)
|
||||
{
|
||||
return container_of(mgr, struct amdgpu_device, mman.vram_mgr);
|
||||
}
|
||||
@ -82,12 +84,14 @@ static ssize_t amdgpu_mem_info_vis_vram_total_show(struct device *dev,
|
||||
* amount of currently used VRAM in bytes
|
||||
*/
|
||||
static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = drm_to_adev(ddev);
|
||||
struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
|
||||
struct ttm_resource_manager *man;
|
||||
|
||||
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
|
||||
return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_usage(man));
|
||||
}
|
||||
|
||||
@ -100,18 +104,28 @@ static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev,
|
||||
* amount of currently used visible VRAM in bytes
|
||||
*/
|
||||
static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = drm_to_adev(ddev);
|
||||
struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
|
||||
struct ttm_resource_manager *man;
|
||||
|
||||
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
|
||||
return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_vis_usage(man));
|
||||
}
|
||||
|
||||
/**
|
||||
* DOC: mem_info_vram_vendor
|
||||
*
|
||||
* The amdgpu driver provides a sysfs API for reporting the vendor of the
|
||||
* installed VRAM
|
||||
* The file mem_info_vram_vendor is used for this and returns the name of the
|
||||
* vendor.
|
||||
*/
|
||||
static ssize_t amdgpu_mem_info_vram_vendor(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = drm_to_adev(ddev);
|
||||
@ -162,78 +176,6 @@ static const struct attribute *amdgpu_vram_mgr_attributes[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
static const struct ttm_resource_manager_func amdgpu_vram_mgr_func;
|
||||
|
||||
/**
|
||||
* amdgpu_vram_mgr_init - init VRAM manager and DRM MM
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Allocate and initialize the VRAM manager.
|
||||
*/
|
||||
int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
|
||||
struct ttm_resource_manager *man = &mgr->manager;
|
||||
int ret;
|
||||
|
||||
ttm_resource_manager_init(man, adev->gmc.real_vram_size >> PAGE_SHIFT);
|
||||
|
||||
man->func = &amdgpu_vram_mgr_func;
|
||||
|
||||
drm_mm_init(&mgr->mm, 0, man->size);
|
||||
spin_lock_init(&mgr->lock);
|
||||
INIT_LIST_HEAD(&mgr->reservations_pending);
|
||||
INIT_LIST_HEAD(&mgr->reserved_pages);
|
||||
|
||||
/* Add the two VRAM-related sysfs files */
|
||||
ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
|
||||
if (ret)
|
||||
DRM_ERROR("Failed to register sysfs\n");
|
||||
|
||||
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
|
||||
ttm_resource_manager_set_used(man, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_vram_mgr_fini - free and destroy VRAM manager
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Destroy and free the VRAM manager, returns -EBUSY if ranges are still
|
||||
* allocated inside it.
|
||||
*/
|
||||
void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
|
||||
struct ttm_resource_manager *man = &mgr->manager;
|
||||
int ret;
|
||||
struct amdgpu_vram_reservation *rsv, *temp;
|
||||
|
||||
ttm_resource_manager_set_used(man, false);
|
||||
|
||||
ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
spin_lock(&mgr->lock);
|
||||
list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
|
||||
kfree(rsv);
|
||||
|
||||
list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
|
||||
drm_mm_remove_node(&rsv->mm_node);
|
||||
kfree(rsv);
|
||||
}
|
||||
drm_mm_takedown(&mgr->mm);
|
||||
spin_unlock(&mgr->lock);
|
||||
|
||||
sysfs_remove_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
|
||||
|
||||
ttm_resource_manager_cleanup(man);
|
||||
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_vram_mgr_vis_size - Calculate visible node size
|
||||
*
|
||||
@ -283,6 +225,7 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
|
||||
return usage;
|
||||
}
|
||||
|
||||
/* Commit the reservation of VRAM pages */
|
||||
static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
|
||||
{
|
||||
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
|
||||
@ -415,13 +358,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
|
||||
const struct ttm_place *place,
|
||||
struct ttm_resource *mem)
|
||||
{
|
||||
unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
|
||||
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
|
||||
struct amdgpu_device *adev = to_amdgpu_device(mgr);
|
||||
struct drm_mm *mm = &mgr->mm;
|
||||
struct drm_mm_node *nodes;
|
||||
enum drm_mm_insert_mode mode;
|
||||
unsigned long lpfn, num_nodes, pages_per_node, pages_left;
|
||||
uint64_t vis_usage = 0, mem_bytes, max_bytes;
|
||||
struct drm_mm *mm = &mgr->mm;
|
||||
enum drm_mm_insert_mode mode;
|
||||
struct drm_mm_node *nodes;
|
||||
unsigned i;
|
||||
int r;
|
||||
|
||||
@ -448,10 +391,10 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
|
||||
pages_per_node = HPAGE_PMD_NR;
|
||||
#else
|
||||
/* default to 2MB */
|
||||
pages_per_node = (2UL << (20UL - PAGE_SHIFT));
|
||||
pages_per_node = 2UL << (20UL - PAGE_SHIFT);
|
||||
#endif
|
||||
pages_per_node = max((uint32_t)pages_per_node,
|
||||
tbo->page_alignment);
|
||||
pages_per_node = max_t(uint32_t, pages_per_node,
|
||||
tbo->page_alignment);
|
||||
num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
|
||||
}
|
||||
|
||||
@ -469,42 +412,37 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
|
||||
mem->start = 0;
|
||||
pages_left = mem->num_pages;
|
||||
|
||||
/* Limit maximum size to 2GB due to SG table limitations */
|
||||
pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
|
||||
|
||||
i = 0;
|
||||
spin_lock(&mgr->lock);
|
||||
for (i = 0; pages_left >= pages_per_node; ++i) {
|
||||
unsigned long pages = rounddown_pow_of_two(pages_left);
|
||||
|
||||
/* Limit maximum size to 2GB due to SG table limitations */
|
||||
pages = min(pages, (2UL << (30 - PAGE_SHIFT)));
|
||||
|
||||
r = drm_mm_insert_node_in_range(mm, &nodes[i], pages,
|
||||
pages_per_node, 0,
|
||||
place->fpfn, lpfn,
|
||||
mode);
|
||||
if (unlikely(r))
|
||||
break;
|
||||
|
||||
vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
|
||||
amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
|
||||
pages_left -= pages;
|
||||
}
|
||||
|
||||
for (; pages_left; ++i) {
|
||||
unsigned long pages = min(pages_left, pages_per_node);
|
||||
while (pages_left) {
|
||||
uint32_t alignment = tbo->page_alignment;
|
||||
|
||||
if (pages == pages_per_node)
|
||||
if (pages >= pages_per_node)
|
||||
alignment = pages_per_node;
|
||||
|
||||
r = drm_mm_insert_node_in_range(mm, &nodes[i],
|
||||
pages, alignment, 0,
|
||||
place->fpfn, lpfn,
|
||||
mode);
|
||||
if (unlikely(r))
|
||||
r = drm_mm_insert_node_in_range(mm, &nodes[i], pages, alignment,
|
||||
0, place->fpfn, lpfn, mode);
|
||||
if (unlikely(r)) {
|
||||
if (pages > pages_per_node) {
|
||||
if (is_power_of_2(pages))
|
||||
pages = pages / 2;
|
||||
else
|
||||
pages = rounddown_pow_of_two(pages);
|
||||
continue;
|
||||
}
|
||||
goto error;
|
||||
}
|
||||
|
||||
vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
|
||||
amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
|
||||
pages_left -= pages;
|
||||
++i;
|
||||
|
||||
if (pages > pages_left)
|
||||
pages = pages_left;
|
||||
}
|
||||
spin_unlock(&mgr->lock);
|
||||
|
||||
@ -728,3 +666,73 @@ static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
|
||||
.free = amdgpu_vram_mgr_del,
|
||||
.debug = amdgpu_vram_mgr_debug
|
||||
};
|
||||
|
||||
/**
|
||||
* amdgpu_vram_mgr_init - init VRAM manager and DRM MM
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Allocate and initialize the VRAM manager.
|
||||
*/
|
||||
int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
|
||||
struct ttm_resource_manager *man = &mgr->manager;
|
||||
int ret;
|
||||
|
||||
ttm_resource_manager_init(man, adev->gmc.real_vram_size >> PAGE_SHIFT);
|
||||
|
||||
man->func = &amdgpu_vram_mgr_func;
|
||||
|
||||
drm_mm_init(&mgr->mm, 0, man->size);
|
||||
spin_lock_init(&mgr->lock);
|
||||
INIT_LIST_HEAD(&mgr->reservations_pending);
|
||||
INIT_LIST_HEAD(&mgr->reserved_pages);
|
||||
|
||||
/* Add the two VRAM-related sysfs files */
|
||||
ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
|
||||
if (ret)
|
||||
DRM_ERROR("Failed to register sysfs\n");
|
||||
|
||||
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
|
||||
ttm_resource_manager_set_used(man, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_vram_mgr_fini - free and destroy VRAM manager
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Destroy and free the VRAM manager, returns -EBUSY if ranges are still
|
||||
* allocated inside it.
|
||||
*/
|
||||
void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
|
||||
struct ttm_resource_manager *man = &mgr->manager;
|
||||
int ret;
|
||||
struct amdgpu_vram_reservation *rsv, *temp;
|
||||
|
||||
ttm_resource_manager_set_used(man, false);
|
||||
|
||||
ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
spin_lock(&mgr->lock);
|
||||
list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
|
||||
kfree(rsv);
|
||||
|
||||
list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
|
||||
drm_mm_remove_node(&rsv->mm_node);
|
||||
kfree(rsv);
|
||||
}
|
||||
drm_mm_takedown(&mgr->mm);
|
||||
spin_unlock(&mgr->lock);
|
||||
|
||||
sysfs_remove_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
|
||||
|
||||
ttm_resource_manager_cleanup(man);
|
||||
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
|
||||
}
|
||||
|
@ -98,9 +98,9 @@ union amd_sriov_msg_feature_flags {
|
||||
|
||||
union amd_sriov_reg_access_flags {
|
||||
struct {
|
||||
uint32_t vf_reg_access_ih : 1;
|
||||
uint32_t vf_reg_access_mmhub : 1;
|
||||
uint32_t vf_reg_access_gc : 1;
|
||||
uint32_t vf_reg_psp_access_ih : 1;
|
||||
uint32_t vf_reg_rlc_access_mmhub : 1;
|
||||
uint32_t vf_reg_rlc_access_gc : 1;
|
||||
uint32_t reserved : 29;
|
||||
} flags;
|
||||
uint32_t all;
|
||||
|
@ -421,6 +421,11 @@ static int dce_virtual_sw_init(void *handle)
|
||||
static int dce_virtual_sw_fini(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
int i = 0;
|
||||
|
||||
for (i = 0; i < adev->mode_info.num_crtc; i++)
|
||||
if (adev->mode_info.crtcs[i])
|
||||
hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer);
|
||||
|
||||
kfree(adev->mode_info.bios_hardcoded_edid);
|
||||
|
||||
@ -480,13 +485,6 @@ static int dce_virtual_hw_init(void *handle)
|
||||
|
||||
static int dce_virtual_hw_fini(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
int i = 0;
|
||||
|
||||
for (i = 0; i<adev->mode_info.num_crtc; i++)
|
||||
if (adev->mode_info.crtcs[i])
|
||||
hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -219,11 +219,11 @@ static void df_v3_6_query_hashes(struct amdgpu_device *adev)
|
||||
adev->df.hash_status.hash_2m = false;
|
||||
adev->df.hash_status.hash_1g = false;
|
||||
|
||||
if (adev->asic_type != CHIP_ARCTURUS)
|
||||
return;
|
||||
|
||||
/* encoding for hash-enabled on Arcturus */
|
||||
if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) {
|
||||
/* encoding for hash-enabled on Arcturus and Aldebaran */
|
||||
if ((adev->asic_type == CHIP_ARCTURUS &&
|
||||
adev->df.funcs->get_fb_channel_number(adev) == 0xe) ||
|
||||
(adev->asic_type == CHIP_ALDEBARAN &&
|
||||
adev->df.funcs->get_fb_channel_number(adev) == 0x1e)) {
|
||||
tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl);
|
||||
adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp,
|
||||
DF_CS_UMC_AON0_DfGlobalCtrl,
|
||||
@ -278,7 +278,12 @@ static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev)
|
||||
u32 tmp;
|
||||
|
||||
tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
|
||||
tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
|
||||
if (adev->asic_type == CHIP_ALDEBARAN)
|
||||
tmp &=
|
||||
ALDEBARAN_DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
|
||||
else
|
||||
tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
|
||||
|
||||
tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
|
||||
|
||||
return tmp;
|
||||
|
@ -3937,7 +3937,8 @@ static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 tmp;
|
||||
|
||||
if (adev->asic_type != CHIP_ARCTURUS)
|
||||
if (adev->asic_type != CHIP_ARCTURUS &&
|
||||
adev->asic_type != CHIP_ALDEBARAN)
|
||||
return;
|
||||
|
||||
tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
|
||||
@ -4559,8 +4560,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
|
||||
if (!ring->sched.ready)
|
||||
return 0;
|
||||
|
||||
if (adev->asic_type == CHIP_ARCTURUS ||
|
||||
adev->asic_type == CHIP_ALDEBARAN) {
|
||||
if (adev->asic_type == CHIP_ARCTURUS) {
|
||||
vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
|
||||
vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
|
||||
vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
|
||||
@ -4745,7 +4745,11 @@ static int gfx_v9_0_ecc_late_init(void *handle)
|
||||
}
|
||||
|
||||
/* requires IBs so do in late init after IB pool is initialized */
|
||||
r = gfx_v9_0_do_edc_gpr_workarounds(adev);
|
||||
if (adev->asic_type == CHIP_ALDEBARAN)
|
||||
r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
|
||||
else
|
||||
r = gfx_v9_0_do_edc_gpr_workarounds(adev);
|
||||
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
|
@ -22,6 +22,7 @@
|
||||
*/
|
||||
#include "amdgpu.h"
|
||||
#include "soc15.h"
|
||||
#include "soc15d.h"
|
||||
|
||||
#include "gc/gc_9_4_2_offset.h"
|
||||
#include "gc/gc_9_4_2_sh_mask.h"
|
||||
@ -31,6 +32,11 @@
|
||||
#include "amdgpu_ras.h"
|
||||
#include "amdgpu_gfx.h"
|
||||
|
||||
#define SE_ID_MAX 8
|
||||
#define CU_ID_MAX 16
|
||||
#define SIMD_ID_MAX 4
|
||||
#define WAVE_ID_MAX 10
|
||||
|
||||
enum gfx_v9_4_2_utc_type {
|
||||
VML2_MEM,
|
||||
VML2_WALKER_MEM,
|
||||
@ -79,6 +85,634 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_2_alde[] = {
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, regTCI_CNTL_3, 0xff, 0x20),
|
||||
};
|
||||
|
||||
/**
|
||||
* This shader is used to clear VGPRS and LDS, and also write the input
|
||||
* pattern into the write back buffer, which will be used by driver to
|
||||
* check whether all SIMDs have been covered.
|
||||
*/
|
||||
static const u32 vgpr_init_compute_shader_aldebaran[] = {
|
||||
0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
|
||||
0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
|
||||
0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xd3d94000,
|
||||
0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 0xd3d94003,
|
||||
0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 0xd3d94006,
|
||||
0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 0xd3d94009,
|
||||
0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 0xd3d9400c,
|
||||
0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 0xd3d9400f,
|
||||
0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 0xd3d94012,
|
||||
0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 0xd3d94015,
|
||||
0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 0xd3d94018,
|
||||
0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 0xd3d9401b,
|
||||
0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 0xd3d9401e,
|
||||
0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 0xd3d94021,
|
||||
0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 0xd3d94024,
|
||||
0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 0xd3d94027,
|
||||
0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 0xd3d9402a,
|
||||
0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 0xd3d9402d,
|
||||
0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 0xd3d94030,
|
||||
0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 0xd3d94033,
|
||||
0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 0xd3d94036,
|
||||
0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 0xd3d94039,
|
||||
0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 0xd3d9403c,
|
||||
0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 0xd3d9403f,
|
||||
0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 0xd3d94042,
|
||||
0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 0xd3d94045,
|
||||
0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 0xd3d94048,
|
||||
0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 0xd3d9404b,
|
||||
0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 0xd3d9404e,
|
||||
0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 0xd3d94051,
|
||||
0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 0xd3d94054,
|
||||
0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 0xd3d94057,
|
||||
0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 0xd3d9405a,
|
||||
0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 0xd3d9405d,
|
||||
0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 0xd3d94060,
|
||||
0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 0xd3d94063,
|
||||
0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 0xd3d94066,
|
||||
0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 0xd3d94069,
|
||||
0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 0xd3d9406c,
|
||||
0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 0xd3d9406f,
|
||||
0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 0xd3d94072,
|
||||
0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 0xd3d94075,
|
||||
0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 0xd3d94078,
|
||||
0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 0xd3d9407b,
|
||||
0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 0xd3d9407e,
|
||||
0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 0xd3d94081,
|
||||
0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 0xd3d94084,
|
||||
0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 0xd3d94087,
|
||||
0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 0xd3d9408a,
|
||||
0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 0xd3d9408d,
|
||||
0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 0xd3d94090,
|
||||
0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 0xd3d94093,
|
||||
0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 0xd3d94096,
|
||||
0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 0xd3d94099,
|
||||
0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 0xd3d9409c,
|
||||
0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 0xd3d9409f,
|
||||
0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 0xd3d940a2,
|
||||
0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 0xd3d940a5,
|
||||
0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 0xd3d940a8,
|
||||
0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 0xd3d940ab,
|
||||
0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 0xd3d940ae,
|
||||
0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 0xd3d940b1,
|
||||
0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 0xd3d940b4,
|
||||
0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 0xd3d940b7,
|
||||
0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 0xd3d940ba,
|
||||
0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 0xd3d940bd,
|
||||
0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 0xd3d940c0,
|
||||
0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 0xd3d940c3,
|
||||
0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 0xd3d940c6,
|
||||
0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 0xd3d940c9,
|
||||
0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 0xd3d940cc,
|
||||
0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 0xd3d940cf,
|
||||
0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 0xd3d940d2,
|
||||
0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 0xd3d940d5,
|
||||
0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 0xd3d940d8,
|
||||
0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 0xd3d940db,
|
||||
0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 0xd3d940de,
|
||||
0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 0xd3d940e1,
|
||||
0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 0xd3d940e4,
|
||||
0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 0xd3d940e7,
|
||||
0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 0xd3d940ea,
|
||||
0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 0xd3d940ed,
|
||||
0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 0xd3d940f0,
|
||||
0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 0xd3d940f3,
|
||||
0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 0xd3d940f6,
|
||||
0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 0xd3d940f9,
|
||||
0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 0xd3d940fc,
|
||||
0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 0xd3d940ff,
|
||||
0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 0x7e000280,
|
||||
0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 0x7e0c0280,
|
||||
0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 0xd28c0001,
|
||||
0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xbe8b0004, 0xb78b4000,
|
||||
0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 0x00020201,
|
||||
0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 0xbf84fff8,
|
||||
0xbf810000,
|
||||
};
|
||||
|
||||
const struct soc15_reg_entry vgpr_init_regs_aldebaran[] = {
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 4 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0xbf },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x400006 }, /* 64KB LDS */
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x3F }, /* 63 - accum-offset = 256 */
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
|
||||
};
|
||||
|
||||
/**
|
||||
* The below shaders are used to clear SGPRS, and also write the input
|
||||
* pattern into the write back buffer. The first two dispatch should be
|
||||
* scheduled simultaneously which make sure that all SGPRS could be
|
||||
* allocated, so the dispatch 1 need check write back buffer before scheduled,
|
||||
* make sure that waves of dispatch 0 are all dispacthed to all simds
|
||||
* balanced. both dispatch 0 and dispatch 1 should be halted until all waves
|
||||
* are dispatched, and then driver write a pattern to the shared memory to make
|
||||
* all waves continue.
|
||||
*/
|
||||
static const u32 sgpr112_init_compute_shader_aldebaran[] = {
|
||||
0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
|
||||
0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
|
||||
0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200,
|
||||
0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102,
|
||||
0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080,
|
||||
0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080,
|
||||
0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080,
|
||||
0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080,
|
||||
0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080,
|
||||
0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080,
|
||||
0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080,
|
||||
0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080,
|
||||
0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080,
|
||||
0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080,
|
||||
0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbeba0080, 0xbebb0080,
|
||||
0xbebc0080, 0xbebd0080, 0xbebe0080, 0xbebf0080, 0xbec00080, 0xbec10080,
|
||||
0xbec20080, 0xbec30080, 0xbec40080, 0xbec50080, 0xbec60080, 0xbec70080,
|
||||
0xbec80080, 0xbec90080, 0xbeca0080, 0xbecb0080, 0xbecc0080, 0xbecd0080,
|
||||
0xbece0080, 0xbecf0080, 0xbed00080, 0xbed10080, 0xbed20080, 0xbed30080,
|
||||
0xbed40080, 0xbed50080, 0xbed60080, 0xbed70080, 0xbed80080, 0xbed90080,
|
||||
0xbeda0080, 0xbedb0080, 0xbedc0080, 0xbedd0080, 0xbede0080, 0xbedf0080,
|
||||
0xbee00080, 0xbee10080, 0xbee20080, 0xbee30080, 0xbee40080, 0xbee50080,
|
||||
0xbf810000
|
||||
};
|
||||
|
||||
const struct soc15_reg_entry sgpr112_init_regs_aldebaran[] = {
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 8 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x340 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
|
||||
};
|
||||
|
||||
static const u32 sgpr96_init_compute_shader_aldebaran[] = {
|
||||
0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
|
||||
0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
|
||||
0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200,
|
||||
0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102,
|
||||
0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080,
|
||||
0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080,
|
||||
0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080,
|
||||
0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080,
|
||||
0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080,
|
||||
0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080,
|
||||
0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080,
|
||||
0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080,
|
||||
0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080,
|
||||
0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080,
|
||||
0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbeba0080, 0xbebb0080,
|
||||
0xbebc0080, 0xbebd0080, 0xbebe0080, 0xbebf0080, 0xbec00080, 0xbec10080,
|
||||
0xbec20080, 0xbec30080, 0xbec40080, 0xbec50080, 0xbec60080, 0xbec70080,
|
||||
0xbec80080, 0xbec90080, 0xbeca0080, 0xbecb0080, 0xbecc0080, 0xbecd0080,
|
||||
0xbece0080, 0xbecf0080, 0xbed00080, 0xbed10080, 0xbed20080, 0xbed30080,
|
||||
0xbed40080, 0xbed50080, 0xbed60080, 0xbed70080, 0xbed80080, 0xbed90080,
|
||||
0xbf810000,
|
||||
};
|
||||
|
||||
const struct soc15_reg_entry sgpr96_init_regs_aldebaran[] = {
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 0xc },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x2c0 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
|
||||
};
|
||||
|
||||
/**
|
||||
* This shader is used to clear the uninitiated sgprs after the above
|
||||
* two dispatches, because of hardware feature, dispath 0 couldn't clear
|
||||
* top hole sgprs. Therefore need 4 waves per SIMD to cover these sgprs
|
||||
*/
|
||||
static const u32 sgpr64_init_compute_shader_aldebaran[] = {
|
||||
0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
|
||||
0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
|
||||
0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200,
|
||||
0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102,
|
||||
0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080,
|
||||
0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080,
|
||||
0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080,
|
||||
0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080,
|
||||
0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080,
|
||||
0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080,
|
||||
0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080,
|
||||
0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080,
|
||||
0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080,
|
||||
0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080,
|
||||
0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbf810000,
|
||||
};
|
||||
|
||||
const struct soc15_reg_entry sgpr64_init_regs_aldebaran[] = {
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 0x10 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x1c0 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
|
||||
};
|
||||
|
||||
static int gfx_v9_4_2_run_shader(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring,
|
||||
struct amdgpu_ib *ib,
|
||||
const u32 *shader_ptr, u32 shader_size,
|
||||
const struct soc15_reg_entry *init_regs, u32 regs_size,
|
||||
u32 compute_dim_x, u64 wb_gpu_addr, u32 pattern,
|
||||
struct dma_fence **fence_ptr)
|
||||
{
|
||||
int r, i;
|
||||
uint32_t total_size, shader_offset;
|
||||
u64 gpu_addr;
|
||||
|
||||
total_size = (regs_size * 3 + 4 + 5 + 5) * 4;
|
||||
total_size = ALIGN(total_size, 256);
|
||||
shader_offset = total_size;
|
||||
total_size += ALIGN(shader_size, 256);
|
||||
|
||||
/* allocate an indirect buffer to put the commands in */
|
||||
memset(ib, 0, sizeof(*ib));
|
||||
r = amdgpu_ib_get(adev, NULL, total_size,
|
||||
AMDGPU_IB_POOL_DIRECT, ib);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "failed to get ib (%d).\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
/* load the compute shaders */
|
||||
for (i = 0; i < shader_size/sizeof(u32); i++)
|
||||
ib->ptr[i + (shader_offset / 4)] = shader_ptr[i];
|
||||
|
||||
/* init the ib length to 0 */
|
||||
ib->length_dw = 0;
|
||||
|
||||
/* write the register state for the compute dispatch */
|
||||
for (i = 0; i < regs_size; i++) {
|
||||
ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
|
||||
ib->ptr[ib->length_dw++] = SOC15_REG_ENTRY_OFFSET(init_regs[i])
|
||||
- PACKET3_SET_SH_REG_START;
|
||||
ib->ptr[ib->length_dw++] = init_regs[i].reg_value;
|
||||
}
|
||||
|
||||
/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
|
||||
gpu_addr = (ib->gpu_addr + (u64)shader_offset) >> 8;
|
||||
ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
|
||||
ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_PGM_LO)
|
||||
- PACKET3_SET_SH_REG_START;
|
||||
ib->ptr[ib->length_dw++] = lower_32_bits(gpu_addr);
|
||||
ib->ptr[ib->length_dw++] = upper_32_bits(gpu_addr);
|
||||
|
||||
/* write the wb buffer address */
|
||||
ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 3);
|
||||
ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_USER_DATA_0)
|
||||
- PACKET3_SET_SH_REG_START;
|
||||
ib->ptr[ib->length_dw++] = lower_32_bits(wb_gpu_addr);
|
||||
ib->ptr[ib->length_dw++] = upper_32_bits(wb_gpu_addr);
|
||||
ib->ptr[ib->length_dw++] = pattern;
|
||||
|
||||
/* write dispatch packet */
|
||||
ib->ptr[ib->length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
|
||||
ib->ptr[ib->length_dw++] = compute_dim_x; /* x */
|
||||
ib->ptr[ib->length_dw++] = 1; /* y */
|
||||
ib->ptr[ib->length_dw++] = 1; /* z */
|
||||
ib->ptr[ib->length_dw++] =
|
||||
REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
|
||||
|
||||
/* shedule the ib on the ring */
|
||||
r = amdgpu_ib_schedule(ring, 1, ib, NULL, fence_ptr);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "ib submit failed (%d).\n", r);
|
||||
amdgpu_ib_free(adev, ib, NULL);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static void gfx_v9_4_2_log_wave_assignment(struct amdgpu_device *adev, uint32_t *wb_ptr)
|
||||
{
|
||||
uint32_t se, cu, simd, wave;
|
||||
uint32_t offset = 0;
|
||||
char *str;
|
||||
int size;
|
||||
|
||||
str = kmalloc(256, GFP_KERNEL);
|
||||
if (!str)
|
||||
return;
|
||||
|
||||
dev_dbg(adev->dev, "wave assignment:\n");
|
||||
|
||||
for (se = 0; se < adev->gfx.config.max_shader_engines; se++) {
|
||||
for (cu = 0; cu < CU_ID_MAX; cu++) {
|
||||
memset(str, 0, 256);
|
||||
size = sprintf(str, "SE[%02d]CU[%02d]: ", se, cu);
|
||||
for (simd = 0; simd < SIMD_ID_MAX; simd++) {
|
||||
size += sprintf(str + size, "[");
|
||||
for (wave = 0; wave < WAVE_ID_MAX; wave++) {
|
||||
size += sprintf(str + size, "%x", wb_ptr[offset]);
|
||||
offset++;
|
||||
}
|
||||
size += sprintf(str + size, "] ");
|
||||
}
|
||||
dev_dbg(adev->dev, "%s\n", str);
|
||||
}
|
||||
}
|
||||
|
||||
kfree(str);
|
||||
}
|
||||
|
||||
static int gfx_v9_4_2_wait_for_waves_assigned(struct amdgpu_device *adev,
|
||||
uint32_t *wb_ptr, uint32_t mask,
|
||||
uint32_t pattern, uint32_t num_wave, bool wait)
|
||||
{
|
||||
uint32_t se, cu, simd, wave;
|
||||
uint32_t loop = 0;
|
||||
uint32_t wave_cnt;
|
||||
uint32_t offset;
|
||||
|
||||
do {
|
||||
wave_cnt = 0;
|
||||
offset = 0;
|
||||
|
||||
for (se = 0; se < adev->gfx.config.max_shader_engines; se++)
|
||||
for (cu = 0; cu < CU_ID_MAX; cu++)
|
||||
for (simd = 0; simd < SIMD_ID_MAX; simd++)
|
||||
for (wave = 0; wave < WAVE_ID_MAX; wave++) {
|
||||
if (((1 << wave) & mask) &&
|
||||
(wb_ptr[offset] == pattern))
|
||||
wave_cnt++;
|
||||
|
||||
offset++;
|
||||
}
|
||||
|
||||
if (wave_cnt == num_wave)
|
||||
return 0;
|
||||
|
||||
mdelay(1);
|
||||
} while (++loop < 2000 && wait);
|
||||
|
||||
dev_err(adev->dev, "actual wave num: %d, expected wave num: %d\n",
|
||||
wave_cnt, num_wave);
|
||||
|
||||
gfx_v9_4_2_log_wave_assignment(adev, wb_ptr);
|
||||
|
||||
return -EBADSLT;
|
||||
}
|
||||
|
||||
static int gfx_v9_4_2_do_sgprs_init(struct amdgpu_device *adev)
|
||||
{
|
||||
int r;
|
||||
int wb_size = adev->gfx.config.max_shader_engines *
|
||||
CU_ID_MAX * SIMD_ID_MAX * WAVE_ID_MAX;
|
||||
struct amdgpu_ib wb_ib;
|
||||
struct amdgpu_ib disp_ibs[3];
|
||||
struct dma_fence *fences[3];
|
||||
u32 pattern[3] = { 0x1, 0x5, 0xa };
|
||||
|
||||
/* bail if the compute ring is not ready */
|
||||
if (!adev->gfx.compute_ring[0].sched.ready ||
|
||||
!adev->gfx.compute_ring[1].sched.ready)
|
||||
return 0;
|
||||
|
||||
/* allocate the write-back buffer from IB */
|
||||
memset(&wb_ib, 0, sizeof(wb_ib));
|
||||
r = amdgpu_ib_get(adev, NULL, (1 + wb_size) * sizeof(uint32_t),
|
||||
AMDGPU_IB_POOL_DIRECT, &wb_ib);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "failed to get ib (%d) for wb\n", r);
|
||||
return r;
|
||||
}
|
||||
memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t));
|
||||
|
||||
r = gfx_v9_4_2_run_shader(adev,
|
||||
&adev->gfx.compute_ring[0],
|
||||
&disp_ibs[0],
|
||||
sgpr112_init_compute_shader_aldebaran,
|
||||
sizeof(sgpr112_init_compute_shader_aldebaran),
|
||||
sgpr112_init_regs_aldebaran,
|
||||
ARRAY_SIZE(sgpr112_init_regs_aldebaran),
|
||||
adev->gfx.cu_info.number,
|
||||
wb_ib.gpu_addr, pattern[0], &fences[0]);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "failed to clear first 224 sgprs\n");
|
||||
goto pro_end;
|
||||
}
|
||||
|
||||
r = gfx_v9_4_2_wait_for_waves_assigned(adev,
|
||||
&wb_ib.ptr[1], 0b11,
|
||||
pattern[0],
|
||||
adev->gfx.cu_info.number * SIMD_ID_MAX * 2,
|
||||
true);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "wave coverage failed when clear first 224 sgprs\n");
|
||||
wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
|
||||
goto disp0_failed;
|
||||
}
|
||||
|
||||
r = gfx_v9_4_2_run_shader(adev,
|
||||
&adev->gfx.compute_ring[1],
|
||||
&disp_ibs[1],
|
||||
sgpr96_init_compute_shader_aldebaran,
|
||||
sizeof(sgpr96_init_compute_shader_aldebaran),
|
||||
sgpr96_init_regs_aldebaran,
|
||||
ARRAY_SIZE(sgpr96_init_regs_aldebaran),
|
||||
adev->gfx.cu_info.number * 2,
|
||||
wb_ib.gpu_addr, pattern[1], &fences[1]);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "failed to clear next 576 sgprs\n");
|
||||
goto disp0_failed;
|
||||
}
|
||||
|
||||
r = gfx_v9_4_2_wait_for_waves_assigned(adev,
|
||||
&wb_ib.ptr[1], 0b11111100,
|
||||
pattern[1], adev->gfx.cu_info.number * SIMD_ID_MAX * 6,
|
||||
true);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "wave coverage failed when clear first 576 sgprs\n");
|
||||
wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
|
||||
goto disp1_failed;
|
||||
}
|
||||
|
||||
wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
|
||||
|
||||
/* wait for the GPU to finish processing the IB */
|
||||
r = dma_fence_wait(fences[0], false);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "timeout to clear first 224 sgprs\n");
|
||||
goto disp1_failed;
|
||||
}
|
||||
|
||||
r = dma_fence_wait(fences[1], false);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "timeout to clear first 576 sgprs\n");
|
||||
goto disp1_failed;
|
||||
}
|
||||
|
||||
memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t));
|
||||
r = gfx_v9_4_2_run_shader(adev,
|
||||
&adev->gfx.compute_ring[0],
|
||||
&disp_ibs[2],
|
||||
sgpr64_init_compute_shader_aldebaran,
|
||||
sizeof(sgpr64_init_compute_shader_aldebaran),
|
||||
sgpr64_init_regs_aldebaran,
|
||||
ARRAY_SIZE(sgpr64_init_regs_aldebaran),
|
||||
adev->gfx.cu_info.number,
|
||||
wb_ib.gpu_addr, pattern[2], &fences[2]);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "failed to clear first 256 sgprs\n");
|
||||
goto disp1_failed;
|
||||
}
|
||||
|
||||
r = gfx_v9_4_2_wait_for_waves_assigned(adev,
|
||||
&wb_ib.ptr[1], 0b1111,
|
||||
pattern[2],
|
||||
adev->gfx.cu_info.number * SIMD_ID_MAX * 4,
|
||||
true);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "wave coverage failed when clear first 256 sgprs\n");
|
||||
wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
|
||||
goto disp2_failed;
|
||||
}
|
||||
|
||||
wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
|
||||
|
||||
r = dma_fence_wait(fences[2], false);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "timeout to clear first 256 sgprs\n");
|
||||
goto disp2_failed;
|
||||
}
|
||||
|
||||
disp2_failed:
|
||||
amdgpu_ib_free(adev, &disp_ibs[2], NULL);
|
||||
dma_fence_put(fences[2]);
|
||||
disp1_failed:
|
||||
amdgpu_ib_free(adev, &disp_ibs[1], NULL);
|
||||
dma_fence_put(fences[1]);
|
||||
disp0_failed:
|
||||
amdgpu_ib_free(adev, &disp_ibs[0], NULL);
|
||||
dma_fence_put(fences[0]);
|
||||
pro_end:
|
||||
amdgpu_ib_free(adev, &wb_ib, NULL);
|
||||
|
||||
if (r)
|
||||
dev_info(adev->dev, "Init SGPRS Failed\n");
|
||||
else
|
||||
dev_info(adev->dev, "Init SGPRS Successfully\n");
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int gfx_v9_4_2_do_vgprs_init(struct amdgpu_device *adev)
|
||||
{
|
||||
int r;
|
||||
/* CU_ID: 0~15, SIMD_ID: 0~3, WAVE_ID: 0 ~ 9 */
|
||||
int wb_size = adev->gfx.config.max_shader_engines *
|
||||
CU_ID_MAX * SIMD_ID_MAX * WAVE_ID_MAX;
|
||||
struct amdgpu_ib wb_ib;
|
||||
struct amdgpu_ib disp_ib;
|
||||
struct dma_fence *fence;
|
||||
u32 pattern = 0xa;
|
||||
|
||||
/* bail if the compute ring is not ready */
|
||||
if (!adev->gfx.compute_ring[0].sched.ready)
|
||||
return 0;
|
||||
|
||||
/* allocate the write-back buffer from IB */
|
||||
memset(&wb_ib, 0, sizeof(wb_ib));
|
||||
r = amdgpu_ib_get(adev, NULL, (1 + wb_size) * sizeof(uint32_t),
|
||||
AMDGPU_IB_POOL_DIRECT, &wb_ib);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "failed to get ib (%d) for wb.\n", r);
|
||||
return r;
|
||||
}
|
||||
memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t));
|
||||
|
||||
r = gfx_v9_4_2_run_shader(adev,
|
||||
&adev->gfx.compute_ring[0],
|
||||
&disp_ib,
|
||||
vgpr_init_compute_shader_aldebaran,
|
||||
sizeof(vgpr_init_compute_shader_aldebaran),
|
||||
vgpr_init_regs_aldebaran,
|
||||
ARRAY_SIZE(vgpr_init_regs_aldebaran),
|
||||
adev->gfx.cu_info.number,
|
||||
wb_ib.gpu_addr, pattern, &fence);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "failed to clear vgprs\n");
|
||||
goto pro_end;
|
||||
}
|
||||
|
||||
/* wait for the GPU to finish processing the IB */
|
||||
r = dma_fence_wait(fence, false);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "timeout to clear vgprs\n");
|
||||
goto disp_failed;
|
||||
}
|
||||
|
||||
r = gfx_v9_4_2_wait_for_waves_assigned(adev,
|
||||
&wb_ib.ptr[1], 0b1,
|
||||
pattern,
|
||||
adev->gfx.cu_info.number * SIMD_ID_MAX,
|
||||
false);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "failed to cover all simds when clearing vgprs\n");
|
||||
goto disp_failed;
|
||||
}
|
||||
|
||||
disp_failed:
|
||||
amdgpu_ib_free(adev, &disp_ib, NULL);
|
||||
dma_fence_put(fence);
|
||||
pro_end:
|
||||
amdgpu_ib_free(adev, &wb_ib, NULL);
|
||||
|
||||
if (r)
|
||||
dev_info(adev->dev, "Init VGPRS Failed\n");
|
||||
else
|
||||
dev_info(adev->dev, "Init VGPRS Successfully\n");
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev)
|
||||
{
|
||||
/* only support when RAS is enabled */
|
||||
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
|
||||
return 0;
|
||||
|
||||
gfx_v9_4_2_do_sgprs_init(adev);
|
||||
|
||||
gfx_v9_4_2_do_vgprs_init(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void gfx_v9_4_2_query_sq_timeout_status(struct amdgpu_device *adev);
|
||||
static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev);
|
||||
|
||||
@ -808,8 +1442,9 @@ static struct gfx_v9_4_2_utc_block gfx_v9_4_2_utc_blocks[] = {
|
||||
REG_SET_FIELD(0, ATC_L2_CACHE_4K_DSM_CNTL, WRITE_COUNTERS, 1) },
|
||||
};
|
||||
|
||||
static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs =
|
||||
{ SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 };
|
||||
static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs = {
|
||||
SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16
|
||||
};
|
||||
|
||||
static int gfx_v9_4_2_get_reg_error_count(struct amdgpu_device *adev,
|
||||
const struct soc15_reg_entry *reg,
|
||||
@ -1039,13 +1674,16 @@ static void gfx_v9_4_2_reset_utc_err_status(struct amdgpu_device *adev)
|
||||
static void gfx_v9_4_2_reset_ea_err_status(struct amdgpu_device *adev)
|
||||
{
|
||||
uint32_t i, j;
|
||||
uint32_t value;
|
||||
|
||||
value = REG_SET_FIELD(0, GCEA_ERR_STATUS, CLEAR_ERROR_STATUS, 0x1);
|
||||
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) {
|
||||
for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance;
|
||||
j++) {
|
||||
gfx_v9_4_2_select_se_sh(adev, i, 0, j);
|
||||
WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), 0x10);
|
||||
WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), value);
|
||||
}
|
||||
}
|
||||
gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
|
||||
|
@ -29,6 +29,7 @@ void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev,
|
||||
void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev,
|
||||
uint32_t die_id);
|
||||
void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev);
|
||||
int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev);
|
||||
|
||||
extern const struct amdgpu_gfx_ras_funcs gfx_v9_4_2_ras_funcs;
|
||||
|
||||
|
@ -283,10 +283,14 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
|
||||
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
|
||||
PAGE_TABLE_BLOCK_SIZE,
|
||||
block_size);
|
||||
/* Send no-retry XNACK on fault to suppress VM fault storm. */
|
||||
/* Send no-retry XNACK on fault to suppress VM fault storm.
|
||||
* On Aldebaran, XNACK can be enabled in the SQ per-process.
|
||||
* Retry faults need to be enabled for that to work.
|
||||
*/
|
||||
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
|
||||
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
|
||||
!adev->gmc.noretry);
|
||||
!adev->gmc.noretry ||
|
||||
adev->asic_type == CHIP_ALDEBARAN);
|
||||
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL,
|
||||
i * hub->ctx_distance, tmp);
|
||||
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
|
||||
|
@ -53,6 +53,7 @@
|
||||
#include "mmhub_v1_7.h"
|
||||
#include "umc_v6_1.h"
|
||||
#include "umc_v6_0.h"
|
||||
#include "hdp_v4_0.h"
|
||||
|
||||
#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
|
||||
|
||||
@ -1210,6 +1211,11 @@ static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
|
||||
adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
|
||||
}
|
||||
|
||||
static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->hdp.ras_funcs = &hdp_v4_0_ras_funcs;
|
||||
}
|
||||
|
||||
static int gmc_v9_0_early_init(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
@ -1230,6 +1236,7 @@ static int gmc_v9_0_early_init(void *handle)
|
||||
gmc_v9_0_set_mmhub_funcs(adev);
|
||||
gmc_v9_0_set_mmhub_ras_funcs(adev);
|
||||
gmc_v9_0_set_gfxhub_funcs(adev);
|
||||
gmc_v9_0_set_hdp_ras_funcs(adev);
|
||||
|
||||
adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
|
||||
adev->gmc.shared_aperture_end =
|
||||
@ -1255,7 +1262,7 @@ static int gmc_v9_0_late_init(void *handle)
|
||||
* writes, while disables HBM ECC for vega10.
|
||||
*/
|
||||
if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) {
|
||||
if (!(adev->ras_features & (1 << AMDGPU_RAS_BLOCK__UMC))) {
|
||||
if (!(adev->ras_enabled & (1 << AMDGPU_RAS_BLOCK__UMC))) {
|
||||
if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
|
||||
adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
|
||||
}
|
||||
@ -1265,6 +1272,10 @@ static int gmc_v9_0_late_init(void *handle)
|
||||
adev->mmhub.ras_funcs->reset_ras_error_count)
|
||||
adev->mmhub.ras_funcs->reset_ras_error_count(adev);
|
||||
|
||||
if (adev->hdp.ras_funcs &&
|
||||
adev->hdp.ras_funcs->reset_ras_error_count)
|
||||
adev->hdp.ras_funcs->reset_ras_error_count(adev);
|
||||
|
||||
r = amdgpu_gmc_ras_late_init(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
@ -59,12 +59,31 @@ static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
|
||||
HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
|
||||
}
|
||||
|
||||
static void hdp_v4_0_query_ras_error_count(struct amdgpu_device *adev,
|
||||
void *ras_error_status)
|
||||
{
|
||||
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
|
||||
|
||||
err_data->ue_count = 0;
|
||||
err_data->ce_count = 0;
|
||||
|
||||
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP))
|
||||
return;
|
||||
|
||||
/* HDP SRAM errors are uncorrectable ones (i.e. fatal errors) */
|
||||
err_data->ue_count += RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
|
||||
};
|
||||
|
||||
static void hdp_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
|
||||
{
|
||||
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP))
|
||||
return;
|
||||
/*read back hdp ras counter to reset it to 0 */
|
||||
RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
|
||||
|
||||
if (adev->asic_type >= CHIP_ALDEBARAN)
|
||||
WREG32_SOC15(HDP, 0, mmHDP_EDC_CNT, 0);
|
||||
else
|
||||
/*read back hdp ras counter to reset it to 0 */
|
||||
RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
|
||||
}
|
||||
|
||||
static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev,
|
||||
@ -130,10 +149,16 @@ static void hdp_v4_0_init_registers(struct amdgpu_device *adev)
|
||||
WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
|
||||
}
|
||||
|
||||
const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs = {
|
||||
.ras_late_init = amdgpu_hdp_ras_late_init,
|
||||
.ras_fini = amdgpu_hdp_ras_fini,
|
||||
.query_ras_error_count = hdp_v4_0_query_ras_error_count,
|
||||
.reset_ras_error_count = hdp_v4_0_reset_ras_error_count,
|
||||
};
|
||||
|
||||
const struct amdgpu_hdp_funcs hdp_v4_0_funcs = {
|
||||
.flush_hdp = hdp_v4_0_flush_hdp,
|
||||
.invalidate_hdp = hdp_v4_0_invalidate_hdp,
|
||||
.reset_ras_error_count = hdp_v4_0_reset_ras_error_count,
|
||||
.update_clock_gating = hdp_v4_0_update_clock_gating,
|
||||
.get_clock_gating_state = hdp_v4_0_get_clockgating_state,
|
||||
.init_registers = hdp_v4_0_init_registers,
|
||||
|
@ -27,5 +27,6 @@
|
||||
#include "soc15_common.h"
|
||||
|
||||
extern const struct amdgpu_hdp_funcs hdp_v4_0_funcs;
|
||||
extern const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs;
|
||||
|
||||
#endif
|
||||
|
@ -296,10 +296,12 @@ static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev)
|
||||
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
|
||||
PAGE_TABLE_BLOCK_SIZE,
|
||||
block_size);
|
||||
/* Send no-retry XNACK on fault to suppress VM fault storm. */
|
||||
/* On Aldebaran, XNACK can be enabled in the SQ per-process.
|
||||
* Retry faults need to be enabled for that to work.
|
||||
*/
|
||||
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
|
||||
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
|
||||
!adev->gmc.noretry);
|
||||
1);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL,
|
||||
i * hub->ctx_distance, tmp);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
|
||||
@ -1313,12 +1315,31 @@ static void mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev)
|
||||
}
|
||||
}
|
||||
|
||||
static void mmhub_v1_7_reset_ras_error_status(struct amdgpu_device *adev)
|
||||
{
|
||||
int i;
|
||||
uint32_t reg_value;
|
||||
|
||||
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB))
|
||||
return;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) {
|
||||
reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
|
||||
mmhub_v1_7_ea_err_status_regs[i]));
|
||||
reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS,
|
||||
CLEAR_ERROR_STATUS, 0x01);
|
||||
WREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_regs[i]),
|
||||
reg_value);
|
||||
}
|
||||
}
|
||||
|
||||
const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs = {
|
||||
.ras_late_init = amdgpu_mmhub_ras_late_init,
|
||||
.ras_fini = amdgpu_mmhub_ras_fini,
|
||||
.query_ras_error_count = mmhub_v1_7_query_ras_error_count,
|
||||
.reset_ras_error_count = mmhub_v1_7_reset_ras_error_count,
|
||||
.query_ras_error_status = mmhub_v1_7_query_ras_error_status,
|
||||
.reset_ras_error_status = mmhub_v1_7_reset_ras_error_status,
|
||||
};
|
||||
|
||||
const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs = {
|
||||
|
@ -569,9 +569,9 @@ static int mmhub_v2_3_set_clockgating(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
|
||||
mmhub_v2_3_update_medium_grain_clock_gating(adev,
|
||||
state == AMD_CG_STATE_GATE ? true : false);
|
||||
state == AMD_CG_STATE_GATE);
|
||||
mmhub_v2_3_update_medium_grain_light_sleep(adev,
|
||||
state == AMD_CG_STATE_GATE ? true : false);
|
||||
state == AMD_CG_STATE_GATE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -598,7 +598,7 @@ static void nv_pcie_gen3_enable(struct amdgpu_device *adev)
|
||||
|
||||
static void nv_program_aspm(struct amdgpu_device *adev)
|
||||
{
|
||||
if (amdgpu_aspm != 1)
|
||||
if (!amdgpu_aspm)
|
||||
return;
|
||||
|
||||
if (!(adev->flags & AMD_IS_APU) &&
|
||||
@ -1068,6 +1068,7 @@ static int nv_common_early_init(void *handle)
|
||||
case CHIP_SIENNA_CICHLID:
|
||||
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
|
||||
AMD_CG_SUPPORT_GFX_CGCG |
|
||||
AMD_CG_SUPPORT_GFX_CGLS |
|
||||
AMD_CG_SUPPORT_GFX_3D_CGCG |
|
||||
AMD_CG_SUPPORT_MC_MGCG |
|
||||
AMD_CG_SUPPORT_VCN_MGCG |
|
||||
@ -1091,6 +1092,7 @@ static int nv_common_early_init(void *handle)
|
||||
case CHIP_NAVY_FLOUNDER:
|
||||
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
|
||||
AMD_CG_SUPPORT_GFX_CGCG |
|
||||
AMD_CG_SUPPORT_GFX_CGLS |
|
||||
AMD_CG_SUPPORT_GFX_3D_CGCG |
|
||||
AMD_CG_SUPPORT_VCN_MGCG |
|
||||
AMD_CG_SUPPORT_JPEG_MGCG |
|
||||
@ -1121,6 +1123,8 @@ static int nv_common_early_init(void *handle)
|
||||
AMD_CG_SUPPORT_MC_LS |
|
||||
AMD_CG_SUPPORT_GFX_FGCG |
|
||||
AMD_CG_SUPPORT_VCN_MGCG |
|
||||
AMD_CG_SUPPORT_SDMA_MGCG |
|
||||
AMD_CG_SUPPORT_SDMA_LS |
|
||||
AMD_CG_SUPPORT_JPEG_MGCG;
|
||||
adev->pg_flags = AMD_PG_SUPPORT_GFX_PG |
|
||||
AMD_PG_SUPPORT_VCN |
|
||||
@ -1132,6 +1136,7 @@ static int nv_common_early_init(void *handle)
|
||||
case CHIP_DIMGREY_CAVEFISH:
|
||||
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
|
||||
AMD_CG_SUPPORT_GFX_CGCG |
|
||||
AMD_CG_SUPPORT_GFX_CGLS |
|
||||
AMD_CG_SUPPORT_GFX_3D_CGCG |
|
||||
AMD_CG_SUPPORT_VCN_MGCG |
|
||||
AMD_CG_SUPPORT_JPEG_MGCG |
|
||||
|
@ -97,7 +97,6 @@ enum psp_gfx_cmd_id
|
||||
GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */
|
||||
GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */
|
||||
GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */
|
||||
GFX_CMD_ID_CLEAR_VF_FW = 0x0000000D, /* Clear VF FW, to be used on VF shutdown. */
|
||||
GFX_CMD_ID_GET_FW_ATTESTATION = 0x0000000F, /* Query GPUVA of the Fw Attestation DB */
|
||||
/* IDs upto 0x1F are reserved for older programs (Raven, Vega 10/12/20) */
|
||||
GFX_CMD_ID_LOAD_TOC = 0x00000020, /* Load TOC and obtain TMR size */
|
||||
|
@ -1109,6 +1109,8 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
|
||||
if (adev->asic_type == CHIP_ARCTURUS &&
|
||||
adev->sdma.instance[i].fw_version >= 14)
|
||||
WREG32_SDMA(i, mmSDMA0_PUB_DUMMY_REG2, enable);
|
||||
/* Extend page fault timeout to avoid interrupt storm */
|
||||
WREG32_SDMA(i, mmSDMA0_UTCL1_TIMEOUT, 0x00800080);
|
||||
}
|
||||
|
||||
}
|
||||
@ -2227,7 +2229,7 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev,
|
||||
memset(&task_info, 0, sizeof(struct amdgpu_task_info));
|
||||
amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
|
||||
|
||||
dev_info(adev->dev,
|
||||
dev_dbg_ratelimited(adev->dev,
|
||||
"[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u "
|
||||
"pasid:%u, for process %s pid %d thread %s pid %d\n",
|
||||
instance, addr, entry->src_id, entry->ring_id, entry->vmid,
|
||||
@ -2240,7 +2242,7 @@ static int sdma_v4_0_process_vm_hole_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
dev_err(adev->dev, "MC or SEM address in VM hole\n");
|
||||
dev_dbg_ratelimited(adev->dev, "MC or SEM address in VM hole\n");
|
||||
sdma_v4_0_print_iv_entry(adev, entry);
|
||||
return 0;
|
||||
}
|
||||
@ -2249,7 +2251,7 @@ static int sdma_v4_0_process_doorbell_invalid_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
dev_err(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n");
|
||||
dev_dbg_ratelimited(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n");
|
||||
sdma_v4_0_print_iv_entry(adev, entry);
|
||||
return 0;
|
||||
}
|
||||
@ -2258,7 +2260,7 @@ static int sdma_v4_0_process_pool_timeout_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
dev_err(adev->dev,
|
||||
dev_dbg_ratelimited(adev->dev,
|
||||
"Polling register/memory timeout executing POLL_REG/MEM with finite timer\n");
|
||||
sdma_v4_0_print_iv_entry(adev, entry);
|
||||
return 0;
|
||||
@ -2268,7 +2270,7 @@ static int sdma_v4_0_process_srbm_write_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
dev_err(adev->dev,
|
||||
dev_dbg_ratelimited(adev->dev,
|
||||
"SDMA gets an Register Write SRBM_WRITE command in non-privilege command buffer\n");
|
||||
sdma_v4_0_print_iv_entry(adev, entry);
|
||||
return 0;
|
||||
@ -2597,27 +2599,18 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_srbm_write_irq_funcs = {
|
||||
|
||||
static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->sdma.trap_irq.num_types = adev->sdma.num_instances;
|
||||
adev->sdma.ecc_irq.num_types = adev->sdma.num_instances;
|
||||
/*For Arcturus and Aldebaran, add another 4 irq handler*/
|
||||
switch (adev->sdma.num_instances) {
|
||||
case 1:
|
||||
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
|
||||
adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
|
||||
break;
|
||||
case 5:
|
||||
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
|
||||
adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
|
||||
break;
|
||||
case 8:
|
||||
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
|
||||
adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
|
||||
adev->sdma.vm_hole_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
|
||||
adev->sdma.doorbell_invalid_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
|
||||
adev->sdma.pool_timeout_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
|
||||
adev->sdma.srbm_write_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
|
||||
adev->sdma.vm_hole_irq.num_types = adev->sdma.num_instances;
|
||||
adev->sdma.doorbell_invalid_irq.num_types = adev->sdma.num_instances;
|
||||
adev->sdma.pool_timeout_irq.num_types = adev->sdma.num_instances;
|
||||
adev->sdma.srbm_write_irq.num_types = adev->sdma.num_instances;
|
||||
break;
|
||||
case 2:
|
||||
default:
|
||||
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
|
||||
adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
|
||||
break;
|
||||
}
|
||||
adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs;
|
||||
|
@ -405,18 +405,6 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
|
||||
|
||||
/* Invalidate L2, because if we don't do it, we might get stale cache
|
||||
* lines from previous IBs.
|
||||
*/
|
||||
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
|
||||
amdgpu_ring_write(ring, 0);
|
||||
amdgpu_ring_write(ring, (SDMA_GCR_GL2_INV |
|
||||
SDMA_GCR_GL2_WB |
|
||||
SDMA_GCR_GLM_INV |
|
||||
SDMA_GCR_GLM_WB) << 16);
|
||||
amdgpu_ring_write(ring, 0xffffff80);
|
||||
amdgpu_ring_write(ring, 0xffff);
|
||||
|
||||
/* An IB packet must end on a 8 DW boundary--the next dword
|
||||
* must be on a 8-dword boundary. Our IB packet below is 6
|
||||
* dwords long, thus add x number of NOPs, such that, in
|
||||
@ -437,6 +425,33 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
|
||||
}
|
||||
|
||||
/**
|
||||
* sdma_v5_0_ring_emit_mem_sync - flush the IB by graphics cache rinse
|
||||
*
|
||||
* @ring: amdgpu ring pointer
|
||||
* @job: job to retrieve vmid from
|
||||
* @ib: IB object to schedule
|
||||
*
|
||||
* flush the IB by graphics cache rinse.
|
||||
*/
|
||||
static void sdma_v5_0_ring_emit_mem_sync(struct amdgpu_ring *ring)
|
||||
{
|
||||
uint32_t gcr_cntl =
|
||||
SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
|
||||
SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
|
||||
SDMA_GCR_GLI_INV(1);
|
||||
|
||||
/* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
|
||||
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
|
||||
amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
|
||||
amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) |
|
||||
SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0));
|
||||
amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) |
|
||||
SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16));
|
||||
amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) |
|
||||
SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0));
|
||||
}
|
||||
|
||||
/**
|
||||
* sdma_v5_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
|
||||
*
|
||||
@ -1643,6 +1658,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
|
||||
10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
|
||||
.emit_ib_size = 5 + 7 + 6, /* sdma_v5_0_ring_emit_ib */
|
||||
.emit_ib = sdma_v5_0_ring_emit_ib,
|
||||
.emit_mem_sync = sdma_v5_0_ring_emit_mem_sync,
|
||||
.emit_fence = sdma_v5_0_ring_emit_fence,
|
||||
.emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync,
|
||||
.emit_vm_flush = sdma_v5_0_ring_emit_vm_flush,
|
||||
|
@ -1556,6 +1556,10 @@ static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *ade
|
||||
int i;
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
|
||||
if (adev->sdma.instance[i].fw_version < 70 && adev->asic_type == CHIP_VANGOGH)
|
||||
adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_MGCG;
|
||||
|
||||
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
|
||||
/* Enable sdma clock gating */
|
||||
def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
|
||||
@ -1589,6 +1593,10 @@ static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev
|
||||
int i;
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
|
||||
if (adev->sdma.instance[i].fw_version < 70 && adev->asic_type == CHIP_VANGOGH)
|
||||
adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_LS;
|
||||
|
||||
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
|
||||
/* Enable sdma mem light sleep */
|
||||
def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));
|
||||
|
@ -88,6 +88,23 @@ static u32 smuio_v13_0_get_die_id(struct amdgpu_device *adev)
|
||||
return die_id;
|
||||
}
|
||||
|
||||
/**
|
||||
* smuio_v13_0_get_socket_id - query socket id from FCH
|
||||
*
|
||||
* @adev: amdgpu device pointer
|
||||
*
|
||||
* Returns socket id
|
||||
*/
|
||||
static u32 smuio_v13_0_get_socket_id(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 data, socket_id;
|
||||
|
||||
data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
|
||||
socket_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, SOCKET_ID);
|
||||
|
||||
return socket_id;
|
||||
}
|
||||
|
||||
/**
|
||||
* smuio_v13_0_supports_host_gpu_xgmi - detect xgmi interface between cpu and gpu/s.
|
||||
*
|
||||
@ -115,6 +132,7 @@ const struct amdgpu_smuio_funcs smuio_v13_0_funcs = {
|
||||
.get_rom_index_offset = smuio_v13_0_get_rom_index_offset,
|
||||
.get_rom_data_offset = smuio_v13_0_get_rom_data_offset,
|
||||
.get_die_id = smuio_v13_0_get_die_id,
|
||||
.get_socket_id = smuio_v13_0_get_socket_id,
|
||||
.is_host_gpu_xgmi_supported = smuio_v13_0_is_host_gpu_xgmi_supported,
|
||||
.update_rom_clock_gating = smuio_v13_0_update_rom_clock_gating,
|
||||
.get_clock_gating_state = smuio_v13_0_get_clock_gating_state,
|
||||
|
@ -655,7 +655,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev)
|
||||
int ret = 0;
|
||||
|
||||
/* avoid NBIF got stuck when do RAS recovery in BACO reset */
|
||||
if (ras && ras->supported)
|
||||
if (ras && adev->ras_enabled)
|
||||
adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
|
||||
|
||||
ret = amdgpu_dpm_baco_reset(adev);
|
||||
@ -663,7 +663,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev)
|
||||
return ret;
|
||||
|
||||
/* re-enable doorbell interrupt after BACO exit */
|
||||
if (ras && ras->supported)
|
||||
if (ras && adev->ras_enabled)
|
||||
adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
|
||||
|
||||
return 0;
|
||||
@ -710,7 +710,8 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
|
||||
* 1. PMFW version > 0x284300: all cases use baco
|
||||
* 2. PMFW version <= 0x284300: only sGPU w/o RAS use baco
|
||||
*/
|
||||
if ((ras && ras->supported) && adev->pm.fw_version <= 0x283400)
|
||||
if (ras && adev->ras_enabled &&
|
||||
adev->pm.fw_version <= 0x283400)
|
||||
baco_reset = false;
|
||||
break;
|
||||
case CHIP_ALDEBARAN:
|
||||
@ -816,7 +817,7 @@ static void soc15_pcie_gen3_enable(struct amdgpu_device *adev)
|
||||
|
||||
static void soc15_program_aspm(struct amdgpu_device *adev)
|
||||
{
|
||||
if (amdgpu_aspm != 1)
|
||||
if (!amdgpu_aspm)
|
||||
return;
|
||||
|
||||
if (!(adev->flags & AMD_IS_APU) &&
|
||||
@ -1522,9 +1523,6 @@ static int soc15_common_late_init(void *handle)
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
xgpu_ai_mailbox_get_irq(adev);
|
||||
|
||||
if (adev->hdp.funcs->reset_ras_error_count)
|
||||
adev->hdp.funcs->reset_ras_error_count(adev);
|
||||
|
||||
if (adev->nbio.ras_funcs &&
|
||||
adev->nbio.ras_funcs->ras_late_init)
|
||||
r = adev->nbio.ras_funcs->ras_late_init(adev);
|
||||
|
@ -105,6 +105,12 @@ struct ta_ras_trigger_error_input {
|
||||
uint64_t value; // method if error injection. i.e persistent, coherent etc.
|
||||
};
|
||||
|
||||
struct ta_ras_init_flags
|
||||
{
|
||||
uint8_t poison_mode_en;
|
||||
uint8_t dgpu_mode;
|
||||
};
|
||||
|
||||
struct ta_ras_output_flags
|
||||
{
|
||||
uint8_t ras_init_success_flag;
|
||||
@ -115,6 +121,7 @@ struct ta_ras_output_flags
|
||||
/* Common input structure for RAS callbacks */
|
||||
/**********************************************************/
|
||||
union ta_ras_cmd_input {
|
||||
struct ta_ras_init_flags init_flags;
|
||||
struct ta_ras_enable_features_input enable_features;
|
||||
struct ta_ras_disable_features_input disable_features;
|
||||
struct ta_ras_trigger_error_input trigger_error;
|
||||
|
@ -104,6 +104,7 @@ static int vega10_ih_toggle_ring_interrupts(struct amdgpu_device *adev,
|
||||
|
||||
tmp = RREG32(ih_regs->ih_rb_cntl);
|
||||
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
|
||||
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1);
|
||||
/* enable_intr field is only valid in ring0 */
|
||||
if (ih == &adev->irq.ih)
|
||||
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
|
||||
|
@ -1136,7 +1136,7 @@ static void vi_program_aspm(struct amdgpu_device *adev)
|
||||
bool bL1SS = false;
|
||||
bool bClkReqSupport = true;
|
||||
|
||||
if (amdgpu_aspm != 1)
|
||||
if (!amdgpu_aspm)
|
||||
return;
|
||||
|
||||
if (adev->flags & AMD_IS_APU ||
|
||||
|
@ -12,3 +12,16 @@ config HSA_AMD
|
||||
select DRM_AMDGPU_USERPTR
|
||||
help
|
||||
Enable this if you want to use HSA features on AMD GPU devices.
|
||||
|
||||
config HSA_AMD_SVM
|
||||
bool "Enable HMM-based shared virtual memory manager"
|
||||
depends on HSA_AMD && DEVICE_PRIVATE
|
||||
default y
|
||||
select HMM_MIRROR
|
||||
select MMU_NOTIFIER
|
||||
help
|
||||
Enable this to use unified memory and managed memory in HIP. This
|
||||
memory manager supports two modes of operation. One based on
|
||||
preemptions and one based on page faults. To enable page fault
|
||||
based memory management on most GFXv9 GPUs, set the module
|
||||
parameter amdgpu.noretry=0.
|
||||
|
@ -63,3 +63,8 @@ endif
|
||||
ifneq ($(CONFIG_DEBUG_FS),)
|
||||
AMDKFD_FILES += $(AMDKFD_PATH)/kfd_debugfs.o
|
||||
endif
|
||||
|
||||
ifneq ($(CONFIG_HSA_AMD_SVM),)
|
||||
AMDKFD_FILES += $(AMDKFD_PATH)/kfd_svm.o \
|
||||
$(AMDKFD_PATH)/kfd_migrate.o
|
||||
endif
|
||||
|
@ -38,6 +38,7 @@
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_device_queue_manager.h"
|
||||
#include "kfd_dbgmgr.h"
|
||||
#include "kfd_svm.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
#include "kfd_smi_events.h"
|
||||
|
||||
@ -1297,7 +1298,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
|
||||
|
||||
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
dev->kgd, args->va_addr, args->size,
|
||||
pdd->vm, (struct kgd_mem **) &mem, &offset,
|
||||
pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
|
||||
flags);
|
||||
|
||||
if (err)
|
||||
@ -1328,7 +1329,8 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
|
||||
return 0;
|
||||
|
||||
err_free:
|
||||
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
|
||||
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
|
||||
pdd->drm_priv, NULL);
|
||||
err_unlock:
|
||||
mutex_unlock(&p->mutex);
|
||||
return err;
|
||||
@ -1365,7 +1367,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
|
||||
}
|
||||
|
||||
ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
|
||||
(struct kgd_mem *)mem, &size);
|
||||
(struct kgd_mem *)mem, pdd->drm_priv, &size);
|
||||
|
||||
/* If freeing the buffer failed, leave the handle in place for
|
||||
* clean-up during process tear-down.
|
||||
@ -1448,7 +1450,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
|
||||
goto get_mem_obj_from_handle_failed;
|
||||
}
|
||||
err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
|
||||
peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
|
||||
peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
|
||||
if (err) {
|
||||
pr_err("Failed to map to gpu %d/%d\n",
|
||||
i, args->n_devices);
|
||||
@ -1555,7 +1557,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
|
||||
goto get_mem_obj_from_handle_failed;
|
||||
}
|
||||
err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
|
||||
peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
|
||||
peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
|
||||
if (err) {
|
||||
pr_err("Failed to unmap from gpu %d/%d\n",
|
||||
i, args->n_devices);
|
||||
@ -1701,7 +1703,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
|
||||
}
|
||||
|
||||
r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
|
||||
args->va_addr, pdd->vm,
|
||||
args->va_addr, pdd->drm_priv,
|
||||
(struct kgd_mem **)&mem, &size,
|
||||
NULL);
|
||||
if (r)
|
||||
@ -1721,7 +1723,8 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
|
||||
return 0;
|
||||
|
||||
err_free:
|
||||
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
|
||||
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
|
||||
pdd->drm_priv, NULL);
|
||||
err_unlock:
|
||||
mutex_unlock(&p->mutex);
|
||||
dma_buf_put(dmabuf);
|
||||
@ -1742,6 +1745,64 @@ static int kfd_ioctl_smi_events(struct file *filep,
|
||||
return kfd_smi_event_open(dev, &args->anon_fd);
|
||||
}
|
||||
|
||||
static int kfd_ioctl_set_xnack_mode(struct file *filep,
|
||||
struct kfd_process *p, void *data)
|
||||
{
|
||||
struct kfd_ioctl_set_xnack_mode_args *args = data;
|
||||
int r = 0;
|
||||
|
||||
mutex_lock(&p->mutex);
|
||||
if (args->xnack_enabled >= 0) {
|
||||
if (!list_empty(&p->pqm.queues)) {
|
||||
pr_debug("Process has user queues running\n");
|
||||
mutex_unlock(&p->mutex);
|
||||
return -EBUSY;
|
||||
}
|
||||
if (args->xnack_enabled && !kfd_process_xnack_mode(p, true))
|
||||
r = -EPERM;
|
||||
else
|
||||
p->xnack_enabled = args->xnack_enabled;
|
||||
} else {
|
||||
args->xnack_enabled = p->xnack_enabled;
|
||||
}
|
||||
mutex_unlock(&p->mutex);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
|
||||
static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
|
||||
{
|
||||
struct kfd_ioctl_svm_args *args = data;
|
||||
int r = 0;
|
||||
|
||||
if (p->svm_disabled)
|
||||
return -EPERM;
|
||||
|
||||
pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n",
|
||||
args->start_addr, args->size, args->op, args->nattr);
|
||||
|
||||
if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK))
|
||||
return -EINVAL;
|
||||
if (!args->start_addr || !args->size)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&p->mutex);
|
||||
|
||||
r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,
|
||||
args->attrs);
|
||||
|
||||
mutex_unlock(&p->mutex);
|
||||
|
||||
return r;
|
||||
}
|
||||
#else
|
||||
static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
|
||||
{
|
||||
return -EPERM;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
|
||||
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
|
||||
.cmd_drv = 0, .name = #ioctl}
|
||||
@ -1840,6 +1901,11 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
|
||||
|
||||
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
|
||||
kfd_ioctl_smi_events, 0),
|
||||
|
||||
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),
|
||||
|
||||
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
|
||||
kfd_ioctl_set_xnack_mode, 0),
|
||||
};
|
||||
|
||||
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
|
||||
|
@ -55,7 +55,7 @@ struct kfd_gpu_cache_info {
|
||||
uint32_t cache_level;
|
||||
uint32_t flags;
|
||||
/* Indicates how many Compute Units share this cache
|
||||
* Value = 1 indicates the cache is not shared
|
||||
* within a SA. Value = 1 indicates the cache is not shared
|
||||
*/
|
||||
uint32_t num_cu_shared;
|
||||
};
|
||||
@ -69,7 +69,6 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = {
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 1,
|
||||
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Instruction Cache (in SQC module) per bank */
|
||||
@ -126,9 +125,6 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
|
||||
/* TODO: Add L2 Cache information */
|
||||
};
|
||||
|
||||
/* NOTE: In future if more information is added to struct kfd_gpu_cache_info
|
||||
* the following ASICs may need a separate table.
|
||||
*/
|
||||
#define hawaii_cache_info kaveri_cache_info
|
||||
#define tonga_cache_info carrizo_cache_info
|
||||
#define fiji_cache_info carrizo_cache_info
|
||||
@ -136,13 +132,562 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
|
||||
#define polaris11_cache_info carrizo_cache_info
|
||||
#define polaris12_cache_info carrizo_cache_info
|
||||
#define vegam_cache_info carrizo_cache_info
|
||||
/* TODO - check & update Vega10 cache details */
|
||||
#define vega10_cache_info carrizo_cache_info
|
||||
#define raven_cache_info carrizo_cache_info
|
||||
#define renoir_cache_info carrizo_cache_info
|
||||
/* TODO - check & update Navi10 cache details */
|
||||
#define navi10_cache_info carrizo_cache_info
|
||||
#define vangogh_cache_info carrizo_cache_info
|
||||
|
||||
/* NOTE: L1 cache information has been updated and L2/L3
|
||||
* cache information has been added for Vega10 and
|
||||
* newer ASICs. The unit for cache_size is KiB.
|
||||
* In future, check & update cache details
|
||||
* for every new ASIC is required.
|
||||
*/
|
||||
|
||||
static struct kfd_gpu_cache_info vega10_cache_info[] = {
|
||||
{
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 1,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 3,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 3,
|
||||
},
|
||||
{
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 4096,
|
||||
.cache_level = 2,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 16,
|
||||
},
|
||||
};
|
||||
|
||||
static struct kfd_gpu_cache_info raven_cache_info[] = {
|
||||
{
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 1,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 3,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 3,
|
||||
},
|
||||
{
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 1024,
|
||||
.cache_level = 2,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 11,
|
||||
},
|
||||
};
|
||||
|
||||
static struct kfd_gpu_cache_info renoir_cache_info[] = {
|
||||
{
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 1,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 3,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 3,
|
||||
},
|
||||
{
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 1024,
|
||||
.cache_level = 2,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 8,
|
||||
},
|
||||
};
|
||||
|
||||
static struct kfd_gpu_cache_info vega12_cache_info[] = {
|
||||
{
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 1,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 3,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 3,
|
||||
},
|
||||
{
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 2048,
|
||||
.cache_level = 2,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 5,
|
||||
},
|
||||
};
|
||||
|
||||
static struct kfd_gpu_cache_info vega20_cache_info[] = {
|
||||
{
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 1,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 3,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 3,
|
||||
},
|
||||
{
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 8192,
|
||||
.cache_level = 2,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 16,
|
||||
},
|
||||
};
|
||||
|
||||
static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
|
||||
{
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 1,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 2,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 2,
|
||||
},
|
||||
{
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 8192,
|
||||
.cache_level = 2,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 14,
|
||||
},
|
||||
};
|
||||
|
||||
static struct kfd_gpu_cache_info navi10_cache_info[] = {
|
||||
{
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 1,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 2,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 2,
|
||||
},
|
||||
{
|
||||
/* GL1 Data Cache per SA */
|
||||
.cache_size = 128,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 10,
|
||||
},
|
||||
{
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 4096,
|
||||
.cache_level = 2,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 10,
|
||||
},
|
||||
};
|
||||
|
||||
static struct kfd_gpu_cache_info vangogh_cache_info[] = {
|
||||
{
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 1,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 2,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 2,
|
||||
},
|
||||
{
|
||||
/* GL1 Data Cache per SA */
|
||||
.cache_size = 128,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 8,
|
||||
},
|
||||
{
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 1024,
|
||||
.cache_level = 2,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 8,
|
||||
},
|
||||
};
|
||||
|
||||
static struct kfd_gpu_cache_info navi14_cache_info[] = {
|
||||
{
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 1,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 2,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 2,
|
||||
},
|
||||
{
|
||||
/* GL1 Data Cache per SA */
|
||||
.cache_size = 128,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 12,
|
||||
},
|
||||
{
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 2048,
|
||||
.cache_level = 2,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 12,
|
||||
},
|
||||
};
|
||||
|
||||
static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
|
||||
{
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 1,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 2,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 2,
|
||||
},
|
||||
{
|
||||
/* GL1 Data Cache per SA */
|
||||
.cache_size = 128,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 10,
|
||||
},
|
||||
{
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 4096,
|
||||
.cache_level = 2,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 10,
|
||||
},
|
||||
{
|
||||
/* L3 Data Cache per GPU */
|
||||
.cache_size = 128*1024,
|
||||
.cache_level = 3,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 10,
|
||||
},
|
||||
};
|
||||
|
||||
static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
|
||||
{
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 1,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 2,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 2,
|
||||
},
|
||||
{
|
||||
/* GL1 Data Cache per SA */
|
||||
.cache_size = 128,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 10,
|
||||
},
|
||||
{
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 3072,
|
||||
.cache_level = 2,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 10,
|
||||
},
|
||||
{
|
||||
/* L3 Data Cache per GPU */
|
||||
.cache_size = 96*1024,
|
||||
.cache_level = 3,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 10,
|
||||
},
|
||||
};
|
||||
|
||||
static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
|
||||
{
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 1,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 2,
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 2,
|
||||
},
|
||||
{
|
||||
/* GL1 Data Cache per SA */
|
||||
.cache_size = 128,
|
||||
.cache_level = 1,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 8,
|
||||
},
|
||||
{
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 2048,
|
||||
.cache_level = 2,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 8,
|
||||
},
|
||||
{
|
||||
/* L3 Data Cache per GPU */
|
||||
.cache_size = 32*1024,
|
||||
.cache_level = 3,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
.num_cu_shared = 8,
|
||||
},
|
||||
};
|
||||
|
||||
static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
|
||||
struct crat_subtype_computeunit *cu)
|
||||
@ -544,7 +1089,7 @@ err:
|
||||
}
|
||||
|
||||
/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
|
||||
static int fill_in_pcache(struct crat_subtype_cache *pcache,
|
||||
static int fill_in_l1_pcache(struct crat_subtype_cache *pcache,
|
||||
struct kfd_gpu_cache_info *pcache_info,
|
||||
struct kfd_cu_info *cu_info,
|
||||
int mem_available,
|
||||
@ -597,6 +1142,70 @@ static int fill_in_pcache(struct crat_subtype_cache *pcache,
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
|
||||
static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache,
|
||||
struct kfd_gpu_cache_info *pcache_info,
|
||||
struct kfd_cu_info *cu_info,
|
||||
int mem_available,
|
||||
int cache_type, unsigned int cu_processor_id)
|
||||
{
|
||||
unsigned int cu_sibling_map_mask;
|
||||
int first_active_cu;
|
||||
int i, j, k;
|
||||
|
||||
/* First check if enough memory is available */
|
||||
if (sizeof(struct crat_subtype_cache) > mem_available)
|
||||
return -ENOMEM;
|
||||
|
||||
cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
|
||||
cu_sibling_map_mask &=
|
||||
((1 << pcache_info[cache_type].num_cu_shared) - 1);
|
||||
first_active_cu = ffs(cu_sibling_map_mask);
|
||||
|
||||
/* CU could be inactive. In case of shared cache find the first active
|
||||
* CU. and incase of non-shared cache check if the CU is inactive. If
|
||||
* inactive active skip it
|
||||
*/
|
||||
if (first_active_cu) {
|
||||
memset(pcache, 0, sizeof(struct crat_subtype_cache));
|
||||
pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
|
||||
pcache->length = sizeof(struct crat_subtype_cache);
|
||||
pcache->flags = pcache_info[cache_type].flags;
|
||||
pcache->processor_id_low = cu_processor_id
|
||||
+ (first_active_cu - 1);
|
||||
pcache->cache_level = pcache_info[cache_type].cache_level;
|
||||
pcache->cache_size = pcache_info[cache_type].cache_size;
|
||||
|
||||
/* Sibling map is w.r.t processor_id_low, so shift out
|
||||
* inactive CU
|
||||
*/
|
||||
cu_sibling_map_mask =
|
||||
cu_sibling_map_mask >> (first_active_cu - 1);
|
||||
k = 0;
|
||||
for (i = 0; i < cu_info->num_shader_engines; i++) {
|
||||
for (j = 0; j < cu_info->num_shader_arrays_per_engine;
|
||||
j++) {
|
||||
pcache->sibling_map[k] =
|
||||
(uint8_t)(cu_sibling_map_mask & 0xFF);
|
||||
pcache->sibling_map[k+1] =
|
||||
(uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
|
||||
pcache->sibling_map[k+2] =
|
||||
(uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
|
||||
pcache->sibling_map[k+3] =
|
||||
(uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
|
||||
k += 4;
|
||||
cu_sibling_map_mask =
|
||||
cu_info->cu_bitmap[i % 4][j + i / 4];
|
||||
cu_sibling_map_mask &= (
|
||||
(1 << pcache_info[cache_type].num_cu_shared)
|
||||
- 1);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info
|
||||
* tables
|
||||
*
|
||||
@ -624,6 +1233,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
|
||||
int mem_available = available_size;
|
||||
unsigned int cu_processor_id;
|
||||
int ret;
|
||||
unsigned int num_cu_shared;
|
||||
|
||||
switch (kdev->device_info->asic_family) {
|
||||
case CHIP_KAVERI:
|
||||
@ -663,13 +1273,22 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
|
||||
num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
|
||||
break;
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_VEGA12:
|
||||
case CHIP_VEGA20:
|
||||
case CHIP_ARCTURUS:
|
||||
case CHIP_ALDEBARAN:
|
||||
pcache_info = vega10_cache_info;
|
||||
num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
|
||||
break;
|
||||
case CHIP_VEGA12:
|
||||
pcache_info = vega12_cache_info;
|
||||
num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
|
||||
break;
|
||||
case CHIP_VEGA20:
|
||||
case CHIP_ARCTURUS:
|
||||
pcache_info = vega20_cache_info;
|
||||
num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
|
||||
break;
|
||||
case CHIP_ALDEBARAN:
|
||||
pcache_info = aldebaran_cache_info;
|
||||
num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
|
||||
break;
|
||||
case CHIP_RAVEN:
|
||||
pcache_info = raven_cache_info;
|
||||
num_of_cache_types = ARRAY_SIZE(raven_cache_info);
|
||||
@ -680,13 +1299,25 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
|
||||
break;
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI12:
|
||||
case CHIP_NAVI14:
|
||||
case CHIP_SIENNA_CICHLID:
|
||||
case CHIP_NAVY_FLOUNDER:
|
||||
case CHIP_DIMGREY_CAVEFISH:
|
||||
pcache_info = navi10_cache_info;
|
||||
num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
|
||||
break;
|
||||
case CHIP_NAVI14:
|
||||
pcache_info = navi14_cache_info;
|
||||
num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
|
||||
break;
|
||||
case CHIP_SIENNA_CICHLID:
|
||||
pcache_info = sienna_cichlid_cache_info;
|
||||
num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
|
||||
break;
|
||||
case CHIP_NAVY_FLOUNDER:
|
||||
pcache_info = navy_flounder_cache_info;
|
||||
num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
|
||||
break;
|
||||
case CHIP_DIMGREY_CAVEFISH:
|
||||
pcache_info = dimgrey_cavefish_cache_info;
|
||||
num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
|
||||
break;
|
||||
case CHIP_VANGOGH:
|
||||
pcache_info = vangogh_cache_info;
|
||||
num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
|
||||
@ -709,40 +1340,58 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
|
||||
*/
|
||||
|
||||
for (ct = 0; ct < num_of_cache_types; ct++) {
|
||||
cu_processor_id = gpu_processor_id;
|
||||
for (i = 0; i < cu_info->num_shader_engines; i++) {
|
||||
for (j = 0; j < cu_info->num_shader_arrays_per_engine;
|
||||
j++) {
|
||||
for (k = 0; k < cu_info->num_cu_per_sh;
|
||||
k += pcache_info[ct].num_cu_shared) {
|
||||
cu_processor_id = gpu_processor_id;
|
||||
if (pcache_info[ct].cache_level == 1) {
|
||||
for (i = 0; i < cu_info->num_shader_engines; i++) {
|
||||
for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
|
||||
for (k = 0; k < cu_info->num_cu_per_sh;
|
||||
k += pcache_info[ct].num_cu_shared) {
|
||||
ret = fill_in_l1_pcache(pcache,
|
||||
pcache_info,
|
||||
cu_info,
|
||||
mem_available,
|
||||
cu_info->cu_bitmap[i % 4][j + i / 4],
|
||||
ct,
|
||||
cu_processor_id,
|
||||
k);
|
||||
|
||||
ret = fill_in_pcache(pcache,
|
||||
pcache_info,
|
||||
cu_info,
|
||||
mem_available,
|
||||
cu_info->cu_bitmap[i % 4][j + i / 4],
|
||||
ct,
|
||||
cu_processor_id,
|
||||
k);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
if (ret < 0)
|
||||
break;
|
||||
if (!ret) {
|
||||
pcache++;
|
||||
(*num_of_entries)++;
|
||||
mem_available -= sizeof(*pcache);
|
||||
(*size_filled) += sizeof(*pcache);
|
||||
}
|
||||
|
||||
if (!ret) {
|
||||
pcache++;
|
||||
(*num_of_entries)++;
|
||||
mem_available -=
|
||||
sizeof(*pcache);
|
||||
(*size_filled) +=
|
||||
sizeof(*pcache);
|
||||
}
|
||||
|
||||
/* Move to next CU block */
|
||||
cu_processor_id +=
|
||||
pcache_info[ct].num_cu_shared;
|
||||
}
|
||||
}
|
||||
/* Move to next CU block */
|
||||
num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
|
||||
cu_info->num_cu_per_sh) ?
|
||||
pcache_info[ct].num_cu_shared :
|
||||
(cu_info->num_cu_per_sh - k);
|
||||
cu_processor_id += num_cu_shared;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ret = fill_in_l2_l3_pcache(pcache,
|
||||
pcache_info,
|
||||
cu_info,
|
||||
mem_available,
|
||||
ct,
|
||||
cu_processor_id);
|
||||
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
if (!ret) {
|
||||
pcache++;
|
||||
(*num_of_entries)++;
|
||||
mem_available -= sizeof(*pcache);
|
||||
(*size_filled) += sizeof(*pcache);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pr_debug("Added [%d] GPU cache entries\n", *num_of_entries);
|
||||
@ -1100,6 +1749,92 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ACPI_NUMA
|
||||
static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
|
||||
{
|
||||
struct acpi_table_header *table_header = NULL;
|
||||
struct acpi_subtable_header *sub_header = NULL;
|
||||
unsigned long table_end, subtable_len;
|
||||
u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
|
||||
pci_dev_id(kdev->pdev);
|
||||
u32 bdf;
|
||||
acpi_status status;
|
||||
struct acpi_srat_cpu_affinity *cpu;
|
||||
struct acpi_srat_generic_affinity *gpu;
|
||||
int pxm = 0, max_pxm = 0;
|
||||
int numa_node = NUMA_NO_NODE;
|
||||
bool found = false;
|
||||
|
||||
/* Fetch the SRAT table from ACPI */
|
||||
status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
|
||||
if (status == AE_NOT_FOUND) {
|
||||
pr_warn("SRAT table not found\n");
|
||||
return;
|
||||
} else if (ACPI_FAILURE(status)) {
|
||||
const char *err = acpi_format_exception(status);
|
||||
pr_err("SRAT table error: %s\n", err);
|
||||
return;
|
||||
}
|
||||
|
||||
table_end = (unsigned long)table_header + table_header->length;
|
||||
|
||||
/* Parse all entries looking for a match. */
|
||||
sub_header = (struct acpi_subtable_header *)
|
||||
((unsigned long)table_header +
|
||||
sizeof(struct acpi_table_srat));
|
||||
subtable_len = sub_header->length;
|
||||
|
||||
while (((unsigned long)sub_header) + subtable_len < table_end) {
|
||||
/*
|
||||
* If length is 0, break from this loop to avoid
|
||||
* infinite loop.
|
||||
*/
|
||||
if (subtable_len == 0) {
|
||||
pr_err("SRAT invalid zero length\n");
|
||||
break;
|
||||
}
|
||||
|
||||
switch (sub_header->type) {
|
||||
case ACPI_SRAT_TYPE_CPU_AFFINITY:
|
||||
cpu = (struct acpi_srat_cpu_affinity *)sub_header;
|
||||
pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
|
||||
cpu->proximity_domain_lo;
|
||||
if (pxm > max_pxm)
|
||||
max_pxm = pxm;
|
||||
break;
|
||||
case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
|
||||
gpu = (struct acpi_srat_generic_affinity *)sub_header;
|
||||
bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
|
||||
*((u16 *)(&gpu->device_handle[2]));
|
||||
if (bdf == pci_id) {
|
||||
found = true;
|
||||
numa_node = pxm_to_node(gpu->proximity_domain);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (found)
|
||||
break;
|
||||
|
||||
sub_header = (struct acpi_subtable_header *)
|
||||
((unsigned long)sub_header + subtable_len);
|
||||
subtable_len = sub_header->length;
|
||||
}
|
||||
|
||||
acpi_put_table(table_header);
|
||||
|
||||
/* Workaround bad cpu-gpu binding case */
|
||||
if (found && (numa_node < 0 ||
|
||||
numa_node > pxm_to_node(max_pxm)))
|
||||
numa_node = 0;
|
||||
|
||||
if (numa_node != NUMA_NO_NODE)
|
||||
set_dev_node(&kdev->pdev->dev, numa_node);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
|
||||
* to its NUMA node
|
||||
* @avail_size: Available size in the memory
|
||||
@ -1140,11 +1875,17 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
|
||||
*/
|
||||
sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
|
||||
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
|
||||
sub_type_hdr->num_hops_xgmi = 1;
|
||||
} else {
|
||||
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
|
||||
}
|
||||
|
||||
sub_type_hdr->proximity_domain_from = proximity_domain;
|
||||
|
||||
#ifdef CONFIG_ACPI_NUMA
|
||||
if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
|
||||
kfd_find_numa_node_in_srat(kdev);
|
||||
#endif
|
||||
#ifdef CONFIG_NUMA
|
||||
if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
|
||||
sub_type_hdr->proximity_domain_to = 0;
|
||||
|
@ -26,10 +26,12 @@
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_device_queue_manager.h"
|
||||
#include "kfd_pm4_headers_vi.h"
|
||||
#include "kfd_pm4_headers_aldebaran.h"
|
||||
#include "cwsr_trap_handler.h"
|
||||
#include "kfd_iommu.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
#include "kfd_smi_events.h"
|
||||
#include "kfd_migrate.h"
|
||||
|
||||
#define MQD_SIZE_ALIGNED 768
|
||||
|
||||
@ -576,7 +578,7 @@ static const struct kfd_device_info *kfd_supported_devices[][2] = {
|
||||
[CHIP_VEGA20] = {&vega20_device_info, NULL},
|
||||
[CHIP_RENOIR] = {&renoir_device_info, NULL},
|
||||
[CHIP_ARCTURUS] = {&arcturus_device_info, &arcturus_device_info},
|
||||
[CHIP_ALDEBARAN] = {&aldebaran_device_info, NULL},
|
||||
[CHIP_ALDEBARAN] = {&aldebaran_device_info, &aldebaran_device_info},
|
||||
[CHIP_NAVI10] = {&navi10_device_info, NULL},
|
||||
[CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info},
|
||||
[CHIP_NAVI14] = {&navi14_device_info, NULL},
|
||||
@ -697,7 +699,9 @@ static int kfd_gws_init(struct kfd_dev *kfd)
|
||||
&& kfd->device_info->asic_family <= CHIP_RAVEN
|
||||
&& kfd->mec2_fw_version >= 0x1b3)
|
||||
|| (kfd->device_info->asic_family == CHIP_ARCTURUS
|
||||
&& kfd->mec2_fw_version >= 0x30))
|
||||
&& kfd->mec2_fw_version >= 0x30)
|
||||
|| (kfd->device_info->asic_family == CHIP_ALDEBARAN
|
||||
&& kfd->mec2_fw_version >= 0x28))
|
||||
ret = amdgpu_amdkfd_alloc_gws(kfd->kgd,
|
||||
amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws);
|
||||
|
||||
@ -713,7 +717,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||
struct drm_device *ddev,
|
||||
const struct kgd2kfd_shared_resources *gpu_resources)
|
||||
{
|
||||
unsigned int size;
|
||||
unsigned int size, map_process_packet_size;
|
||||
|
||||
kfd->ddev = ddev;
|
||||
kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
|
||||
@ -748,7 +752,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||
* calculate max size of runlist packet.
|
||||
* There can be only 2 packets at once
|
||||
*/
|
||||
size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_mes_map_process) +
|
||||
map_process_packet_size =
|
||||
kfd->device_info->asic_family == CHIP_ALDEBARAN ?
|
||||
sizeof(struct pm4_mes_map_process_aldebaran) :
|
||||
sizeof(struct pm4_mes_map_process);
|
||||
size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size +
|
||||
max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
|
||||
+ sizeof(struct pm4_mes_runlist)) * 2;
|
||||
|
||||
@ -814,6 +822,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||
|
||||
kfd_cwsr_init(kfd);
|
||||
|
||||
svm_migrate_init((struct amdgpu_device *)kfd->kgd);
|
||||
|
||||
if (kfd_resume(kfd))
|
||||
goto kfd_resume_error;
|
||||
|
||||
@ -862,6 +872,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
|
||||
{
|
||||
if (kfd->init_complete) {
|
||||
kgd2kfd_suspend(kfd, false);
|
||||
svm_migrate_fini((struct amdgpu_device *)kfd->kgd);
|
||||
device_queue_manager_uninit(kfd->dqm);
|
||||
kfd_interrupt_exit(kfd);
|
||||
kfd_topology_remove_device(kfd);
|
||||
|
@ -738,7 +738,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||
|
||||
pdd = qpd_to_pdd(qpd);
|
||||
/* Retrieve PD base */
|
||||
pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
|
||||
pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
|
||||
|
||||
dqm_lock(dqm);
|
||||
if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
|
||||
@ -821,7 +821,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
|
||||
|
||||
pdd = qpd_to_pdd(qpd);
|
||||
/* Retrieve PD base */
|
||||
pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
|
||||
pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
|
||||
|
||||
dqm_lock(dqm);
|
||||
if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
|
||||
@ -873,7 +873,7 @@ static int register_process(struct device_queue_manager *dqm,
|
||||
|
||||
pdd = qpd_to_pdd(qpd);
|
||||
/* Retrieve PD base */
|
||||
pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
|
||||
pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
|
||||
|
||||
dqm_lock(dqm);
|
||||
list_add(&n->list, &dqm->queues);
|
||||
|
@ -61,10 +61,19 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
|
||||
qpd->sh_mem_config =
|
||||
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
|
||||
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
|
||||
if (dqm->dev->noretry &&
|
||||
!dqm->dev->use_iommu_v2)
|
||||
|
||||
if (dqm->dev->device_info->asic_family == CHIP_ALDEBARAN) {
|
||||
/* Aldebaran can safely support different XNACK modes
|
||||
* per process
|
||||
*/
|
||||
if (!pdd->process->xnack_enabled)
|
||||
qpd->sh_mem_config |=
|
||||
1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
|
||||
} else if (dqm->dev->noretry &&
|
||||
!dqm->dev->use_iommu_v2) {
|
||||
qpd->sh_mem_config |=
|
||||
1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
|
||||
}
|
||||
|
||||
qpd->sh_mem_ape1_limit = 0;
|
||||
qpd->sh_mem_ape1_base = 0;
|
||||
|
@ -405,6 +405,10 @@ int kfd_init_apertures(struct kfd_process *process)
|
||||
case CHIP_POLARIS12:
|
||||
case CHIP_VEGAM:
|
||||
kfd_init_apertures_vi(pdd, id);
|
||||
/* VI GPUs cannot support SVM with only
|
||||
* 40 bits of virtual address space.
|
||||
*/
|
||||
process->svm_disabled = true;
|
||||
break;
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_VEGA12:
|
||||
|
@ -25,7 +25,6 @@
|
||||
#include "soc15_int.h"
|
||||
#include "kfd_device_queue_manager.h"
|
||||
#include "kfd_smi_events.h"
|
||||
#include "amdgpu.h"
|
||||
|
||||
enum SQ_INTERRUPT_WORD_ENCODING {
|
||||
SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x0,
|
||||
|
937
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
Normal file
937
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
Normal file
@ -0,0 +1,937 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright 2020-2021 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/hmm.h>
|
||||
#include <linux/dma-direction.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include "amdgpu_sync.h"
|
||||
#include "amdgpu_object.h"
|
||||
#include "amdgpu_vm.h"
|
||||
#include "amdgpu_mn.h"
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_svm.h"
|
||||
#include "kfd_migrate.h"
|
||||
|
||||
static uint64_t
|
||||
svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
|
||||
{
|
||||
return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
|
||||
}
|
||||
|
||||
static int
|
||||
svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
|
||||
dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
struct amdgpu_job *job;
|
||||
unsigned int num_dw, num_bytes;
|
||||
struct dma_fence *fence;
|
||||
uint64_t src_addr, dst_addr;
|
||||
uint64_t pte_flags;
|
||||
void *cpu_addr;
|
||||
int r;
|
||||
|
||||
/* use gart window 0 */
|
||||
*gart_addr = adev->gmc.gart_start;
|
||||
|
||||
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
|
||||
num_bytes = npages * 8;
|
||||
|
||||
r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
|
||||
AMDGPU_IB_POOL_DELAYED, &job);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
src_addr = num_dw * 4;
|
||||
src_addr += job->ibs[0].gpu_addr;
|
||||
|
||||
dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
|
||||
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
|
||||
dst_addr, num_bytes, false);
|
||||
|
||||
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
|
||||
WARN_ON(job->ibs[0].length_dw > num_dw);
|
||||
|
||||
pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
|
||||
pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED;
|
||||
if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
|
||||
pte_flags |= AMDGPU_PTE_WRITEABLE;
|
||||
pte_flags |= adev->gart.gart_pte_flags;
|
||||
|
||||
cpu_addr = &job->ibs[0].ptr[num_dw];
|
||||
|
||||
r = amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
|
||||
if (r)
|
||||
goto error_free;
|
||||
|
||||
r = amdgpu_job_submit(job, &adev->mman.entity,
|
||||
AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
|
||||
if (r)
|
||||
goto error_free;
|
||||
|
||||
dma_fence_put(fence);
|
||||
|
||||
return r;
|
||||
|
||||
error_free:
|
||||
amdgpu_job_free(job);
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* svm_migrate_copy_memory_gart - sdma copy data between ram and vram
|
||||
*
|
||||
* @adev: amdgpu device the sdma ring running
|
||||
* @src: source page address array
|
||||
* @dst: destination page address array
|
||||
* @npages: number of pages to copy
|
||||
* @direction: enum MIGRATION_COPY_DIR
|
||||
* @mfence: output, sdma fence to signal after sdma is done
|
||||
*
|
||||
* ram address uses GART table continuous entries mapping to ram pages,
|
||||
* vram address uses direct mapping of vram pages, which must have npages
|
||||
* number of continuous pages.
|
||||
* GART update and sdma uses same buf copy function ring, sdma is splited to
|
||||
* multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for
|
||||
* the last sdma finish fence which is returned to check copy memory is done.
|
||||
*
|
||||
* Context: Process context, takes and releases gtt_window_lock
|
||||
*
|
||||
* Return:
|
||||
* 0 - OK, otherwise error code
|
||||
*/
|
||||
|
||||
static int
|
||||
svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
|
||||
uint64_t *vram, uint64_t npages,
|
||||
enum MIGRATION_COPY_DIR direction,
|
||||
struct dma_fence **mfence)
|
||||
{
|
||||
const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
|
||||
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
|
||||
uint64_t gart_s, gart_d;
|
||||
struct dma_fence *next;
|
||||
uint64_t size;
|
||||
int r;
|
||||
|
||||
mutex_lock(&adev->mman.gtt_window_lock);
|
||||
|
||||
while (npages) {
|
||||
size = min(GTT_MAX_PAGES, npages);
|
||||
|
||||
if (direction == FROM_VRAM_TO_RAM) {
|
||||
gart_s = svm_migrate_direct_mapping_addr(adev, *vram);
|
||||
r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0);
|
||||
|
||||
} else if (direction == FROM_RAM_TO_VRAM) {
|
||||
r = svm_migrate_gart_map(ring, size, sys, &gart_s,
|
||||
KFD_IOCTL_SVM_FLAG_GPU_RO);
|
||||
gart_d = svm_migrate_direct_mapping_addr(adev, *vram);
|
||||
}
|
||||
if (r) {
|
||||
pr_debug("failed %d to create gart mapping\n", r);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
|
||||
NULL, &next, false, true, false);
|
||||
if (r) {
|
||||
pr_debug("failed %d to copy memory\n", r);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
dma_fence_put(*mfence);
|
||||
*mfence = next;
|
||||
npages -= size;
|
||||
if (npages) {
|
||||
sys += size;
|
||||
vram += size;
|
||||
}
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&adev->mman.gtt_window_lock);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* svm_migrate_copy_done - wait for memory copy sdma is done
|
||||
*
|
||||
* @adev: amdgpu device the sdma memory copy is executing on
|
||||
* @mfence: migrate fence
|
||||
*
|
||||
* Wait for dma fence is signaled, if the copy ssplit into multiple sdma
|
||||
* operations, this is the last sdma operation fence.
|
||||
*
|
||||
* Context: called after svm_migrate_copy_memory
|
||||
*
|
||||
* Return:
|
||||
* 0 - success
|
||||
* otherwise - error code from dma fence signal
|
||||
*/
|
||||
static int
|
||||
svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
if (mfence) {
|
||||
r = dma_fence_wait(mfence, false);
|
||||
dma_fence_put(mfence);
|
||||
pr_debug("sdma copy memory fence done\n");
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
svm_migrate_node_physical_addr(struct amdgpu_device *adev,
|
||||
struct drm_mm_node **mm_node, uint64_t *offset)
|
||||
{
|
||||
struct drm_mm_node *node = *mm_node;
|
||||
uint64_t pos = *offset;
|
||||
|
||||
if (node->start == AMDGPU_BO_INVALID_OFFSET) {
|
||||
pr_debug("drm node is not validated\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
pr_debug("vram node start 0x%llx npages 0x%llx\n", node->start,
|
||||
node->size);
|
||||
|
||||
if (pos >= node->size) {
|
||||
do {
|
||||
pos -= node->size;
|
||||
node++;
|
||||
} while (pos >= node->size);
|
||||
|
||||
*mm_node = node;
|
||||
*offset = pos;
|
||||
}
|
||||
|
||||
return (node->start + pos) << PAGE_SHIFT;
|
||||
}
|
||||
|
||||
unsigned long
|
||||
svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr)
|
||||
{
|
||||
return (addr + adev->kfd.dev->pgmap.range.start) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static void
|
||||
svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
page = pfn_to_page(pfn);
|
||||
page->zone_device_data = prange;
|
||||
get_page(page);
|
||||
lock_page(page);
|
||||
}
|
||||
|
||||
static void
|
||||
svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
page = pfn_to_page(svm_migrate_addr_to_pfn(adev, addr));
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
static unsigned long
|
||||
svm_migrate_addr(struct amdgpu_device *adev, struct page *page)
|
||||
{
|
||||
unsigned long addr;
|
||||
|
||||
addr = page_to_pfn(page) << PAGE_SHIFT;
|
||||
return (addr - adev->kfd.dev->pgmap.range.start);
|
||||
}
|
||||
|
||||
static struct page *
|
||||
svm_migrate_get_sys_page(struct vm_area_struct *vma, unsigned long addr)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
|
||||
if (page)
|
||||
lock_page(page);
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
static void svm_migrate_put_sys_page(unsigned long addr)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
page = pfn_to_page(addr >> PAGE_SHIFT);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
static int
|
||||
svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
|
||||
struct migrate_vma *migrate, struct dma_fence **mfence,
|
||||
dma_addr_t *scratch)
|
||||
{
|
||||
uint64_t npages = migrate->cpages;
|
||||
struct device *dev = adev->dev;
|
||||
struct drm_mm_node *node;
|
||||
dma_addr_t *src;
|
||||
uint64_t *dst;
|
||||
uint64_t vram_addr;
|
||||
uint64_t offset;
|
||||
uint64_t i, j;
|
||||
int r;
|
||||
|
||||
pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
|
||||
prange->last);
|
||||
|
||||
src = scratch;
|
||||
dst = (uint64_t *)(scratch + npages);
|
||||
|
||||
r = svm_range_vram_node_new(adev, prange, true);
|
||||
if (r) {
|
||||
pr_debug("failed %d get 0x%llx pages from vram\n", r, npages);
|
||||
goto out;
|
||||
}
|
||||
|
||||
node = prange->ttm_res->mm_node;
|
||||
offset = prange->offset;
|
||||
vram_addr = svm_migrate_node_physical_addr(adev, &node, &offset);
|
||||
if (!vram_addr) {
|
||||
WARN_ONCE(1, "vram node address is 0\n");
|
||||
r = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = j = 0; i < npages; i++) {
|
||||
struct page *spage;
|
||||
|
||||
dst[i] = vram_addr + (j << PAGE_SHIFT);
|
||||
migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
|
||||
svm_migrate_get_vram_page(prange, migrate->dst[i]);
|
||||
|
||||
migrate->dst[i] = migrate_pfn(migrate->dst[i]);
|
||||
migrate->dst[i] |= MIGRATE_PFN_LOCKED;
|
||||
|
||||
if (migrate->src[i] & MIGRATE_PFN_VALID) {
|
||||
spage = migrate_pfn_to_page(migrate->src[i]);
|
||||
src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
|
||||
DMA_TO_DEVICE);
|
||||
r = dma_mapping_error(dev, src[i]);
|
||||
if (r) {
|
||||
pr_debug("failed %d dma_map_page\n", r);
|
||||
goto out_free_vram_pages;
|
||||
}
|
||||
} else {
|
||||
if (j) {
|
||||
r = svm_migrate_copy_memory_gart(
|
||||
adev, src + i - j,
|
||||
dst + i - j, j,
|
||||
FROM_RAM_TO_VRAM,
|
||||
mfence);
|
||||
if (r)
|
||||
goto out_free_vram_pages;
|
||||
offset += j;
|
||||
vram_addr = (node->start + offset) << PAGE_SHIFT;
|
||||
j = 0;
|
||||
} else {
|
||||
offset++;
|
||||
vram_addr += PAGE_SIZE;
|
||||
}
|
||||
if (offset >= node->size) {
|
||||
node++;
|
||||
pr_debug("next node size 0x%llx\n", node->size);
|
||||
vram_addr = node->start << PAGE_SHIFT;
|
||||
offset = 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n",
|
||||
src[i] >> PAGE_SHIFT, page_to_pfn(spage));
|
||||
|
||||
if (j + offset >= node->size - 1 && i < npages - 1) {
|
||||
r = svm_migrate_copy_memory_gart(adev, src + i - j,
|
||||
dst + i - j, j + 1,
|
||||
FROM_RAM_TO_VRAM,
|
||||
mfence);
|
||||
if (r)
|
||||
goto out_free_vram_pages;
|
||||
|
||||
node++;
|
||||
pr_debug("next node size 0x%llx\n", node->size);
|
||||
vram_addr = node->start << PAGE_SHIFT;
|
||||
offset = 0;
|
||||
j = 0;
|
||||
} else {
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
r = svm_migrate_copy_memory_gart(adev, src + i - j, dst + i - j, j,
|
||||
FROM_RAM_TO_VRAM, mfence);
|
||||
|
||||
out_free_vram_pages:
|
||||
if (r) {
|
||||
pr_debug("failed %d to copy memory to vram\n", r);
|
||||
while (i--) {
|
||||
svm_migrate_put_vram_page(adev, dst[i]);
|
||||
migrate->dst[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
|
||||
static int
|
||||
svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
|
||||
struct vm_area_struct *vma, uint64_t start,
|
||||
uint64_t end)
|
||||
{
|
||||
uint64_t npages = (end - start) >> PAGE_SHIFT;
|
||||
struct dma_fence *mfence = NULL;
|
||||
struct migrate_vma migrate;
|
||||
dma_addr_t *scratch;
|
||||
size_t size;
|
||||
void *buf;
|
||||
int r = -ENOMEM;
|
||||
int retry = 0;
|
||||
|
||||
memset(&migrate, 0, sizeof(migrate));
|
||||
migrate.vma = vma;
|
||||
migrate.start = start;
|
||||
migrate.end = end;
|
||||
migrate.flags = MIGRATE_VMA_SELECT_SYSTEM;
|
||||
migrate.pgmap_owner = adev;
|
||||
|
||||
size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
|
||||
size *= npages;
|
||||
buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
|
||||
if (!buf)
|
||||
goto out;
|
||||
|
||||
migrate.src = buf;
|
||||
migrate.dst = migrate.src + npages;
|
||||
scratch = (dma_addr_t *)(migrate.dst + npages);
|
||||
|
||||
retry:
|
||||
r = migrate_vma_setup(&migrate);
|
||||
if (r) {
|
||||
pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
|
||||
r, prange->svms, prange->start, prange->last);
|
||||
goto out_free;
|
||||
}
|
||||
if (migrate.cpages != npages) {
|
||||
pr_debug("collect 0x%lx/0x%llx pages, retry\n", migrate.cpages,
|
||||
npages);
|
||||
migrate_vma_finalize(&migrate);
|
||||
if (retry++ >= 3) {
|
||||
r = -ENOMEM;
|
||||
pr_debug("failed %d migrate svms 0x%p [0x%lx 0x%lx]\n",
|
||||
r, prange->svms, prange->start, prange->last);
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (migrate.cpages) {
|
||||
r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence,
|
||||
scratch);
|
||||
migrate_vma_pages(&migrate);
|
||||
svm_migrate_copy_done(adev, mfence);
|
||||
migrate_vma_finalize(&migrate);
|
||||
}
|
||||
|
||||
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
|
||||
svm_range_free_dma_mappings(prange);
|
||||
|
||||
out_free:
|
||||
kvfree(buf);
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* svm_migrate_ram_to_vram - migrate svm range from system to device
|
||||
* @prange: range structure
|
||||
* @best_loc: the device to migrate to
|
||||
* @mm: the process mm structure
|
||||
*
|
||||
* Context: Process context, caller hold mmap read lock, svms lock, prange lock
|
||||
*
|
||||
* Return:
|
||||
* 0 - OK, otherwise error code
|
||||
*/
|
||||
static int
|
||||
svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
unsigned long addr, start, end;
|
||||
struct vm_area_struct *vma;
|
||||
struct amdgpu_device *adev;
|
||||
int r = 0;
|
||||
|
||||
if (prange->actual_loc == best_loc) {
|
||||
pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
|
||||
prange->svms, prange->start, prange->last, best_loc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
adev = svm_range_get_adev_by_id(prange, best_loc);
|
||||
if (!adev) {
|
||||
pr_debug("failed to get device by id 0x%x\n", best_loc);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
|
||||
prange->start, prange->last, best_loc);
|
||||
|
||||
/* FIXME: workaround for page locking bug with invalid pages */
|
||||
svm_range_prefault(prange, mm);
|
||||
|
||||
start = prange->start << PAGE_SHIFT;
|
||||
end = (prange->last + 1) << PAGE_SHIFT;
|
||||
|
||||
for (addr = start; addr < end;) {
|
||||
unsigned long next;
|
||||
|
||||
vma = find_vma(mm, addr);
|
||||
if (!vma || addr < vma->vm_start)
|
||||
break;
|
||||
|
||||
next = min(vma->vm_end, end);
|
||||
r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next);
|
||||
if (r) {
|
||||
pr_debug("failed to migrate\n");
|
||||
break;
|
||||
}
|
||||
addr = next;
|
||||
}
|
||||
|
||||
if (!r)
|
||||
prange->actual_loc = best_loc;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void svm_migrate_page_free(struct page *page)
|
||||
{
|
||||
/* Keep this function to avoid warning */
|
||||
}
|
||||
|
||||
static int
|
||||
svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
|
||||
struct migrate_vma *migrate, struct dma_fence **mfence,
|
||||
dma_addr_t *scratch)
|
||||
{
|
||||
uint64_t npages = migrate->cpages;
|
||||
struct device *dev = adev->dev;
|
||||
uint64_t *src;
|
||||
dma_addr_t *dst;
|
||||
struct page *dpage;
|
||||
uint64_t i = 0, j;
|
||||
uint64_t addr;
|
||||
int r = 0;
|
||||
|
||||
pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
|
||||
prange->last);
|
||||
|
||||
addr = prange->start << PAGE_SHIFT;
|
||||
|
||||
src = (uint64_t *)(scratch + npages);
|
||||
dst = scratch;
|
||||
|
||||
for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) {
|
||||
struct page *spage;
|
||||
|
||||
spage = migrate_pfn_to_page(migrate->src[i]);
|
||||
if (!spage) {
|
||||
pr_debug("failed get spage svms 0x%p [0x%lx 0x%lx]\n",
|
||||
prange->svms, prange->start, prange->last);
|
||||
r = -ENOMEM;
|
||||
goto out_oom;
|
||||
}
|
||||
src[i] = svm_migrate_addr(adev, spage);
|
||||
if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
|
||||
r = svm_migrate_copy_memory_gart(adev, dst + i - j,
|
||||
src + i - j, j,
|
||||
FROM_VRAM_TO_RAM,
|
||||
mfence);
|
||||
if (r)
|
||||
goto out_oom;
|
||||
j = 0;
|
||||
}
|
||||
|
||||
dpage = svm_migrate_get_sys_page(migrate->vma, addr);
|
||||
if (!dpage) {
|
||||
pr_debug("failed get page svms 0x%p [0x%lx 0x%lx]\n",
|
||||
prange->svms, prange->start, prange->last);
|
||||
r = -ENOMEM;
|
||||
goto out_oom;
|
||||
}
|
||||
|
||||
dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
|
||||
r = dma_mapping_error(dev, dst[i]);
|
||||
if (r) {
|
||||
pr_debug("failed %d dma_map_page\n", r);
|
||||
goto out_oom;
|
||||
}
|
||||
|
||||
pr_debug("dma mapping dst to 0x%llx, page_to_pfn 0x%lx\n",
|
||||
dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
|
||||
|
||||
migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
|
||||
migrate->dst[i] |= MIGRATE_PFN_LOCKED;
|
||||
}
|
||||
|
||||
r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j,
|
||||
FROM_VRAM_TO_RAM, mfence);
|
||||
|
||||
out_oom:
|
||||
if (r) {
|
||||
pr_debug("failed %d copy to ram\n", r);
|
||||
while (i--) {
|
||||
svm_migrate_put_sys_page(dst[i]);
|
||||
migrate->dst[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int
|
||||
svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
|
||||
struct vm_area_struct *vma, uint64_t start, uint64_t end)
|
||||
{
|
||||
uint64_t npages = (end - start) >> PAGE_SHIFT;
|
||||
struct dma_fence *mfence = NULL;
|
||||
struct migrate_vma migrate;
|
||||
dma_addr_t *scratch;
|
||||
size_t size;
|
||||
void *buf;
|
||||
int r = -ENOMEM;
|
||||
|
||||
memset(&migrate, 0, sizeof(migrate));
|
||||
migrate.vma = vma;
|
||||
migrate.start = start;
|
||||
migrate.end = end;
|
||||
migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
|
||||
migrate.pgmap_owner = adev;
|
||||
|
||||
size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
|
||||
size *= npages;
|
||||
buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
|
||||
if (!buf)
|
||||
goto out;
|
||||
|
||||
migrate.src = buf;
|
||||
migrate.dst = migrate.src + npages;
|
||||
scratch = (dma_addr_t *)(migrate.dst + npages);
|
||||
|
||||
r = migrate_vma_setup(&migrate);
|
||||
if (r) {
|
||||
pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
|
||||
r, prange->svms, prange->start, prange->last);
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
pr_debug("cpages %ld\n", migrate.cpages);
|
||||
|
||||
if (migrate.cpages) {
|
||||
r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
|
||||
scratch);
|
||||
migrate_vma_pages(&migrate);
|
||||
svm_migrate_copy_done(adev, mfence);
|
||||
migrate_vma_finalize(&migrate);
|
||||
} else {
|
||||
pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n",
|
||||
prange->start, prange->last);
|
||||
}
|
||||
|
||||
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
|
||||
|
||||
out_free:
|
||||
kvfree(buf);
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* svm_migrate_vram_to_ram - migrate svm range from device to system
|
||||
* @prange: range structure
|
||||
* @mm: process mm, use current->mm if NULL
|
||||
*
|
||||
* Context: Process context, caller hold mmap read lock, svms lock, prange lock
|
||||
*
|
||||
* Return:
|
||||
* 0 - OK, otherwise error code
|
||||
*/
|
||||
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
|
||||
{
|
||||
struct amdgpu_device *adev;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long addr;
|
||||
unsigned long start;
|
||||
unsigned long end;
|
||||
int r = 0;
|
||||
|
||||
if (!prange->actual_loc) {
|
||||
pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
|
||||
prange->start, prange->last);
|
||||
return 0;
|
||||
}
|
||||
|
||||
adev = svm_range_get_adev_by_id(prange, prange->actual_loc);
|
||||
if (!adev) {
|
||||
pr_debug("failed to get device by id 0x%x\n",
|
||||
prange->actual_loc);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n",
|
||||
prange->svms, prange, prange->start, prange->last,
|
||||
prange->actual_loc);
|
||||
|
||||
start = prange->start << PAGE_SHIFT;
|
||||
end = (prange->last + 1) << PAGE_SHIFT;
|
||||
|
||||
for (addr = start; addr < end;) {
|
||||
unsigned long next;
|
||||
|
||||
vma = find_vma(mm, addr);
|
||||
if (!vma || addr < vma->vm_start)
|
||||
break;
|
||||
|
||||
next = min(vma->vm_end, end);
|
||||
r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next);
|
||||
if (r) {
|
||||
pr_debug("failed %d to migrate\n", r);
|
||||
break;
|
||||
}
|
||||
addr = next;
|
||||
}
|
||||
|
||||
if (!r) {
|
||||
svm_range_vram_node_free(prange);
|
||||
prange->actual_loc = 0;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* svm_migrate_vram_to_vram - migrate svm range from device to device
|
||||
* @prange: range structure
|
||||
* @best_loc: the device to migrate to
|
||||
* @mm: process mm, use current->mm if NULL
|
||||
*
|
||||
* Context: Process context, caller hold mmap read lock, svms lock, prange lock
|
||||
*
|
||||
* Return:
|
||||
* 0 - OK, otherwise error code
|
||||
*/
|
||||
static int
|
||||
svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
int r;
|
||||
|
||||
/*
|
||||
* TODO: for both devices with PCIe large bar or on same xgmi hive, skip
|
||||
* system memory as migration bridge
|
||||
*/
|
||||
|
||||
pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
|
||||
|
||||
r = svm_migrate_vram_to_ram(prange, mm);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
return svm_migrate_ram_to_vram(prange, best_loc, mm);
|
||||
}
|
||||
|
||||
int
|
||||
svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
if (!prange->actual_loc)
|
||||
return svm_migrate_ram_to_vram(prange, best_loc, mm);
|
||||
else
|
||||
return svm_migrate_vram_to_vram(prange, best_loc, mm);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* svm_migrate_to_ram - CPU page fault handler
|
||||
* @vmf: CPU vm fault vma, address
|
||||
*
|
||||
* Context: vm fault handler, caller holds the mmap read lock
|
||||
*
|
||||
* Return:
|
||||
* 0 - OK
|
||||
* VM_FAULT_SIGBUS - notice application to have SIGBUS page fault
|
||||
*/
|
||||
static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
|
||||
{
|
||||
unsigned long addr = vmf->address;
|
||||
struct vm_area_struct *vma;
|
||||
enum svm_work_list_ops op;
|
||||
struct svm_range *parent;
|
||||
struct svm_range *prange;
|
||||
struct kfd_process *p;
|
||||
struct mm_struct *mm;
|
||||
int r = 0;
|
||||
|
||||
vma = vmf->vma;
|
||||
mm = vma->vm_mm;
|
||||
|
||||
p = kfd_lookup_process_by_mm(vma->vm_mm);
|
||||
if (!p) {
|
||||
pr_debug("failed find process at fault address 0x%lx\n", addr);
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
addr >>= PAGE_SHIFT;
|
||||
pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr);
|
||||
|
||||
mutex_lock(&p->svms.lock);
|
||||
|
||||
prange = svm_range_from_addr(&p->svms, addr, &parent);
|
||||
if (!prange) {
|
||||
pr_debug("cannot find svm range at 0x%lx\n", addr);
|
||||
r = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
mutex_lock(&parent->migrate_mutex);
|
||||
if (prange != parent)
|
||||
mutex_lock_nested(&prange->migrate_mutex, 1);
|
||||
|
||||
if (!prange->actual_loc)
|
||||
goto out_unlock_prange;
|
||||
|
||||
svm_range_lock(parent);
|
||||
if (prange != parent)
|
||||
mutex_lock_nested(&prange->lock, 1);
|
||||
r = svm_range_split_by_granularity(p, mm, addr, parent, prange);
|
||||
if (prange != parent)
|
||||
mutex_unlock(&prange->lock);
|
||||
svm_range_unlock(parent);
|
||||
if (r) {
|
||||
pr_debug("failed %d to split range by granularity\n", r);
|
||||
goto out_unlock_prange;
|
||||
}
|
||||
|
||||
r = svm_migrate_vram_to_ram(prange, mm);
|
||||
if (r)
|
||||
pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r,
|
||||
prange, prange->start, prange->last);
|
||||
|
||||
/* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
|
||||
if (p->xnack_enabled && parent == prange)
|
||||
op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
|
||||
else
|
||||
op = SVM_OP_UPDATE_RANGE_NOTIFIER;
|
||||
svm_range_add_list_work(&p->svms, parent, mm, op);
|
||||
schedule_deferred_list_work(&p->svms);
|
||||
|
||||
out_unlock_prange:
|
||||
if (prange != parent)
|
||||
mutex_unlock(&prange->migrate_mutex);
|
||||
mutex_unlock(&parent->migrate_mutex);
|
||||
out:
|
||||
mutex_unlock(&p->svms.lock);
|
||||
kfd_unref_process(p);
|
||||
|
||||
pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr);
|
||||
|
||||
return r ? VM_FAULT_SIGBUS : 0;
|
||||
}
|
||||
|
||||
static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
|
||||
.page_free = svm_migrate_page_free,
|
||||
.migrate_to_ram = svm_migrate_to_ram,
|
||||
};
|
||||
|
||||
/* Each VRAM page uses sizeof(struct page) on system memory */
|
||||
#define SVM_HMM_PAGE_STRUCT_SIZE(size) ((size)/PAGE_SIZE * sizeof(struct page))
|
||||
|
||||
int svm_migrate_init(struct amdgpu_device *adev)
|
||||
{
|
||||
struct kfd_dev *kfddev = adev->kfd.dev;
|
||||
struct dev_pagemap *pgmap;
|
||||
struct resource *res;
|
||||
unsigned long size;
|
||||
void *r;
|
||||
|
||||
/* Page migration works on Vega10 or newer */
|
||||
if (kfddev->device_info->asic_family < CHIP_VEGA10)
|
||||
return -EINVAL;
|
||||
|
||||
pgmap = &kfddev->pgmap;
|
||||
memset(pgmap, 0, sizeof(*pgmap));
|
||||
|
||||
/* TODO: register all vram to HMM for now.
|
||||
* should remove reserved size
|
||||
*/
|
||||
size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20);
|
||||
res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
|
||||
if (IS_ERR(res))
|
||||
return -ENOMEM;
|
||||
|
||||
pgmap->type = MEMORY_DEVICE_PRIVATE;
|
||||
pgmap->nr_range = 1;
|
||||
pgmap->range.start = res->start;
|
||||
pgmap->range.end = res->end;
|
||||
pgmap->ops = &svm_migrate_pgmap_ops;
|
||||
pgmap->owner = adev;
|
||||
pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
|
||||
r = devm_memremap_pages(adev->dev, pgmap);
|
||||
if (IS_ERR(r)) {
|
||||
pr_err("failed to register HMM device memory\n");
|
||||
devm_release_mem_region(adev->dev, res->start,
|
||||
res->end - res->start + 1);
|
||||
return PTR_ERR(r);
|
||||
}
|
||||
|
||||
pr_debug("reserve %ldMB system memory for VRAM pages struct\n",
|
||||
SVM_HMM_PAGE_STRUCT_SIZE(size) >> 20);
|
||||
|
||||
amdgpu_amdkfd_reserve_system_mem(SVM_HMM_PAGE_STRUCT_SIZE(size));
|
||||
|
||||
pr_info("HMM registered %ldMB device memory\n", size >> 20);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void svm_migrate_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
struct dev_pagemap *pgmap = &adev->kfd.dev->pgmap;
|
||||
|
||||
devm_memunmap_pages(adev->dev, pgmap);
|
||||
devm_release_mem_region(adev->dev, pgmap->range.start,
|
||||
pgmap->range.end - pgmap->range.start + 1);
|
||||
}
|
65
drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
Normal file
65
drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
Normal file
@ -0,0 +1,65 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright 2020-2021 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef KFD_MIGRATE_H_
|
||||
#define KFD_MIGRATE_H_
|
||||
|
||||
#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
|
||||
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/hmm.h>
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_svm.h"
|
||||
|
||||
enum MIGRATION_COPY_DIR {
|
||||
FROM_RAM_TO_VRAM = 0,
|
||||
FROM_VRAM_TO_RAM
|
||||
};
|
||||
|
||||
int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
|
||||
struct mm_struct *mm);
|
||||
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm);
|
||||
unsigned long
|
||||
svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
|
||||
|
||||
int svm_migrate_init(struct amdgpu_device *adev);
|
||||
void svm_migrate_fini(struct amdgpu_device *adev);
|
||||
|
||||
#else
|
||||
|
||||
static inline int svm_migrate_init(struct amdgpu_device *adev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void svm_migrate_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
/* empty */
|
||||
}
|
||||
|
||||
#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
|
||||
|
||||
#endif /* KFD_MIGRATE_H_ */
|
@ -124,14 +124,14 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
|
||||
{
|
||||
unsigned int alloc_size_bytes;
|
||||
unsigned int *rl_buffer, rl_wptr, i;
|
||||
int retval, proccesses_mapped;
|
||||
int retval, processes_mapped;
|
||||
struct device_process_node *cur;
|
||||
struct qcm_process_device *qpd;
|
||||
struct queue *q;
|
||||
struct kernel_queue *kq;
|
||||
bool is_over_subscription;
|
||||
|
||||
rl_wptr = retval = proccesses_mapped = 0;
|
||||
rl_wptr = retval = processes_mapped = 0;
|
||||
|
||||
retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,
|
||||
&alloc_size_bytes, &is_over_subscription);
|
||||
@ -148,7 +148,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
|
||||
list_for_each_entry(cur, queues, list) {
|
||||
qpd = cur->qpd;
|
||||
/* build map process packet */
|
||||
if (proccesses_mapped >= pm->dqm->processes_count) {
|
||||
if (processes_mapped >= pm->dqm->processes_count) {
|
||||
pr_debug("Not enough space left in runlist IB\n");
|
||||
pm_release_ib(pm);
|
||||
return -ENOMEM;
|
||||
@ -158,7 +158,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
|
||||
if (retval)
|
||||
return retval;
|
||||
|
||||
proccesses_mapped++;
|
||||
processes_mapped++;
|
||||
inc_wptr(&rl_wptr, pm->pmf->map_process_size,
|
||||
alloc_size_bytes);
|
||||
|
||||
@ -242,7 +242,6 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
|
||||
case CHIP_RAVEN:
|
||||
case CHIP_RENOIR:
|
||||
case CHIP_ARCTURUS:
|
||||
case CHIP_ALDEBARAN:
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI12:
|
||||
case CHIP_NAVI14:
|
||||
@ -252,6 +251,9 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
|
||||
case CHIP_DIMGREY_CAVEFISH:
|
||||
pm->pmf = &kfd_v9_pm_funcs;
|
||||
break;
|
||||
case CHIP_ALDEBARAN:
|
||||
pm->pmf = &kfd_aldebaran_pm_funcs;
|
||||
break;
|
||||
default:
|
||||
WARN(1, "Unexpected ASIC family %u",
|
||||
dqm->dev->device_info->asic_family);
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "kfd_kernel_queue.h"
|
||||
#include "kfd_device_queue_manager.h"
|
||||
#include "kfd_pm4_headers_ai.h"
|
||||
#include "kfd_pm4_headers_aldebaran.h"
|
||||
#include "kfd_pm4_opcodes.h"
|
||||
#include "gc/gc_10_1_0_sh_mask.h"
|
||||
|
||||
@ -35,7 +36,6 @@ static int pm_map_process_v9(struct packet_manager *pm,
|
||||
|
||||
packet = (struct pm4_mes_map_process *)buffer;
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_map_process));
|
||||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
|
||||
sizeof(struct pm4_mes_map_process));
|
||||
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
|
||||
@ -73,6 +73,45 @@ static int pm_map_process_v9(struct packet_manager *pm,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pm_map_process_aldebaran(struct packet_manager *pm,
|
||||
uint32_t *buffer, struct qcm_process_device *qpd)
|
||||
{
|
||||
struct pm4_mes_map_process_aldebaran *packet;
|
||||
uint64_t vm_page_table_base_addr = qpd->page_table_base;
|
||||
|
||||
packet = (struct pm4_mes_map_process_aldebaran *)buffer;
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran));
|
||||
packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
|
||||
sizeof(struct pm4_mes_map_process_aldebaran));
|
||||
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
|
||||
packet->bitfields2.process_quantum = 10;
|
||||
packet->bitfields2.pasid = qpd->pqm->process->pasid;
|
||||
packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
|
||||
packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
|
||||
packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0;
|
||||
packet->bitfields14.num_oac = qpd->num_oac;
|
||||
packet->bitfields14.sdma_enable = 1;
|
||||
packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
|
||||
|
||||
packet->sh_mem_config = qpd->sh_mem_config;
|
||||
packet->sh_mem_bases = qpd->sh_mem_bases;
|
||||
if (qpd->tba_addr) {
|
||||
packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
|
||||
packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
|
||||
packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
|
||||
}
|
||||
|
||||
packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
|
||||
packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
|
||||
|
||||
packet->vm_context_page_table_base_addr_lo32 =
|
||||
lower_32_bits(vm_page_table_base_addr);
|
||||
packet->vm_context_page_table_base_addr_hi32 =
|
||||
upper_32_bits(vm_page_table_base_addr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
|
||||
uint64_t ib, size_t ib_size_in_dwords, bool chain)
|
||||
{
|
||||
@ -324,3 +363,20 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
|
||||
.query_status_size = sizeof(struct pm4_mes_query_status),
|
||||
.release_mem_size = 0,
|
||||
};
|
||||
|
||||
const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
|
||||
.map_process = pm_map_process_aldebaran,
|
||||
.runlist = pm_runlist_v9,
|
||||
.set_resources = pm_set_resources_v9,
|
||||
.map_queues = pm_map_queues_v9,
|
||||
.unmap_queues = pm_unmap_queues_v9,
|
||||
.query_status = pm_query_status_v9,
|
||||
.release_mem = NULL,
|
||||
.map_process_size = sizeof(struct pm4_mes_map_process_aldebaran),
|
||||
.runlist_size = sizeof(struct pm4_mes_runlist),
|
||||
.set_resources_size = sizeof(struct pm4_mes_set_resources),
|
||||
.map_queues_size = sizeof(struct pm4_mes_map_queues),
|
||||
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
|
||||
.query_status_size = sizeof(struct pm4_mes_query_status),
|
||||
.release_mem_size = 0,
|
||||
};
|
||||
|
93
drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
Normal file
93
drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
Normal file
@ -0,0 +1,93 @@
|
||||
/*
|
||||
* Copyright 2020 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
/*--------------------MES_MAP_PROCESS (PER DEBUG VMID)--------------------*/
|
||||
|
||||
#ifndef PM4_MES_MAP_PROCESS_PER_DEBUG_VMID_DEFINED
|
||||
#define PM4_MES_MAP_PROCESS_PER_DEBUG_VMID_DEFINED
|
||||
|
||||
struct pm4_mes_map_process_aldebaran {
|
||||
union {
|
||||
union PM4_MES_TYPE_3_HEADER header; /* header */
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t pasid:16; /* 0 - 15 */
|
||||
uint32_t single_memops:1; /* 16 */
|
||||
uint32_t reserved1:1; /* 17 */
|
||||
uint32_t debug_vmid:4; /* 18 - 21 */
|
||||
uint32_t new_debug:1; /* 22 */
|
||||
uint32_t tmz:1; /* 23 */
|
||||
uint32_t diq_enable:1; /* 24 */
|
||||
uint32_t process_quantum:7; /* 25 - 31 */
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
uint32_t vm_context_page_table_base_addr_lo32;
|
||||
|
||||
uint32_t vm_context_page_table_base_addr_hi32;
|
||||
|
||||
uint32_t sh_mem_bases;
|
||||
|
||||
uint32_t sh_mem_config;
|
||||
|
||||
uint32_t sq_shader_tba_lo;
|
||||
|
||||
uint32_t sq_shader_tba_hi;
|
||||
|
||||
uint32_t sq_shader_tma_lo;
|
||||
|
||||
uint32_t sq_shader_tma_hi;
|
||||
|
||||
uint32_t reserved6;
|
||||
|
||||
uint32_t gds_addr_lo;
|
||||
|
||||
uint32_t gds_addr_hi;
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t num_gws:7;
|
||||
uint32_t sdma_enable:1;
|
||||
uint32_t num_oac:4;
|
||||
uint32_t gds_size_hi:4;
|
||||
uint32_t gds_size:6;
|
||||
uint32_t num_queues:10;
|
||||
} bitfields14;
|
||||
uint32_t ordinal14;
|
||||
};
|
||||
|
||||
uint32_t spi_gdbg_per_vmid_cntl;
|
||||
|
||||
uint32_t tcp_watch_cntl[4];
|
||||
|
||||
uint32_t completion_signal_lo;
|
||||
|
||||
uint32_t completion_signal_hi;
|
||||
|
||||
};
|
||||
|
||||
#endif
|
@ -322,6 +322,9 @@ struct kfd_dev {
|
||||
unsigned int max_doorbell_slices;
|
||||
|
||||
int noretry;
|
||||
|
||||
/* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
|
||||
struct dev_pagemap pgmap;
|
||||
};
|
||||
|
||||
enum kfd_mempool {
|
||||
@ -669,7 +672,7 @@ struct kfd_process_device {
|
||||
|
||||
/* VM context for GPUVM allocations */
|
||||
struct file *drm_file;
|
||||
void *vm;
|
||||
void *drm_priv;
|
||||
|
||||
/* GPUVM allocations storage */
|
||||
struct idr alloc_idr;
|
||||
@ -731,6 +734,17 @@ struct kfd_process_device {
|
||||
|
||||
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
|
||||
|
||||
struct svm_range_list {
|
||||
struct mutex lock;
|
||||
struct rb_root_cached objects;
|
||||
struct list_head list;
|
||||
struct work_struct deferred_list_work;
|
||||
struct list_head deferred_range_list;
|
||||
spinlock_t deferred_list_lock;
|
||||
atomic_t evicted_ranges;
|
||||
struct delayed_work restore_work;
|
||||
};
|
||||
|
||||
/* Process data */
|
||||
struct kfd_process {
|
||||
/*
|
||||
@ -809,6 +823,12 @@ struct kfd_process {
|
||||
struct kobject *kobj;
|
||||
struct kobject *kobj_queues;
|
||||
struct attribute attr_pasid;
|
||||
|
||||
/* shared virtual memory registered by this process */
|
||||
struct svm_range_list svms;
|
||||
bool svm_disabled;
|
||||
|
||||
bool xnack_enabled;
|
||||
};
|
||||
|
||||
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
|
||||
@ -842,6 +862,20 @@ struct kfd_process *kfd_create_process(struct file *filep);
|
||||
struct kfd_process *kfd_get_process(const struct task_struct *);
|
||||
struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
|
||||
struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
|
||||
|
||||
int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
|
||||
int kfd_process_gpuid_from_kgd(struct kfd_process *p,
|
||||
struct amdgpu_device *adev, uint32_t *gpuid,
|
||||
uint32_t *gpuidx);
|
||||
static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p,
|
||||
uint32_t gpuidx, uint32_t *gpuid) {
|
||||
return gpuidx < p->n_pdds ? p->pdds[gpuidx]->dev->id : -EINVAL;
|
||||
}
|
||||
static inline struct kfd_process_device *kfd_process_device_from_gpuidx(
|
||||
struct kfd_process *p, uint32_t gpuidx) {
|
||||
return gpuidx < p->n_pdds ? p->pdds[gpuidx] : NULL;
|
||||
}
|
||||
|
||||
void kfd_unref_process(struct kfd_process *p);
|
||||
int kfd_process_evict_queues(struct kfd_process *p);
|
||||
int kfd_process_restore_queues(struct kfd_process *p);
|
||||
@ -857,6 +891,8 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
|
||||
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
|
||||
struct kfd_process *p);
|
||||
|
||||
bool kfd_process_xnack_mode(struct kfd_process *p, bool supported);
|
||||
|
||||
int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
|
||||
struct vm_area_struct *vma);
|
||||
|
||||
@ -1052,6 +1088,7 @@ struct packet_manager_funcs {
|
||||
|
||||
extern const struct packet_manager_funcs kfd_vi_pm_funcs;
|
||||
extern const struct packet_manager_funcs kfd_v9_pm_funcs;
|
||||
extern const struct packet_manager_funcs kfd_aldebaran_pm_funcs;
|
||||
|
||||
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
|
||||
void pm_uninit(struct packet_manager *pm, bool hanging);
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include <linux/pm_runtime.h>
|
||||
#include "amdgpu_amdkfd.h"
|
||||
#include "amdgpu.h"
|
||||
#include "kfd_svm.h"
|
||||
|
||||
struct mm_struct;
|
||||
|
||||
@ -42,6 +43,7 @@ struct mm_struct;
|
||||
#include "kfd_device_queue_manager.h"
|
||||
#include "kfd_dbgmgr.h"
|
||||
#include "kfd_iommu.h"
|
||||
#include "kfd_svm.h"
|
||||
|
||||
/*
|
||||
* List of struct kfd_process (field kfd_process).
|
||||
@ -250,7 +252,7 @@ cleanup:
|
||||
}
|
||||
|
||||
/**
|
||||
* @kfd_get_cu_occupancy() - Collect number of waves in-flight on this device
|
||||
* @kfd_get_cu_occupancy - Collect number of waves in-flight on this device
|
||||
* by current process. Translates acquired wave count into number of compute units
|
||||
* that are occupied.
|
||||
*
|
||||
@ -647,8 +649,9 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
|
||||
{
|
||||
struct kfd_dev *dev = pdd->dev;
|
||||
|
||||
amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm);
|
||||
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, NULL);
|
||||
amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv);
|
||||
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, pdd->drm_priv,
|
||||
NULL);
|
||||
}
|
||||
|
||||
/* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
|
||||
@ -667,11 +670,11 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
|
||||
int err;
|
||||
|
||||
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
|
||||
pdd->vm, &mem, NULL, flags);
|
||||
pdd->drm_priv, &mem, NULL, flags);
|
||||
if (err)
|
||||
goto err_alloc_mem;
|
||||
|
||||
err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
|
||||
err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->drm_priv);
|
||||
if (err)
|
||||
goto err_map_mem;
|
||||
|
||||
@ -712,7 +715,8 @@ sync_memory_failed:
|
||||
return err;
|
||||
|
||||
err_map_mem:
|
||||
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, NULL);
|
||||
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, pdd->drm_priv,
|
||||
NULL);
|
||||
err_alloc_mem:
|
||||
*kptr = NULL;
|
||||
return err;
|
||||
@ -901,13 +905,14 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
|
||||
for (i = 0; i < p->n_pdds; i++) {
|
||||
struct kfd_process_device *peer_pdd = p->pdds[i];
|
||||
|
||||
if (!peer_pdd->vm)
|
||||
if (!peer_pdd->drm_priv)
|
||||
continue;
|
||||
amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
|
||||
peer_pdd->dev->kgd, mem, peer_pdd->vm);
|
||||
peer_pdd->dev->kgd, mem, peer_pdd->drm_priv);
|
||||
}
|
||||
|
||||
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, NULL);
|
||||
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem,
|
||||
pdd->drm_priv, NULL);
|
||||
kfd_process_device_remove_obj_handle(pdd, id);
|
||||
}
|
||||
}
|
||||
@ -932,7 +937,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
|
||||
|
||||
if (pdd->drm_file) {
|
||||
amdgpu_amdkfd_gpuvm_release_process_vm(
|
||||
pdd->dev->kgd, pdd->vm);
|
||||
pdd->dev->kgd, pdd->drm_priv);
|
||||
fput(pdd->drm_file);
|
||||
}
|
||||
|
||||
@ -1000,6 +1005,7 @@ static void kfd_process_wq_release(struct work_struct *work)
|
||||
kfd_iommu_unbind_process(p);
|
||||
|
||||
kfd_process_free_outstanding_kfd_bos(p);
|
||||
svm_range_list_fini(p);
|
||||
|
||||
kfd_process_destroy_pdds(p);
|
||||
dma_fence_put(p->ef);
|
||||
@ -1058,6 +1064,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
|
||||
|
||||
cancel_delayed_work_sync(&p->eviction_work);
|
||||
cancel_delayed_work_sync(&p->restore_work);
|
||||
cancel_delayed_work_sync(&p->svms.restore_work);
|
||||
|
||||
mutex_lock(&p->mutex);
|
||||
|
||||
@ -1186,6 +1193,56 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
|
||||
}
|
||||
}
|
||||
|
||||
bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* On most GFXv9 GPUs, the retry mode in the SQ must match the
|
||||
* boot time retry setting. Mixing processes with different
|
||||
* XNACK/retry settings can hang the GPU.
|
||||
*
|
||||
* Different GPUs can have different noretry settings depending
|
||||
* on HW bugs or limitations. We need to find at least one
|
||||
* XNACK mode for this process that's compatible with all GPUs.
|
||||
* Fortunately GPUs with retry enabled (noretry=0) can run code
|
||||
* built for XNACK-off. On GFXv9 it may perform slower.
|
||||
*
|
||||
* Therefore applications built for XNACK-off can always be
|
||||
* supported and will be our fallback if any GPU does not
|
||||
* support retry.
|
||||
*/
|
||||
for (i = 0; i < p->n_pdds; i++) {
|
||||
struct kfd_dev *dev = p->pdds[i]->dev;
|
||||
|
||||
/* Only consider GFXv9 and higher GPUs. Older GPUs don't
|
||||
* support the SVM APIs and don't need to be considered
|
||||
* for the XNACK mode selection.
|
||||
*/
|
||||
if (dev->device_info->asic_family < CHIP_VEGA10)
|
||||
continue;
|
||||
/* Aldebaran can always support XNACK because it can support
|
||||
* per-process XNACK mode selection. But let the dev->noretry
|
||||
* setting still influence the default XNACK mode.
|
||||
*/
|
||||
if (supported &&
|
||||
dev->device_info->asic_family == CHIP_ALDEBARAN)
|
||||
continue;
|
||||
|
||||
/* GFXv10 and later GPUs do not support shader preemption
|
||||
* during page faults. This can lead to poor QoS for queue
|
||||
* management and memory-manager-related preemptions or
|
||||
* even deadlocks.
|
||||
*/
|
||||
if (dev->device_info->asic_family >= CHIP_NAVI10)
|
||||
return false;
|
||||
|
||||
if (dev->noretry)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* On return the kfd_process is fully operational and will be freed when the
|
||||
* mm is released
|
||||
@ -1205,6 +1262,7 @@ static struct kfd_process *create_process(const struct task_struct *thread)
|
||||
process->mm = thread->mm;
|
||||
process->lead_thread = thread->group_leader;
|
||||
process->n_pdds = 0;
|
||||
process->svm_disabled = false;
|
||||
INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
|
||||
INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
|
||||
process->last_restore_timestamp = get_jiffies_64();
|
||||
@ -1224,6 +1282,13 @@ static struct kfd_process *create_process(const struct task_struct *thread)
|
||||
if (err != 0)
|
||||
goto err_init_apertures;
|
||||
|
||||
/* Check XNACK support after PDDs are created in kfd_init_apertures */
|
||||
process->xnack_enabled = kfd_process_xnack_mode(process, false);
|
||||
|
||||
err = svm_range_list_init(process);
|
||||
if (err)
|
||||
goto err_init_svm_range_list;
|
||||
|
||||
/* alloc_notifier needs to find the process in the hash table */
|
||||
hash_add_rcu(kfd_processes_table, &process->kfd_processes,
|
||||
(uintptr_t)process->mm);
|
||||
@ -1246,6 +1311,8 @@ static struct kfd_process *create_process(const struct task_struct *thread)
|
||||
|
||||
err_register_notifier:
|
||||
hash_del_rcu(&process->kfd_processes);
|
||||
svm_range_list_fini(process);
|
||||
err_init_svm_range_list:
|
||||
kfd_process_free_outstanding_kfd_bos(process);
|
||||
kfd_process_destroy_pdds(process);
|
||||
err_init_apertures:
|
||||
@ -1375,7 +1442,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
|
||||
if (!drm_file)
|
||||
return -EINVAL;
|
||||
|
||||
if (pdd->vm)
|
||||
if (pdd->drm_priv)
|
||||
return -EBUSY;
|
||||
|
||||
p = pdd->process;
|
||||
@ -1383,13 +1450,12 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
|
||||
|
||||
ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
|
||||
dev->kgd, drm_file, p->pasid,
|
||||
&pdd->vm, &p->kgd_process_info, &p->ef);
|
||||
&p->kgd_process_info, &p->ef);
|
||||
if (ret) {
|
||||
pr_err("Failed to create process VM object\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
amdgpu_vm_set_task_info(pdd->vm);
|
||||
pdd->drm_priv = drm_file->private_data;
|
||||
|
||||
ret = kfd_process_device_reserve_ib_mem(pdd);
|
||||
if (ret)
|
||||
@ -1405,7 +1471,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
|
||||
err_init_cwsr:
|
||||
err_reserve_ib_mem:
|
||||
kfd_process_device_free_bos(pdd);
|
||||
pdd->vm = NULL;
|
||||
pdd->drm_priv = NULL;
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -1429,7 +1495,7 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
if (!pdd->vm)
|
||||
if (!pdd->drm_priv)
|
||||
return ERR_PTR(-ENODEV);
|
||||
|
||||
/*
|
||||
@ -1600,6 +1666,32 @@ int kfd_process_restore_queues(struct kfd_process *p)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < p->n_pdds; i++)
|
||||
if (p->pdds[i] && gpu_id == p->pdds[i]->dev->id)
|
||||
return i;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int
|
||||
kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev,
|
||||
uint32_t *gpuid, uint32_t *gpuidx)
|
||||
{
|
||||
struct kgd_dev *kgd = (struct kgd_dev *)adev;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < p->n_pdds; i++)
|
||||
if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) {
|
||||
*gpuid = p->pdds[i]->dev->id;
|
||||
*gpuidx = i;
|
||||
return 0;
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static void evict_process_worker(struct work_struct *work)
|
||||
{
|
||||
int ret;
|
||||
|
3085
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
Normal file
3085
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
Normal file
File diff suppressed because it is too large
Load Diff
206
drivers/gpu/drm/amd/amdkfd/kfd_svm.h
Normal file
206
drivers/gpu/drm/amd/amdkfd/kfd_svm.h
Normal file
@ -0,0 +1,206 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright 2020-2021 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef KFD_SVM_H_
|
||||
#define KFD_SVM_H_
|
||||
|
||||
#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
|
||||
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/hmm.h>
|
||||
#include "amdgpu.h"
|
||||
#include "kfd_priv.h"
|
||||
|
||||
struct svm_range_bo {
|
||||
struct amdgpu_bo *bo;
|
||||
struct kref kref;
|
||||
struct list_head range_list; /* all svm ranges shared this bo */
|
||||
spinlock_t list_lock;
|
||||
struct amdgpu_amdkfd_fence *eviction_fence;
|
||||
struct work_struct eviction_work;
|
||||
struct svm_range_list *svms;
|
||||
uint32_t evicting;
|
||||
};
|
||||
|
||||
enum svm_work_list_ops {
|
||||
SVM_OP_NULL,
|
||||
SVM_OP_UNMAP_RANGE,
|
||||
SVM_OP_UPDATE_RANGE_NOTIFIER,
|
||||
SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP,
|
||||
SVM_OP_ADD_RANGE,
|
||||
SVM_OP_ADD_RANGE_AND_MAP
|
||||
};
|
||||
|
||||
struct svm_work_list_item {
|
||||
enum svm_work_list_ops op;
|
||||
struct mm_struct *mm;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct svm_range - shared virtual memory range
|
||||
*
|
||||
* @svms: list of svm ranges, structure defined in kfd_process
|
||||
* @migrate_mutex: to serialize range migration, validation and mapping update
|
||||
* @start: range start address in pages
|
||||
* @last: range last address in pages
|
||||
* @it_node: node [start, last] stored in interval tree, start, last are page
|
||||
* aligned, page size is (last - start + 1)
|
||||
* @list: link list node, used to scan all ranges of svms
|
||||
* @update_list:link list node used to add to update_list
|
||||
* @remove_list:link list node used to add to remove list
|
||||
* @insert_list:link list node used to add to insert list
|
||||
* @mapping: bo_va mapping structure to create and update GPU page table
|
||||
* @npages: number of pages
|
||||
* @dma_addr: dma mapping address on each GPU for system memory physical page
|
||||
* @ttm_res: vram ttm resource map
|
||||
* @offset: range start offset within mm_nodes
|
||||
* @svm_bo: struct to manage splited amdgpu_bo
|
||||
* @svm_bo_list:link list node, to scan all ranges which share same svm_bo
|
||||
* @lock: protect prange start, last, child_list, svm_bo_list
|
||||
* @saved_flags:save/restore current PF_MEMALLOC flags
|
||||
* @flags: flags defined as KFD_IOCTL_SVM_FLAG_*
|
||||
* @perferred_loc: perferred location, 0 for CPU, or GPU id
|
||||
* @perfetch_loc: last prefetch location, 0 for CPU, or GPU id
|
||||
* @actual_loc: the actual location, 0 for CPU, or GPU id
|
||||
* @granularity:migration granularity, log2 num pages
|
||||
* @invalid: not 0 means cpu page table is invalidated
|
||||
* @validate_timestamp: system timestamp when range is validated
|
||||
* @notifier: register mmu interval notifier
|
||||
* @work_item: deferred work item information
|
||||
* @deferred_list: list header used to add range to deferred list
|
||||
* @child_list: list header for split ranges which are not added to svms yet
|
||||
* @bitmap_access: index bitmap of GPUs which can access the range
|
||||
* @bitmap_aip: index bitmap of GPUs which can access the range in place
|
||||
*
|
||||
* Data structure for virtual memory range shared by CPU and GPUs, it can be
|
||||
* allocated from system memory ram or device vram, and migrate from ram to vram
|
||||
* or from vram to ram.
|
||||
*/
|
||||
struct svm_range {
|
||||
struct svm_range_list *svms;
|
||||
struct mutex migrate_mutex;
|
||||
unsigned long start;
|
||||
unsigned long last;
|
||||
struct interval_tree_node it_node;
|
||||
struct list_head list;
|
||||
struct list_head update_list;
|
||||
struct list_head remove_list;
|
||||
struct list_head insert_list;
|
||||
struct amdgpu_bo_va_mapping mapping;
|
||||
uint64_t npages;
|
||||
dma_addr_t *dma_addr[MAX_GPU_INSTANCE];
|
||||
struct ttm_resource *ttm_res;
|
||||
uint64_t offset;
|
||||
struct svm_range_bo *svm_bo;
|
||||
struct list_head svm_bo_list;
|
||||
struct mutex lock;
|
||||
unsigned int saved_flags;
|
||||
uint32_t flags;
|
||||
uint32_t preferred_loc;
|
||||
uint32_t prefetch_loc;
|
||||
uint32_t actual_loc;
|
||||
uint8_t granularity;
|
||||
atomic_t invalid;
|
||||
uint64_t validate_timestamp;
|
||||
struct mmu_interval_notifier notifier;
|
||||
struct svm_work_list_item work_item;
|
||||
struct list_head deferred_list;
|
||||
struct list_head child_list;
|
||||
DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
|
||||
DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
|
||||
bool validated_once;
|
||||
};
|
||||
|
||||
static inline void svm_range_lock(struct svm_range *prange)
|
||||
{
|
||||
mutex_lock(&prange->lock);
|
||||
prange->saved_flags = memalloc_noreclaim_save();
|
||||
|
||||
}
|
||||
static inline void svm_range_unlock(struct svm_range *prange)
|
||||
{
|
||||
memalloc_noreclaim_restore(prange->saved_flags);
|
||||
mutex_unlock(&prange->lock);
|
||||
}
|
||||
|
||||
int svm_range_list_init(struct kfd_process *p);
|
||||
void svm_range_list_fini(struct kfd_process *p);
|
||||
int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
|
||||
uint64_t size, uint32_t nattrs,
|
||||
struct kfd_ioctl_svm_attribute *attrs);
|
||||
struct svm_range *svm_range_from_addr(struct svm_range_list *svms,
|
||||
unsigned long addr,
|
||||
struct svm_range **parent);
|
||||
struct amdgpu_device *svm_range_get_adev_by_id(struct svm_range *prange,
|
||||
uint32_t id);
|
||||
int svm_range_vram_node_new(struct amdgpu_device *adev,
|
||||
struct svm_range *prange, bool clear);
|
||||
void svm_range_vram_node_free(struct svm_range *prange);
|
||||
int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
|
||||
unsigned long addr, struct svm_range *parent,
|
||||
struct svm_range *prange);
|
||||
int svm_range_restore_pages(struct amdgpu_device *adev,
|
||||
unsigned int pasid, uint64_t addr);
|
||||
int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence);
|
||||
void svm_range_add_list_work(struct svm_range_list *svms,
|
||||
struct svm_range *prange, struct mm_struct *mm,
|
||||
enum svm_work_list_ops op);
|
||||
void schedule_deferred_list_work(struct svm_range_list *svms);
|
||||
void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
|
||||
unsigned long offset, unsigned long npages);
|
||||
void svm_range_free_dma_mappings(struct svm_range *prange);
|
||||
void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm);
|
||||
|
||||
#else
|
||||
|
||||
struct kfd_process;
|
||||
|
||||
static inline int svm_range_list_init(struct kfd_process *p)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void svm_range_list_fini(struct kfd_process *p)
|
||||
{
|
||||
/* empty */
|
||||
}
|
||||
|
||||
static inline int svm_range_restore_pages(struct amdgpu_device *adev,
|
||||
unsigned int pasid, uint64_t addr)
|
||||
{
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
static inline int svm_range_schedule_evict_svm_bo(
|
||||
struct amdgpu_amdkfd_fence *fence)
|
||||
{
|
||||
WARN_ONCE(1, "SVM eviction fence triggered, but SVM is disabled");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
|
||||
|
||||
#endif /* KFD_SVM_H_ */
|
@ -1192,40 +1192,60 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
|
||||
mem->mem_clk_max = local_mem_info.mem_clk_max;
|
||||
}
|
||||
|
||||
static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev,
|
||||
struct kfd_topology_device *target_gpu_dev,
|
||||
struct kfd_iolink_properties *link)
|
||||
{
|
||||
/* xgmi always supports atomics between links. */
|
||||
if (link->iolink_type == CRAT_IOLINK_TYPE_XGMI)
|
||||
return;
|
||||
|
||||
/* check pcie support to set cpu(dev) flags for target_gpu_dev link. */
|
||||
if (target_gpu_dev) {
|
||||
uint32_t cap;
|
||||
|
||||
pcie_capability_read_dword(target_gpu_dev->gpu->pdev,
|
||||
PCI_EXP_DEVCAP2, &cap);
|
||||
|
||||
if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
|
||||
PCI_EXP_DEVCAP2_ATOMIC_COMP64)))
|
||||
link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
|
||||
CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
|
||||
/* set gpu (dev) flags. */
|
||||
} else {
|
||||
if (!dev->gpu->pci_atomic_requested ||
|
||||
dev->gpu->device_info->asic_family ==
|
||||
CHIP_HAWAII)
|
||||
link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
|
||||
CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
|
||||
}
|
||||
}
|
||||
|
||||
static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
|
||||
{
|
||||
struct kfd_iolink_properties *link, *cpu_link;
|
||||
struct kfd_topology_device *cpu_dev;
|
||||
uint32_t cap;
|
||||
uint32_t cpu_flag = CRAT_IOLINK_FLAGS_ENABLED;
|
||||
uint32_t flag = CRAT_IOLINK_FLAGS_ENABLED;
|
||||
struct kfd_iolink_properties *link, *inbound_link;
|
||||
struct kfd_topology_device *peer_dev;
|
||||
|
||||
if (!dev || !dev->gpu)
|
||||
return;
|
||||
|
||||
pcie_capability_read_dword(dev->gpu->pdev,
|
||||
PCI_EXP_DEVCAP2, &cap);
|
||||
|
||||
if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
|
||||
PCI_EXP_DEVCAP2_ATOMIC_COMP64)))
|
||||
cpu_flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
|
||||
CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
|
||||
|
||||
if (!dev->gpu->pci_atomic_requested ||
|
||||
dev->gpu->device_info->asic_family == CHIP_HAWAII)
|
||||
flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
|
||||
CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
|
||||
|
||||
/* GPU only creates direct links so apply flags setting to all */
|
||||
list_for_each_entry(link, &dev->io_link_props, list) {
|
||||
link->flags = flag;
|
||||
cpu_dev = kfd_topology_device_by_proximity_domain(
|
||||
link->flags = CRAT_IOLINK_FLAGS_ENABLED;
|
||||
kfd_set_iolink_no_atomics(dev, NULL, link);
|
||||
peer_dev = kfd_topology_device_by_proximity_domain(
|
||||
link->node_to);
|
||||
if (cpu_dev) {
|
||||
list_for_each_entry(cpu_link,
|
||||
&cpu_dev->io_link_props, list)
|
||||
if (cpu_link->node_to == link->node_from)
|
||||
cpu_link->flags = cpu_flag;
|
||||
|
||||
if (!peer_dev)
|
||||
continue;
|
||||
|
||||
list_for_each_entry(inbound_link, &peer_dev->io_link_props,
|
||||
list) {
|
||||
if (inbound_link->node_to != link->node_from)
|
||||
continue;
|
||||
|
||||
inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED;
|
||||
kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1410,15 +1430,21 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
|
||||
adev = (struct amdgpu_device *)(dev->gpu->kgd);
|
||||
/* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
|
||||
dev->node_props.capability |=
|
||||
((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
|
||||
((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
|
||||
HSA_CAP_SRAM_EDCSUPPORTED : 0;
|
||||
dev->node_props.capability |= ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
|
||||
dev->node_props.capability |= ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
|
||||
HSA_CAP_MEM_EDCSUPPORTED : 0;
|
||||
|
||||
if (adev->asic_type != CHIP_VEGA10)
|
||||
dev->node_props.capability |= (adev->ras_features != 0) ?
|
||||
dev->node_props.capability |= (adev->ras_enabled != 0) ?
|
||||
HSA_CAP_RASEVENTNOTIFY : 0;
|
||||
|
||||
/* SVM API and HMM page migration work together, device memory type
|
||||
* is initialized to not 0 when page migration register device memory.
|
||||
*/
|
||||
if (adev->kfd.dev->pgmap.type != 0)
|
||||
dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED;
|
||||
|
||||
kfd_debug_print_topology();
|
||||
|
||||
if (!res)
|
||||
|
@ -53,8 +53,9 @@
|
||||
#define HSA_CAP_ASIC_REVISION_MASK 0x03c00000
|
||||
#define HSA_CAP_ASIC_REVISION_SHIFT 22
|
||||
#define HSA_CAP_SRAM_EDCSUPPORTED 0x04000000
|
||||
#define HSA_CAP_SVMAPI_SUPPORTED 0x08000000
|
||||
|
||||
#define HSA_CAP_RESERVED 0xf80f8000
|
||||
#define HSA_CAP_RESERVED 0xf00f8000
|
||||
|
||||
struct kfd_node_properties {
|
||||
uint64_t hive_id;
|
||||
@ -98,9 +99,10 @@ struct kfd_node_properties {
|
||||
#define HSA_MEM_HEAP_TYPE_GPU_LDS 4
|
||||
#define HSA_MEM_HEAP_TYPE_GPU_SCRATCH 5
|
||||
|
||||
#define HSA_MEM_FLAGS_HOT_PLUGGABLE 0x00000001
|
||||
#define HSA_MEM_FLAGS_NON_VOLATILE 0x00000002
|
||||
#define HSA_MEM_FLAGS_RESERVED 0xfffffffc
|
||||
#define HSA_MEM_FLAGS_HOT_PLUGGABLE 0x00000001
|
||||
#define HSA_MEM_FLAGS_NON_VOLATILE 0x00000002
|
||||
#define HSA_MEM_FLAGS_COHERENTHOSTACCESS 0x00000004
|
||||
#define HSA_MEM_FLAGS_RESERVED 0xfffffff8
|
||||
|
||||
struct kfd_mem_properties {
|
||||
struct list_head list;
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include "dc/inc/hw/abm.h"
|
||||
#include "dc/dc_dmub_srv.h"
|
||||
#include "dc/dc_edid_parser.h"
|
||||
#include "dc/dc_stat.h"
|
||||
#include "amdgpu_dm_trace.h"
|
||||
|
||||
#include "vid.h"
|
||||
@ -59,6 +60,7 @@
|
||||
|
||||
#include "ivsrcid/ivsrcid_vislands30.h"
|
||||
|
||||
#include "i2caux_interface.h"
|
||||
#include <linux/module.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/types.h>
|
||||
@ -618,6 +620,58 @@ static void dm_dcn_vertical_interrupt0_high_irq(void *interrupt_params)
|
||||
amdgpu_dm_crtc_handle_crc_window_irq(&acrtc->base);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* dm_dmub_outbox1_low_irq() - Handles Outbox interrupt
|
||||
* @interrupt_params: used for determining the Outbox instance
|
||||
*
|
||||
* Handles the Outbox Interrupt
|
||||
* event handler.
|
||||
*/
|
||||
#define DMUB_TRACE_MAX_READ 64
|
||||
static void dm_dmub_outbox1_low_irq(void *interrupt_params)
|
||||
{
|
||||
struct dmub_notification notify;
|
||||
struct common_irq_params *irq_params = interrupt_params;
|
||||
struct amdgpu_device *adev = irq_params->adev;
|
||||
struct amdgpu_display_manager *dm = &adev->dm;
|
||||
struct dmcub_trace_buf_entry entry = { 0 };
|
||||
uint32_t count = 0;
|
||||
|
||||
if (dc_enable_dmub_notifications(adev->dm.dc)) {
|
||||
if (irq_params->irq_src == DC_IRQ_SOURCE_DMCUB_OUTBOX) {
|
||||
do {
|
||||
dc_stat_get_dmub_notification(adev->dm.dc, ¬ify);
|
||||
} while (notify.pending_notification);
|
||||
|
||||
if (adev->dm.dmub_notify)
|
||||
memcpy(adev->dm.dmub_notify, ¬ify, sizeof(struct dmub_notification));
|
||||
if (notify.type == DMUB_NOTIFICATION_AUX_REPLY)
|
||||
complete(&adev->dm.dmub_aux_transfer_done);
|
||||
// TODO : HPD Implementation
|
||||
|
||||
} else {
|
||||
DRM_ERROR("DM: Failed to receive correct outbox IRQ !");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
do {
|
||||
if (dc_dmub_srv_get_dmub_outbox0_msg(dm->dc, &entry)) {
|
||||
trace_amdgpu_dmub_trace_high_irq(entry.trace_code, entry.tick_count,
|
||||
entry.param0, entry.param1);
|
||||
|
||||
DRM_DEBUG_DRIVER("trace_code:%u, tick_count:%u, param0:%u, param1:%u\n",
|
||||
entry.trace_code, entry.tick_count, entry.param0, entry.param1);
|
||||
} else
|
||||
break;
|
||||
|
||||
count++;
|
||||
|
||||
} while (count <= DMUB_TRACE_MAX_READ);
|
||||
|
||||
ASSERT(count <= DMUB_TRACE_MAX_READ);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int dm_set_clockgating_state(void *handle,
|
||||
@ -938,32 +992,6 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
|
||||
}
|
||||
|
||||
#if defined(CONFIG_DRM_AMD_DC_DCN)
|
||||
#define DMUB_TRACE_MAX_READ 64
|
||||
static void dm_dmub_trace_high_irq(void *interrupt_params)
|
||||
{
|
||||
struct common_irq_params *irq_params = interrupt_params;
|
||||
struct amdgpu_device *adev = irq_params->adev;
|
||||
struct amdgpu_display_manager *dm = &adev->dm;
|
||||
struct dmcub_trace_buf_entry entry = { 0 };
|
||||
uint32_t count = 0;
|
||||
|
||||
do {
|
||||
if (dc_dmub_srv_get_dmub_outbox0_msg(dm->dc, &entry)) {
|
||||
trace_amdgpu_dmub_trace_high_irq(entry.trace_code, entry.tick_count,
|
||||
entry.param0, entry.param1);
|
||||
|
||||
DRM_DEBUG_DRIVER("trace_code:%u, tick_count:%u, param0:%u, param1:%u\n",
|
||||
entry.trace_code, entry.tick_count, entry.param0, entry.param1);
|
||||
} else
|
||||
break;
|
||||
|
||||
count++;
|
||||
|
||||
} while (count <= DMUB_TRACE_MAX_READ);
|
||||
|
||||
ASSERT(count <= DMUB_TRACE_MAX_READ);
|
||||
}
|
||||
|
||||
static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_addr_space_config *pa_config)
|
||||
{
|
||||
uint64_t pt_base;
|
||||
@ -1220,6 +1248,16 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
|
||||
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
|
||||
adev->dm.crc_rd_wrk = amdgpu_dm_crtc_secure_display_create_work();
|
||||
#endif
|
||||
if (dc_enable_dmub_notifications(adev->dm.dc)) {
|
||||
init_completion(&adev->dm.dmub_aux_transfer_done);
|
||||
adev->dm.dmub_notify = kzalloc(sizeof(struct dmub_notification), GFP_KERNEL);
|
||||
if (!adev->dm.dmub_notify) {
|
||||
DRM_INFO("amdgpu: fail to allocate adev->dm.dmub_notify");
|
||||
goto error;
|
||||
}
|
||||
amdgpu_dm_outbox_init(adev);
|
||||
}
|
||||
|
||||
if (amdgpu_dm_initialize_drm_device(adev)) {
|
||||
DRM_ERROR(
|
||||
"amdgpu: failed to initialize sw for display support.\n");
|
||||
@ -1293,6 +1331,11 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
|
||||
adev->dm.dc->ctx->dmub_srv = NULL;
|
||||
}
|
||||
|
||||
if (dc_enable_dmub_notifications(adev->dm.dc)) {
|
||||
kfree(adev->dm.dmub_notify);
|
||||
adev->dm.dmub_notify = NULL;
|
||||
}
|
||||
|
||||
if (adev->dm.dmub_bo)
|
||||
amdgpu_bo_free_kernel(&adev->dm.dmub_bo,
|
||||
&adev->dm.dmub_bo_gpu_addr,
|
||||
@ -2708,8 +2751,7 @@ static void handle_hpd_rx_irq(void *param)
|
||||
* conflict, after implement i2c helper, this mutex should be
|
||||
* retired.
|
||||
*/
|
||||
if (dc_link->type != dc_connection_mst_branch)
|
||||
mutex_lock(&aconnector->hpd_lock);
|
||||
mutex_lock(&aconnector->hpd_lock);
|
||||
|
||||
read_hpd_rx_irq_data(dc_link, &hpd_irq_data);
|
||||
|
||||
@ -2726,13 +2768,15 @@ static void handle_hpd_rx_irq(void *param)
|
||||
}
|
||||
}
|
||||
|
||||
mutex_lock(&adev->dm.dc_lock);
|
||||
if (!amdgpu_in_reset(adev)) {
|
||||
mutex_lock(&adev->dm.dc_lock);
|
||||
#ifdef CONFIG_DRM_AMD_DC_HDCP
|
||||
result = dc_link_handle_hpd_rx_irq(dc_link, &hpd_irq_data, NULL);
|
||||
#else
|
||||
result = dc_link_handle_hpd_rx_irq(dc_link, NULL, NULL);
|
||||
#endif
|
||||
mutex_unlock(&adev->dm.dc_lock);
|
||||
mutex_unlock(&adev->dm.dc_lock);
|
||||
}
|
||||
|
||||
out:
|
||||
if (result && !is_mst_root_connector) {
|
||||
@ -2776,10 +2820,10 @@ out:
|
||||
}
|
||||
#endif
|
||||
|
||||
if (dc_link->type != dc_connection_mst_branch) {
|
||||
if (dc_link->type != dc_connection_mst_branch)
|
||||
drm_dp_cec_irq(&aconnector->dm_dp_aux.aux);
|
||||
mutex_unlock(&aconnector->hpd_lock);
|
||||
}
|
||||
|
||||
mutex_unlock(&aconnector->hpd_lock);
|
||||
}
|
||||
|
||||
static void register_hpd_handlers(struct amdgpu_device *adev)
|
||||
@ -3151,28 +3195,6 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
|
||||
|
||||
}
|
||||
|
||||
if (dc->ctx->dmub_srv) {
|
||||
i = DCN_1_0__SRCID__DMCUB_OUTBOX_HIGH_PRIORITY_READY_INT;
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->dmub_trace_irq);
|
||||
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to add dmub trace irq id!\n");
|
||||
return r;
|
||||
}
|
||||
|
||||
int_params.int_context = INTERRUPT_HIGH_IRQ_CONTEXT;
|
||||
int_params.irq_source =
|
||||
dc_interrupt_to_irq_source(dc, i, 0);
|
||||
|
||||
c_irq_params = &adev->dm.dmub_trace_params[0];
|
||||
|
||||
c_irq_params->adev = adev;
|
||||
c_irq_params->irq_src = int_params.irq_source;
|
||||
|
||||
amdgpu_dm_irq_register_interrupt(adev, &int_params,
|
||||
dm_dmub_trace_high_irq, c_irq_params);
|
||||
}
|
||||
|
||||
/* HPD */
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, DCN_1_0__SRCID__DC_HPD1_INT,
|
||||
&adev->hpd_irq);
|
||||
@ -3185,6 +3207,41 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
|
||||
|
||||
return 0;
|
||||
}
|
||||
/* Register Outbox IRQ sources and initialize IRQ callbacks */
|
||||
static int register_outbox_irq_handlers(struct amdgpu_device *adev)
|
||||
{
|
||||
struct dc *dc = adev->dm.dc;
|
||||
struct common_irq_params *c_irq_params;
|
||||
struct dc_interrupt_params int_params = {0};
|
||||
int r, i;
|
||||
|
||||
int_params.requested_polarity = INTERRUPT_POLARITY_DEFAULT;
|
||||
int_params.current_polarity = INTERRUPT_POLARITY_DEFAULT;
|
||||
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, DCN_1_0__SRCID__DMCUB_OUTBOX_LOW_PRIORITY_READY_INT,
|
||||
&adev->dmub_outbox_irq);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to add outbox irq id!\n");
|
||||
return r;
|
||||
}
|
||||
|
||||
if (dc->ctx->dmub_srv) {
|
||||
i = DCN_1_0__SRCID__DMCUB_OUTBOX_LOW_PRIORITY_READY_INT;
|
||||
int_params.int_context = INTERRUPT_LOW_IRQ_CONTEXT;
|
||||
int_params.irq_source =
|
||||
dc_interrupt_to_irq_source(dc, i, 0);
|
||||
|
||||
c_irq_params = &adev->dm.dmub_outbox_params[0];
|
||||
|
||||
c_irq_params->adev = adev;
|
||||
c_irq_params->irq_src = int_params.irq_source;
|
||||
|
||||
amdgpu_dm_irq_register_interrupt(adev, &int_params,
|
||||
dm_dmub_outbox1_low_irq, c_irq_params);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -3414,22 +3471,37 @@ static int amdgpu_dm_backlight_update_status(struct backlight_device *bd)
|
||||
{
|
||||
struct amdgpu_display_manager *dm = bl_get_data(bd);
|
||||
struct amdgpu_dm_backlight_caps caps;
|
||||
struct dc_link *link = NULL;
|
||||
struct dc_link *link[AMDGPU_DM_MAX_NUM_EDP];
|
||||
u32 brightness;
|
||||
bool rc;
|
||||
int i;
|
||||
|
||||
amdgpu_dm_update_backlight_caps(dm);
|
||||
caps = dm->backlight_caps;
|
||||
|
||||
link = (struct dc_link *)dm->backlight_link;
|
||||
for (i = 0; i < dm->num_of_edps; i++)
|
||||
link[i] = (struct dc_link *)dm->backlight_link[i];
|
||||
|
||||
brightness = convert_brightness_from_user(&caps, bd->props.brightness);
|
||||
// Change brightness based on AUX property
|
||||
if (caps.aux_support)
|
||||
rc = dc_link_set_backlight_level_nits(link, true, brightness,
|
||||
AUX_BL_DEFAULT_TRANSITION_TIME_MS);
|
||||
else
|
||||
rc = dc_link_set_backlight_level(dm->backlight_link, brightness, 0);
|
||||
if (caps.aux_support) {
|
||||
for (i = 0; i < dm->num_of_edps; i++) {
|
||||
rc = dc_link_set_backlight_level_nits(link[i], true, brightness,
|
||||
AUX_BL_DEFAULT_TRANSITION_TIME_MS);
|
||||
if (!rc) {
|
||||
DRM_ERROR("DM: Failed to update backlight via AUX on eDP[%d]\n", i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < dm->num_of_edps; i++) {
|
||||
rc = dc_link_set_backlight_level(dm->backlight_link[i], brightness, 0);
|
||||
if (!rc) {
|
||||
DRM_ERROR("DM: Failed to update backlight on eDP[%d]\n", i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rc ? 0 : 1;
|
||||
}
|
||||
@ -3443,7 +3515,7 @@ static int amdgpu_dm_backlight_get_brightness(struct backlight_device *bd)
|
||||
caps = dm->backlight_caps;
|
||||
|
||||
if (caps.aux_support) {
|
||||
struct dc_link *link = (struct dc_link *)dm->backlight_link;
|
||||
struct dc_link *link = (struct dc_link *)dm->backlight_link[0];
|
||||
u32 avg, peak;
|
||||
bool rc;
|
||||
|
||||
@ -3452,7 +3524,7 @@ static int amdgpu_dm_backlight_get_brightness(struct backlight_device *bd)
|
||||
return bd->props.brightness;
|
||||
return convert_brightness_to_user(&caps, avg);
|
||||
} else {
|
||||
int ret = dc_link_get_backlight_level(dm->backlight_link);
|
||||
int ret = dc_link_get_backlight_level(dm->backlight_link[0]);
|
||||
|
||||
if (ret == DC_ERROR_UNEXPECTED)
|
||||
return bd->props.brightness;
|
||||
@ -3549,10 +3621,13 @@ static void register_backlight_device(struct amdgpu_display_manager *dm,
|
||||
* DM initialization because not having a backlight control
|
||||
* is better then a black screen.
|
||||
*/
|
||||
amdgpu_dm_register_backlight_device(dm);
|
||||
if (!dm->backlight_dev)
|
||||
amdgpu_dm_register_backlight_device(dm);
|
||||
|
||||
if (dm->backlight_dev)
|
||||
dm->backlight_link = link;
|
||||
if (dm->backlight_dev) {
|
||||
dm->backlight_link[dm->num_of_edps] = link;
|
||||
dm->num_of_edps++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@ -3643,6 +3718,22 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_DRM_AMD_DC_DCN)
|
||||
/* Use Outbox interrupt */
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_SIENNA_CICHLID:
|
||||
case CHIP_NAVY_FLOUNDER:
|
||||
case CHIP_RENOIR:
|
||||
if (register_outbox_irq_handlers(dm->adev)) {
|
||||
DRM_ERROR("DM: Failed to initialize IRQ\n");
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
DRM_DEBUG_KMS("Unsupported ASIC type for outbox: 0x%X\n", adev->asic_type);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* loops over all connectors on the board */
|
||||
for (i = 0; i < link_cnt; i++) {
|
||||
struct dc_link *link = NULL;
|
||||
@ -6560,13 +6651,13 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
|
||||
{
|
||||
struct dc_stream_state *stream = NULL;
|
||||
struct drm_connector *connector;
|
||||
struct drm_connector_state *new_con_state, *old_con_state;
|
||||
struct drm_connector_state *new_con_state;
|
||||
struct amdgpu_dm_connector *aconnector;
|
||||
struct dm_connector_state *dm_conn_state;
|
||||
int i, j, clock, bpp;
|
||||
int vcpi, pbn_div, pbn = 0;
|
||||
|
||||
for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) {
|
||||
for_each_new_connector_in_state(state, connector, new_con_state, i) {
|
||||
|
||||
aconnector = to_amdgpu_dm_connector(connector);
|
||||
|
||||
@ -8164,15 +8255,14 @@ static void amdgpu_dm_handle_vrr_transition(struct dm_crtc_state *old_state,
|
||||
static void amdgpu_dm_commit_cursors(struct drm_atomic_state *state)
|
||||
{
|
||||
struct drm_plane *plane;
|
||||
struct drm_plane_state *old_plane_state, *new_plane_state;
|
||||
struct drm_plane_state *old_plane_state;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* TODO: Make this per-stream so we don't issue redundant updates for
|
||||
* commits with multiple streams.
|
||||
*/
|
||||
for_each_oldnew_plane_in_state(state, plane, old_plane_state,
|
||||
new_plane_state, i)
|
||||
for_each_old_plane_in_state(state, plane, old_plane_state, i)
|
||||
if (plane->type == DRM_PLANE_TYPE_CURSOR)
|
||||
handle_cursor_update(plane, old_plane_state);
|
||||
}
|
||||
@ -10668,3 +10758,30 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address,
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int linkIndex,
|
||||
struct aux_payload *payload, enum aux_return_code_type *operation_result)
|
||||
{
|
||||
struct amdgpu_device *adev = ctx->driver_context;
|
||||
int ret = 0;
|
||||
|
||||
dc_process_dmub_aux_transfer_async(ctx->dc, linkIndex, payload);
|
||||
ret = wait_for_completion_interruptible_timeout(&adev->dm.dmub_aux_transfer_done, 10*HZ);
|
||||
if (ret == 0) {
|
||||
*operation_result = AUX_RET_ERROR_TIMEOUT;
|
||||
return -1;
|
||||
}
|
||||
*operation_result = (enum aux_return_code_type)adev->dm.dmub_notify->result;
|
||||
|
||||
if (adev->dm.dmub_notify->result == AUX_RET_SUCCESS) {
|
||||
(*payload->reply) = adev->dm.dmub_notify->aux_reply.command;
|
||||
|
||||
// For read case, Copy data to payload
|
||||
if (!payload->write && adev->dm.dmub_notify->aux_reply.length &&
|
||||
(*payload->reply == AUX_TRANSACTION_REPLY_AUX_ACK))
|
||||
memcpy(payload->data, adev->dm.dmub_notify->aux_reply.data,
|
||||
adev->dm.dmub_notify->aux_reply.length);
|
||||
}
|
||||
|
||||
return adev->dm.dmub_notify->aux_reply.length;
|
||||
}
|
||||
|
@ -46,6 +46,7 @@
|
||||
|
||||
#define AMDGPU_DM_MAX_CRTC 6
|
||||
|
||||
#define AMDGPU_DM_MAX_NUM_EDP 2
|
||||
/*
|
||||
#include "include/amdgpu_dal_power_if.h"
|
||||
#include "amdgpu_dm_irq.h"
|
||||
@ -54,6 +55,8 @@
|
||||
#include "irq_types.h"
|
||||
#include "signal_types.h"
|
||||
#include "amdgpu_dm_crc.h"
|
||||
struct aux_payload;
|
||||
enum aux_return_code_type;
|
||||
|
||||
/* Forward declarations */
|
||||
struct amdgpu_device;
|
||||
@ -62,6 +65,7 @@ struct dc;
|
||||
struct amdgpu_bo;
|
||||
struct dmub_srv;
|
||||
struct dc_plane_state;
|
||||
struct dmub_notification;
|
||||
|
||||
struct common_irq_params {
|
||||
struct amdgpu_device *adev;
|
||||
@ -135,6 +139,10 @@ struct amdgpu_dm_backlight_caps {
|
||||
|
||||
/**
|
||||
* struct dal_allocation - Tracks mapped FB memory for SMU communication
|
||||
* @list: list of dal allocations
|
||||
* @bo: GPU buffer object
|
||||
* @cpu_ptr: CPU virtual address of the GPU buffer object
|
||||
* @gpu_addr: GPU virtual address of the GPU buffer object
|
||||
*/
|
||||
struct dal_allocation {
|
||||
struct list_head list;
|
||||
@ -164,6 +172,7 @@ struct dal_allocation {
|
||||
* @compressor: Frame buffer compression buffer. See &struct dm_compressor_info
|
||||
* @force_timing_sync: set via debugfs. When set, indicates that all connected
|
||||
* displays will be forced to synchronize.
|
||||
* @dmcub_trace_event_en: enable dmcub trace events
|
||||
*/
|
||||
struct amdgpu_display_manager {
|
||||
|
||||
@ -178,6 +187,8 @@ struct amdgpu_display_manager {
|
||||
*/
|
||||
struct dmub_srv *dmub_srv;
|
||||
|
||||
struct dmub_notification *dmub_notify;
|
||||
|
||||
/**
|
||||
* @dmub_fb_info:
|
||||
*
|
||||
@ -349,11 +360,17 @@ struct amdgpu_display_manager {
|
||||
struct common_irq_params
|
||||
dmub_trace_params[1];
|
||||
|
||||
struct common_irq_params
|
||||
dmub_outbox_params[1];
|
||||
|
||||
spinlock_t irq_handler_list_table_lock;
|
||||
|
||||
struct backlight_device *backlight_dev;
|
||||
|
||||
const struct dc_link *backlight_link;
|
||||
const struct dc_link *backlight_link[AMDGPU_DM_MAX_NUM_EDP];
|
||||
|
||||
uint8_t num_of_edps;
|
||||
|
||||
struct amdgpu_dm_backlight_caps backlight_caps;
|
||||
|
||||
struct mod_freesync *freesync_module;
|
||||
@ -418,6 +435,7 @@ struct amdgpu_display_manager {
|
||||
* DAL fb memory allocation list, for communication with SMU.
|
||||
*/
|
||||
struct list_head da_list;
|
||||
struct completion dmub_aux_transfer_done;
|
||||
};
|
||||
|
||||
enum dsc_clock_force_state {
|
||||
@ -600,4 +618,6 @@ void amdgpu_dm_update_connector_after_detect(
|
||||
|
||||
extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs;
|
||||
|
||||
int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int linkIndex,
|
||||
struct aux_payload *payload, enum aux_return_code_type *operation_result);
|
||||
#endif /* __AMDGPU_DM_H__ */
|
||||
|
@ -925,6 +925,22 @@ static int hdcp_sink_capability_show(struct seq_file *m, void *data)
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Returns whether the connected display is internal and not hotpluggable.
|
||||
* Example usage: cat /sys/kernel/debug/dri/0/DP-1/internal_display
|
||||
*/
|
||||
static int internal_display_show(struct seq_file *m, void *data)
|
||||
{
|
||||
struct drm_connector *connector = m->private;
|
||||
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
|
||||
struct dc_link *link = aconnector->dc_link;
|
||||
|
||||
seq_printf(m, "Internal: %u\n", link->is_internal_display);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* function description
|
||||
*
|
||||
* generic SDP message access for testing
|
||||
@ -2361,6 +2377,44 @@ unlock:
|
||||
return size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Backlight at this moment. Read only.
|
||||
* As written to display, taking ABM and backlight lut into account.
|
||||
* Ranges from 0x0 to 0x10000 (= 100% PWM)
|
||||
*
|
||||
* Example usage: cat /sys/kernel/debug/dri/0/eDP-1/current_backlight
|
||||
*/
|
||||
static int current_backlight_show(struct seq_file *m, void *unused)
|
||||
{
|
||||
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(m->private);
|
||||
struct dc_link *link = aconnector->dc_link;
|
||||
unsigned int backlight;
|
||||
|
||||
backlight = dc_link_get_backlight_level(link);
|
||||
seq_printf(m, "0x%x\n", backlight);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Backlight value that is being approached. Read only.
|
||||
* As written to display, taking ABM and backlight lut into account.
|
||||
* Ranges from 0x0 to 0x10000 (= 100% PWM)
|
||||
*
|
||||
* Example usage: cat /sys/kernel/debug/dri/0/eDP-1/target_backlight
|
||||
*/
|
||||
static int target_backlight_show(struct seq_file *m, void *unused)
|
||||
{
|
||||
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(m->private);
|
||||
struct dc_link *link = aconnector->dc_link;
|
||||
unsigned int backlight;
|
||||
|
||||
backlight = dc_link_get_target_backlight_pwm(link);
|
||||
seq_printf(m, "0x%x\n", backlight);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support);
|
||||
DEFINE_SHOW_ATTRIBUTE(dmub_fw_state);
|
||||
DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer);
|
||||
@ -2369,6 +2423,7 @@ DEFINE_SHOW_ATTRIBUTE(dp_lttpr_status);
|
||||
#ifdef CONFIG_DRM_AMD_DC_HDCP
|
||||
DEFINE_SHOW_ATTRIBUTE(hdcp_sink_capability);
|
||||
#endif
|
||||
DEFINE_SHOW_ATTRIBUTE(internal_display);
|
||||
|
||||
static const struct file_operations dp_dsc_clock_en_debugfs_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
@ -2594,13 +2649,17 @@ DEFINE_DEBUGFS_ATTRIBUTE(dmcub_trace_event_state_fops, dmcub_trace_event_state_g
|
||||
|
||||
DEFINE_DEBUGFS_ATTRIBUTE(psr_fops, psr_get, NULL, "%llu\n");
|
||||
|
||||
DEFINE_SHOW_ATTRIBUTE(current_backlight);
|
||||
DEFINE_SHOW_ATTRIBUTE(target_backlight);
|
||||
|
||||
static const struct {
|
||||
char *name;
|
||||
const struct file_operations *fops;
|
||||
} connector_debugfs_entries[] = {
|
||||
{"force_yuv420_output", &force_yuv420_output_fops},
|
||||
{"output_bpc", &output_bpc_fops},
|
||||
{"trigger_hotplug", &trigger_hotplug_debugfs_fops}
|
||||
{"trigger_hotplug", &trigger_hotplug_debugfs_fops},
|
||||
{"internal_display", &internal_display_fops}
|
||||
};
|
||||
|
||||
void connector_debugfs_init(struct amdgpu_dm_connector *connector)
|
||||
@ -2616,8 +2675,13 @@ void connector_debugfs_init(struct amdgpu_dm_connector *connector)
|
||||
dp_debugfs_entries[i].fops);
|
||||
}
|
||||
}
|
||||
if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP)
|
||||
if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP) {
|
||||
debugfs_create_file_unsafe("psr_state", 0444, dir, connector, &psr_fops);
|
||||
debugfs_create_file("amdgpu_current_backlight_pwm", 0444, dir, connector,
|
||||
¤t_backlight_fops);
|
||||
debugfs_create_file("amdgpu_target_backlight_pwm", 0444, dir, connector,
|
||||
&target_backlight_fops);
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(connector_debugfs_entries); i++) {
|
||||
debugfs_create_file(connector_debugfs_entries[i].name,
|
||||
@ -2920,38 +2984,6 @@ static ssize_t dtn_log_write(
|
||||
return size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Backlight at this moment. Read only.
|
||||
* As written to display, taking ABM and backlight lut into account.
|
||||
* Ranges from 0x0 to 0x10000 (= 100% PWM)
|
||||
*/
|
||||
static int current_backlight_show(struct seq_file *m, void *unused)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
|
||||
struct amdgpu_display_manager *dm = &adev->dm;
|
||||
|
||||
unsigned int backlight = dc_link_get_backlight_level(dm->backlight_link);
|
||||
|
||||
seq_printf(m, "0x%x\n", backlight);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Backlight value that is being approached. Read only.
|
||||
* As written to display, taking ABM and backlight lut into account.
|
||||
* Ranges from 0x0 to 0x10000 (= 100% PWM)
|
||||
*/
|
||||
static int target_backlight_show(struct seq_file *m, void *unused)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
|
||||
struct amdgpu_display_manager *dm = &adev->dm;
|
||||
|
||||
unsigned int backlight = dc_link_get_target_backlight_pwm(dm->backlight_link);
|
||||
|
||||
seq_printf(m, "0x%x\n", backlight);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mst_topo_show(struct seq_file *m, void *unused)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
|
||||
@ -3134,8 +3166,6 @@ static int visual_confirm_get(void *data, u64 *val)
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEFINE_SHOW_ATTRIBUTE(current_backlight);
|
||||
DEFINE_SHOW_ATTRIBUTE(target_backlight);
|
||||
DEFINE_SHOW_ATTRIBUTE(mst_topo);
|
||||
DEFINE_DEBUGFS_ATTRIBUTE(visual_confirm_fops, visual_confirm_get,
|
||||
visual_confirm_set, "%llu\n");
|
||||
@ -3215,10 +3245,6 @@ void dtn_debugfs_init(struct amdgpu_device *adev)
|
||||
struct drm_minor *minor = adev_to_drm(adev)->primary;
|
||||
struct dentry *root = minor->debugfs_root;
|
||||
|
||||
debugfs_create_file("amdgpu_current_backlight_pwm", 0444,
|
||||
root, adev, ¤t_backlight_fops);
|
||||
debugfs_create_file("amdgpu_target_backlight_pwm", 0444,
|
||||
root, adev, &target_backlight_fops);
|
||||
debugfs_create_file("amdgpu_mst_topology", 0444, root,
|
||||
adev, &mst_topo_fops);
|
||||
debugfs_create_file("amdgpu_dm_dtn_log", 0644, root, adev,
|
||||
|
@ -544,8 +544,10 @@ bool dm_helpers_dp_write_dsc_enable(
|
||||
ret = drm_dp_dpcd_write(aconnector->dsc_aux, DP_DSC_ENABLE, &enable_dsc, 1);
|
||||
}
|
||||
|
||||
if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT)
|
||||
return dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1);
|
||||
if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT) {
|
||||
ret = dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1);
|
||||
DC_LOG_DC("Send DSC %s to sst display\n", enable_dsc ? "enable" : "disable");
|
||||
}
|
||||
|
||||
return (ret > 0);
|
||||
}
|
||||
@ -640,7 +642,14 @@ enum dc_edid_status dm_helpers_read_local_edid(
|
||||
|
||||
return edid_status;
|
||||
}
|
||||
|
||||
int dm_helper_dmub_aux_transfer_sync(
|
||||
struct dc_context *ctx,
|
||||
const struct dc_link *link,
|
||||
struct aux_payload *payload,
|
||||
enum aux_return_code_type *operation_result)
|
||||
{
|
||||
return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload, operation_result);
|
||||
}
|
||||
void dm_set_dcn_clocks(struct dc_context *ctx, struct dc_clocks *clks)
|
||||
{
|
||||
/* TODO: something */
|
||||
@ -698,12 +707,12 @@ void dm_helpers_free_gpu_mem(
|
||||
}
|
||||
}
|
||||
|
||||
bool dm_helpers_dmub_outbox0_interrupt_control(struct dc_context *ctx, bool enable)
|
||||
bool dm_helpers_dmub_outbox_interrupt_control(struct dc_context *ctx, bool enable)
|
||||
{
|
||||
enum dc_irq_source irq_source;
|
||||
bool ret;
|
||||
|
||||
irq_source = DC_IRQ_SOURCE_DMCUB_OUTBOX0;
|
||||
irq_source = DC_IRQ_SOURCE_DMCUB_OUTBOX;
|
||||
|
||||
ret = dc_interrupt_set(ctx->dc, irq_source, enable);
|
||||
|
||||
|
@ -769,6 +769,18 @@ static int amdgpu_dm_set_vline0_irq_state(struct amdgpu_device *adev,
|
||||
__func__);
|
||||
}
|
||||
|
||||
static int amdgpu_dm_set_dmub_outbox_irq_state(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
unsigned int crtc_id,
|
||||
enum amdgpu_interrupt_state state)
|
||||
{
|
||||
enum dc_irq_source irq_source = DC_IRQ_SOURCE_DMCUB_OUTBOX;
|
||||
bool st = (state == AMDGPU_IRQ_STATE_ENABLE);
|
||||
|
||||
dc_interrupt_set(adev->dm.dc, irq_source, st);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdgpu_dm_set_vupdate_irq_state(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
unsigned int crtc_id,
|
||||
@ -805,6 +817,11 @@ static const struct amdgpu_irq_src_funcs dm_vline0_irq_funcs = {
|
||||
.process = amdgpu_dm_irq_handler,
|
||||
};
|
||||
|
||||
static const struct amdgpu_irq_src_funcs dm_dmub_outbox_irq_funcs = {
|
||||
.set = amdgpu_dm_set_dmub_outbox_irq_state,
|
||||
.process = amdgpu_dm_irq_handler,
|
||||
};
|
||||
|
||||
static const struct amdgpu_irq_src_funcs dm_vupdate_irq_funcs = {
|
||||
.set = amdgpu_dm_set_vupdate_irq_state,
|
||||
.process = amdgpu_dm_irq_handler,
|
||||
@ -827,13 +844,15 @@ static const struct amdgpu_irq_src_funcs dm_hpd_irq_funcs = {
|
||||
|
||||
void amdgpu_dm_set_irq_funcs(struct amdgpu_device *adev)
|
||||
{
|
||||
|
||||
adev->crtc_irq.num_types = adev->mode_info.num_crtc;
|
||||
adev->crtc_irq.funcs = &dm_crtc_irq_funcs;
|
||||
|
||||
adev->vline0_irq.num_types = adev->mode_info.num_crtc;
|
||||
adev->vline0_irq.funcs = &dm_vline0_irq_funcs;
|
||||
|
||||
adev->dmub_outbox_irq.num_types = 1;
|
||||
adev->dmub_outbox_irq.funcs = &dm_dmub_outbox_irq_funcs;
|
||||
|
||||
adev->vupdate_irq.num_types = adev->mode_info.num_crtc;
|
||||
adev->vupdate_irq.funcs = &dm_vupdate_irq_funcs;
|
||||
|
||||
@ -846,6 +865,12 @@ void amdgpu_dm_set_irq_funcs(struct amdgpu_device *adev)
|
||||
adev->hpd_irq.num_types = adev->mode_info.num_hpd;
|
||||
adev->hpd_irq.funcs = &dm_hpd_irq_funcs;
|
||||
}
|
||||
void amdgpu_dm_outbox_init(struct amdgpu_device *adev)
|
||||
{
|
||||
dc_interrupt_set(adev->dm.dc,
|
||||
DC_IRQ_SOURCE_DMCUB_OUTBOX,
|
||||
true);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_dm_hpd_init - hpd setup callback.
|
||||
|
@ -82,6 +82,7 @@ void amdgpu_dm_irq_unregister_interrupt(struct amdgpu_device *adev,
|
||||
|
||||
void amdgpu_dm_set_irq_funcs(struct amdgpu_device *adev);
|
||||
|
||||
void amdgpu_dm_outbox_init(struct amdgpu_device *adev);
|
||||
void amdgpu_dm_hpd_init(struct amdgpu_device *adev);
|
||||
void amdgpu_dm_hpd_fini(struct amdgpu_device *adev);
|
||||
|
||||
|
@ -278,6 +278,9 @@ dm_dp_mst_detect(struct drm_connector *connector,
|
||||
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
|
||||
struct amdgpu_dm_connector *master = aconnector->mst_port;
|
||||
|
||||
if (drm_connector_is_unregistered(connector))
|
||||
return connector_status_disconnected;
|
||||
|
||||
return drm_dp_mst_detect_port(connector, ctx, &master->mst_mgr,
|
||||
aconnector->port);
|
||||
}
|
||||
|
@ -54,7 +54,7 @@ AMD_DC = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/dc/,$(DC_LI
|
||||
|
||||
include $(AMD_DC)
|
||||
|
||||
DISPLAY_CORE = dc.o dc_link.o dc_resource.o dc_hw_sequencer.o dc_sink.o \
|
||||
DISPLAY_CORE = dc.o dc_stat.o dc_link.o dc_resource.o dc_hw_sequencer.o dc_sink.o \
|
||||
dc_surface.o dc_link_hwss.o dc_link_dp.o dc_link_ddc.o dc_debug.o dc_stream.o \
|
||||
dc_link_enc_cfg.o
|
||||
|
||||
|
@ -836,8 +836,10 @@ static enum bp_result bios_parser_get_spread_spectrum_info(
|
||||
return get_ss_info_v4_1(bp, signal, index, ss_info);
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
return get_ss_info_v4_2(bp, signal, index, ss_info);
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
@ -106,10 +106,10 @@ static void rn_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
|
||||
for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
|
||||
int dpp_inst, dppclk_khz, prev_dppclk_khz;
|
||||
|
||||
/* Loop index will match dpp->inst if resource exists,
|
||||
* and we want to avoid dependency on dpp object
|
||||
/* Loop index may not match dpp->inst if some pipes disabled,
|
||||
* so select correct inst from res_pool
|
||||
*/
|
||||
dpp_inst = i;
|
||||
dpp_inst = clk_mgr->base.ctx->dc->res_pool->dpps[i]->inst;
|
||||
dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz;
|
||||
|
||||
prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[i];
|
||||
@ -128,7 +128,7 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base,
|
||||
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
|
||||
struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
|
||||
struct dc *dc = clk_mgr_base->ctx->dc;
|
||||
int display_count, i;
|
||||
int display_count;
|
||||
bool update_dppclk = false;
|
||||
bool update_dispclk = false;
|
||||
bool dpp_clock_lowered = false;
|
||||
@ -210,14 +210,6 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base,
|
||||
clk_mgr_base->clks.dppclk_khz,
|
||||
safe_to_lower);
|
||||
|
||||
for (i = 0; i < context->stream_count; i++) {
|
||||
if (context->streams[i]->signal == SIGNAL_TYPE_EDP &&
|
||||
context->streams[i]->apply_seamless_boot_optimization) {
|
||||
dc_wait_for_vblank(dc, context->streams[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
clk_mgr_base->clks.actual_dppclk_khz =
|
||||
rn_vbios_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz);
|
||||
|
||||
@ -769,43 +761,6 @@ static struct wm_table ddr4_wm_table_rn = {
|
||||
}
|
||||
};
|
||||
|
||||
static struct wm_table ddr4_1R_wm_table_rn = {
|
||||
.entries = {
|
||||
{
|
||||
.wm_inst = WM_A,
|
||||
.wm_type = WM_TYPE_PSTATE_CHG,
|
||||
.pstate_latency_us = 11.72,
|
||||
.sr_exit_time_us = 13.90,
|
||||
.sr_enter_plus_exit_time_us = 14.80,
|
||||
.valid = true,
|
||||
},
|
||||
{
|
||||
.wm_inst = WM_B,
|
||||
.wm_type = WM_TYPE_PSTATE_CHG,
|
||||
.pstate_latency_us = 11.72,
|
||||
.sr_exit_time_us = 13.90,
|
||||
.sr_enter_plus_exit_time_us = 14.80,
|
||||
.valid = true,
|
||||
},
|
||||
{
|
||||
.wm_inst = WM_C,
|
||||
.wm_type = WM_TYPE_PSTATE_CHG,
|
||||
.pstate_latency_us = 11.72,
|
||||
.sr_exit_time_us = 13.90,
|
||||
.sr_enter_plus_exit_time_us = 14.80,
|
||||
.valid = true,
|
||||
},
|
||||
{
|
||||
.wm_inst = WM_D,
|
||||
.wm_type = WM_TYPE_PSTATE_CHG,
|
||||
.pstate_latency_us = 11.72,
|
||||
.sr_exit_time_us = 13.90,
|
||||
.sr_enter_plus_exit_time_us = 14.80,
|
||||
.valid = true,
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
static struct wm_table lpddr4_wm_table_rn = {
|
||||
.entries = {
|
||||
{
|
||||
@ -842,46 +797,67 @@ static struct wm_table lpddr4_wm_table_rn = {
|
||||
},
|
||||
}
|
||||
};
|
||||
static unsigned int find_socclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage)
|
||||
|
||||
static unsigned int find_max_fclk_for_voltage(struct dpm_clocks *clock_table,
|
||||
unsigned int voltage)
|
||||
{
|
||||
int i;
|
||||
uint32_t max_clk = 0;
|
||||
|
||||
for (i = 0; i < PP_SMU_NUM_FCLK_DPM_LEVELS; i++) {
|
||||
if (clock_table->FClocks[i].Vol <= voltage) {
|
||||
max_clk = clock_table->FClocks[i].Freq > max_clk ?
|
||||
clock_table->FClocks[i].Freq : max_clk;
|
||||
}
|
||||
}
|
||||
|
||||
return max_clk;
|
||||
}
|
||||
|
||||
static unsigned int find_max_memclk_for_voltage(struct dpm_clocks *clock_table,
|
||||
unsigned int voltage)
|
||||
{
|
||||
int i;
|
||||
uint32_t max_clk = 0;
|
||||
|
||||
for (i = 0; i < PP_SMU_NUM_MEMCLK_DPM_LEVELS; i++) {
|
||||
if (clock_table->MemClocks[i].Vol <= voltage) {
|
||||
max_clk = clock_table->MemClocks[i].Freq > max_clk ?
|
||||
clock_table->MemClocks[i].Freq : max_clk;
|
||||
}
|
||||
}
|
||||
|
||||
return max_clk;
|
||||
}
|
||||
|
||||
static unsigned int find_max_socclk_for_voltage(struct dpm_clocks *clock_table,
|
||||
unsigned int voltage)
|
||||
{
|
||||
int i;
|
||||
uint32_t max_clk = 0;
|
||||
|
||||
for (i = 0; i < PP_SMU_NUM_SOCCLK_DPM_LEVELS; i++) {
|
||||
if (clock_table->SocClocks[i].Vol == voltage)
|
||||
return clock_table->SocClocks[i].Freq;
|
||||
if (clock_table->SocClocks[i].Vol <= voltage) {
|
||||
max_clk = clock_table->SocClocks[i].Freq > max_clk ?
|
||||
clock_table->SocClocks[i].Freq : max_clk;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(0);
|
||||
return 0;
|
||||
}
|
||||
static unsigned int find_dcfclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < PP_SMU_NUM_DCFCLK_DPM_LEVELS; i++) {
|
||||
if (clock_table->DcfClocks[i].Vol == voltage)
|
||||
return clock_table->DcfClocks[i].Freq;
|
||||
}
|
||||
|
||||
ASSERT(0);
|
||||
return 0;
|
||||
return max_clk;
|
||||
}
|
||||
|
||||
static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params, struct dpm_clocks *clock_table, struct integrated_info *bios_info)
|
||||
{
|
||||
int i, j = 0;
|
||||
unsigned int volt;
|
||||
|
||||
j = -1;
|
||||
|
||||
ASSERT(PP_SMU_NUM_FCLK_DPM_LEVELS <= MAX_NUM_DPM_LVL);
|
||||
|
||||
/* Find lowest DPM, FCLK is filled in reverse order*/
|
||||
|
||||
for (i = PP_SMU_NUM_FCLK_DPM_LEVELS - 1; i >= 0; i--) {
|
||||
if (clock_table->FClocks[i].Freq != 0 && clock_table->FClocks[i].Vol != 0) {
|
||||
/* Find max DPM */
|
||||
for (i = 0; i < PP_SMU_NUM_DCFCLK_DPM_LEVELS; ++i) {
|
||||
if (clock_table->DcfClocks[i].Freq != 0 &&
|
||||
clock_table->DcfClocks[i].Vol != 0)
|
||||
j = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (j == -1) {
|
||||
@ -892,13 +868,18 @@ static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params
|
||||
|
||||
bw_params->clk_table.num_entries = j + 1;
|
||||
|
||||
for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) {
|
||||
bw_params->clk_table.entries[i].fclk_mhz = clock_table->FClocks[j].Freq;
|
||||
bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemClocks[j].Freq;
|
||||
bw_params->clk_table.entries[i].voltage = clock_table->FClocks[j].Vol;
|
||||
bw_params->clk_table.entries[i].dcfclk_mhz = find_dcfclk_for_voltage(clock_table, clock_table->FClocks[j].Vol);
|
||||
bw_params->clk_table.entries[i].socclk_mhz = find_socclk_for_voltage(clock_table,
|
||||
bw_params->clk_table.entries[i].voltage);
|
||||
for (i = 0; i < bw_params->clk_table.num_entries; i++) {
|
||||
volt = clock_table->DcfClocks[i].Vol;
|
||||
|
||||
bw_params->clk_table.entries[i].voltage = volt;
|
||||
bw_params->clk_table.entries[i].dcfclk_mhz =
|
||||
clock_table->DcfClocks[i].Freq;
|
||||
bw_params->clk_table.entries[i].fclk_mhz =
|
||||
find_max_fclk_for_voltage(clock_table, volt);
|
||||
bw_params->clk_table.entries[i].memclk_mhz =
|
||||
find_max_memclk_for_voltage(clock_table, volt);
|
||||
bw_params->clk_table.entries[i].socclk_mhz =
|
||||
find_max_socclk_for_voltage(clock_table, volt);
|
||||
}
|
||||
|
||||
bw_params->vram_type = bios_info->memory_type;
|
||||
@ -990,12 +971,8 @@ void rn_clk_mgr_construct(
|
||||
} else {
|
||||
if (is_green_sardine)
|
||||
rn_bw_params.wm_table = ddr4_wm_table_gs;
|
||||
else {
|
||||
if (ctx->dc->config.is_single_rank_dimm)
|
||||
rn_bw_params.wm_table = ddr4_1R_wm_table_rn;
|
||||
else
|
||||
rn_bw_params.wm_table = ddr4_wm_table_rn;
|
||||
}
|
||||
else
|
||||
rn_bw_params.wm_table = ddr4_wm_table_rn;
|
||||
}
|
||||
/* Saved clocks configured at boot for debug purposes */
|
||||
rn_dump_clk_registers(&clk_mgr->base.boot_snapshot, &clk_mgr->base, &log_info);
|
||||
@ -1013,9 +990,6 @@ void rn_clk_mgr_construct(
|
||||
if (status == PP_SMU_RESULT_OK &&
|
||||
ctx->dc_bios && ctx->dc_bios->integrated_info) {
|
||||
rn_clk_mgr_helper_populate_bw_params (clk_mgr->base.bw_params, &clock_table, ctx->dc_bios->integrated_info);
|
||||
/* treat memory config as single channel if memory is asymmetrics. */
|
||||
if (ctx->dc->config.is_asymmetric_memory)
|
||||
clk_mgr->base.bw_params->num_channels = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -59,7 +59,6 @@
|
||||
#include "dc_link_ddc.h"
|
||||
#include "dm_helpers.h"
|
||||
#include "mem_input.h"
|
||||
#include "hubp.h"
|
||||
|
||||
#include "dc_link_dp.h"
|
||||
#include "dc_dmub_srv.h"
|
||||
@ -3219,19 +3218,6 @@ void dc_link_remove_remote_sink(struct dc_link *link, struct dc_sink *sink)
|
||||
}
|
||||
}
|
||||
|
||||
void dc_wait_for_vblank(struct dc *dc, struct dc_stream_state *stream)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < dc->res_pool->pipe_count; i++)
|
||||
if (dc->current_state->res_ctx.pipe_ctx[i].stream == stream) {
|
||||
struct timing_generator *tg =
|
||||
dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg;
|
||||
tg->funcs->wait_for_state(tg, CRTC_STATE_VBLANK);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void get_clock_requirements_for_state(struct dc_state *state, struct AsicStateEx *info)
|
||||
{
|
||||
info->displayClock = (unsigned int)state->bw_ctx.bw.dcn.clk.dispclk_khz;
|
||||
@ -3287,7 +3273,7 @@ void dc_allow_idle_optimizations(struct dc *dc, bool allow)
|
||||
if (dc->debug.disable_idle_power_optimizations)
|
||||
return;
|
||||
|
||||
if (dc->clk_mgr->funcs->is_smu_present)
|
||||
if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->is_smu_present)
|
||||
if (!dc->clk_mgr->funcs->is_smu_present(dc->clk_mgr))
|
||||
return;
|
||||
|
||||
|
@ -48,6 +48,7 @@
|
||||
#include "dce/dmub_psr.h"
|
||||
#include "dmub/dmub_srv.h"
|
||||
#include "inc/hw/panel_cntl.h"
|
||||
#include "inc/link_enc_cfg.h"
|
||||
|
||||
#define DC_LOGGER_INIT(logger)
|
||||
|
||||
@ -247,6 +248,16 @@ bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type)
|
||||
link->dc->hwss.edp_wait_for_hpd_ready(link, true);
|
||||
}
|
||||
|
||||
/* Link may not have physical HPD pin. */
|
||||
if (link->ep_type != DISPLAY_ENDPOINT_PHY) {
|
||||
if (link->hpd_status)
|
||||
*type = dc_connection_single;
|
||||
else
|
||||
*type = dc_connection_none;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* todo: may need to lock gpio access */
|
||||
hpd_pin = get_hpd_gpio(link->ctx->dc_bios, link->link_id,
|
||||
link->ctx->gpio_service);
|
||||
@ -432,8 +443,18 @@ bool dc_link_is_dp_sink_present(struct dc_link *link)
|
||||
static enum signal_type link_detect_sink(struct dc_link *link,
|
||||
enum dc_detect_reason reason)
|
||||
{
|
||||
enum signal_type result = get_basic_signal_type(link->link_enc->id,
|
||||
link->link_id);
|
||||
enum signal_type result;
|
||||
struct graphics_object_id enc_id;
|
||||
|
||||
if (link->is_dig_mapping_flexible)
|
||||
enc_id = (struct graphics_object_id){.id = ENCODER_ID_UNKNOWN};
|
||||
else
|
||||
enc_id = link->link_enc->id;
|
||||
result = get_basic_signal_type(enc_id, link->link_id);
|
||||
|
||||
/* Use basic signal type for link without physical connector. */
|
||||
if (link->ep_type != DISPLAY_ENDPOINT_PHY)
|
||||
return result;
|
||||
|
||||
/* Internal digital encoder will detect only dongles
|
||||
* that require digital signal
|
||||
@ -762,19 +783,20 @@ static bool detect_dp(struct dc_link *link,
|
||||
}
|
||||
|
||||
if (link->type != dc_connection_mst_branch &&
|
||||
is_dp_active_dongle(link)) {
|
||||
/* DP active dongles */
|
||||
link->type = dc_connection_active_dongle;
|
||||
is_dp_branch_device(link)) {
|
||||
/* DP SST branch */
|
||||
link->type = dc_connection_sst_branch;
|
||||
if (!link->dpcd_caps.sink_count.bits.SINK_COUNT) {
|
||||
/*
|
||||
* active dongle unplug processing for short irq
|
||||
* SST branch unplug processing for short irq
|
||||
*/
|
||||
link_disconnect_sink(link);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (link->dpcd_caps.dongle_type !=
|
||||
DISPLAY_DONGLE_DP_HDMI_CONVERTER)
|
||||
if (is_dp_active_dongle(link) &&
|
||||
(link->dpcd_caps.dongle_type !=
|
||||
DISPLAY_DONGLE_DP_HDMI_CONVERTER))
|
||||
*converter_disable_audio = true;
|
||||
}
|
||||
} else {
|
||||
@ -954,7 +976,8 @@ static bool dc_link_detect_helper(struct dc_link *link,
|
||||
|
||||
case SIGNAL_TYPE_DISPLAY_PORT: {
|
||||
/* wa HPD high coming too early*/
|
||||
if (link->link_enc->features.flags.bits.DP_IS_USB_C == 1) {
|
||||
if (link->ep_type == DISPLAY_ENDPOINT_PHY &&
|
||||
link->link_enc->features.flags.bits.DP_IS_USB_C == 1) {
|
||||
/* if alt mode times out, return false */
|
||||
if (!wait_for_entering_dp_alt_mode(link))
|
||||
return false;
|
||||
@ -974,8 +997,8 @@ static bool dc_link_detect_helper(struct dc_link *link,
|
||||
sizeof(struct dpcd_caps)))
|
||||
same_dpcd = false;
|
||||
}
|
||||
/* Active dongle downstream unplug*/
|
||||
if (link->type == dc_connection_active_dongle &&
|
||||
/* Active SST downstream branch device unplug*/
|
||||
if (link->type == dc_connection_sst_branch &&
|
||||
link->dpcd_caps.sink_count.bits.SINK_COUNT == 0) {
|
||||
if (prev_sink)
|
||||
/* Downstream unplug */
|
||||
@ -1206,14 +1229,25 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
|
||||
{
|
||||
const struct dc *dc = link->dc;
|
||||
bool ret;
|
||||
bool can_apply_seamless_boot = false;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < dc->current_state->stream_count; i++) {
|
||||
if (dc->current_state->streams[i]->apply_seamless_boot_optimization) {
|
||||
can_apply_seamless_boot = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* get out of low power state */
|
||||
clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr);
|
||||
if (!can_apply_seamless_boot && reason != DETECT_REASON_BOOT)
|
||||
clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr);
|
||||
|
||||
ret = dc_link_detect_helper(link, reason);
|
||||
|
||||
/* Go back to power optimized state */
|
||||
clk_mgr_optimize_pwr_state(dc, dc->clk_mgr);
|
||||
if (!can_apply_seamless_boot && reason != DETECT_REASON_BOOT)
|
||||
clk_mgr_optimize_pwr_state(dc, dc->clk_mgr);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -1716,6 +1750,8 @@ static enum dc_status enable_link_dp(struct dc_state *state,
|
||||
bool apply_seamless_boot_optimization = false;
|
||||
uint32_t bl_oled_enable_delay = 50; // in ms
|
||||
const uint32_t post_oui_delay = 30; // 30ms
|
||||
/* Reduce link bandwidth between failed link training attempts. */
|
||||
bool do_fallback = false;
|
||||
|
||||
// check for seamless boot
|
||||
for (i = 0; i < state->stream_count; i++) {
|
||||
@ -1754,7 +1790,8 @@ static enum dc_status enable_link_dp(struct dc_state *state,
|
||||
skip_video_pattern,
|
||||
LINK_TRAINING_ATTEMPTS,
|
||||
pipe_ctx,
|
||||
pipe_ctx->stream->signal)) {
|
||||
pipe_ctx->stream->signal,
|
||||
do_fallback)) {
|
||||
link->cur_link_settings = link_settings;
|
||||
status = DC_OK;
|
||||
} else {
|
||||
@ -3475,9 +3512,11 @@ uint32_t dc_bandwidth_in_kbps_from_timing(
|
||||
uint32_t kbps;
|
||||
|
||||
#if defined(CONFIG_DRM_AMD_DC_DCN)
|
||||
if (timing->flags.DSC) {
|
||||
return dc_dsc_stream_bandwidth_in_kbps(timing->pix_clk_100hz, timing->dsc_cfg.bits_per_pixel);
|
||||
}
|
||||
if (timing->flags.DSC)
|
||||
return dc_dsc_stream_bandwidth_in_kbps(timing,
|
||||
timing->dsc_cfg.bits_per_pixel,
|
||||
timing->dsc_cfg.num_slices_h,
|
||||
timing->dsc_cfg.is_dp);
|
||||
#endif
|
||||
|
||||
switch (timing->display_color_depth) {
|
||||
@ -3539,19 +3578,6 @@ void dc_link_set_drive_settings(struct dc *dc,
|
||||
dc_link_dp_set_drive_settings(dc->links[i], lt_settings);
|
||||
}
|
||||
|
||||
void dc_link_perform_link_training(struct dc *dc,
|
||||
struct dc_link_settings *link_setting,
|
||||
bool skip_video_pattern)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < dc->link_count; i++)
|
||||
dc_link_dp_perform_link_training(
|
||||
dc->links[i],
|
||||
link_setting,
|
||||
skip_video_pattern);
|
||||
}
|
||||
|
||||
void dc_link_set_preferred_link_settings(struct dc *dc,
|
||||
struct dc_link_settings *link_setting,
|
||||
struct dc_link *link)
|
||||
@ -3702,8 +3728,22 @@ void dc_link_overwrite_extended_receiver_cap(
|
||||
|
||||
bool dc_link_is_fec_supported(const struct dc_link *link)
|
||||
{
|
||||
struct link_encoder *link_enc = NULL;
|
||||
|
||||
/* Links supporting dynamically assigned link encoder will be assigned next
|
||||
* available encoder if one not already assigned.
|
||||
*/
|
||||
if (link->is_dig_mapping_flexible &&
|
||||
link->dc->res_pool->funcs->link_encs_assign) {
|
||||
link_enc = link_enc_cfg_get_link_enc_used_by_link(link->dc->current_state, link);
|
||||
if (link_enc == NULL)
|
||||
link_enc = link_enc_cfg_get_next_avail_link_enc(link->dc, link->dc->current_state);
|
||||
} else
|
||||
link_enc = link->link_enc;
|
||||
ASSERT(link_enc);
|
||||
|
||||
return (dc_is_dp_signal(link->connector_signal) &&
|
||||
link->link_enc->features.fec_supported &&
|
||||
link_enc->features.fec_supported &&
|
||||
link->dpcd_caps.fec_cap.bits.FEC_CAPABLE &&
|
||||
!IS_FPGA_MAXIMUS_DC(link->ctx->dce_environment));
|
||||
}
|
||||
|
@ -658,7 +658,10 @@ int dc_link_aux_transfer_raw(struct ddc_service *ddc,
|
||||
struct aux_payload *payload,
|
||||
enum aux_return_code_type *operation_result)
|
||||
{
|
||||
return dce_aux_transfer_raw(ddc, payload, operation_result);
|
||||
if (dc_enable_dmub_notifications(ddc->ctx->dc))
|
||||
return dce_aux_transfer_dmub_raw(ddc, payload, operation_result);
|
||||
else
|
||||
return dce_aux_transfer_raw(ddc, payload, operation_result);
|
||||
}
|
||||
|
||||
/* dc_link_aux_transfer_with_retries() - Attempt to submit an
|
||||
@ -682,6 +685,10 @@ bool dc_link_aux_try_to_configure_timeout(struct ddc_service *ddc,
|
||||
bool result = false;
|
||||
struct ddc *ddc_pin = ddc->ddc_pin;
|
||||
|
||||
/* Do not try to access nonexistent DDC pin. */
|
||||
if (ddc->link->ep_type != DISPLAY_ENDPOINT_PHY)
|
||||
return true;
|
||||
|
||||
if (ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]->funcs->configure_timeout) {
|
||||
ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]->funcs->configure_timeout(ddc, timeout);
|
||||
result = true;
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "dpcd_defs.h"
|
||||
#include "dc_dmub_srv.h"
|
||||
#include "dce/dmub_hw_lock_mgr.h"
|
||||
#include "inc/link_enc_cfg.h"
|
||||
|
||||
/*Travis*/
|
||||
static const uint8_t DP_VGA_LVDS_CONVERTER_ID_2[] = "sivarT";
|
||||
@ -107,10 +108,50 @@ static void wait_for_training_aux_rd_interval(
|
||||
wait_in_micro_secs);
|
||||
}
|
||||
|
||||
static enum dpcd_training_patterns
|
||||
dc_dp_training_pattern_to_dpcd_training_pattern(
|
||||
struct dc_link *link,
|
||||
enum dc_dp_training_pattern pattern)
|
||||
{
|
||||
enum dpcd_training_patterns dpcd_tr_pattern =
|
||||
DPCD_TRAINING_PATTERN_VIDEOIDLE;
|
||||
|
||||
switch (pattern) {
|
||||
case DP_TRAINING_PATTERN_SEQUENCE_1:
|
||||
dpcd_tr_pattern = DPCD_TRAINING_PATTERN_1;
|
||||
break;
|
||||
case DP_TRAINING_PATTERN_SEQUENCE_2:
|
||||
dpcd_tr_pattern = DPCD_TRAINING_PATTERN_2;
|
||||
break;
|
||||
case DP_TRAINING_PATTERN_SEQUENCE_3:
|
||||
dpcd_tr_pattern = DPCD_TRAINING_PATTERN_3;
|
||||
break;
|
||||
case DP_TRAINING_PATTERN_SEQUENCE_4:
|
||||
dpcd_tr_pattern = DPCD_TRAINING_PATTERN_4;
|
||||
break;
|
||||
case DP_TRAINING_PATTERN_VIDEOIDLE:
|
||||
dpcd_tr_pattern = DPCD_TRAINING_PATTERN_VIDEOIDLE;
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
DC_LOG_HW_LINK_TRAINING("%s: Invalid HW Training pattern: %d\n",
|
||||
__func__, pattern);
|
||||
break;
|
||||
}
|
||||
|
||||
return dpcd_tr_pattern;
|
||||
}
|
||||
|
||||
static void dpcd_set_training_pattern(
|
||||
struct dc_link *link,
|
||||
union dpcd_training_pattern dpcd_pattern)
|
||||
enum dc_dp_training_pattern training_pattern)
|
||||
{
|
||||
union dpcd_training_pattern dpcd_pattern = { {0} };
|
||||
|
||||
dpcd_pattern.v1_4.TRAINING_PATTERN_SET =
|
||||
dc_dp_training_pattern_to_dpcd_training_pattern(
|
||||
link, training_pattern);
|
||||
|
||||
core_link_write_dpcd(
|
||||
link,
|
||||
DP_TRAINING_PATTERN_SET,
|
||||
@ -132,10 +173,22 @@ static enum dc_dp_training_pattern decide_cr_training_pattern(
|
||||
static enum dc_dp_training_pattern decide_eq_training_pattern(struct dc_link *link,
|
||||
const struct dc_link_settings *link_settings)
|
||||
{
|
||||
struct link_encoder *link_enc;
|
||||
enum dc_dp_training_pattern highest_tp = DP_TRAINING_PATTERN_SEQUENCE_2;
|
||||
struct encoder_feature_support *features = &link->link_enc->features;
|
||||
struct encoder_feature_support *features;
|
||||
struct dpcd_caps *dpcd_caps = &link->dpcd_caps;
|
||||
|
||||
/* Access link encoder capability based on whether it is statically
|
||||
* or dynamically assigned to a link.
|
||||
*/
|
||||
if (link->is_dig_mapping_flexible &&
|
||||
link->dc->res_pool->funcs->link_encs_assign)
|
||||
link_enc = link_enc_cfg_get_link_enc_used_by_link(link->dc->current_state, link);
|
||||
else
|
||||
link_enc = link->link_enc;
|
||||
ASSERT(link_enc);
|
||||
features = &link_enc->features;
|
||||
|
||||
if (features->flags.bits.IS_TPS3_CAPABLE)
|
||||
highest_tp = DP_TRAINING_PATTERN_SEQUENCE_3;
|
||||
|
||||
@ -227,37 +280,6 @@ static void dpcd_set_link_settings(
|
||||
}
|
||||
}
|
||||
|
||||
static enum dpcd_training_patterns
|
||||
dc_dp_training_pattern_to_dpcd_training_pattern(
|
||||
struct dc_link *link,
|
||||
enum dc_dp_training_pattern pattern)
|
||||
{
|
||||
enum dpcd_training_patterns dpcd_tr_pattern =
|
||||
DPCD_TRAINING_PATTERN_VIDEOIDLE;
|
||||
|
||||
switch (pattern) {
|
||||
case DP_TRAINING_PATTERN_SEQUENCE_1:
|
||||
dpcd_tr_pattern = DPCD_TRAINING_PATTERN_1;
|
||||
break;
|
||||
case DP_TRAINING_PATTERN_SEQUENCE_2:
|
||||
dpcd_tr_pattern = DPCD_TRAINING_PATTERN_2;
|
||||
break;
|
||||
case DP_TRAINING_PATTERN_SEQUENCE_3:
|
||||
dpcd_tr_pattern = DPCD_TRAINING_PATTERN_3;
|
||||
break;
|
||||
case DP_TRAINING_PATTERN_SEQUENCE_4:
|
||||
dpcd_tr_pattern = DPCD_TRAINING_PATTERN_4;
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
DC_LOG_HW_LINK_TRAINING("%s: Invalid HW Training pattern: %d\n",
|
||||
__func__, pattern);
|
||||
break;
|
||||
}
|
||||
|
||||
return dpcd_tr_pattern;
|
||||
}
|
||||
|
||||
static uint8_t dc_dp_initialize_scrambling_data_symbols(
|
||||
struct dc_link *link,
|
||||
enum dc_dp_training_pattern pattern)
|
||||
@ -420,20 +442,30 @@ static bool is_cr_done(enum dc_lane_count ln_count,
|
||||
}
|
||||
|
||||
static bool is_ch_eq_done(enum dc_lane_count ln_count,
|
||||
union lane_status *dpcd_lane_status,
|
||||
union lane_align_status_updated *lane_status_updated)
|
||||
union lane_status *dpcd_lane_status)
|
||||
{
|
||||
bool done = true;
|
||||
uint32_t lane;
|
||||
if (!lane_status_updated->bits.INTERLANE_ALIGN_DONE)
|
||||
return false;
|
||||
else {
|
||||
for (lane = 0; lane < (uint32_t)(ln_count); lane++) {
|
||||
if (!dpcd_lane_status[lane].bits.SYMBOL_LOCKED_0 ||
|
||||
!dpcd_lane_status[lane].bits.CHANNEL_EQ_DONE_0)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
for (lane = 0; lane < (uint32_t)(ln_count); lane++)
|
||||
if (!dpcd_lane_status[lane].bits.CHANNEL_EQ_DONE_0)
|
||||
done = false;
|
||||
return done;
|
||||
}
|
||||
|
||||
static bool is_symbol_locked(enum dc_lane_count ln_count,
|
||||
union lane_status *dpcd_lane_status)
|
||||
{
|
||||
bool locked = true;
|
||||
uint32_t lane;
|
||||
for (lane = 0; lane < (uint32_t)(ln_count); lane++)
|
||||
if (!dpcd_lane_status[lane].bits.SYMBOL_LOCKED_0)
|
||||
locked = false;
|
||||
return locked;
|
||||
}
|
||||
|
||||
static inline bool is_interlane_aligned(union lane_align_status_updated align_status)
|
||||
{
|
||||
return align_status.bits.INTERLANE_ALIGN_DONE == 1;
|
||||
}
|
||||
|
||||
static void update_drive_settings(
|
||||
@ -835,10 +867,9 @@ static bool perform_post_lt_adj_req_sequence(
|
||||
if (!is_cr_done(lane_count, dpcd_lane_status))
|
||||
return false;
|
||||
|
||||
if (!is_ch_eq_done(
|
||||
lane_count,
|
||||
dpcd_lane_status,
|
||||
&dpcd_lane_status_updated))
|
||||
if (!is_ch_eq_done(lane_count, dpcd_lane_status) ||
|
||||
!is_symbol_locked(lane_count, dpcd_lane_status) ||
|
||||
!is_interlane_aligned(dpcd_lane_status_updated))
|
||||
return false;
|
||||
|
||||
for (lane = 0; lane < (uint32_t)(lane_count); lane++) {
|
||||
@ -992,9 +1023,9 @@ static enum link_training_result perform_channel_equalization_sequence(
|
||||
return LINK_TRAINING_EQ_FAIL_CR;
|
||||
|
||||
/* 6. check CHEQ done*/
|
||||
if (is_ch_eq_done(lane_count,
|
||||
dpcd_lane_status,
|
||||
&dpcd_lane_status_updated))
|
||||
if (is_ch_eq_done(lane_count, dpcd_lane_status) &&
|
||||
is_symbol_locked(lane_count, dpcd_lane_status) &&
|
||||
is_interlane_aligned(dpcd_lane_status_updated))
|
||||
return LINK_TRAINING_SUCCESS;
|
||||
|
||||
/* 7. update VS/PE/PC2 in lt_settings*/
|
||||
@ -1162,7 +1193,7 @@ static inline enum link_training_result perform_link_training_int(
|
||||
return status;
|
||||
}
|
||||
|
||||
static enum link_training_result check_link_loss_status(
|
||||
enum link_training_result dp_check_link_loss_status(
|
||||
struct dc_link *link,
|
||||
const struct link_training_settings *link_training_setting)
|
||||
{
|
||||
@ -1296,7 +1327,7 @@ static void initialize_training_settings(
|
||||
lt_settings->enhanced_framing = 1;
|
||||
}
|
||||
|
||||
static uint8_t convert_to_count(uint8_t lttpr_repeater_count)
|
||||
uint8_t dp_convert_to_count(uint8_t lttpr_repeater_count)
|
||||
{
|
||||
switch (lttpr_repeater_count) {
|
||||
case 0x80: // 1 lttpr repeater
|
||||
@ -1365,7 +1396,8 @@ static void configure_lttpr_mode_non_transparent(struct dc_link *link)
|
||||
link->dpcd_caps.lttpr_caps.mode = repeater_mode;
|
||||
}
|
||||
|
||||
repeater_cnt = convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
|
||||
repeater_cnt = dp_convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
|
||||
|
||||
for (repeater_id = repeater_cnt; repeater_id > 0; repeater_id--) {
|
||||
aux_interval_address = DP_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1 +
|
||||
((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (repeater_id - 1));
|
||||
@ -1555,7 +1587,6 @@ enum link_training_result dc_link_dp_perform_link_training(
|
||||
{
|
||||
enum link_training_result status = LINK_TRAINING_SUCCESS;
|
||||
struct link_training_settings lt_settings;
|
||||
union dpcd_training_pattern dpcd_pattern = { { 0 } };
|
||||
|
||||
bool fec_enable;
|
||||
uint8_t repeater_cnt;
|
||||
@ -1591,7 +1622,7 @@ enum link_training_result dc_link_dp_perform_link_training(
|
||||
/* 2. perform link training (set link training done
|
||||
* to false is done as well)
|
||||
*/
|
||||
repeater_cnt = convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
|
||||
repeater_cnt = dp_convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
|
||||
|
||||
for (repeater_id = repeater_cnt; (repeater_id > 0 && status == LINK_TRAINING_SUCCESS);
|
||||
repeater_id--) {
|
||||
@ -1621,8 +1652,7 @@ enum link_training_result dc_link_dp_perform_link_training(
|
||||
}
|
||||
|
||||
/* 3. set training not in progress*/
|
||||
dpcd_pattern.v1_4.TRAINING_PATTERN_SET = DPCD_TRAINING_PATTERN_VIDEOIDLE;
|
||||
dpcd_set_training_pattern(link, dpcd_pattern);
|
||||
dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE);
|
||||
if ((status == LINK_TRAINING_SUCCESS) || !skip_video_pattern) {
|
||||
status = perform_link_training_int(link,
|
||||
<_settings,
|
||||
@ -1634,7 +1664,7 @@ enum link_training_result dc_link_dp_perform_link_training(
|
||||
*/
|
||||
if (link->connector_signal != SIGNAL_TYPE_EDP && status == LINK_TRAINING_SUCCESS) {
|
||||
msleep(5);
|
||||
status = check_link_loss_status(link, <_settings);
|
||||
status = dp_check_link_loss_status(link, <_settings);
|
||||
}
|
||||
|
||||
/* 6. print status message*/
|
||||
@ -1687,18 +1717,31 @@ bool perform_link_training_with_retries(
|
||||
bool skip_video_pattern,
|
||||
int attempts,
|
||||
struct pipe_ctx *pipe_ctx,
|
||||
enum signal_type signal)
|
||||
enum signal_type signal,
|
||||
bool do_fallback)
|
||||
{
|
||||
uint8_t j;
|
||||
uint8_t delay_between_attempts = LINK_TRAINING_RETRY_DELAY;
|
||||
struct dc_stream_state *stream = pipe_ctx->stream;
|
||||
struct dc_link *link = stream->link;
|
||||
enum dp_panel_mode panel_mode;
|
||||
struct link_encoder *link_enc;
|
||||
enum link_training_result status = LINK_TRAINING_CR_FAIL_LANE0;
|
||||
struct dc_link_settings currnet_setting = *link_setting;
|
||||
|
||||
/* Dynamically assigned link encoders associated with stream rather than
|
||||
* link.
|
||||
*/
|
||||
if (link->dc->res_pool->funcs->link_encs_assign)
|
||||
link_enc = stream->link_enc;
|
||||
else
|
||||
link_enc = link->link_enc;
|
||||
ASSERT(link_enc);
|
||||
|
||||
/* We need to do this before the link training to ensure the idle pattern in SST
|
||||
* mode will be sent right after the link training
|
||||
*/
|
||||
link->link_enc->funcs->connect_dig_be_to_fe(link->link_enc,
|
||||
link_enc->funcs->connect_dig_be_to_fe(link_enc,
|
||||
pipe_ctx->stream_res.stream_enc->id, true);
|
||||
|
||||
for (j = 0; j < attempts; ++j) {
|
||||
@ -1710,7 +1753,7 @@ bool perform_link_training_with_retries(
|
||||
link,
|
||||
signal,
|
||||
pipe_ctx->clock_source->id,
|
||||
link_setting);
|
||||
&currnet_setting);
|
||||
|
||||
if (stream->sink_patches.dppowerup_delay > 0) {
|
||||
int delay_dp_power_up_in_ms = stream->sink_patches.dppowerup_delay;
|
||||
@ -1725,14 +1768,12 @@ bool perform_link_training_with_retries(
|
||||
panel_mode != DP_PANEL_MODE_DEFAULT);
|
||||
|
||||
if (link->aux_access_disabled) {
|
||||
dc_link_dp_perform_link_training_skip_aux(link, link_setting);
|
||||
dc_link_dp_perform_link_training_skip_aux(link, &currnet_setting);
|
||||
return true;
|
||||
} else {
|
||||
enum link_training_result status = LINK_TRAINING_CR_FAIL_LANE0;
|
||||
|
||||
status = dc_link_dp_perform_link_training(
|
||||
link,
|
||||
link_setting,
|
||||
&currnet_setting,
|
||||
skip_video_pattern);
|
||||
if (status == LINK_TRAINING_SUCCESS)
|
||||
return true;
|
||||
@ -1740,7 +1781,7 @@ bool perform_link_training_with_retries(
|
||||
|
||||
/* latest link training still fail, skip delay and keep PHY on
|
||||
*/
|
||||
if (j == (attempts - 1))
|
||||
if (j == (attempts - 1) && link->ep_type == DISPLAY_ENDPOINT_PHY)
|
||||
break;
|
||||
|
||||
DC_LOG_WARNING("%s: Link training attempt %u of %d failed\n",
|
||||
@ -1748,6 +1789,19 @@ bool perform_link_training_with_retries(
|
||||
|
||||
dp_disable_link_phy(link, signal);
|
||||
|
||||
/* Abort link training if failure due to sink being unplugged. */
|
||||
if (status == LINK_TRAINING_ABORT)
|
||||
break;
|
||||
else if (do_fallback) {
|
||||
decide_fallback_link_setting(*link_setting, &currnet_setting, status);
|
||||
/* Fail link training if reduced link bandwidth no longer meets
|
||||
* stream requirements.
|
||||
*/
|
||||
if (dc_bandwidth_in_kbps_from_timing(&stream->timing) <
|
||||
dc_link_bandwidth_kbps(link, &currnet_setting))
|
||||
break;
|
||||
}
|
||||
|
||||
msleep(delay_between_attempts);
|
||||
|
||||
delay_between_attempts += LINK_TRAINING_RETRY_DELAY;
|
||||
@ -2429,6 +2483,12 @@ bool dp_validate_mode_timing(
|
||||
|
||||
const struct dc_link_settings *link_setting;
|
||||
|
||||
/* According to spec, VSC SDP should be used if pixel format is YCbCr420 */
|
||||
if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420 &&
|
||||
!link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED &&
|
||||
dal_graphics_object_id_get_connector_id(link->link_id) != CONNECTOR_ID_VIRTUAL)
|
||||
return false;
|
||||
|
||||
/*always DP fail safe mode*/
|
||||
if ((timing->pix_clk_100hz / 10) == (uint32_t) 25175 &&
|
||||
timing->h_addressable == (uint32_t) 640 &&
|
||||
@ -2611,13 +2671,11 @@ static bool allow_hpd_rx_irq(const struct dc_link *link)
|
||||
/*
|
||||
* Don't handle RX IRQ unless one of following is met:
|
||||
* 1) The link is established (cur_link_settings != unknown)
|
||||
* 2) We kicked off MST detection
|
||||
* 3) We know we're dealing with an active dongle
|
||||
* 2) We know we're dealing with a branch device, SST or MST
|
||||
*/
|
||||
|
||||
if ((link->cur_link_settings.lane_count != LANE_COUNT_UNKNOWN) ||
|
||||
(link->type == dc_connection_mst_branch) ||
|
||||
is_dp_active_dongle(link))
|
||||
is_dp_branch_device(link))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@ -2917,6 +2975,22 @@ static void dp_test_send_link_test_pattern(struct dc_link *link)
|
||||
break;
|
||||
}
|
||||
|
||||
switch (dpcd_test_params.bits.CLR_FORMAT) {
|
||||
case 0:
|
||||
pipe_ctx->stream->timing.pixel_encoding = PIXEL_ENCODING_RGB;
|
||||
break;
|
||||
case 1:
|
||||
pipe_ctx->stream->timing.pixel_encoding = PIXEL_ENCODING_YCBCR422;
|
||||
break;
|
||||
case 2:
|
||||
pipe_ctx->stream->timing.pixel_encoding = PIXEL_ENCODING_YCBCR444;
|
||||
break;
|
||||
default:
|
||||
pipe_ctx->stream->timing.pixel_encoding = PIXEL_ENCODING_RGB;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
if (requestColorDepth != COLOR_DEPTH_UNDEFINED
|
||||
&& pipe_ctx->stream->timing.display_color_depth != requestColorDepth) {
|
||||
DC_LOG_DEBUG("%s: original bpc %d, changing to %d\n",
|
||||
@ -2924,9 +2998,10 @@ static void dp_test_send_link_test_pattern(struct dc_link *link)
|
||||
pipe_ctx->stream->timing.display_color_depth,
|
||||
requestColorDepth);
|
||||
pipe_ctx->stream->timing.display_color_depth = requestColorDepth;
|
||||
dp_update_dsc_config(pipe_ctx);
|
||||
}
|
||||
|
||||
dp_update_dsc_config(pipe_ctx);
|
||||
|
||||
dc_link_dp_set_test_pattern(
|
||||
link,
|
||||
test_pattern,
|
||||
@ -3182,7 +3257,7 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd
|
||||
*out_link_loss = true;
|
||||
}
|
||||
|
||||
if (link->type == dc_connection_active_dongle &&
|
||||
if (link->type == dc_connection_sst_branch &&
|
||||
hpd_irq_dpcd_data.bytes.sink_cnt.bits.SINK_COUNT
|
||||
!= link->dpcd_sink_count)
|
||||
status = true;
|
||||
@ -3232,6 +3307,12 @@ bool is_mst_supported(struct dc_link *link)
|
||||
}
|
||||
|
||||
bool is_dp_active_dongle(const struct dc_link *link)
|
||||
{
|
||||
return (link->dpcd_caps.dongle_type >= DISPLAY_DONGLE_DP_VGA_CONVERTER) &&
|
||||
(link->dpcd_caps.dongle_type <= DISPLAY_DONGLE_DP_HDMI_CONVERTER);
|
||||
}
|
||||
|
||||
bool is_dp_branch_device(const struct dc_link *link)
|
||||
{
|
||||
return link->dpcd_caps.is_branch_dev;
|
||||
}
|
||||
@ -3593,7 +3674,9 @@ static bool retrieve_link_cap(struct dc_link *link)
|
||||
lttpr_dpcd_data[DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT -
|
||||
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
|
||||
|
||||
/* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */
|
||||
is_lttpr_present = (link->dpcd_caps.lttpr_caps.phy_repeater_cnt > 0 &&
|
||||
link->dpcd_caps.lttpr_caps.phy_repeater_cnt < 0xff &&
|
||||
link->dpcd_caps.lttpr_caps.max_lane_count > 0 &&
|
||||
link->dpcd_caps.lttpr_caps.max_lane_count <= 4 &&
|
||||
link->dpcd_caps.lttpr_caps.revision.raw >= 0x14);
|
||||
|
@ -112,8 +112,8 @@ static void update_link_enc_assignment(
|
||||
|
||||
/* Return first available DIG link encoder. */
|
||||
static enum engine_id find_first_avail_link_enc(
|
||||
struct dc_context *ctx,
|
||||
struct dc_state *state)
|
||||
const struct dc_context *ctx,
|
||||
const struct dc_state *state)
|
||||
{
|
||||
enum engine_id eng_id = ENGINE_ID_UNKNOWN;
|
||||
int i;
|
||||
@ -270,7 +270,7 @@ struct dc_link *link_enc_cfg_get_link_using_link_enc(
|
||||
|
||||
struct link_encoder *link_enc_cfg_get_link_enc_used_by_link(
|
||||
struct dc_state *state,
|
||||
struct dc_link *link)
|
||||
const struct dc_link *link)
|
||||
{
|
||||
struct link_encoder *link_enc = NULL;
|
||||
struct display_endpoint_id ep_id;
|
||||
@ -296,8 +296,20 @@ struct link_encoder *link_enc_cfg_get_link_enc_used_by_link(
|
||||
|
||||
if (stream_idx != -1)
|
||||
link_enc = state->streams[stream_idx]->link_enc;
|
||||
else
|
||||
dm_output_to_console("%s: No link encoder used by link(%d).\n", __func__, link->link_index);
|
||||
|
||||
return link_enc;
|
||||
}
|
||||
|
||||
struct link_encoder *link_enc_cfg_get_next_avail_link_enc(
|
||||
const struct dc *dc,
|
||||
const struct dc_state *state)
|
||||
{
|
||||
struct link_encoder *link_enc = NULL;
|
||||
enum engine_id eng_id = ENGINE_ID_UNKNOWN;
|
||||
|
||||
eng_id = find_first_avail_link_enc(dc->ctx, state);
|
||||
if (eng_id != ENGINE_ID_UNKNOWN)
|
||||
link_enc = dc->res_pool->link_encoders[eng_id - ENGINE_ID_DIGA];
|
||||
|
||||
return link_enc;
|
||||
}
|
||||
|
@ -384,7 +384,8 @@ void dp_retrain_link_dp_test(struct dc_link *link,
|
||||
skip_video_pattern,
|
||||
LINK_TRAINING_ATTEMPTS,
|
||||
&pipes[i],
|
||||
SIGNAL_TYPE_DISPLAY_PORT);
|
||||
SIGNAL_TYPE_DISPLAY_PORT,
|
||||
false);
|
||||
|
||||
link->dc->hwss.enable_stream(&pipes[i]);
|
||||
|
||||
|
@ -1706,12 +1706,6 @@ static bool is_timing_changed(struct dc_stream_state *cur_stream,
|
||||
if (cur_stream == NULL)
|
||||
return true;
|
||||
|
||||
/* If sink pointer changed, it means this is a hotplug, we should do
|
||||
* full hw setting.
|
||||
*/
|
||||
if (cur_stream->sink != new_stream->sink)
|
||||
return true;
|
||||
|
||||
/* If output color space is changed, need to reprogram info frames */
|
||||
if (cur_stream->output_color_space != new_stream->output_color_space)
|
||||
return true;
|
||||
@ -2679,6 +2673,7 @@ void dc_resource_state_destruct(struct dc_state *context)
|
||||
dc_stream_release(context->streams[i]);
|
||||
context->streams[i] = NULL;
|
||||
}
|
||||
context->stream_count = 0;
|
||||
}
|
||||
|
||||
void dc_resource_state_copy_construct(
|
||||
|
@ -45,7 +45,7 @@
|
||||
/* forward declaration */
|
||||
struct aux_payload;
|
||||
|
||||
#define DC_VER "3.2.132"
|
||||
#define DC_VER "3.2.135.1"
|
||||
|
||||
#define MAX_SURFACES 3
|
||||
#define MAX_PLANES 6
|
||||
@ -308,8 +308,6 @@ struct dc_config {
|
||||
#endif
|
||||
uint64_t vblank_alignment_dto_params;
|
||||
uint8_t vblank_alignment_max_frame_time_diff;
|
||||
bool is_asymmetric_memory;
|
||||
bool is_single_rank_dimm;
|
||||
};
|
||||
|
||||
enum visual_confirm {
|
||||
@ -600,7 +598,6 @@ struct dc_bounding_box_overrides {
|
||||
int min_dcfclk_mhz;
|
||||
};
|
||||
|
||||
struct dc_state;
|
||||
struct resource_pool;
|
||||
struct dce_hwseq;
|
||||
struct gpu_info_soc_bounding_box_v1_0;
|
||||
@ -719,7 +716,6 @@ void dc_init_callbacks(struct dc *dc,
|
||||
void dc_deinit_callbacks(struct dc *dc);
|
||||
void dc_destroy(struct dc **dc);
|
||||
|
||||
void dc_wait_for_vblank(struct dc *dc, struct dc_stream_state *stream);
|
||||
/*******************************************************************************
|
||||
* Surface Interfaces
|
||||
******************************************************************************/
|
||||
|
@ -180,5 +180,5 @@ bool dc_dmub_srv_get_dmub_outbox0_msg(const struct dc *dc, struct dmcub_trace_bu
|
||||
|
||||
void dc_dmub_trace_event_control(struct dc *dc, bool enable)
|
||||
{
|
||||
dm_helpers_dmub_outbox0_interrupt_control(dc->ctx, enable);
|
||||
dm_helpers_dmub_outbox_interrupt_control(dc->ctx, enable);
|
||||
}
|
||||
|
@ -95,6 +95,7 @@ enum dc_dp_training_pattern {
|
||||
DP_TRAINING_PATTERN_SEQUENCE_2,
|
||||
DP_TRAINING_PATTERN_SEQUENCE_3,
|
||||
DP_TRAINING_PATTERN_SEQUENCE_4,
|
||||
DP_TRAINING_PATTERN_VIDEOIDLE,
|
||||
};
|
||||
|
||||
struct dc_link_settings {
|
||||
|
@ -78,7 +78,8 @@ bool dc_dsc_compute_config(
|
||||
const struct dc_crtc_timing *timing,
|
||||
struct dc_dsc_config *dsc_cfg);
|
||||
|
||||
uint32_t dc_dsc_stream_bandwidth_in_kbps(uint32_t pix_clk_100hz, uint32_t bpp_x16);
|
||||
uint32_t dc_dsc_stream_bandwidth_in_kbps(const struct dc_crtc_timing *timing,
|
||||
uint32_t bpp_x16, uint32_t num_slices_h, bool is_dp);
|
||||
|
||||
void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing,
|
||||
uint32_t max_target_bpp_limit_override_x16,
|
||||
@ -88,6 +89,6 @@ void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit);
|
||||
|
||||
void dc_dsc_policy_set_enable_dsc_when_not_needed(bool enable);
|
||||
|
||||
uint32_t dc_dsc_stream_bandwidth_in_kbps(uint32_t pix_clk_100hz, uint32_t bpp_x16);
|
||||
void dc_dsc_policy_set_disable_dsc_stream_overhead(bool disable);
|
||||
|
||||
#endif
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user