Merge tag 'amd-drm-next-5.14-2021-05-19' of https://gitlab.freedesktop.org/agd5f/linux into drm-next

amd-drm-next-5.14-2021-05-19:

amdgpu:
- Aldebaran updates
- More LTTPR display work
- Vangogh updates
- SDMA 5.x GCR fixes
- RAS fixes
- PCIe ASPM support
- Modifier fixes
- Enable TMZ on Renoir
- Buffer object code cleanup
- Display overlay fixes
- Initial support for multiple eDP panels
- Initial SR-IOV support for Aldebaran
- DP link training refactor
- Misc code cleanups and bug fixes
- SMU regression fixes for variable sized arrays
- MAINTAINERS fixes for amdgpu

amdkfd:
- Initial SR-IOV support for Aldebaran
- Topology fixes
- Initial HMM SVM support
- Misc code cleanups and bug fixes

radeon:
- Misc code cleanups and bug fixes
- SMU regression fixes for variable sized arrays
- Flickering fix for Oland with multiple 4K displays

UAPI:
- amdgpu: Drop AMDGPU_GEM_CREATE_SHADOW flag.
  This was always a kernel internal flag and userspace use of it has always been blocked.
  It's no longer needed so remove it.
- amdkgd: HMM SVM support
  Overview: https://patchwork.freedesktop.org/series/85562/
  Porposed userspace: https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/tree/fxkamd/hmm-wip

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210520031258.231896-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie 2021-05-21 15:29:34 +10:00
commit c99c4d0ca5
159 changed files with 8889 additions and 1568 deletions

View File

@ -878,7 +878,7 @@ M: Harry Wentland <harry.wentland@amd.com>
M: Leo Li <sunpeng.li@amd.com>
L: amd-gfx@lists.freedesktop.org
S: Supported
T: git git://people.freedesktop.org/~agd5f/linux
T: git https://gitlab.freedesktop.org/agd5f/linux.git
F: drivers/gpu/drm/amd/display/
AMD FAM15H PROCESSOR POWER MONITORING DRIVER
@ -954,7 +954,7 @@ AMD POWERPLAY
M: Evan Quan <evan.quan@amd.com>
L: amd-gfx@lists.freedesktop.org
S: Supported
T: git git://people.freedesktop.org/~agd5f/linux
T: git https://gitlab.freedesktop.org/agd5f/linux.git
F: drivers/gpu/drm/amd/pm/powerplay/
AMD SEATTLE DEVICE TREE SUPPORT

View File

@ -56,7 +56,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o
amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o

View File

@ -1075,7 +1075,8 @@ struct amdgpu_device {
atomic_t throttling_logging_enabled;
struct ratelimit_state throttling_logging_rs;
uint32_t ras_features;
uint32_t ras_hw_enabled;
uint32_t ras_enabled;
bool in_pci_err_recovery;
struct pci_saved_state *pci_state;

View File

@ -76,7 +76,7 @@ struct amdgpu_atif {
/**
* amdgpu_atif_call - call an ATIF method
*
* @handle: acpi handle
* @atif: acpi handle
* @function: the ATIF function to execute
* @params: ATIF function params
*
@ -166,7 +166,6 @@ static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mas
/**
* amdgpu_atif_verify_interface - verify ATIF
*
* @handle: acpi handle
* @atif: amdgpu atif struct
*
* Execute the ATIF_FUNCTION_VERIFY_INTERFACE ATIF function
@ -240,8 +239,7 @@ out:
/**
* amdgpu_atif_get_notification_params - determine notify configuration
*
* @handle: acpi handle
* @n: atif notification configuration struct
* @atif: acpi handle
*
* Execute the ATIF_FUNCTION_GET_SYSTEM_PARAMETERS ATIF function
* to determine if a notifier is used and if so which one
@ -304,7 +302,7 @@ out:
/**
* amdgpu_atif_query_backlight_caps - get min and max backlight input signal
*
* @handle: acpi handle
* @atif: acpi handle
*
* Execute the QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS ATIF function
* to determine the acceptable range of backlight values
@ -363,7 +361,7 @@ out:
/**
* amdgpu_atif_get_sbios_requests - get requested sbios event
*
* @handle: acpi handle
* @atif: acpi handle
* @req: atif sbios request struct
*
* Execute the ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS ATIF function
@ -899,6 +897,8 @@ void amdgpu_acpi_fini(struct amdgpu_device *adev)
/**
* amdgpu_acpi_is_s0ix_supported
*
* @adev: amdgpu_device_pointer
*
* returns true if supported, false if not.
*/
bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev)

View File

@ -75,6 +75,7 @@ struct amdgpu_amdkfd_fence {
struct mm_struct *mm;
spinlock_t lock;
char timeline_name[TASK_COMM_LEN];
struct svm_range_bo *svm_bo;
};
struct amdgpu_kfd_dev {
@ -148,7 +149,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
int queue_bit);
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
struct mm_struct *mm);
struct mm_struct *mm,
struct svm_range_bo *svm_bo);
#if IS_ENABLED(CONFIG_HSA_AMD)
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
@ -234,22 +236,27 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s
})
/* GPUVM API */
#define drm_priv_to_vm(drm_priv) \
(&((struct amdgpu_fpriv *) \
((struct drm_file *)(drm_priv))->driver_priv)->vm)
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
struct file *filp, u32 pasid,
void **vm, void **process_info,
void **process_info,
struct dma_fence **ef);
void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm);
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv);
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct kgd_dev *kgd, uint64_t va, uint64_t size,
void *vm, struct kgd_mem **mem,
void *drm_priv, struct kgd_mem **mem,
uint64_t *offset, uint32_t flags);
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size);
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
uint64_t *size);
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_sync_memory(
struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
@ -260,7 +267,7 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
struct kfd_vm_fault_info *info);
int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
struct dma_buf *dmabuf,
uint64_t va, void *vm,
uint64_t va, void *drm_priv,
struct kgd_mem **mem, uint64_t *size,
uint64_t *mmap_offset);
int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
@ -270,6 +277,7 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
#else
static inline
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)

View File

@ -25,6 +25,7 @@
#include <linux/firmware.h>
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_amdkfd_arcturus.h"
#include "sdma0/sdma0_4_2_2_offset.h"
#include "sdma0/sdma0_4_2_2_sh_mask.h"
#include "sdma1/sdma1_4_2_2_offset.h"

View File

@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/sched/mm.h>
#include "amdgpu_amdkfd.h"
#include "kfd_svm.h"
static const struct dma_fence_ops amdkfd_fence_ops;
static atomic_t fence_seq = ATOMIC_INIT(0);
@ -60,7 +61,8 @@ static atomic_t fence_seq = ATOMIC_INIT(0);
*/
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
struct mm_struct *mm)
struct mm_struct *mm,
struct svm_range_bo *svm_bo)
{
struct amdgpu_amdkfd_fence *fence;
@ -73,7 +75,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
fence->mm = mm;
get_task_comm(fence->timeline_name, current);
spin_lock_init(&fence->lock);
fence->svm_bo = svm_bo;
dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock,
context, atomic_inc_return(&fence_seq));
@ -111,6 +113,8 @@ static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f)
* a KFD BO and schedules a job to move the BO.
* If fence is already signaled return true.
* If fence is not signaled schedule a evict KFD process work item.
*
* @f: dma_fence
*/
static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
{
@ -122,16 +126,20 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
if (dma_fence_is_signaled(f))
return true;
if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
return true;
if (!fence->svm_bo) {
if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
return true;
} else {
if (!svm_range_schedule_evict_svm_bo(fence))
return true;
}
return false;
}
/**
* amdkfd_fence_release - callback that fence can be freed
*
* @fence: fence
* @f: dma_fence
*
* This function is called when the reference count becomes zero.
* Drops the mm_struct reference and RCU schedules freeing up the fence.

View File

@ -719,7 +719,7 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
}
/**
* @get_wave_count: Read device registers to get number of waves in flight for
* get_wave_count: Read device registers to get number of waves in flight for
* a particular queue. The method also returns the VMID associated with the
* queue.
*
@ -755,19 +755,19 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
}
/**
* @kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each
* kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each
* shader engine and aggregates the number of waves that are in flight for the
* process whose pasid is provided as a parameter. The process could have ZERO
* or more queues running and submitting waves to compute units.
*
* @kgd: Handle of device from which to get number of waves in flight
* @pasid: Identifies the process for which this query call is invoked
* @wave_cnt: Output parameter updated with number of waves in flight that
* @pasid_wave_cnt: Output parameter updated with number of waves in flight that
* belong to process with given pasid
* @max_waves_per_cu: Output parameter updated with maximum number of waves
* possible per Compute Unit
*
* @note: It's possible that the device has too many queues (oversubscription)
* Note: It's possible that the device has too many queues (oversubscription)
* in which case a VMID could be remapped to a different PASID. This could lead
* to an iaccurate wave count. Following is a high-level sequence:
* Time T1: vmid = getVmid(); vmid is associated with Pasid P1

View File

@ -33,9 +33,6 @@
#include <uapi/linux/kfd_ioctl.h>
#include "amdgpu_xgmi.h"
/* BO flag to indicate a KFD userptr BO */
#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
/* Userptr restore delay, just long enough to allow consecutive VM
* changes to accumulate
*/
@ -108,6 +105,11 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
(kfd_mem_limit.max_ttm_mem_limit >> 20));
}
void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
{
kfd_mem_limit.system_mem_used += size;
}
/* Estimate page table size needed to represent a given memory size
*
* With 4KB pages, we need one 8 byte PTE for each 4KB of memory
@ -217,7 +219,7 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
u32 domain = bo->preferred_domains;
bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) {
domain = AMDGPU_GEM_DOMAIN_CPU;
sg = false;
}
@ -967,7 +969,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
info->eviction_fence =
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
current->mm);
current->mm,
NULL);
if (!info->eviction_fence) {
pr_err("Failed to create eviction fence\n");
ret = -ENOMEM;
@ -1036,15 +1039,19 @@ create_evict_fence_fail:
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
struct file *filp, u32 pasid,
void **vm, void **process_info,
void **process_info,
struct dma_fence **ef)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct drm_file *drm_priv = filp->private_data;
struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv;
struct amdgpu_vm *avm = &drv_priv->vm;
struct amdgpu_fpriv *drv_priv;
struct amdgpu_vm *avm;
int ret;
ret = amdgpu_file_to_fpriv(filp, &drv_priv);
if (ret)
return ret;
avm = &drv_priv->vm;
/* Already a compute VM? */
if (avm->process_info)
return -EINVAL;
@ -1059,7 +1066,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
if (ret)
return ret;
*vm = (void *)avm;
amdgpu_vm_set_task_info(avm);
return 0;
}
@ -1100,15 +1107,17 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
}
}
void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)
void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
struct amdgpu_vm *avm;
if (WARN_ON(!kgd || !vm))
if (WARN_ON(!kgd || !drm_priv))
return;
pr_debug("Releasing process vm %p\n", vm);
avm = drm_priv_to_vm(drm_priv);
pr_debug("Releasing process vm %p\n", avm);
/* The original pasid of amdgpu vm has already been
* released during making a amdgpu vm to a compute vm
@ -1119,9 +1128,9 @@ void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)
amdgpu_vm_release_compute(adev, avm);
}
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
{
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
struct amdgpu_bo *pd = avm->root.base.bo;
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
@ -1132,11 +1141,11 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct kgd_dev *kgd, uint64_t va, uint64_t size,
void *vm, struct kgd_mem **mem,
void *drm_priv, struct kgd_mem **mem,
uint64_t *offset, uint32_t flags)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
enum ttm_bo_type bo_type = ttm_bo_type_device;
struct sg_table *sg = NULL;
uint64_t user_addr = 0;
@ -1216,6 +1225,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
domain_string(alloc_domain), ret);
goto err_bo_create;
}
ret = drm_vma_node_allow(&gobj->vma_node, drm_priv);
if (ret) {
pr_debug("Failed to allow vma node access. ret %d\n", ret);
goto err_node_allow;
}
bo = gem_to_amdgpu_bo(gobj);
if (bo_type == ttm_bo_type_sg) {
bo->tbo.sg = sg;
@ -1224,7 +1238,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
bo->kfd_bo = *mem;
(*mem)->bo = bo;
if (user_addr)
bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO;
(*mem)->va = va;
(*mem)->domain = domain;
@ -1245,6 +1259,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
allocate_init_user_pages_failed:
remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
drm_vma_node_revoke(&gobj->vma_node, drm_priv);
err_node_allow:
amdgpu_bo_unref(&bo);
/* Don't unreserve system mem limit twice */
goto err_reserve_limit;
@ -1262,7 +1278,8 @@ err:
}
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size)
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
uint64_t *size)
{
struct amdkfd_process_info *process_info = mem->process_info;
unsigned long bo_size = mem->bo->tbo.base.size;
@ -1339,6 +1356,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
}
/* Free the BO*/
drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
drm_gem_object_put(&mem->bo->tbo.base);
mutex_destroy(&mem->lock);
kfree(mem);
@ -1347,10 +1365,10 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
}
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
int ret;
struct amdgpu_bo *bo;
uint32_t domain;
@ -1391,9 +1409,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n",
mem->va,
mem->va + bo_size * (1 + mem->aql_queue),
vm, domain_string(domain));
avm, domain_string(domain));
ret = reserve_bo_and_vm(mem, vm, &ctx);
ret = reserve_bo_and_vm(mem, avm, &ctx);
if (unlikely(ret))
goto out;
@ -1437,7 +1455,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
}
list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
if (entry->bo_va->base.vm == vm && !entry->is_mapped) {
if (entry->bo_va->base.vm == avm && !entry->is_mapped) {
pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
entry->va, entry->va + bo_size,
entry);
@ -1449,7 +1467,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
goto map_bo_to_gpuvm_failed;
}
ret = vm_update_pds(vm, ctx.sync);
ret = vm_update_pds(avm, ctx.sync);
if (ret) {
pr_err("Failed to update page directories\n");
goto map_bo_to_gpuvm_failed;
@ -1485,11 +1503,11 @@ out:
}
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdkfd_process_info *process_info =
((struct amdgpu_vm *)vm)->process_info;
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
struct amdkfd_process_info *process_info = avm->process_info;
unsigned long bo_size = mem->bo->tbo.base.size;
struct kfd_bo_va_list *entry;
struct bo_vm_reservation_context ctx;
@ -1497,7 +1515,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
mutex_lock(&mem->lock);
ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx);
ret = reserve_bo_and_cond_vms(mem, avm, BO_VM_MAPPED, &ctx);
if (unlikely(ret))
goto out;
/* If no VMs were reserved, it means the BO wasn't actually mapped */
@ -1506,17 +1524,17 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
goto unreserve_out;
}
ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm);
ret = vm_validate_pt_pd_bos(avm);
if (unlikely(ret))
goto unreserve_out;
pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n",
mem->va,
mem->va + bo_size * (1 + mem->aql_queue),
vm);
avm);
list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
if (entry->bo_va->base.vm == vm && entry->is_mapped) {
if (entry->bo_va->base.vm == avm && entry->is_mapped) {
pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
entry->va,
entry->va + bo_size,
@ -1642,14 +1660,15 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
struct dma_buf *dma_buf,
uint64_t va, void *vm,
uint64_t va, void *drm_priv,
struct kgd_mem **mem, uint64_t *size,
uint64_t *mmap_offset)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
struct drm_gem_object *obj;
struct amdgpu_bo *bo;
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
int ret;
if (dma_buf->ops != &amdgpu_dmabuf_ops)
/* Can't handle non-graphics buffers */
@ -1670,6 +1689,12 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
if (!*mem)
return -ENOMEM;
ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
if (ret) {
kfree(mem);
return ret;
}
if (size)
*size = amdgpu_bo_size(bo);
@ -2135,7 +2160,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
*/
new_fence = amdgpu_amdkfd_fence_create(
process_info->eviction_fence->base.context,
process_info->eviction_fence->mm);
process_info->eviction_fence->mm,
NULL);
if (!new_fence) {
pr_err("Failed to create eviction fence\n");
ret = -ENOMEM;

View File

@ -672,7 +672,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
}
/**
* cs_parser_fini() - clean parser states
* amdgpu_cs_parser_fini() - clean parser states
* @parser: parser structure holding parsing context.
* @error: error number
* @backoff: indicator to backoff the reservation
@ -1488,7 +1488,7 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
}
/**
* amdgpu_cs_wait_all_fence - wait on all fences to signal
* amdgpu_cs_wait_all_fences - wait on all fences to signal
*
* @adev: amdgpu device
* @filp: file private
@ -1639,7 +1639,7 @@ err_free_fences:
}
/**
* amdgpu_cs_find_bo_va - find bo_va for VM address
* amdgpu_cs_find_mapping - find bo_va for VM address
*
* @parser: command submission parser context
* @addr: VM address

View File

@ -2856,7 +2856,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
AMD_IP_BLOCK_TYPE_IH,
};
for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
for (i = 0; i < adev->num_ip_blocks; i++) {
int j;
struct amdgpu_ip_block *block;
@ -3179,8 +3179,8 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
int ret = 0;
/*
* By default timeout for non compute jobs is 10000.
* And there is no timeout enforced on compute jobs.
* By default timeout for non compute jobs is 10000
* and 60000 for compute jobs.
* In SR-IOV or passthrough mode, timeout for compute
* jobs are 60000 by default.
*/
@ -3189,10 +3189,8 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
else if (amdgpu_passthrough(adev))
adev->compute_timeout = msecs_to_jiffies(60000);
else
adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
adev->compute_timeout = msecs_to_jiffies(60000);
if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
while ((timeout_setting = strsep(&input, ",")) &&
@ -3741,7 +3739,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
{
struct amdgpu_device *adev = drm_to_adev(dev);
int r;
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
@ -3756,7 +3753,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
amdgpu_ras_suspend(adev);
r = amdgpu_device_ip_suspend_phase1(adev);
amdgpu_device_ip_suspend_phase1(adev);
if (!adev->in_s0ix)
amdgpu_amdkfd_suspend(adev, adev->in_runpm);
@ -3766,7 +3763,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
amdgpu_fence_driver_suspend(adev);
r = amdgpu_device_ip_suspend_phase2(adev);
amdgpu_device_ip_suspend_phase2(adev);
/* evict remaining vram memory
* This second call to evict vram is to evict the gart page table
* using the CPU.
@ -5124,7 +5121,8 @@ int amdgpu_device_baco_enter(struct drm_device *dev)
if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
return -ENOTSUPP;
if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
if (ras && adev->ras_enabled &&
adev->nbio.funcs->enable_doorbell_interrupt)
adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
return amdgpu_dpm_baco_enter(adev);
@ -5143,7 +5141,8 @@ int amdgpu_device_baco_exit(struct drm_device *dev)
if (ret)
return ret;
if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
if (ras && adev->ras_enabled &&
adev->nbio.funcs->enable_doorbell_interrupt)
adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
return 0;

View File

@ -288,9 +288,9 @@ module_param_named(msi, amdgpu_msi, int, 0444);
* for SDMA and Video.
*
* By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
* jobs is 10000. And there is no timeout enforced on compute jobs.
* jobs is 10000. The timeout for compute is 60000.
*/
MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and infinity timeout for compute jobs; "
MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and 60000 for compute jobs; "
"for passthrough or sriov, 10000 for all jobs."
" 0: keep default value. negative: infinity timeout), "
"format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
@ -641,7 +641,8 @@ module_param_named(mes, amdgpu_mes, int, 0444);
/**
* DOC: noretry (int)
* Disable retry faults in the GPU memory controller.
* Disable XNACK retry in the SQ by default on GFXv9 hardware. On ASICs that
* do not support per-process XNACK this also disables retry page faults.
* (0 = retry enabled, 1 = retry disabled, -1 auto (default))
*/
MODULE_PARM_DESC(noretry,
@ -1186,6 +1187,7 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
{0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
{0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
{0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
{0, 0, 0}
};
@ -1598,17 +1600,15 @@ static int amdgpu_pmops_runtime_idle(struct device *dev)
if (amdgpu_device_has_dc_support(adev)) {
struct drm_crtc *crtc;
drm_modeset_lock_all(drm_dev);
drm_for_each_crtc(crtc, drm_dev) {
if (crtc->state->active) {
drm_modeset_lock(&crtc->mutex, NULL);
if (crtc->state->active)
ret = -EBUSY;
drm_modeset_unlock(&crtc->mutex);
if (ret < 0)
break;
}
}
drm_modeset_unlock_all(drm_dev);
} else {
struct drm_connector *list_connector;
struct drm_connector_list_iter iter;

View File

@ -434,6 +434,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
*
* @ring: ring to init the fence driver on
* @num_hw_submission: number of entries on the hardware queue
* @sched_score: optional score atomic shared with other schedulers
*
* Init the fence driver for the requested ring (all asics).
* Helper function for amdgpu_fence_driver_init().

View File

@ -60,7 +60,7 @@
*/
/**
* amdgpu_dummy_page_init - init dummy page used by the driver
* amdgpu_gart_dummy_page_init - init dummy page used by the driver
*
* @adev: amdgpu_device pointer
*
@ -86,7 +86,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
}
/**
* amdgpu_dummy_page_fini - free dummy page used by the driver
* amdgpu_gart_dummy_page_fini - free dummy page used by the driver
*
* @adev: amdgpu_device pointer
*

View File

@ -332,6 +332,17 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
mc->agp_size >> 20, mc->agp_start, mc->agp_end);
}
/**
* amdgpu_gmc_fault_key - get hask key from vm fault address and pasid
*
* @addr: 48 bit physical address, page aligned (36 significant bits)
* @pasid: 16 bit process address space identifier
*/
static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid)
{
return addr << 4 | pasid;
}
/**
* amdgpu_gmc_filter_faults - filter VM faults
*
@ -348,8 +359,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
uint16_t pasid, uint64_t timestamp)
{
struct amdgpu_gmc *gmc = &adev->gmc;
uint64_t stamp, key = addr << 4 | pasid;
uint64_t stamp, key = amdgpu_gmc_fault_key(addr, pasid);
struct amdgpu_gmc_fault *fault;
uint32_t hash;
@ -365,7 +375,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
while (fault->timestamp >= stamp) {
uint64_t tmp;
if (fault->key == key)
if (atomic64_read(&fault->key) == key)
return true;
tmp = fault->timestamp;
@ -378,7 +388,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
/* Add the fault to the ring */
fault = &gmc->fault_ring[gmc->last_fault];
fault->key = key;
atomic64_set(&fault->key, key);
fault->timestamp = timestamp;
/* And update the hash */
@ -387,6 +397,36 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
return false;
}
/**
* amdgpu_gmc_filter_faults_remove - remove address from VM faults filter
*
* @adev: amdgpu device structure
* @addr: address of the VM fault
* @pasid: PASID of the process causing the fault
*
* Remove the address from fault filter, then future vm fault on this address
* will pass to retry fault handler to recover.
*/
void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
uint16_t pasid)
{
struct amdgpu_gmc *gmc = &adev->gmc;
uint64_t key = amdgpu_gmc_fault_key(addr, pasid);
struct amdgpu_gmc_fault *fault;
uint32_t hash;
uint64_t tmp;
hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
do {
if (atomic64_cmpxchg(&fault->key, key, 0) == key)
break;
tmp = fault->timestamp;
fault = &gmc->fault_ring[fault->next];
} while (fault->timestamp < tmp);
}
int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
{
int r;
@ -415,6 +455,13 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
return r;
}
if (adev->hdp.ras_funcs &&
adev->hdp.ras_funcs->ras_late_init) {
r = adev->hdp.ras_funcs->ras_late_init(adev);
if (r)
return r;
}
return 0;
}
@ -426,11 +473,15 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
if (adev->mmhub.ras_funcs &&
adev->mmhub.ras_funcs->ras_fini)
amdgpu_mmhub_ras_fini(adev);
adev->mmhub.ras_funcs->ras_fini(adev);
if (adev->gmc.xgmi.ras_funcs &&
adev->gmc.xgmi.ras_funcs->ras_fini)
adev->gmc.xgmi.ras_funcs->ras_fini(adev);
if (adev->hdp.ras_funcs &&
adev->hdp.ras_funcs->ras_fini)
adev->hdp.ras_funcs->ras_fini(adev);
}
/*

View File

@ -66,9 +66,9 @@ struct firmware;
* GMC page fault information
*/
struct amdgpu_gmc_fault {
uint64_t timestamp;
uint64_t timestamp:48;
uint64_t next:AMDGPU_GMC_FAULT_RING_ORDER;
uint64_t key:52;
atomic64_t key;
};
/*
@ -318,6 +318,8 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc);
bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
uint16_t pasid, uint64_t timestamp);
void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
uint16_t pasid);
int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);

View File

@ -24,7 +24,8 @@
#include "amdgpu.h"
static inline struct amdgpu_gtt_mgr *to_gtt_mgr(struct ttm_resource_manager *man)
static inline struct amdgpu_gtt_mgr *
to_gtt_mgr(struct ttm_resource_manager *man)
{
return container_of(man, struct amdgpu_gtt_mgr, manager);
}
@ -43,12 +44,14 @@ struct amdgpu_gtt_node {
* the GTT block, in bytes
*/
static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev,
struct device_attribute *attr, char *buf)
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
struct ttm_resource_manager *man;
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
return sysfs_emit(buf, "%llu\n", man->size * PAGE_SIZE);
}
@ -61,12 +64,14 @@ static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev,
* size of the GTT block, in bytes
*/
static ssize_t amdgpu_mem_info_gtt_used_show(struct device *dev,
struct device_attribute *attr, char *buf)
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
struct ttm_resource_manager *man;
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
return sysfs_emit(buf, "%llu\n", amdgpu_gtt_mgr_usage(man));
}
@ -75,80 +80,6 @@ static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO,
static DEVICE_ATTR(mem_info_gtt_used, S_IRUGO,
amdgpu_mem_info_gtt_used_show, NULL);
static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func;
/**
* amdgpu_gtt_mgr_init - init GTT manager and DRM MM
*
* @adev: amdgpu_device pointer
* @gtt_size: maximum size of GTT
*
* Allocate and initialize the GTT manager.
*/
int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)
{
struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
struct ttm_resource_manager *man = &mgr->manager;
uint64_t start, size;
int ret;
man->use_tt = true;
man->func = &amdgpu_gtt_mgr_func;
ttm_resource_manager_init(man, gtt_size >> PAGE_SHIFT);
start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
drm_mm_init(&mgr->mm, start, size);
spin_lock_init(&mgr->lock);
atomic64_set(&mgr->available, gtt_size >> PAGE_SHIFT);
ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_total);
if (ret) {
DRM_ERROR("Failed to create device file mem_info_gtt_total\n");
return ret;
}
ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_used);
if (ret) {
DRM_ERROR("Failed to create device file mem_info_gtt_used\n");
return ret;
}
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager);
ttm_resource_manager_set_used(man, true);
return 0;
}
/**
* amdgpu_gtt_mgr_fini - free and destroy GTT manager
*
* @adev: amdgpu_device pointer
*
* Destroy and free the GTT manager, returns -EBUSY if ranges are still
* allocated inside it.
*/
void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev)
{
struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
struct ttm_resource_manager *man = &mgr->manager;
int ret;
ttm_resource_manager_set_used(man, false);
ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
if (ret)
return;
spin_lock(&mgr->lock);
drm_mm_takedown(&mgr->mm);
spin_unlock(&mgr->lock);
device_remove_file(adev->dev, &dev_attr_mem_info_gtt_total);
device_remove_file(adev->dev, &dev_attr_mem_info_gtt_used);
ttm_resource_manager_cleanup(man);
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL);
}
/**
* amdgpu_gtt_mgr_has_gart_addr - Check if mem has address space
*
@ -265,6 +196,13 @@ uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man)
return (result > 0 ? result : 0) * PAGE_SIZE;
}
/**
* amdgpu_gtt_mgr_recover - re-init gart
*
* @man: TTM memory type manager
*
* Re-init the gart for each known BO in the GTT.
*/
int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man)
{
struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
@ -311,3 +249,76 @@ static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func = {
.free = amdgpu_gtt_mgr_del,
.debug = amdgpu_gtt_mgr_debug
};
/**
* amdgpu_gtt_mgr_init - init GTT manager and DRM MM
*
* @adev: amdgpu_device pointer
* @gtt_size: maximum size of GTT
*
* Allocate and initialize the GTT manager.
*/
int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)
{
struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
struct ttm_resource_manager *man = &mgr->manager;
uint64_t start, size;
int ret;
man->use_tt = true;
man->func = &amdgpu_gtt_mgr_func;
ttm_resource_manager_init(man, gtt_size >> PAGE_SHIFT);
start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
drm_mm_init(&mgr->mm, start, size);
spin_lock_init(&mgr->lock);
atomic64_set(&mgr->available, gtt_size >> PAGE_SHIFT);
ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_total);
if (ret) {
DRM_ERROR("Failed to create device file mem_info_gtt_total\n");
return ret;
}
ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_used);
if (ret) {
DRM_ERROR("Failed to create device file mem_info_gtt_used\n");
return ret;
}
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager);
ttm_resource_manager_set_used(man, true);
return 0;
}
/**
* amdgpu_gtt_mgr_fini - free and destroy GTT manager
*
* @adev: amdgpu_device pointer
*
* Destroy and free the GTT manager, returns -EBUSY if ranges are still
* allocated inside it.
*/
void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev)
{
struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
struct ttm_resource_manager *man = &mgr->manager;
int ret;
ttm_resource_manager_set_used(man, false);
ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
if (ret)
return;
spin_lock(&mgr->lock);
drm_mm_takedown(&mgr->mm);
spin_unlock(&mgr->lock);
device_remove_file(adev->dev, &dev_attr_mem_info_gtt_total);
device_remove_file(adev->dev, &dev_attr_mem_info_gtt_used);
ttm_resource_manager_cleanup(man);
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL);
}

View File

@ -0,0 +1,69 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "amdgpu_ras.h"
int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev)
{
int r;
struct ras_ih_if ih_info = {
.cb = NULL,
};
struct ras_fs_if fs_info = {
.sysfs_name = "hdp_err_count",
};
if (!adev->hdp.ras_if) {
adev->hdp.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
if (!adev->hdp.ras_if)
return -ENOMEM;
adev->hdp.ras_if->block = AMDGPU_RAS_BLOCK__HDP;
adev->hdp.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
adev->hdp.ras_if->sub_block_index = 0;
strcpy(adev->hdp.ras_if->name, "hdp");
}
ih_info.head = fs_info.head = *adev->hdp.ras_if;
r = amdgpu_ras_late_init(adev, adev->hdp.ras_if,
&fs_info, &ih_info);
if (r || !amdgpu_ras_is_supported(adev, adev->hdp.ras_if->block)) {
kfree(adev->hdp.ras_if);
adev->hdp.ras_if = NULL;
}
return r;
}
void amdgpu_hdp_ras_fini(struct amdgpu_device *adev)
{
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) &&
adev->hdp.ras_if) {
struct ras_common_if *ras_if = adev->hdp.ras_if;
struct ras_ih_if ih_info = {
.cb = NULL,
};
amdgpu_ras_late_fini(adev, ras_if, &ih_info);
kfree(ras_if);
}
}

View File

@ -23,18 +23,29 @@
#ifndef __AMDGPU_HDP_H__
#define __AMDGPU_HDP_H__
struct amdgpu_hdp_ras_funcs {
int (*ras_late_init)(struct amdgpu_device *adev);
void (*ras_fini)(struct amdgpu_device *adev);
void (*query_ras_error_count)(struct amdgpu_device *adev,
void *ras_error_status);
void (*reset_ras_error_count)(struct amdgpu_device *adev);
};
struct amdgpu_hdp_funcs {
void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
void (*invalidate_hdp)(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
void (*reset_ras_error_count)(struct amdgpu_device *adev);
void (*update_clock_gating)(struct amdgpu_device *adev, bool enable);
void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags);
void (*init_registers)(struct amdgpu_device *adev);
};
struct amdgpu_hdp {
struct ras_common_if *ras_if;
const struct amdgpu_hdp_funcs *funcs;
const struct amdgpu_hdp_ras_funcs *ras_funcs;
};
int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev);
void amdgpu_hdp_ras_fini(struct amdgpu_device *adev);
#endif /* __AMDGPU_HDP_H__ */

View File

@ -328,7 +328,7 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {
if (i == AMDGPU_IB_POOL_DIRECT)
size = PAGE_SIZE * 2;
size = PAGE_SIZE * 6;
else
size = AMDGPU_IB_POOL_SIZE;

View File

@ -175,7 +175,9 @@ static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev,
cur_rptr += ih->ptr_mask + 1;
*prev_rptr = cur_rptr;
return cur_rptr >= checkpoint_wptr;
/* check ring is empty to workaround missing wptr overflow flag */
return cur_rptr >= checkpoint_wptr ||
(cur_rptr & ih->ptr_mask) == amdgpu_ih_get_wptr(adev, ih);
}
/**

View File

@ -986,7 +986,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (!ras)
return -EINVAL;
ras_mask = (uint64_t)ras->supported << 32 | ras->features;
ras_mask = (uint64_t)adev->ras_enabled << 32 | ras->features;
return copy_to_user(out, &ras_mask,
min_t(u64, size, sizeof(ras_mask))) ?
@ -1114,7 +1114,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
dev_warn(adev->dev, "No more PASIDs available!");
pasid = 0;
}
r = amdgpu_vm_init(adev, &fpriv->vm, AMDGPU_VM_CONTEXT_GFX, pasid);
r = amdgpu_vm_init(adev, &fpriv->vm, pasid);
if (r)
goto error_pasid;

View File

@ -28,6 +28,7 @@ struct amdgpu_mmhub_ras_funcs {
void *ras_error_status);
void (*query_ras_error_status)(struct amdgpu_device *adev);
void (*reset_ras_error_count)(struct amdgpu_device *adev);
void (*reset_ras_error_status)(struct amdgpu_device *adev);
};
struct amdgpu_mmhub_funcs {

View File

@ -155,3 +155,89 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
mmu_interval_notifier_remove(&bo->notifier);
bo->notifier.mm = NULL;
}
int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
struct mm_struct *mm, struct page **pages,
uint64_t start, uint64_t npages,
struct hmm_range **phmm_range, bool readonly,
bool mmap_locked)
{
struct hmm_range *hmm_range;
unsigned long timeout;
unsigned long i;
unsigned long *pfns;
int r = 0;
hmm_range = kzalloc(sizeof(*hmm_range), GFP_KERNEL);
if (unlikely(!hmm_range))
return -ENOMEM;
pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
if (unlikely(!pfns)) {
r = -ENOMEM;
goto out_free_range;
}
hmm_range->notifier = notifier;
hmm_range->default_flags = HMM_PFN_REQ_FAULT;
if (!readonly)
hmm_range->default_flags |= HMM_PFN_REQ_WRITE;
hmm_range->hmm_pfns = pfns;
hmm_range->start = start;
hmm_range->end = start + npages * PAGE_SIZE;
/* Assuming 512MB takes maxmium 1 second to fault page address */
timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT;
timeout = jiffies + msecs_to_jiffies(timeout);
retry:
hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
if (likely(!mmap_locked))
mmap_read_lock(mm);
r = hmm_range_fault(hmm_range);
if (likely(!mmap_locked))
mmap_read_unlock(mm);
if (unlikely(r)) {
/*
* FIXME: This timeout should encompass the retry from
* mmu_interval_read_retry() as well.
*/
if (r == -EBUSY && !time_after(jiffies, timeout))
goto retry;
goto out_free_pfns;
}
/*
* Due to default_flags, all pages are HMM_PFN_VALID or
* hmm_range_fault() fails. FIXME: The pages cannot be touched outside
* the notifier_lock, and mmu_interval_read_retry() must be done first.
*/
for (i = 0; pages && i < npages; i++)
pages[i] = hmm_pfn_to_page(pfns[i]);
*phmm_range = hmm_range;
return 0;
out_free_pfns:
kvfree(pfns);
out_free_range:
kfree(hmm_range);
return r;
}
int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range)
{
int r;
r = mmu_interval_read_retry(hmm_range->notifier,
hmm_range->notifier_seq);
kvfree(hmm_range->hmm_pfns);
kfree(hmm_range);
return r;
}

View File

@ -30,6 +30,13 @@
#include <linux/workqueue.h>
#include <linux/interval_tree.h>
int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
struct mm_struct *mm, struct page **pages,
uint64_t start, uint64_t npages,
struct hmm_range **phmm_range, bool readonly,
bool mmap_locked);
int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
#if defined(CONFIG_HMM_MIRROR)
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
void amdgpu_mn_unregister(struct amdgpu_bo *bo);

View File

@ -491,7 +491,18 @@ bool amdgpu_bo_support_uswc(u64 bo_flags)
#endif
}
static int amdgpu_bo_do_create(struct amdgpu_device *adev,
/**
* amdgpu_bo_create - create an &amdgpu_bo buffer object
* @adev: amdgpu device object
* @bp: parameters to be used for the buffer object
* @bo_ptr: pointer to the buffer object pointer
*
* Creates an &amdgpu_bo buffer object.
*
* Returns:
* 0 for success or a negative error code on failure.
*/
int amdgpu_bo_create(struct amdgpu_device *adev,
struct amdgpu_bo_param *bp,
struct amdgpu_bo **bo_ptr)
{
@ -601,9 +612,9 @@ fail_unreserve:
return r;
}
static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
unsigned long size,
struct amdgpu_bo *bo)
int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
unsigned long size,
struct amdgpu_bo *bo)
{
struct amdgpu_bo_param bp;
int r;
@ -614,13 +625,12 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
memset(&bp, 0, sizeof(bp));
bp.size = size;
bp.domain = AMDGPU_GEM_DOMAIN_GTT;
bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC |
AMDGPU_GEM_CREATE_SHADOW;
bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
bp.type = ttm_bo_type_kernel;
bp.resv = bo->tbo.base.resv;
bp.bo_ptr_size = sizeof(struct amdgpu_bo);
r = amdgpu_bo_do_create(adev, &bp, &bo->shadow);
r = amdgpu_bo_create(adev, &bp, &bo->shadow);
if (!r) {
bo->shadow->parent = amdgpu_bo_ref(bo);
mutex_lock(&adev->shadow_list_lock);
@ -631,50 +641,6 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
return r;
}
/**
* amdgpu_bo_create - create an &amdgpu_bo buffer object
* @adev: amdgpu device object
* @bp: parameters to be used for the buffer object
* @bo_ptr: pointer to the buffer object pointer
*
* Creates an &amdgpu_bo buffer object; and if requested, also creates a
* shadow object.
* Shadow object is used to backup the original buffer object, and is always
* in GTT.
*
* Returns:
* 0 for success or a negative error code on failure.
*/
int amdgpu_bo_create(struct amdgpu_device *adev,
struct amdgpu_bo_param *bp,
struct amdgpu_bo **bo_ptr)
{
u64 flags = bp->flags;
int r;
bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW;
r = amdgpu_bo_do_create(adev, bp, bo_ptr);
if (r)
return r;
if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) {
if (!bp->resv)
WARN_ON(dma_resv_lock((*bo_ptr)->tbo.base.resv,
NULL));
r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr);
if (!bp->resv)
dma_resv_unlock((*bo_ptr)->tbo.base.resv);
if (r)
amdgpu_bo_unref(bo_ptr);
}
return r;
}
/**
* amdgpu_bo_create_user - create an &amdgpu_bo_user buffer object
* @adev: amdgpu device object
@ -694,9 +660,8 @@ int amdgpu_bo_create_user(struct amdgpu_device *adev,
struct amdgpu_bo *bo_ptr;
int r;
bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW;
bp->bo_ptr_size = sizeof(struct amdgpu_bo_user);
r = amdgpu_bo_do_create(adev, bp, &bo_ptr);
r = amdgpu_bo_create(adev, bp, &bo_ptr);
if (r)
return r;
@ -1595,7 +1560,6 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
amdgpu_bo_print_flag(m, bo, NO_CPU_ACCESS);
amdgpu_bo_print_flag(m, bo, CPU_GTT_USWC);
amdgpu_bo_print_flag(m, bo, VRAM_CLEARED);
amdgpu_bo_print_flag(m, bo, SHADOW);
amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID);
amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC);

View File

@ -37,6 +37,10 @@
#define AMDGPU_BO_INVALID_OFFSET LONG_MAX
#define AMDGPU_BO_MAX_PLACEMENTS 3
/* BO flag to indicate a KFD userptr BO */
#define AMDGPU_AMDKFD_CREATE_USERPTR_BO (1ULL << 63)
#define AMDGPU_AMDKFD_CREATE_SVM_BO (1ULL << 62)
#define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo)
struct amdgpu_bo_param {
@ -267,6 +271,9 @@ int amdgpu_bo_create_user(struct amdgpu_device *adev,
struct amdgpu_bo_user **ubo_ptr);
void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
void **cpu_addr);
int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
unsigned long size,
struct amdgpu_bo *bo);
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
void *amdgpu_bo_kptr(struct amdgpu_bo *bo);
void amdgpu_bo_kunmap(struct amdgpu_bo *bo);

View File

@ -417,31 +417,12 @@ static int psp_tmr_init(struct psp_context *psp)
return ret;
}
static int psp_clear_vf_fw(struct psp_context *psp)
{
int ret;
struct psp_gfx_cmd_resp *cmd;
if (!amdgpu_sriov_vf(psp->adev) || psp->adev->asic_type != CHIP_NAVI12)
return 0;
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
cmd->cmd_id = GFX_CMD_ID_CLEAR_VF_FW;
ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
kfree(cmd);
return ret;
}
static bool psp_skip_tmr(struct psp_context *psp)
{
switch (psp->adev->asic_type) {
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
case CHIP_ALDEBARAN:
return true;
default:
return false;
@ -1037,6 +1018,13 @@ static int psp_ras_load(struct psp_context *psp)
memset(psp->fw_pri_buf, 0, PSP_1_MEG);
memcpy(psp->fw_pri_buf, psp->ta_ras_start_addr, psp->ta_ras_ucode_size);
ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf;
if (psp->adev->gmc.xgmi.connected_to_cpu)
ras_cmd->ras_in_message.init_flags.poison_mode_en = 1;
else
ras_cmd->ras_in_message.init_flags.dgpu_mode = 1;
psp_prep_ta_load_cmd_buf(cmd,
psp->fw_pri_mc_addr,
psp->ta_ras_ucode_size,
@ -1046,8 +1034,6 @@ static int psp_ras_load(struct psp_context *psp)
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf;
if (!ret) {
psp->ras.session_id = cmd->resp.session_id;
@ -1128,6 +1114,31 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
return ret;
}
static int psp_ras_status_to_errno(struct amdgpu_device *adev,
enum ta_ras_status ras_status)
{
int ret = -EINVAL;
switch (ras_status) {
case TA_RAS_STATUS__SUCCESS:
ret = 0;
break;
case TA_RAS_STATUS__RESET_NEEDED:
ret = -EAGAIN;
break;
case TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE:
dev_warn(adev->dev, "RAS WARN: ras function unavailable\n");
break;
case TA_RAS_STATUS__ERROR_ASD_READ_WRITE:
dev_warn(adev->dev, "RAS WARN: asd read or write failed\n");
break;
default:
dev_err(adev->dev, "RAS ERROR: ras function failed ret 0x%X\n", ret);
}
return ret;
}
int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable)
{
@ -1151,7 +1162,7 @@ int psp_ras_enable_features(struct psp_context *psp,
if (ret)
return -EINVAL;
return ras_cmd->ras_status;
return psp_ras_status_to_errno(psp->adev, ras_cmd->ras_status);
}
static int psp_ras_terminate(struct psp_context *psp)
@ -1234,7 +1245,7 @@ int psp_ras_trigger_error(struct psp_context *psp,
if (amdgpu_ras_intr_triggered())
return 0;
return ras_cmd->ras_status;
return psp_ras_status_to_errno(psp->adev, ras_cmd->ras_status);
}
// ras end
@ -1920,12 +1931,6 @@ static int psp_hw_start(struct psp_context *psp)
return ret;
}
ret = psp_clear_vf_fw(psp);
if (ret) {
DRM_ERROR("PSP clear vf fw!\n");
return ret;
}
ret = psp_boot_config_set(adev);
if (ret) {
DRM_WARN("PSP set boot config@\n");
@ -2166,7 +2171,7 @@ static int psp_load_smu_fw(struct psp_context *psp)
return 0;
if ((amdgpu_in_reset(adev) &&
ras && ras->supported &&
ras && adev->ras_enabled &&
(adev->asic_type == CHIP_ARCTURUS ||
adev->asic_type == CHIP_VEGA20)) ||
(adev->in_runpm &&
@ -2434,7 +2439,6 @@ static int psp_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct psp_context *psp = &adev->psp;
int ret;
if (psp->adev->psp.ta_fw) {
psp_ras_terminate(psp);
@ -2445,11 +2449,6 @@ static int psp_hw_fini(void *handle)
}
psp_asd_unload(psp);
ret = psp_clear_vf_fw(psp);
if (ret) {
DRM_ERROR("PSP clear vf fw!\n");
return ret;
}
psp_tmr_terminate(psp);
psp_ring_destroy(psp, PSP_RING_TYPE__KM);

View File

@ -33,6 +33,7 @@
#include "amdgpu_atomfirmware.h"
#include "amdgpu_xgmi.h"
#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
#include "atom.h"
static const char *RAS_FS_NAME = "ras";
@ -320,11 +321,14 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
* "disable" requires only the block.
* "enable" requires the block and error type.
* "inject" requires the block, error type, address, and value.
*
* The block is one of: umc, sdma, gfx, etc.
* see ras_block_string[] for details
*
* The error type is one of: ue, ce, where,
* ue is multi-uncorrectable
* ce is single-correctable
*
* The sub-block is a the sub-block index, pass 0 if there is no sub-block.
* The address and value are hexadecimal numbers, leading 0x is optional.
*
@ -531,7 +535,7 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj;
if (!adev->ras_features || !con)
if (!adev->ras_enabled || !con)
return NULL;
if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
@ -558,7 +562,7 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
struct ras_manager *obj;
int i;
if (!adev->ras_features || !con)
if (!adev->ras_enabled || !con)
return NULL;
if (head) {
@ -585,36 +589,11 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
}
/* obj end */
static void amdgpu_ras_parse_status_code(struct amdgpu_device *adev,
const char* invoke_type,
const char* block_name,
enum ta_ras_status ret)
{
switch (ret) {
case TA_RAS_STATUS__SUCCESS:
return;
case TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE:
dev_warn(adev->dev,
"RAS WARN: %s %s currently unavailable\n",
invoke_type,
block_name);
break;
default:
dev_err(adev->dev,
"RAS ERROR: %s %s error failed ret 0x%X\n",
invoke_type,
block_name,
ret);
}
}
/* feature ctl begin */
static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev,
struct ras_common_if *head)
struct ras_common_if *head)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
return con->hw_supported & BIT(head->block);
return adev->ras_hw_enabled & BIT(head->block);
}
static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev,
@ -658,11 +637,7 @@ static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,
con->features |= BIT(head->block);
} else {
if (obj && amdgpu_ras_is_feature_enabled(adev, head)) {
/* skip clean gfx ras context feature for VEGA20 Gaming.
* will clean later
*/
if (!(!adev->ras_features && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)))
con->features &= ~BIT(head->block);
con->features &= ~BIT(head->block);
put_obj(obj);
}
}
@ -708,15 +683,10 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
if (!amdgpu_ras_intr_triggered()) {
ret = psp_ras_enable_features(&adev->psp, info, enable);
if (ret) {
amdgpu_ras_parse_status_code(adev,
enable ? "enable":"disable",
ras_block_str(head->block),
(enum ta_ras_status)ret);
if (ret == TA_RAS_STATUS__RESET_NEEDED)
ret = -EAGAIN;
else
ret = -EINVAL;
dev_err(adev->dev, "ras %s %s failed %d\n",
enable ? "enable":"disable",
ras_block_str(head->block),
ret);
goto out;
}
}
@ -770,6 +740,10 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
con->features |= BIT(head->block);
ret = amdgpu_ras_feature_enable(adev, head, 0);
/* clean gfx block ras features flag */
if (adev->ras_enabled && head->block == AMDGPU_RAS_BLOCK__GFX)
con->features &= ~BIT(head->block);
}
} else
ret = amdgpu_ras_feature_enable(adev, head, enable);
@ -890,6 +864,11 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
adev->gmc.xgmi.ras_funcs->query_ras_error_count)
adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, &err_data);
break;
case AMDGPU_RAS_BLOCK__HDP:
if (adev->hdp.ras_funcs &&
adev->hdp.ras_funcs->query_ras_error_count)
adev->hdp.ras_funcs->query_ras_error_count(adev, &err_data);
break;
default:
break;
}
@ -901,17 +880,42 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
info->ce_count = obj->err_data.ce_count;
if (err_data.ce_count) {
dev_info(adev->dev, "%ld correctable hardware errors "
if (adev->smuio.funcs &&
adev->smuio.funcs->get_socket_id &&
adev->smuio.funcs->get_die_id) {
dev_info(adev->dev, "socket: %d, die: %d "
"%ld correctable hardware errors "
"detected in %s block, no user "
"action is needed.\n",
adev->smuio.funcs->get_socket_id(adev),
adev->smuio.funcs->get_die_id(adev),
obj->err_data.ce_count,
ras_block_str(info->head.block));
} else {
dev_info(adev->dev, "%ld correctable hardware errors "
"detected in %s block, no user "
"action is needed.\n",
obj->err_data.ce_count,
ras_block_str(info->head.block));
}
}
if (err_data.ue_count) {
dev_info(adev->dev, "%ld uncorrectable hardware errors "
if (adev->smuio.funcs &&
adev->smuio.funcs->get_socket_id &&
adev->smuio.funcs->get_die_id) {
dev_info(adev->dev, "socket: %d, die: %d "
"%ld uncorrectable hardware errors "
"detected in %s block\n",
adev->smuio.funcs->get_socket_id(adev),
adev->smuio.funcs->get_die_id(adev),
obj->err_data.ue_count,
ras_block_str(info->head.block));
} else {
dev_info(adev->dev, "%ld uncorrectable hardware errors "
"detected in %s block\n",
obj->err_data.ue_count,
ras_block_str(info->head.block));
}
}
return 0;
@ -937,11 +941,20 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
if (adev->mmhub.ras_funcs &&
adev->mmhub.ras_funcs->reset_ras_error_count)
adev->mmhub.ras_funcs->reset_ras_error_count(adev);
if (adev->mmhub.ras_funcs &&
adev->mmhub.ras_funcs->reset_ras_error_status)
adev->mmhub.ras_funcs->reset_ras_error_status(adev);
break;
case AMDGPU_RAS_BLOCK__SDMA:
if (adev->sdma.funcs->reset_ras_error_count)
adev->sdma.funcs->reset_ras_error_count(adev);
break;
case AMDGPU_RAS_BLOCK__HDP:
if (adev->hdp.ras_funcs &&
adev->hdp.ras_funcs->reset_ras_error_count)
adev->hdp.ras_funcs->reset_ras_error_count(adev);
break;
default:
break;
}
@ -1022,10 +1035,9 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
ret = -EINVAL;
}
amdgpu_ras_parse_status_code(adev,
"inject",
ras_block_str(info->head.block),
(enum ta_ras_status)ret);
if (ret)
dev_err(adev->dev, "ras inject %s failed %d\n",
ras_block_str(info->head.block), ret);
return ret;
}
@ -1038,7 +1050,7 @@ unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
struct ras_manager *obj;
struct ras_err_data data = {0, 0};
if (!adev->ras_features || !con)
if (!adev->ras_enabled || !con)
return 0;
list_for_each_entry(obj, &con->head, node) {
@ -1265,8 +1277,8 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct dentry *dir;
struct drm_minor *minor = adev_to_drm(adev)->primary;
struct drm_minor *minor = adev_to_drm(adev)->primary;
struct dentry *dir;
dir = debugfs_create_dir(RAS_FS_NAME, minor->debugfs_root);
debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, dir, adev,
@ -1275,6 +1287,8 @@ static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *
&amdgpu_ras_debugfs_eeprom_ops);
debugfs_create_u32("bad_page_cnt_threshold", 0444, dir,
&con->bad_page_cnt_threshold);
debugfs_create_x32("ras_hw_enabled", 0444, dir, &adev->ras_hw_enabled);
debugfs_create_x32("ras_enabled", 0444, dir, &adev->ras_enabled);
/*
* After one uncorrectable error happens, usually GPU recovery will
@ -1561,7 +1575,7 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj;
if (!adev->ras_features || !con)
if (!adev->ras_enabled || !con)
return;
list_for_each_entry(obj, &con->head, node) {
@ -1611,7 +1625,7 @@ static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj;
if (!adev->ras_features || !con)
if (!adev->ras_enabled || !con)
return;
list_for_each_entry(obj, &con->head, node) {
@ -1925,7 +1939,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
bool exc_err_limit = false;
int ret;
if (adev->ras_features && con)
if (adev->ras_enabled && con)
data = &con->eh_data;
else
return 0;
@ -2028,6 +2042,23 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
adev->asic_type == CHIP_SIENNA_CICHLID;
}
/*
* this is workaround for vega20 workstation sku,
* force enable gfx ras, ignore vbios gfx ras flag
* due to GC EDC can not write
*/
static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)
{
struct atom_context *ctx = adev->mode_info.atom_context;
if (!ctx)
return;
if (strnstr(ctx->vbios_version, "D16406",
sizeof(ctx->vbios_version)))
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);
}
/*
* check hardware's ras ability which will be saved in hw_supported.
* if hardware does not support ras, we can skip some ras initializtion and
@ -2037,11 +2068,9 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
* we have to initialize ras as normal. but need check if operation is
* allowed or not in each function.
*/
static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
uint32_t *hw_supported, uint32_t *supported)
static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
{
*hw_supported = 0;
*supported = 0;
adev->ras_hw_enabled = adev->ras_enabled = 0;
if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
!amdgpu_ras_asic_supported(adev))
@ -2050,33 +2079,34 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
if (!adev->gmc.xgmi.connected_to_cpu) {
if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
dev_info(adev->dev, "MEM ECC is active.\n");
*hw_supported |= (1 << AMDGPU_RAS_BLOCK__UMC |
1 << AMDGPU_RAS_BLOCK__DF);
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC |
1 << AMDGPU_RAS_BLOCK__DF);
} else {
dev_info(adev->dev, "MEM ECC is not presented.\n");
}
if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
dev_info(adev->dev, "SRAM ECC is active.\n");
*hw_supported |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
1 << AMDGPU_RAS_BLOCK__DF);
adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
1 << AMDGPU_RAS_BLOCK__DF);
} else {
dev_info(adev->dev, "SRAM ECC is not presented.\n");
}
} else {
/* driver only manages a few IP blocks RAS feature
* when GPU is connected cpu through XGMI */
*hw_supported |= (1 << AMDGPU_RAS_BLOCK__GFX |
1 << AMDGPU_RAS_BLOCK__SDMA |
1 << AMDGPU_RAS_BLOCK__MMHUB);
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX |
1 << AMDGPU_RAS_BLOCK__SDMA |
1 << AMDGPU_RAS_BLOCK__MMHUB);
}
/* hw_supported needs to be aligned with RAS block mask. */
*hw_supported &= AMDGPU_RAS_BLOCK_MASK;
amdgpu_ras_get_quirks(adev);
*supported = amdgpu_ras_enable == 0 ?
0 : *hw_supported & amdgpu_ras_mask;
adev->ras_features = *supported;
/* hw_supported needs to be aligned with RAS block mask. */
adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK;
adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 :
adev->ras_hw_enabled & amdgpu_ras_mask;
}
int amdgpu_ras_init(struct amdgpu_device *adev)
@ -2097,13 +2127,13 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
amdgpu_ras_set_context(adev, con);
amdgpu_ras_check_supported(adev, &con->hw_supported,
&con->supported);
if (!con->hw_supported || (adev->asic_type == CHIP_VEGA10)) {
amdgpu_ras_check_supported(adev);
if (!adev->ras_enabled || adev->asic_type == CHIP_VEGA10) {
/* set gfx block ras context feature for VEGA20 Gaming
* send ras disable cmd to ras ta during ras late init.
*/
if (!adev->ras_features && adev->asic_type == CHIP_VEGA20) {
if (!adev->ras_enabled && adev->asic_type == CHIP_VEGA20) {
con->features |= BIT(AMDGPU_RAS_BLOCK__GFX);
return 0;
@ -2153,8 +2183,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
}
dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
"hardware ability[%x] ras_mask[%x]\n",
con->hw_supported, con->supported);
"hardware ability[%x] ras_mask[%x]\n",
adev->ras_hw_enabled, adev->ras_enabled);
return 0;
release_con:
amdgpu_ras_set_context(adev, NULL);
@ -2268,7 +2299,7 @@ void amdgpu_ras_resume(struct amdgpu_device *adev)
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj, *tmp;
if (!adev->ras_features || !con) {
if (!adev->ras_enabled || !con) {
/* clean ras context for VEGA20 Gaming after send ras disable cmd */
amdgpu_release_ras_context(adev);
@ -2314,7 +2345,7 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
if (!adev->ras_features || !con)
if (!adev->ras_enabled || !con)
return;
amdgpu_ras_disable_all_features(adev, 0);
@ -2328,7 +2359,7 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
if (!adev->ras_features || !con)
if (!adev->ras_enabled || !con)
return 0;
/* Need disable ras on all IPs here before ip [hw/sw]fini */
@ -2341,7 +2372,7 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
if (!adev->ras_features || !con)
if (!adev->ras_enabled || !con)
return 0;
amdgpu_ras_fs_fini(adev);
@ -2360,10 +2391,8 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
{
uint32_t hw_supported, supported;
amdgpu_ras_check_supported(adev, &hw_supported, &supported);
if (!hw_supported)
amdgpu_ras_check_supported(adev);
if (!adev->ras_hw_enabled)
return;
if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
@ -2392,7 +2421,7 @@ void amdgpu_release_ras_context(struct amdgpu_device *adev)
if (!con)
return;
if (!adev->ras_features && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) {
if (!adev->ras_enabled && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) {
con->features &= ~BIT(AMDGPU_RAS_BLOCK__GFX);
amdgpu_ras_set_context(adev, NULL);
kfree(con);

View File

@ -313,9 +313,6 @@ struct ras_common_if {
struct amdgpu_ras {
/* ras infrastructure */
/* for ras itself. */
uint32_t hw_supported;
/* for IP to check its ras ability. */
uint32_t supported;
uint32_t features;
struct list_head head;
/* sysfs */
@ -478,7 +475,7 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
if (block >= AMDGPU_RAS_BLOCK_COUNT)
return 0;
return ras && (ras->supported & (1 << block));
return ras && (adev->ras_enabled & (1 << block));
}
int amdgpu_ras_recovery_init(struct amdgpu_device *adev);

View File

@ -158,6 +158,7 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
* @irq_src: interrupt source to use for this ring
* @irq_type: interrupt type to use for this ring
* @hw_prio: ring priority (NORMAL/HIGH)
* @sched_score: optional score atomic shared with other schedulers
*
* Initialize the driver information for the selected ring (all asics).
* Returns 0 on success, error on failure.

View File

@ -29,6 +29,7 @@ struct amdgpu_smuio_funcs {
void (*update_rom_clock_gating)(struct amdgpu_device *adev, bool enable);
void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags);
u32 (*get_die_id)(struct amdgpu_device *adev);
u32 (*get_socket_id)(struct amdgpu_device *adev);
bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev);
};

View File

@ -32,7 +32,6 @@
#include <linux/dma-mapping.h>
#include <linux/iommu.h>
#include <linux/hmm.h>
#include <linux/pagemap.h>
#include <linux/sched/task.h>
#include <linux/sched/mm.h>
@ -112,6 +111,20 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
}
abo = ttm_to_amdgpu_bo(bo);
if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) {
struct dma_fence *fence;
struct dma_resv *resv = &bo->base._resv;
rcu_read_lock();
fence = rcu_dereference(resv->fence_excl);
if (fence && !fence->ops->signaled)
dma_fence_enable_sw_signaling(fence);
placement->num_placement = 0;
placement->num_busy_placement = 0;
rcu_read_unlock();
return;
}
switch (bo->mem.mem_type) {
case AMDGPU_PL_GDS:
case AMDGPU_PL_GWS:
@ -165,13 +178,6 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
{
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
/*
* Don't verify access for KFD BOs. They don't have a GEM
* object associated with them.
*/
if (abo->kfd_bo)
return 0;
if (amdgpu_ttm_tt_get_usermm(bo->ttm))
return -EPERM;
return drm_vma_node_verify_access(&abo->tbo.base.vma_node,
@ -288,7 +294,7 @@ error_free:
}
/**
* amdgpu_copy_ttm_mem_to_mem - Helper function for copy
* amdgpu_ttm_copy_mem_to_mem - Helper function for copy
* @adev: amdgpu device
* @src: buffer/address where to read from
* @dst: buffer/address where to write to
@ -670,10 +676,8 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
struct amdgpu_ttm_tt *gtt = (void *)ttm;
unsigned long start = gtt->userptr;
struct vm_area_struct *vma;
struct hmm_range *range;
unsigned long timeout;
struct mm_struct *mm;
unsigned long i;
bool readonly;
int r = 0;
mm = bo->notifier.mm;
@ -689,76 +693,26 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
if (!mmget_not_zero(mm)) /* Happens during process shutdown */
return -ESRCH;
range = kzalloc(sizeof(*range), GFP_KERNEL);
if (unlikely(!range)) {
r = -ENOMEM;
goto out;
}
range->notifier = &bo->notifier;
range->start = bo->notifier.interval_tree.start;
range->end = bo->notifier.interval_tree.last + 1;
range->default_flags = HMM_PFN_REQ_FAULT;
if (!amdgpu_ttm_tt_is_readonly(ttm))
range->default_flags |= HMM_PFN_REQ_WRITE;
range->hmm_pfns = kvmalloc_array(ttm->num_pages,
sizeof(*range->hmm_pfns), GFP_KERNEL);
if (unlikely(!range->hmm_pfns)) {
r = -ENOMEM;
goto out_free_ranges;
}
mmap_read_lock(mm);
vma = find_vma(mm, start);
mmap_read_unlock(mm);
if (unlikely(!vma || start < vma->vm_start)) {
r = -EFAULT;
goto out_unlock;
goto out_putmm;
}
if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
vma->vm_file)) {
r = -EPERM;
goto out_unlock;
}
mmap_read_unlock(mm);
timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
retry:
range->notifier_seq = mmu_interval_read_begin(&bo->notifier);
mmap_read_lock(mm);
r = hmm_range_fault(range);
mmap_read_unlock(mm);
if (unlikely(r)) {
/*
* FIXME: This timeout should encompass the retry from
* mmu_interval_read_retry() as well.
*/
if (r == -EBUSY && !time_after(jiffies, timeout))
goto retry;
goto out_free_pfns;
goto out_putmm;
}
/*
* Due to default_flags, all pages are HMM_PFN_VALID or
* hmm_range_fault() fails. FIXME: The pages cannot be touched outside
* the notifier_lock, and mmu_interval_read_retry() must be done first.
*/
for (i = 0; i < ttm->num_pages; i++)
pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]);
gtt->range = range;
readonly = amdgpu_ttm_tt_is_readonly(ttm);
r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start,
ttm->num_pages, &gtt->range, readonly,
false);
out_putmm:
mmput(mm);
return 0;
out_unlock:
mmap_read_unlock(mm);
out_free_pfns:
kvfree(range->hmm_pfns);
out_free_ranges:
kfree(range);
out:
mmput(mm);
return r;
}
@ -787,10 +741,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
* FIXME: Must always hold notifier_lock for this, and must
* not ignore the return code.
*/
r = mmu_interval_read_retry(gtt->range->notifier,
gtt->range->notifier_seq);
kvfree(gtt->range->hmm_pfns);
kfree(gtt->range);
r = amdgpu_hmm_range_get_pages_done(gtt->range);
gtt->range = NULL;
}

View File

@ -50,9 +50,12 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
struct drm_device *ddev = adev_to_drm(adev);
/* enable virtual display */
if (adev->mode_info.num_crtc == 0)
adev->mode_info.num_crtc = 1;
adev->enable_virtual_display = true;
if (adev->asic_type != CHIP_ALDEBARAN &&
adev->asic_type != CHIP_ARCTURUS) {
if (adev->mode_info.num_crtc == 0)
adev->mode_info.num_crtc = 1;
adev->enable_virtual_display = true;
}
ddev->driver_features &= ~DRIVER_ATOMIC;
adev->cg_flags = 0;
adev->pg_flags = 0;
@ -679,6 +682,7 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
case CHIP_VEGA10:
case CHIP_VEGA20:
case CHIP_ARCTURUS:
case CHIP_ALDEBARAN:
soc15_set_virt_ops(adev);
break;
case CHIP_NAVI10:

View File

@ -38,6 +38,7 @@
#include "amdgpu_gmc.h"
#include "amdgpu_xgmi.h"
#include "amdgpu_dma_buf.h"
#include "kfd_svm.h"
/**
* DOC: GPUVM
@ -850,35 +851,60 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
}
/**
* amdgpu_vm_bo_param - fill in parameters for PD/PT allocation
* amdgpu_vm_pt_create - create bo for PD/PT
*
* @adev: amdgpu_device pointer
* @vm: requesting vm
* @level: the page table level
* @immediate: use a immediate update
* @bp: resulting BO allocation parameters
* @bo: pointer to the buffer object pointer
*/
static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
static int amdgpu_vm_pt_create(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
int level, bool immediate,
struct amdgpu_bo_param *bp)
struct amdgpu_bo **bo)
{
memset(bp, 0, sizeof(*bp));
struct amdgpu_bo_param bp;
int r;
bp->size = amdgpu_vm_bo_size(adev, level);
bp->byte_align = AMDGPU_GPU_PAGE_SIZE;
bp->domain = AMDGPU_GEM_DOMAIN_VRAM;
bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain);
bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
memset(&bp, 0, sizeof(bp));
bp.size = amdgpu_vm_bo_size(adev, level);
bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
bp.domain = amdgpu_bo_get_preferred_pin_domain(adev, bp.domain);
bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
AMDGPU_GEM_CREATE_CPU_GTT_USWC;
bp->bo_ptr_size = sizeof(struct amdgpu_bo);
bp.bo_ptr_size = sizeof(struct amdgpu_bo);
if (vm->use_cpu_for_update)
bp->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
else if (!vm->root.base.bo || vm->root.base.bo->shadow)
bp->flags |= AMDGPU_GEM_CREATE_SHADOW;
bp->type = ttm_bo_type_kernel;
bp->no_wait_gpu = immediate;
bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
bp.type = ttm_bo_type_kernel;
bp.no_wait_gpu = immediate;
if (vm->root.base.bo)
bp->resv = vm->root.base.bo->tbo.base.resv;
bp.resv = vm->root.base.bo->tbo.base.resv;
r = amdgpu_bo_create(adev, &bp, bo);
if (r)
return r;
if (vm->is_compute_context && (adev->flags & AMD_IS_APU))
return 0;
if (!bp.resv)
WARN_ON(dma_resv_lock((*bo)->tbo.base.resv,
NULL));
r = amdgpu_bo_create_shadow(adev, bp.size, *bo);
if (!bp.resv)
dma_resv_unlock((*bo)->tbo.base.resv);
if (r) {
amdgpu_bo_unref(bo);
return r;
}
return 0;
}
/**
@ -901,7 +927,6 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
bool immediate)
{
struct amdgpu_vm_pt *entry = cursor->entry;
struct amdgpu_bo_param bp;
struct amdgpu_bo *pt;
int r;
@ -919,9 +944,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
if (entry->base.bo)
return 0;
amdgpu_vm_bo_param(adev, vm, cursor->level, immediate, &bp);
r = amdgpu_bo_create(adev, &bp, &pt);
r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt);
if (r)
return r;
@ -1593,15 +1616,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
* Returns:
* 0 for success, -EINVAL for failure.
*/
static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
struct amdgpu_device *bo_adev,
struct amdgpu_vm *vm, bool immediate,
bool unlocked, struct dma_resv *resv,
uint64_t start, uint64_t last,
uint64_t flags, uint64_t offset,
struct drm_mm_node *nodes,
dma_addr_t *pages_addr,
struct dma_fence **fence)
int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
struct amdgpu_device *bo_adev,
struct amdgpu_vm *vm, bool immediate,
bool unlocked, struct dma_resv *resv,
uint64_t start, uint64_t last,
uint64_t flags, uint64_t offset,
struct drm_mm_node *nodes,
dma_addr_t *pages_addr,
struct dma_fence **fence)
{
struct amdgpu_vm_update_params params;
enum amdgpu_sync_mode sync_mode;
@ -2818,7 +2841,6 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
*
* @adev: amdgpu_device pointer
* @vm: requested vm
* @vm_context: Indicates if it GFX or Compute context
* @pasid: Process address space identifier
*
* Init @vm fields.
@ -2826,10 +2848,8 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
* Returns:
* 0 for success, error for failure.
*/
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int vm_context, u32 pasid)
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid)
{
struct amdgpu_bo_param bp;
struct amdgpu_bo *root;
int r, i;
@ -2861,16 +2881,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
vm->pte_support_ats = false;
vm->is_compute_context = false;
if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
AMDGPU_VM_USE_CPU_FOR_COMPUTE);
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
AMDGPU_VM_USE_CPU_FOR_GFX);
if (adev->asic_type == CHIP_RAVEN)
vm->pte_support_ats = true;
} else {
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
AMDGPU_VM_USE_CPU_FOR_GFX);
}
DRM_DEBUG_DRIVER("VM update mode is %s\n",
vm->use_cpu_for_update ? "CPU" : "SDMA");
WARN_ONCE((vm->use_cpu_for_update &&
@ -2887,10 +2900,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
mutex_init(&vm->eviction_lock);
vm->evicting = false;
amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp);
if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW;
r = amdgpu_bo_create(adev, &bp, &root);
r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
false, &root);
if (r)
goto error_free_delayed;
@ -3349,6 +3360,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
uint64_t addr)
{
bool is_compute_context = false;
struct amdgpu_bo *root;
uint64_t value, flags;
struct amdgpu_vm *vm;
@ -3356,15 +3368,25 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
spin_lock(&adev->vm_manager.pasid_lock);
vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
if (vm)
if (vm) {
root = amdgpu_bo_ref(vm->root.base.bo);
else
is_compute_context = vm->is_compute_context;
} else {
root = NULL;
}
spin_unlock(&adev->vm_manager.pasid_lock);
if (!root)
return false;
addr /= AMDGPU_GPU_PAGE_SIZE;
if (is_compute_context &&
!svm_range_restore_pages(adev, pasid, addr)) {
amdgpu_bo_unref(&root);
return true;
}
r = amdgpu_bo_reserve(root, true);
if (r)
goto error_unref;
@ -3378,18 +3400,16 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
if (!vm)
goto error_unlock;
addr /= AMDGPU_GPU_PAGE_SIZE;
flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
AMDGPU_PTE_SYSTEM;
if (vm->is_compute_context) {
if (is_compute_context) {
/* Intentionally setting invalid PTE flag
* combination to force a no-retry-fault
*/
flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE |
AMDGPU_PTE_TF;
value = 0;
} else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
/* Redirect the access to the dummy page */
value = adev->dummy_page_addr;

View File

@ -121,9 +121,6 @@ struct amdgpu_bo_list_entry;
/* max vmids dedicated for process */
#define AMDGPU_VM_MAX_RESERVED_VMID 1
#define AMDGPU_VM_CONTEXT_GFX 0
#define AMDGPU_VM_CONTEXT_COMPUTE 1
/* See vm_update_mode */
#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
#define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
@ -367,6 +364,8 @@ struct amdgpu_vm_manager {
spinlock_t pasid_lock;
};
struct amdgpu_bo_va_mapping;
#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
@ -378,8 +377,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev);
void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int vm_context, u32 pasid);
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid);
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid);
void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
@ -398,6 +396,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct dma_fence **fence);
int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
struct amdgpu_device *bo_adev,
struct amdgpu_vm *vm, bool immediate,
bool unlocked, struct dma_resv *resv,
uint64_t start, uint64_t last,
uint64_t flags, uint64_t offset,
struct drm_mm_node *nodes,
dma_addr_t *pages_addr,
struct dma_fence **fence);
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
bool clear);

View File

@ -29,12 +29,14 @@
#include "amdgpu_atomfirmware.h"
#include "atom.h"
static inline struct amdgpu_vram_mgr *to_vram_mgr(struct ttm_resource_manager *man)
static inline struct amdgpu_vram_mgr *
to_vram_mgr(struct ttm_resource_manager *man)
{
return container_of(man, struct amdgpu_vram_mgr, manager);
}
static inline struct amdgpu_device *to_amdgpu_device(struct amdgpu_vram_mgr *mgr)
static inline struct amdgpu_device *
to_amdgpu_device(struct amdgpu_vram_mgr *mgr)
{
return container_of(mgr, struct amdgpu_device, mman.vram_mgr);
}
@ -82,12 +84,14 @@ static ssize_t amdgpu_mem_info_vis_vram_total_show(struct device *dev,
* amount of currently used VRAM in bytes
*/
static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev,
struct device_attribute *attr, char *buf)
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
struct ttm_resource_manager *man;
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_usage(man));
}
@ -100,18 +104,28 @@ static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev,
* amount of currently used visible VRAM in bytes
*/
static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev,
struct device_attribute *attr, char *buf)
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
struct ttm_resource_manager *man;
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_vis_usage(man));
}
/**
* DOC: mem_info_vram_vendor
*
* The amdgpu driver provides a sysfs API for reporting the vendor of the
* installed VRAM
* The file mem_info_vram_vendor is used for this and returns the name of the
* vendor.
*/
static ssize_t amdgpu_mem_info_vram_vendor(struct device *dev,
struct device_attribute *attr,
char *buf)
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
@ -162,78 +176,6 @@ static const struct attribute *amdgpu_vram_mgr_attributes[] = {
NULL
};
static const struct ttm_resource_manager_func amdgpu_vram_mgr_func;
/**
* amdgpu_vram_mgr_init - init VRAM manager and DRM MM
*
* @adev: amdgpu_device pointer
*
* Allocate and initialize the VRAM manager.
*/
int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
{
struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
struct ttm_resource_manager *man = &mgr->manager;
int ret;
ttm_resource_manager_init(man, adev->gmc.real_vram_size >> PAGE_SHIFT);
man->func = &amdgpu_vram_mgr_func;
drm_mm_init(&mgr->mm, 0, man->size);
spin_lock_init(&mgr->lock);
INIT_LIST_HEAD(&mgr->reservations_pending);
INIT_LIST_HEAD(&mgr->reserved_pages);
/* Add the two VRAM-related sysfs files */
ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
if (ret)
DRM_ERROR("Failed to register sysfs\n");
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
ttm_resource_manager_set_used(man, true);
return 0;
}
/**
* amdgpu_vram_mgr_fini - free and destroy VRAM manager
*
* @adev: amdgpu_device pointer
*
* Destroy and free the VRAM manager, returns -EBUSY if ranges are still
* allocated inside it.
*/
void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
{
struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
struct ttm_resource_manager *man = &mgr->manager;
int ret;
struct amdgpu_vram_reservation *rsv, *temp;
ttm_resource_manager_set_used(man, false);
ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
if (ret)
return;
spin_lock(&mgr->lock);
list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
kfree(rsv);
list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
drm_mm_remove_node(&rsv->mm_node);
kfree(rsv);
}
drm_mm_takedown(&mgr->mm);
spin_unlock(&mgr->lock);
sysfs_remove_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
ttm_resource_manager_cleanup(man);
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
}
/**
* amdgpu_vram_mgr_vis_size - Calculate visible node size
*
@ -283,6 +225,7 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
return usage;
}
/* Commit the reservation of VRAM pages */
static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
@ -415,13 +358,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
const struct ttm_place *place,
struct ttm_resource *mem)
{
unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
struct drm_mm *mm = &mgr->mm;
struct drm_mm_node *nodes;
enum drm_mm_insert_mode mode;
unsigned long lpfn, num_nodes, pages_per_node, pages_left;
uint64_t vis_usage = 0, mem_bytes, max_bytes;
struct drm_mm *mm = &mgr->mm;
enum drm_mm_insert_mode mode;
struct drm_mm_node *nodes;
unsigned i;
int r;
@ -448,10 +391,10 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
pages_per_node = HPAGE_PMD_NR;
#else
/* default to 2MB */
pages_per_node = (2UL << (20UL - PAGE_SHIFT));
pages_per_node = 2UL << (20UL - PAGE_SHIFT);
#endif
pages_per_node = max((uint32_t)pages_per_node,
tbo->page_alignment);
pages_per_node = max_t(uint32_t, pages_per_node,
tbo->page_alignment);
num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
}
@ -469,42 +412,37 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
mem->start = 0;
pages_left = mem->num_pages;
/* Limit maximum size to 2GB due to SG table limitations */
pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
i = 0;
spin_lock(&mgr->lock);
for (i = 0; pages_left >= pages_per_node; ++i) {
unsigned long pages = rounddown_pow_of_two(pages_left);
/* Limit maximum size to 2GB due to SG table limitations */
pages = min(pages, (2UL << (30 - PAGE_SHIFT)));
r = drm_mm_insert_node_in_range(mm, &nodes[i], pages,
pages_per_node, 0,
place->fpfn, lpfn,
mode);
if (unlikely(r))
break;
vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
pages_left -= pages;
}
for (; pages_left; ++i) {
unsigned long pages = min(pages_left, pages_per_node);
while (pages_left) {
uint32_t alignment = tbo->page_alignment;
if (pages == pages_per_node)
if (pages >= pages_per_node)
alignment = pages_per_node;
r = drm_mm_insert_node_in_range(mm, &nodes[i],
pages, alignment, 0,
place->fpfn, lpfn,
mode);
if (unlikely(r))
r = drm_mm_insert_node_in_range(mm, &nodes[i], pages, alignment,
0, place->fpfn, lpfn, mode);
if (unlikely(r)) {
if (pages > pages_per_node) {
if (is_power_of_2(pages))
pages = pages / 2;
else
pages = rounddown_pow_of_two(pages);
continue;
}
goto error;
}
vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
pages_left -= pages;
++i;
if (pages > pages_left)
pages = pages_left;
}
spin_unlock(&mgr->lock);
@ -728,3 +666,73 @@ static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
.free = amdgpu_vram_mgr_del,
.debug = amdgpu_vram_mgr_debug
};
/**
* amdgpu_vram_mgr_init - init VRAM manager and DRM MM
*
* @adev: amdgpu_device pointer
*
* Allocate and initialize the VRAM manager.
*/
int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
{
struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
struct ttm_resource_manager *man = &mgr->manager;
int ret;
ttm_resource_manager_init(man, adev->gmc.real_vram_size >> PAGE_SHIFT);
man->func = &amdgpu_vram_mgr_func;
drm_mm_init(&mgr->mm, 0, man->size);
spin_lock_init(&mgr->lock);
INIT_LIST_HEAD(&mgr->reservations_pending);
INIT_LIST_HEAD(&mgr->reserved_pages);
/* Add the two VRAM-related sysfs files */
ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
if (ret)
DRM_ERROR("Failed to register sysfs\n");
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
ttm_resource_manager_set_used(man, true);
return 0;
}
/**
* amdgpu_vram_mgr_fini - free and destroy VRAM manager
*
* @adev: amdgpu_device pointer
*
* Destroy and free the VRAM manager, returns -EBUSY if ranges are still
* allocated inside it.
*/
void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
{
struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
struct ttm_resource_manager *man = &mgr->manager;
int ret;
struct amdgpu_vram_reservation *rsv, *temp;
ttm_resource_manager_set_used(man, false);
ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
if (ret)
return;
spin_lock(&mgr->lock);
list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
kfree(rsv);
list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
drm_mm_remove_node(&rsv->mm_node);
kfree(rsv);
}
drm_mm_takedown(&mgr->mm);
spin_unlock(&mgr->lock);
sysfs_remove_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
ttm_resource_manager_cleanup(man);
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
}

View File

@ -98,9 +98,9 @@ union amd_sriov_msg_feature_flags {
union amd_sriov_reg_access_flags {
struct {
uint32_t vf_reg_access_ih : 1;
uint32_t vf_reg_access_mmhub : 1;
uint32_t vf_reg_access_gc : 1;
uint32_t vf_reg_psp_access_ih : 1;
uint32_t vf_reg_rlc_access_mmhub : 1;
uint32_t vf_reg_rlc_access_gc : 1;
uint32_t reserved : 29;
} flags;
uint32_t all;

View File

@ -421,6 +421,11 @@ static int dce_virtual_sw_init(void *handle)
static int dce_virtual_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i = 0;
for (i = 0; i < adev->mode_info.num_crtc; i++)
if (adev->mode_info.crtcs[i])
hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer);
kfree(adev->mode_info.bios_hardcoded_edid);
@ -480,13 +485,6 @@ static int dce_virtual_hw_init(void *handle)
static int dce_virtual_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i = 0;
for (i = 0; i<adev->mode_info.num_crtc; i++)
if (adev->mode_info.crtcs[i])
hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer);
return 0;
}

View File

@ -219,11 +219,11 @@ static void df_v3_6_query_hashes(struct amdgpu_device *adev)
adev->df.hash_status.hash_2m = false;
adev->df.hash_status.hash_1g = false;
if (adev->asic_type != CHIP_ARCTURUS)
return;
/* encoding for hash-enabled on Arcturus */
if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) {
/* encoding for hash-enabled on Arcturus and Aldebaran */
if ((adev->asic_type == CHIP_ARCTURUS &&
adev->df.funcs->get_fb_channel_number(adev) == 0xe) ||
(adev->asic_type == CHIP_ALDEBARAN &&
adev->df.funcs->get_fb_channel_number(adev) == 0x1e)) {
tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl);
adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp,
DF_CS_UMC_AON0_DfGlobalCtrl,
@ -278,7 +278,12 @@ static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev)
u32 tmp;
tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
if (adev->asic_type == CHIP_ALDEBARAN)
tmp &=
ALDEBARAN_DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
else
tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
return tmp;

View File

@ -3937,7 +3937,8 @@ static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
{
u32 tmp;
if (adev->asic_type != CHIP_ARCTURUS)
if (adev->asic_type != CHIP_ARCTURUS &&
adev->asic_type != CHIP_ALDEBARAN)
return;
tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
@ -4559,8 +4560,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
if (!ring->sched.ready)
return 0;
if (adev->asic_type == CHIP_ARCTURUS ||
adev->asic_type == CHIP_ALDEBARAN) {
if (adev->asic_type == CHIP_ARCTURUS) {
vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
@ -4745,7 +4745,11 @@ static int gfx_v9_0_ecc_late_init(void *handle)
}
/* requires IBs so do in late init after IB pool is initialized */
r = gfx_v9_0_do_edc_gpr_workarounds(adev);
if (adev->asic_type == CHIP_ALDEBARAN)
r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
else
r = gfx_v9_0_do_edc_gpr_workarounds(adev);
if (r)
return r;

View File

@ -22,6 +22,7 @@
*/
#include "amdgpu.h"
#include "soc15.h"
#include "soc15d.h"
#include "gc/gc_9_4_2_offset.h"
#include "gc/gc_9_4_2_sh_mask.h"
@ -31,6 +32,11 @@
#include "amdgpu_ras.h"
#include "amdgpu_gfx.h"
#define SE_ID_MAX 8
#define CU_ID_MAX 16
#define SIMD_ID_MAX 4
#define WAVE_ID_MAX 10
enum gfx_v9_4_2_utc_type {
VML2_MEM,
VML2_WALKER_MEM,
@ -79,6 +85,634 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_2_alde[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, regTCI_CNTL_3, 0xff, 0x20),
};
/**
* This shader is used to clear VGPRS and LDS, and also write the input
* pattern into the write back buffer, which will be used by driver to
* check whether all SIMDs have been covered.
*/
static const u32 vgpr_init_compute_shader_aldebaran[] = {
0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xd3d94000,
0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 0xd3d94003,
0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 0xd3d94006,
0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 0xd3d94009,
0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 0xd3d9400c,
0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 0xd3d9400f,
0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 0xd3d94012,
0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 0xd3d94015,
0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 0xd3d94018,
0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 0xd3d9401b,
0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 0xd3d9401e,
0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 0xd3d94021,
0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 0xd3d94024,
0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 0xd3d94027,
0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 0xd3d9402a,
0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 0xd3d9402d,
0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 0xd3d94030,
0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 0xd3d94033,
0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 0xd3d94036,
0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 0xd3d94039,
0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 0xd3d9403c,
0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 0xd3d9403f,
0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 0xd3d94042,
0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 0xd3d94045,
0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 0xd3d94048,
0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 0xd3d9404b,
0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 0xd3d9404e,
0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 0xd3d94051,
0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 0xd3d94054,
0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 0xd3d94057,
0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 0xd3d9405a,
0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 0xd3d9405d,
0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 0xd3d94060,
0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 0xd3d94063,
0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 0xd3d94066,
0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 0xd3d94069,
0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 0xd3d9406c,
0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 0xd3d9406f,
0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 0xd3d94072,
0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 0xd3d94075,
0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 0xd3d94078,
0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 0xd3d9407b,
0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 0xd3d9407e,
0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 0xd3d94081,
0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 0xd3d94084,
0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 0xd3d94087,
0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 0xd3d9408a,
0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 0xd3d9408d,
0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 0xd3d94090,
0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 0xd3d94093,
0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 0xd3d94096,
0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 0xd3d94099,
0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 0xd3d9409c,
0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 0xd3d9409f,
0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 0xd3d940a2,
0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 0xd3d940a5,
0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 0xd3d940a8,
0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 0xd3d940ab,
0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 0xd3d940ae,
0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 0xd3d940b1,
0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 0xd3d940b4,
0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 0xd3d940b7,
0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 0xd3d940ba,
0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 0xd3d940bd,
0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 0xd3d940c0,
0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 0xd3d940c3,
0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 0xd3d940c6,
0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 0xd3d940c9,
0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 0xd3d940cc,
0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 0xd3d940cf,
0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 0xd3d940d2,
0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 0xd3d940d5,
0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 0xd3d940d8,
0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 0xd3d940db,
0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 0xd3d940de,
0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 0xd3d940e1,
0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 0xd3d940e4,
0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 0xd3d940e7,
0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 0xd3d940ea,
0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 0xd3d940ed,
0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 0xd3d940f0,
0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 0xd3d940f3,
0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 0xd3d940f6,
0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 0xd3d940f9,
0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 0xd3d940fc,
0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 0xd3d940ff,
0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 0x7e000280,
0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 0x7e0c0280,
0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 0xd28c0001,
0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xbe8b0004, 0xb78b4000,
0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 0x00020201,
0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 0xbf84fff8,
0xbf810000,
};
const struct soc15_reg_entry vgpr_init_regs_aldebaran[] = {
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 4 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0xbf },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x400006 }, /* 64KB LDS */
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x3F }, /* 63 - accum-offset = 256 */
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
};
/**
* The below shaders are used to clear SGPRS, and also write the input
* pattern into the write back buffer. The first two dispatch should be
* scheduled simultaneously which make sure that all SGPRS could be
* allocated, so the dispatch 1 need check write back buffer before scheduled,
* make sure that waves of dispatch 0 are all dispacthed to all simds
* balanced. both dispatch 0 and dispatch 1 should be halted until all waves
* are dispatched, and then driver write a pattern to the shared memory to make
* all waves continue.
*/
static const u32 sgpr112_init_compute_shader_aldebaran[] = {
0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200,
0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102,
0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080,
0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080,
0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080,
0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080,
0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080,
0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080,
0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080,
0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080,
0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080,
0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080,
0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbeba0080, 0xbebb0080,
0xbebc0080, 0xbebd0080, 0xbebe0080, 0xbebf0080, 0xbec00080, 0xbec10080,
0xbec20080, 0xbec30080, 0xbec40080, 0xbec50080, 0xbec60080, 0xbec70080,
0xbec80080, 0xbec90080, 0xbeca0080, 0xbecb0080, 0xbecc0080, 0xbecd0080,
0xbece0080, 0xbecf0080, 0xbed00080, 0xbed10080, 0xbed20080, 0xbed30080,
0xbed40080, 0xbed50080, 0xbed60080, 0xbed70080, 0xbed80080, 0xbed90080,
0xbeda0080, 0xbedb0080, 0xbedc0080, 0xbedd0080, 0xbede0080, 0xbedf0080,
0xbee00080, 0xbee10080, 0xbee20080, 0xbee30080, 0xbee40080, 0xbee50080,
0xbf810000
};
const struct soc15_reg_entry sgpr112_init_regs_aldebaran[] = {
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 8 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x340 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
};
static const u32 sgpr96_init_compute_shader_aldebaran[] = {
0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200,
0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102,
0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080,
0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080,
0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080,
0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080,
0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080,
0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080,
0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080,
0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080,
0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080,
0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080,
0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbeba0080, 0xbebb0080,
0xbebc0080, 0xbebd0080, 0xbebe0080, 0xbebf0080, 0xbec00080, 0xbec10080,
0xbec20080, 0xbec30080, 0xbec40080, 0xbec50080, 0xbec60080, 0xbec70080,
0xbec80080, 0xbec90080, 0xbeca0080, 0xbecb0080, 0xbecc0080, 0xbecd0080,
0xbece0080, 0xbecf0080, 0xbed00080, 0xbed10080, 0xbed20080, 0xbed30080,
0xbed40080, 0xbed50080, 0xbed60080, 0xbed70080, 0xbed80080, 0xbed90080,
0xbf810000,
};
const struct soc15_reg_entry sgpr96_init_regs_aldebaran[] = {
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 0xc },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x2c0 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
};
/**
* This shader is used to clear the uninitiated sgprs after the above
* two dispatches, because of hardware feature, dispath 0 couldn't clear
* top hole sgprs. Therefore need 4 waves per SIMD to cover these sgprs
*/
static const u32 sgpr64_init_compute_shader_aldebaran[] = {
0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200,
0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102,
0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080,
0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080,
0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080,
0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080,
0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080,
0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080,
0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080,
0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080,
0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080,
0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080,
0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbf810000,
};
const struct soc15_reg_entry sgpr64_init_regs_aldebaran[] = {
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 0x10 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x1c0 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
};
static int gfx_v9_4_2_run_shader(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
const u32 *shader_ptr, u32 shader_size,
const struct soc15_reg_entry *init_regs, u32 regs_size,
u32 compute_dim_x, u64 wb_gpu_addr, u32 pattern,
struct dma_fence **fence_ptr)
{
int r, i;
uint32_t total_size, shader_offset;
u64 gpu_addr;
total_size = (regs_size * 3 + 4 + 5 + 5) * 4;
total_size = ALIGN(total_size, 256);
shader_offset = total_size;
total_size += ALIGN(shader_size, 256);
/* allocate an indirect buffer to put the commands in */
memset(ib, 0, sizeof(*ib));
r = amdgpu_ib_get(adev, NULL, total_size,
AMDGPU_IB_POOL_DIRECT, ib);
if (r) {
dev_err(adev->dev, "failed to get ib (%d).\n", r);
return r;
}
/* load the compute shaders */
for (i = 0; i < shader_size/sizeof(u32); i++)
ib->ptr[i + (shader_offset / 4)] = shader_ptr[i];
/* init the ib length to 0 */
ib->length_dw = 0;
/* write the register state for the compute dispatch */
for (i = 0; i < regs_size; i++) {
ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
ib->ptr[ib->length_dw++] = SOC15_REG_ENTRY_OFFSET(init_regs[i])
- PACKET3_SET_SH_REG_START;
ib->ptr[ib->length_dw++] = init_regs[i].reg_value;
}
/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
gpu_addr = (ib->gpu_addr + (u64)shader_offset) >> 8;
ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_PGM_LO)
- PACKET3_SET_SH_REG_START;
ib->ptr[ib->length_dw++] = lower_32_bits(gpu_addr);
ib->ptr[ib->length_dw++] = upper_32_bits(gpu_addr);
/* write the wb buffer address */
ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 3);
ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_USER_DATA_0)
- PACKET3_SET_SH_REG_START;
ib->ptr[ib->length_dw++] = lower_32_bits(wb_gpu_addr);
ib->ptr[ib->length_dw++] = upper_32_bits(wb_gpu_addr);
ib->ptr[ib->length_dw++] = pattern;
/* write dispatch packet */
ib->ptr[ib->length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
ib->ptr[ib->length_dw++] = compute_dim_x; /* x */
ib->ptr[ib->length_dw++] = 1; /* y */
ib->ptr[ib->length_dw++] = 1; /* z */
ib->ptr[ib->length_dw++] =
REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
/* shedule the ib on the ring */
r = amdgpu_ib_schedule(ring, 1, ib, NULL, fence_ptr);
if (r) {
dev_err(adev->dev, "ib submit failed (%d).\n", r);
amdgpu_ib_free(adev, ib, NULL);
}
return r;
}
static void gfx_v9_4_2_log_wave_assignment(struct amdgpu_device *adev, uint32_t *wb_ptr)
{
uint32_t se, cu, simd, wave;
uint32_t offset = 0;
char *str;
int size;
str = kmalloc(256, GFP_KERNEL);
if (!str)
return;
dev_dbg(adev->dev, "wave assignment:\n");
for (se = 0; se < adev->gfx.config.max_shader_engines; se++) {
for (cu = 0; cu < CU_ID_MAX; cu++) {
memset(str, 0, 256);
size = sprintf(str, "SE[%02d]CU[%02d]: ", se, cu);
for (simd = 0; simd < SIMD_ID_MAX; simd++) {
size += sprintf(str + size, "[");
for (wave = 0; wave < WAVE_ID_MAX; wave++) {
size += sprintf(str + size, "%x", wb_ptr[offset]);
offset++;
}
size += sprintf(str + size, "] ");
}
dev_dbg(adev->dev, "%s\n", str);
}
}
kfree(str);
}
static int gfx_v9_4_2_wait_for_waves_assigned(struct amdgpu_device *adev,
uint32_t *wb_ptr, uint32_t mask,
uint32_t pattern, uint32_t num_wave, bool wait)
{
uint32_t se, cu, simd, wave;
uint32_t loop = 0;
uint32_t wave_cnt;
uint32_t offset;
do {
wave_cnt = 0;
offset = 0;
for (se = 0; se < adev->gfx.config.max_shader_engines; se++)
for (cu = 0; cu < CU_ID_MAX; cu++)
for (simd = 0; simd < SIMD_ID_MAX; simd++)
for (wave = 0; wave < WAVE_ID_MAX; wave++) {
if (((1 << wave) & mask) &&
(wb_ptr[offset] == pattern))
wave_cnt++;
offset++;
}
if (wave_cnt == num_wave)
return 0;
mdelay(1);
} while (++loop < 2000 && wait);
dev_err(adev->dev, "actual wave num: %d, expected wave num: %d\n",
wave_cnt, num_wave);
gfx_v9_4_2_log_wave_assignment(adev, wb_ptr);
return -EBADSLT;
}
static int gfx_v9_4_2_do_sgprs_init(struct amdgpu_device *adev)
{
int r;
int wb_size = adev->gfx.config.max_shader_engines *
CU_ID_MAX * SIMD_ID_MAX * WAVE_ID_MAX;
struct amdgpu_ib wb_ib;
struct amdgpu_ib disp_ibs[3];
struct dma_fence *fences[3];
u32 pattern[3] = { 0x1, 0x5, 0xa };
/* bail if the compute ring is not ready */
if (!adev->gfx.compute_ring[0].sched.ready ||
!adev->gfx.compute_ring[1].sched.ready)
return 0;
/* allocate the write-back buffer from IB */
memset(&wb_ib, 0, sizeof(wb_ib));
r = amdgpu_ib_get(adev, NULL, (1 + wb_size) * sizeof(uint32_t),
AMDGPU_IB_POOL_DIRECT, &wb_ib);
if (r) {
dev_err(adev->dev, "failed to get ib (%d) for wb\n", r);
return r;
}
memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t));
r = gfx_v9_4_2_run_shader(adev,
&adev->gfx.compute_ring[0],
&disp_ibs[0],
sgpr112_init_compute_shader_aldebaran,
sizeof(sgpr112_init_compute_shader_aldebaran),
sgpr112_init_regs_aldebaran,
ARRAY_SIZE(sgpr112_init_regs_aldebaran),
adev->gfx.cu_info.number,
wb_ib.gpu_addr, pattern[0], &fences[0]);
if (r) {
dev_err(adev->dev, "failed to clear first 224 sgprs\n");
goto pro_end;
}
r = gfx_v9_4_2_wait_for_waves_assigned(adev,
&wb_ib.ptr[1], 0b11,
pattern[0],
adev->gfx.cu_info.number * SIMD_ID_MAX * 2,
true);
if (r) {
dev_err(adev->dev, "wave coverage failed when clear first 224 sgprs\n");
wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
goto disp0_failed;
}
r = gfx_v9_4_2_run_shader(adev,
&adev->gfx.compute_ring[1],
&disp_ibs[1],
sgpr96_init_compute_shader_aldebaran,
sizeof(sgpr96_init_compute_shader_aldebaran),
sgpr96_init_regs_aldebaran,
ARRAY_SIZE(sgpr96_init_regs_aldebaran),
adev->gfx.cu_info.number * 2,
wb_ib.gpu_addr, pattern[1], &fences[1]);
if (r) {
dev_err(adev->dev, "failed to clear next 576 sgprs\n");
goto disp0_failed;
}
r = gfx_v9_4_2_wait_for_waves_assigned(adev,
&wb_ib.ptr[1], 0b11111100,
pattern[1], adev->gfx.cu_info.number * SIMD_ID_MAX * 6,
true);
if (r) {
dev_err(adev->dev, "wave coverage failed when clear first 576 sgprs\n");
wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
goto disp1_failed;
}
wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
/* wait for the GPU to finish processing the IB */
r = dma_fence_wait(fences[0], false);
if (r) {
dev_err(adev->dev, "timeout to clear first 224 sgprs\n");
goto disp1_failed;
}
r = dma_fence_wait(fences[1], false);
if (r) {
dev_err(adev->dev, "timeout to clear first 576 sgprs\n");
goto disp1_failed;
}
memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t));
r = gfx_v9_4_2_run_shader(adev,
&adev->gfx.compute_ring[0],
&disp_ibs[2],
sgpr64_init_compute_shader_aldebaran,
sizeof(sgpr64_init_compute_shader_aldebaran),
sgpr64_init_regs_aldebaran,
ARRAY_SIZE(sgpr64_init_regs_aldebaran),
adev->gfx.cu_info.number,
wb_ib.gpu_addr, pattern[2], &fences[2]);
if (r) {
dev_err(adev->dev, "failed to clear first 256 sgprs\n");
goto disp1_failed;
}
r = gfx_v9_4_2_wait_for_waves_assigned(adev,
&wb_ib.ptr[1], 0b1111,
pattern[2],
adev->gfx.cu_info.number * SIMD_ID_MAX * 4,
true);
if (r) {
dev_err(adev->dev, "wave coverage failed when clear first 256 sgprs\n");
wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
goto disp2_failed;
}
wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
r = dma_fence_wait(fences[2], false);
if (r) {
dev_err(adev->dev, "timeout to clear first 256 sgprs\n");
goto disp2_failed;
}
disp2_failed:
amdgpu_ib_free(adev, &disp_ibs[2], NULL);
dma_fence_put(fences[2]);
disp1_failed:
amdgpu_ib_free(adev, &disp_ibs[1], NULL);
dma_fence_put(fences[1]);
disp0_failed:
amdgpu_ib_free(adev, &disp_ibs[0], NULL);
dma_fence_put(fences[0]);
pro_end:
amdgpu_ib_free(adev, &wb_ib, NULL);
if (r)
dev_info(adev->dev, "Init SGPRS Failed\n");
else
dev_info(adev->dev, "Init SGPRS Successfully\n");
return r;
}
static int gfx_v9_4_2_do_vgprs_init(struct amdgpu_device *adev)
{
int r;
/* CU_ID: 0~15, SIMD_ID: 0~3, WAVE_ID: 0 ~ 9 */
int wb_size = adev->gfx.config.max_shader_engines *
CU_ID_MAX * SIMD_ID_MAX * WAVE_ID_MAX;
struct amdgpu_ib wb_ib;
struct amdgpu_ib disp_ib;
struct dma_fence *fence;
u32 pattern = 0xa;
/* bail if the compute ring is not ready */
if (!adev->gfx.compute_ring[0].sched.ready)
return 0;
/* allocate the write-back buffer from IB */
memset(&wb_ib, 0, sizeof(wb_ib));
r = amdgpu_ib_get(adev, NULL, (1 + wb_size) * sizeof(uint32_t),
AMDGPU_IB_POOL_DIRECT, &wb_ib);
if (r) {
dev_err(adev->dev, "failed to get ib (%d) for wb.\n", r);
return r;
}
memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t));
r = gfx_v9_4_2_run_shader(adev,
&adev->gfx.compute_ring[0],
&disp_ib,
vgpr_init_compute_shader_aldebaran,
sizeof(vgpr_init_compute_shader_aldebaran),
vgpr_init_regs_aldebaran,
ARRAY_SIZE(vgpr_init_regs_aldebaran),
adev->gfx.cu_info.number,
wb_ib.gpu_addr, pattern, &fence);
if (r) {
dev_err(adev->dev, "failed to clear vgprs\n");
goto pro_end;
}
/* wait for the GPU to finish processing the IB */
r = dma_fence_wait(fence, false);
if (r) {
dev_err(adev->dev, "timeout to clear vgprs\n");
goto disp_failed;
}
r = gfx_v9_4_2_wait_for_waves_assigned(adev,
&wb_ib.ptr[1], 0b1,
pattern,
adev->gfx.cu_info.number * SIMD_ID_MAX,
false);
if (r) {
dev_err(adev->dev, "failed to cover all simds when clearing vgprs\n");
goto disp_failed;
}
disp_failed:
amdgpu_ib_free(adev, &disp_ib, NULL);
dma_fence_put(fence);
pro_end:
amdgpu_ib_free(adev, &wb_ib, NULL);
if (r)
dev_info(adev->dev, "Init VGPRS Failed\n");
else
dev_info(adev->dev, "Init VGPRS Successfully\n");
return r;
}
int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev)
{
/* only support when RAS is enabled */
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
return 0;
gfx_v9_4_2_do_sgprs_init(adev);
gfx_v9_4_2_do_vgprs_init(adev);
return 0;
}
static void gfx_v9_4_2_query_sq_timeout_status(struct amdgpu_device *adev);
static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev);
@ -808,8 +1442,9 @@ static struct gfx_v9_4_2_utc_block gfx_v9_4_2_utc_blocks[] = {
REG_SET_FIELD(0, ATC_L2_CACHE_4K_DSM_CNTL, WRITE_COUNTERS, 1) },
};
static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs =
{ SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 };
static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs = {
SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16
};
static int gfx_v9_4_2_get_reg_error_count(struct amdgpu_device *adev,
const struct soc15_reg_entry *reg,
@ -1039,13 +1674,16 @@ static void gfx_v9_4_2_reset_utc_err_status(struct amdgpu_device *adev)
static void gfx_v9_4_2_reset_ea_err_status(struct amdgpu_device *adev)
{
uint32_t i, j;
uint32_t value;
value = REG_SET_FIELD(0, GCEA_ERR_STATUS, CLEAR_ERROR_STATUS, 0x1);
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) {
for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance;
j++) {
gfx_v9_4_2_select_se_sh(adev, i, 0, j);
WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), 0x10);
WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), value);
}
}
gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);

View File

@ -29,6 +29,7 @@ void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev,
void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev,
uint32_t die_id);
void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev);
int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev);
extern const struct amdgpu_gfx_ras_funcs gfx_v9_4_2_ras_funcs;

View File

@ -283,10 +283,14 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
block_size);
/* Send no-retry XNACK on fault to suppress VM fault storm. */
/* Send no-retry XNACK on fault to suppress VM fault storm.
* On Aldebaran, XNACK can be enabled in the SQ per-process.
* Retry faults need to be enabled for that to work.
*/
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!adev->gmc.noretry);
!adev->gmc.noretry ||
adev->asic_type == CHIP_ALDEBARAN);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,

View File

@ -53,6 +53,7 @@
#include "mmhub_v1_7.h"
#include "umc_v6_1.h"
#include "umc_v6_0.h"
#include "hdp_v4_0.h"
#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
@ -1210,6 +1211,11 @@ static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
}
static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev)
{
adev->hdp.ras_funcs = &hdp_v4_0_ras_funcs;
}
static int gmc_v9_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@ -1230,6 +1236,7 @@ static int gmc_v9_0_early_init(void *handle)
gmc_v9_0_set_mmhub_funcs(adev);
gmc_v9_0_set_mmhub_ras_funcs(adev);
gmc_v9_0_set_gfxhub_funcs(adev);
gmc_v9_0_set_hdp_ras_funcs(adev);
adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
adev->gmc.shared_aperture_end =
@ -1255,7 +1262,7 @@ static int gmc_v9_0_late_init(void *handle)
* writes, while disables HBM ECC for vega10.
*/
if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) {
if (!(adev->ras_features & (1 << AMDGPU_RAS_BLOCK__UMC))) {
if (!(adev->ras_enabled & (1 << AMDGPU_RAS_BLOCK__UMC))) {
if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
}
@ -1265,6 +1272,10 @@ static int gmc_v9_0_late_init(void *handle)
adev->mmhub.ras_funcs->reset_ras_error_count)
adev->mmhub.ras_funcs->reset_ras_error_count(adev);
if (adev->hdp.ras_funcs &&
adev->hdp.ras_funcs->reset_ras_error_count)
adev->hdp.ras_funcs->reset_ras_error_count(adev);
r = amdgpu_gmc_ras_late_init(adev);
if (r)
return r;

View File

@ -59,12 +59,31 @@ static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
}
static void hdp_v4_0_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
err_data->ue_count = 0;
err_data->ce_count = 0;
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP))
return;
/* HDP SRAM errors are uncorrectable ones (i.e. fatal errors) */
err_data->ue_count += RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
};
static void hdp_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
{
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP))
return;
/*read back hdp ras counter to reset it to 0 */
RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
if (adev->asic_type >= CHIP_ALDEBARAN)
WREG32_SOC15(HDP, 0, mmHDP_EDC_CNT, 0);
else
/*read back hdp ras counter to reset it to 0 */
RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
}
static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev,
@ -130,10 +149,16 @@ static void hdp_v4_0_init_registers(struct amdgpu_device *adev)
WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
}
const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs = {
.ras_late_init = amdgpu_hdp_ras_late_init,
.ras_fini = amdgpu_hdp_ras_fini,
.query_ras_error_count = hdp_v4_0_query_ras_error_count,
.reset_ras_error_count = hdp_v4_0_reset_ras_error_count,
};
const struct amdgpu_hdp_funcs hdp_v4_0_funcs = {
.flush_hdp = hdp_v4_0_flush_hdp,
.invalidate_hdp = hdp_v4_0_invalidate_hdp,
.reset_ras_error_count = hdp_v4_0_reset_ras_error_count,
.update_clock_gating = hdp_v4_0_update_clock_gating,
.get_clock_gating_state = hdp_v4_0_get_clockgating_state,
.init_registers = hdp_v4_0_init_registers,

View File

@ -27,5 +27,6 @@
#include "soc15_common.h"
extern const struct amdgpu_hdp_funcs hdp_v4_0_funcs;
extern const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs;
#endif

View File

@ -296,10 +296,12 @@ static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
block_size);
/* Send no-retry XNACK on fault to suppress VM fault storm. */
/* On Aldebaran, XNACK can be enabled in the SQ per-process.
* Retry faults need to be enabled for that to work.
*/
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!adev->gmc.noretry);
1);
WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
@ -1313,12 +1315,31 @@ static void mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev)
}
}
static void mmhub_v1_7_reset_ras_error_status(struct amdgpu_device *adev)
{
int i;
uint32_t reg_value;
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB))
return;
for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) {
reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
mmhub_v1_7_ea_err_status_regs[i]));
reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS,
CLEAR_ERROR_STATUS, 0x01);
WREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_regs[i]),
reg_value);
}
}
const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs = {
.ras_late_init = amdgpu_mmhub_ras_late_init,
.ras_fini = amdgpu_mmhub_ras_fini,
.query_ras_error_count = mmhub_v1_7_query_ras_error_count,
.reset_ras_error_count = mmhub_v1_7_reset_ras_error_count,
.query_ras_error_status = mmhub_v1_7_query_ras_error_status,
.reset_ras_error_status = mmhub_v1_7_reset_ras_error_status,
};
const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs = {

View File

@ -569,9 +569,9 @@ static int mmhub_v2_3_set_clockgating(struct amdgpu_device *adev,
return 0;
mmhub_v2_3_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
state == AMD_CG_STATE_GATE);
mmhub_v2_3_update_medium_grain_light_sleep(adev,
state == AMD_CG_STATE_GATE ? true : false);
state == AMD_CG_STATE_GATE);
return 0;
}

View File

@ -598,7 +598,7 @@ static void nv_pcie_gen3_enable(struct amdgpu_device *adev)
static void nv_program_aspm(struct amdgpu_device *adev)
{
if (amdgpu_aspm != 1)
if (!amdgpu_aspm)
return;
if (!(adev->flags & AMD_IS_APU) &&
@ -1068,6 +1068,7 @@ static int nv_common_early_init(void *handle)
case CHIP_SIENNA_CICHLID:
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_CGLS |
AMD_CG_SUPPORT_GFX_3D_CGCG |
AMD_CG_SUPPORT_MC_MGCG |
AMD_CG_SUPPORT_VCN_MGCG |
@ -1091,6 +1092,7 @@ static int nv_common_early_init(void *handle)
case CHIP_NAVY_FLOUNDER:
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_CGLS |
AMD_CG_SUPPORT_GFX_3D_CGCG |
AMD_CG_SUPPORT_VCN_MGCG |
AMD_CG_SUPPORT_JPEG_MGCG |
@ -1121,6 +1123,8 @@ static int nv_common_early_init(void *handle)
AMD_CG_SUPPORT_MC_LS |
AMD_CG_SUPPORT_GFX_FGCG |
AMD_CG_SUPPORT_VCN_MGCG |
AMD_CG_SUPPORT_SDMA_MGCG |
AMD_CG_SUPPORT_SDMA_LS |
AMD_CG_SUPPORT_JPEG_MGCG;
adev->pg_flags = AMD_PG_SUPPORT_GFX_PG |
AMD_PG_SUPPORT_VCN |
@ -1132,6 +1136,7 @@ static int nv_common_early_init(void *handle)
case CHIP_DIMGREY_CAVEFISH:
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_CGLS |
AMD_CG_SUPPORT_GFX_3D_CGCG |
AMD_CG_SUPPORT_VCN_MGCG |
AMD_CG_SUPPORT_JPEG_MGCG |

View File

@ -97,7 +97,6 @@ enum psp_gfx_cmd_id
GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */
GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */
GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */
GFX_CMD_ID_CLEAR_VF_FW = 0x0000000D, /* Clear VF FW, to be used on VF shutdown. */
GFX_CMD_ID_GET_FW_ATTESTATION = 0x0000000F, /* Query GPUVA of the Fw Attestation DB */
/* IDs upto 0x1F are reserved for older programs (Raven, Vega 10/12/20) */
GFX_CMD_ID_LOAD_TOC = 0x00000020, /* Load TOC and obtain TMR size */

View File

@ -1109,6 +1109,8 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
if (adev->asic_type == CHIP_ARCTURUS &&
adev->sdma.instance[i].fw_version >= 14)
WREG32_SDMA(i, mmSDMA0_PUB_DUMMY_REG2, enable);
/* Extend page fault timeout to avoid interrupt storm */
WREG32_SDMA(i, mmSDMA0_UTCL1_TIMEOUT, 0x00800080);
}
}
@ -2227,7 +2229,7 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev,
memset(&task_info, 0, sizeof(struct amdgpu_task_info));
amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
dev_info(adev->dev,
dev_dbg_ratelimited(adev->dev,
"[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u "
"pasid:%u, for process %s pid %d thread %s pid %d\n",
instance, addr, entry->src_id, entry->ring_id, entry->vmid,
@ -2240,7 +2242,7 @@ static int sdma_v4_0_process_vm_hole_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
dev_err(adev->dev, "MC or SEM address in VM hole\n");
dev_dbg_ratelimited(adev->dev, "MC or SEM address in VM hole\n");
sdma_v4_0_print_iv_entry(adev, entry);
return 0;
}
@ -2249,7 +2251,7 @@ static int sdma_v4_0_process_doorbell_invalid_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
dev_err(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n");
dev_dbg_ratelimited(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n");
sdma_v4_0_print_iv_entry(adev, entry);
return 0;
}
@ -2258,7 +2260,7 @@ static int sdma_v4_0_process_pool_timeout_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
dev_err(adev->dev,
dev_dbg_ratelimited(adev->dev,
"Polling register/memory timeout executing POLL_REG/MEM with finite timer\n");
sdma_v4_0_print_iv_entry(adev, entry);
return 0;
@ -2268,7 +2270,7 @@ static int sdma_v4_0_process_srbm_write_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
dev_err(adev->dev,
dev_dbg_ratelimited(adev->dev,
"SDMA gets an Register Write SRBM_WRITE command in non-privilege command buffer\n");
sdma_v4_0_print_iv_entry(adev, entry);
return 0;
@ -2597,27 +2599,18 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_srbm_write_irq_funcs = {
static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
{
adev->sdma.trap_irq.num_types = adev->sdma.num_instances;
adev->sdma.ecc_irq.num_types = adev->sdma.num_instances;
/*For Arcturus and Aldebaran, add another 4 irq handler*/
switch (adev->sdma.num_instances) {
case 1:
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
break;
case 5:
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
break;
case 8:
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
adev->sdma.vm_hole_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
adev->sdma.doorbell_invalid_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
adev->sdma.pool_timeout_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
adev->sdma.srbm_write_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
adev->sdma.vm_hole_irq.num_types = adev->sdma.num_instances;
adev->sdma.doorbell_invalid_irq.num_types = adev->sdma.num_instances;
adev->sdma.pool_timeout_irq.num_types = adev->sdma.num_instances;
adev->sdma.srbm_write_irq.num_types = adev->sdma.num_instances;
break;
case 2:
default:
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
break;
}
adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs;

View File

@ -405,18 +405,6 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
/* Invalidate L2, because if we don't do it, we might get stale cache
* lines from previous IBs.
*/
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, (SDMA_GCR_GL2_INV |
SDMA_GCR_GL2_WB |
SDMA_GCR_GLM_INV |
SDMA_GCR_GLM_WB) << 16);
amdgpu_ring_write(ring, 0xffffff80);
amdgpu_ring_write(ring, 0xffff);
/* An IB packet must end on a 8 DW boundary--the next dword
* must be on a 8-dword boundary. Our IB packet below is 6
* dwords long, thus add x number of NOPs, such that, in
@ -437,6 +425,33 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
}
/**
* sdma_v5_0_ring_emit_mem_sync - flush the IB by graphics cache rinse
*
* @ring: amdgpu ring pointer
* @job: job to retrieve vmid from
* @ib: IB object to schedule
*
* flush the IB by graphics cache rinse.
*/
static void sdma_v5_0_ring_emit_mem_sync(struct amdgpu_ring *ring)
{
uint32_t gcr_cntl =
SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
SDMA_GCR_GLI_INV(1);
/* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) |
SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0));
amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) |
SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16));
amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) |
SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0));
}
/**
* sdma_v5_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
*
@ -1643,6 +1658,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
.emit_ib_size = 5 + 7 + 6, /* sdma_v5_0_ring_emit_ib */
.emit_ib = sdma_v5_0_ring_emit_ib,
.emit_mem_sync = sdma_v5_0_ring_emit_mem_sync,
.emit_fence = sdma_v5_0_ring_emit_fence,
.emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync,
.emit_vm_flush = sdma_v5_0_ring_emit_vm_flush,

View File

@ -1556,6 +1556,10 @@ static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *ade
int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
if (adev->sdma.instance[i].fw_version < 70 && adev->asic_type == CHIP_VANGOGH)
adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_MGCG;
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
/* Enable sdma clock gating */
def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
@ -1589,6 +1593,10 @@ static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev
int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
if (adev->sdma.instance[i].fw_version < 70 && adev->asic_type == CHIP_VANGOGH)
adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_LS;
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
/* Enable sdma mem light sleep */
def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));

View File

@ -88,6 +88,23 @@ static u32 smuio_v13_0_get_die_id(struct amdgpu_device *adev)
return die_id;
}
/**
* smuio_v13_0_get_socket_id - query socket id from FCH
*
* @adev: amdgpu device pointer
*
* Returns socket id
*/
static u32 smuio_v13_0_get_socket_id(struct amdgpu_device *adev)
{
u32 data, socket_id;
data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
socket_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, SOCKET_ID);
return socket_id;
}
/**
* smuio_v13_0_supports_host_gpu_xgmi - detect xgmi interface between cpu and gpu/s.
*
@ -115,6 +132,7 @@ const struct amdgpu_smuio_funcs smuio_v13_0_funcs = {
.get_rom_index_offset = smuio_v13_0_get_rom_index_offset,
.get_rom_data_offset = smuio_v13_0_get_rom_data_offset,
.get_die_id = smuio_v13_0_get_die_id,
.get_socket_id = smuio_v13_0_get_socket_id,
.is_host_gpu_xgmi_supported = smuio_v13_0_is_host_gpu_xgmi_supported,
.update_rom_clock_gating = smuio_v13_0_update_rom_clock_gating,
.get_clock_gating_state = smuio_v13_0_get_clock_gating_state,

View File

@ -655,7 +655,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev)
int ret = 0;
/* avoid NBIF got stuck when do RAS recovery in BACO reset */
if (ras && ras->supported)
if (ras && adev->ras_enabled)
adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
ret = amdgpu_dpm_baco_reset(adev);
@ -663,7 +663,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev)
return ret;
/* re-enable doorbell interrupt after BACO exit */
if (ras && ras->supported)
if (ras && adev->ras_enabled)
adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
return 0;
@ -710,7 +710,8 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
* 1. PMFW version > 0x284300: all cases use baco
* 2. PMFW version <= 0x284300: only sGPU w/o RAS use baco
*/
if ((ras && ras->supported) && adev->pm.fw_version <= 0x283400)
if (ras && adev->ras_enabled &&
adev->pm.fw_version <= 0x283400)
baco_reset = false;
break;
case CHIP_ALDEBARAN:
@ -816,7 +817,7 @@ static void soc15_pcie_gen3_enable(struct amdgpu_device *adev)
static void soc15_program_aspm(struct amdgpu_device *adev)
{
if (amdgpu_aspm != 1)
if (!amdgpu_aspm)
return;
if (!(adev->flags & AMD_IS_APU) &&
@ -1522,9 +1523,6 @@ static int soc15_common_late_init(void *handle)
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_get_irq(adev);
if (adev->hdp.funcs->reset_ras_error_count)
adev->hdp.funcs->reset_ras_error_count(adev);
if (adev->nbio.ras_funcs &&
adev->nbio.ras_funcs->ras_late_init)
r = adev->nbio.ras_funcs->ras_late_init(adev);

View File

@ -105,6 +105,12 @@ struct ta_ras_trigger_error_input {
uint64_t value; // method if error injection. i.e persistent, coherent etc.
};
struct ta_ras_init_flags
{
uint8_t poison_mode_en;
uint8_t dgpu_mode;
};
struct ta_ras_output_flags
{
uint8_t ras_init_success_flag;
@ -115,6 +121,7 @@ struct ta_ras_output_flags
/* Common input structure for RAS callbacks */
/**********************************************************/
union ta_ras_cmd_input {
struct ta_ras_init_flags init_flags;
struct ta_ras_enable_features_input enable_features;
struct ta_ras_disable_features_input disable_features;
struct ta_ras_trigger_error_input trigger_error;

View File

@ -104,6 +104,7 @@ static int vega10_ih_toggle_ring_interrupts(struct amdgpu_device *adev,
tmp = RREG32(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1);
/* enable_intr field is only valid in ring0 */
if (ih == &adev->irq.ih)
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));

View File

@ -1136,7 +1136,7 @@ static void vi_program_aspm(struct amdgpu_device *adev)
bool bL1SS = false;
bool bClkReqSupport = true;
if (amdgpu_aspm != 1)
if (!amdgpu_aspm)
return;
if (adev->flags & AMD_IS_APU ||

View File

@ -12,3 +12,16 @@ config HSA_AMD
select DRM_AMDGPU_USERPTR
help
Enable this if you want to use HSA features on AMD GPU devices.
config HSA_AMD_SVM
bool "Enable HMM-based shared virtual memory manager"
depends on HSA_AMD && DEVICE_PRIVATE
default y
select HMM_MIRROR
select MMU_NOTIFIER
help
Enable this to use unified memory and managed memory in HIP. This
memory manager supports two modes of operation. One based on
preemptions and one based on page faults. To enable page fault
based memory management on most GFXv9 GPUs, set the module
parameter amdgpu.noretry=0.

View File

@ -63,3 +63,8 @@ endif
ifneq ($(CONFIG_DEBUG_FS),)
AMDKFD_FILES += $(AMDKFD_PATH)/kfd_debugfs.o
endif
ifneq ($(CONFIG_HSA_AMD_SVM),)
AMDKFD_FILES += $(AMDKFD_PATH)/kfd_svm.o \
$(AMDKFD_PATH)/kfd_migrate.o
endif

View File

@ -38,6 +38,7 @@
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_dbgmgr.h"
#include "kfd_svm.h"
#include "amdgpu_amdkfd.h"
#include "kfd_smi_events.h"
@ -1297,7 +1298,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
dev->kgd, args->va_addr, args->size,
pdd->vm, (struct kgd_mem **) &mem, &offset,
pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
flags);
if (err)
@ -1328,7 +1329,8 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
return 0;
err_free:
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
pdd->drm_priv, NULL);
err_unlock:
mutex_unlock(&p->mutex);
return err;
@ -1365,7 +1367,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
}
ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
(struct kgd_mem *)mem, &size);
(struct kgd_mem *)mem, pdd->drm_priv, &size);
/* If freeing the buffer failed, leave the handle in place for
* clean-up during process tear-down.
@ -1448,7 +1450,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
goto get_mem_obj_from_handle_failed;
}
err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
if (err) {
pr_err("Failed to map to gpu %d/%d\n",
i, args->n_devices);
@ -1555,7 +1557,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
goto get_mem_obj_from_handle_failed;
}
err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
if (err) {
pr_err("Failed to unmap from gpu %d/%d\n",
i, args->n_devices);
@ -1701,7 +1703,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
}
r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
args->va_addr, pdd->vm,
args->va_addr, pdd->drm_priv,
(struct kgd_mem **)&mem, &size,
NULL);
if (r)
@ -1721,7 +1723,8 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
return 0;
err_free:
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
pdd->drm_priv, NULL);
err_unlock:
mutex_unlock(&p->mutex);
dma_buf_put(dmabuf);
@ -1742,6 +1745,64 @@ static int kfd_ioctl_smi_events(struct file *filep,
return kfd_smi_event_open(dev, &args->anon_fd);
}
static int kfd_ioctl_set_xnack_mode(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_set_xnack_mode_args *args = data;
int r = 0;
mutex_lock(&p->mutex);
if (args->xnack_enabled >= 0) {
if (!list_empty(&p->pqm.queues)) {
pr_debug("Process has user queues running\n");
mutex_unlock(&p->mutex);
return -EBUSY;
}
if (args->xnack_enabled && !kfd_process_xnack_mode(p, true))
r = -EPERM;
else
p->xnack_enabled = args->xnack_enabled;
} else {
args->xnack_enabled = p->xnack_enabled;
}
mutex_unlock(&p->mutex);
return r;
}
#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
{
struct kfd_ioctl_svm_args *args = data;
int r = 0;
if (p->svm_disabled)
return -EPERM;
pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n",
args->start_addr, args->size, args->op, args->nattr);
if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK))
return -EINVAL;
if (!args->start_addr || !args->size)
return -EINVAL;
mutex_lock(&p->mutex);
r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,
args->attrs);
mutex_unlock(&p->mutex);
return r;
}
#else
static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
{
return -EPERM;
}
#endif
#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
.cmd_drv = 0, .name = #ioctl}
@ -1840,6 +1901,11 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
kfd_ioctl_smi_events, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
kfd_ioctl_set_xnack_mode, 0),
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)

View File

@ -55,7 +55,7 @@ struct kfd_gpu_cache_info {
uint32_t cache_level;
uint32_t flags;
/* Indicates how many Compute Units share this cache
* Value = 1 indicates the cache is not shared
* within a SA. Value = 1 indicates the cache is not shared
*/
uint32_t num_cu_shared;
};
@ -69,7 +69,6 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = {
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 1,
},
{
/* Scalar L1 Instruction Cache (in SQC module) per bank */
@ -126,9 +125,6 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
/* TODO: Add L2 Cache information */
};
/* NOTE: In future if more information is added to struct kfd_gpu_cache_info
* the following ASICs may need a separate table.
*/
#define hawaii_cache_info kaveri_cache_info
#define tonga_cache_info carrizo_cache_info
#define fiji_cache_info carrizo_cache_info
@ -136,13 +132,562 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
#define polaris11_cache_info carrizo_cache_info
#define polaris12_cache_info carrizo_cache_info
#define vegam_cache_info carrizo_cache_info
/* TODO - check & update Vega10 cache details */
#define vega10_cache_info carrizo_cache_info
#define raven_cache_info carrizo_cache_info
#define renoir_cache_info carrizo_cache_info
/* TODO - check & update Navi10 cache details */
#define navi10_cache_info carrizo_cache_info
#define vangogh_cache_info carrizo_cache_info
/* NOTE: L1 cache information has been updated and L2/L3
* cache information has been added for Vega10 and
* newer ASICs. The unit for cache_size is KiB.
* In future, check & update cache details
* for every new ASIC is required.
*/
static struct kfd_gpu_cache_info vega10_cache_info[] = {
{
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 1,
},
{
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 3,
},
{
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 3,
},
{
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 4096,
.cache_level = 2,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 16,
},
};
static struct kfd_gpu_cache_info raven_cache_info[] = {
{
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 1,
},
{
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 3,
},
{
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 3,
},
{
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 1024,
.cache_level = 2,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 11,
},
};
static struct kfd_gpu_cache_info renoir_cache_info[] = {
{
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 1,
},
{
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 3,
},
{
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 3,
},
{
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 1024,
.cache_level = 2,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 8,
},
};
static struct kfd_gpu_cache_info vega12_cache_info[] = {
{
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 1,
},
{
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 3,
},
{
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 3,
},
{
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 2048,
.cache_level = 2,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 5,
},
};
static struct kfd_gpu_cache_info vega20_cache_info[] = {
{
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 1,
},
{
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 3,
},
{
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 3,
},
{
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 8192,
.cache_level = 2,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 16,
},
};
static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
{
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 1,
},
{
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 2,
},
{
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 2,
},
{
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 8192,
.cache_level = 2,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 14,
},
};
static struct kfd_gpu_cache_info navi10_cache_info[] = {
{
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 1,
},
{
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 2,
},
{
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 2,
},
{
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 10,
},
{
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 4096,
.cache_level = 2,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 10,
},
};
static struct kfd_gpu_cache_info vangogh_cache_info[] = {
{
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 1,
},
{
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 2,
},
{
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 2,
},
{
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 8,
},
{
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 1024,
.cache_level = 2,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 8,
},
};
static struct kfd_gpu_cache_info navi14_cache_info[] = {
{
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 1,
},
{
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 2,
},
{
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 2,
},
{
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 12,
},
{
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 2048,
.cache_level = 2,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 12,
},
};
static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
{
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 1,
},
{
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 2,
},
{
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 2,
},
{
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 10,
},
{
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 4096,
.cache_level = 2,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 10,
},
{
/* L3 Data Cache per GPU */
.cache_size = 128*1024,
.cache_level = 3,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 10,
},
};
static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
{
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 1,
},
{
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 2,
},
{
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 2,
},
{
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 10,
},
{
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 3072,
.cache_level = 2,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 10,
},
{
/* L3 Data Cache per GPU */
.cache_size = 96*1024,
.cache_level = 3,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 10,
},
};
static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
{
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 1,
},
{
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 2,
},
{
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 2,
},
{
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 8,
},
{
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 2048,
.cache_level = 2,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 8,
},
{
/* L3 Data Cache per GPU */
.cache_size = 32*1024,
.cache_level = 3,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
.num_cu_shared = 8,
},
};
static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
struct crat_subtype_computeunit *cu)
@ -544,7 +1089,7 @@ err:
}
/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
static int fill_in_pcache(struct crat_subtype_cache *pcache,
static int fill_in_l1_pcache(struct crat_subtype_cache *pcache,
struct kfd_gpu_cache_info *pcache_info,
struct kfd_cu_info *cu_info,
int mem_available,
@ -597,6 +1142,70 @@ static int fill_in_pcache(struct crat_subtype_cache *pcache,
return 1;
}
/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache,
struct kfd_gpu_cache_info *pcache_info,
struct kfd_cu_info *cu_info,
int mem_available,
int cache_type, unsigned int cu_processor_id)
{
unsigned int cu_sibling_map_mask;
int first_active_cu;
int i, j, k;
/* First check if enough memory is available */
if (sizeof(struct crat_subtype_cache) > mem_available)
return -ENOMEM;
cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
cu_sibling_map_mask &=
((1 << pcache_info[cache_type].num_cu_shared) - 1);
first_active_cu = ffs(cu_sibling_map_mask);
/* CU could be inactive. In case of shared cache find the first active
* CU. and incase of non-shared cache check if the CU is inactive. If
* inactive active skip it
*/
if (first_active_cu) {
memset(pcache, 0, sizeof(struct crat_subtype_cache));
pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
pcache->length = sizeof(struct crat_subtype_cache);
pcache->flags = pcache_info[cache_type].flags;
pcache->processor_id_low = cu_processor_id
+ (first_active_cu - 1);
pcache->cache_level = pcache_info[cache_type].cache_level;
pcache->cache_size = pcache_info[cache_type].cache_size;
/* Sibling map is w.r.t processor_id_low, so shift out
* inactive CU
*/
cu_sibling_map_mask =
cu_sibling_map_mask >> (first_active_cu - 1);
k = 0;
for (i = 0; i < cu_info->num_shader_engines; i++) {
for (j = 0; j < cu_info->num_shader_arrays_per_engine;
j++) {
pcache->sibling_map[k] =
(uint8_t)(cu_sibling_map_mask & 0xFF);
pcache->sibling_map[k+1] =
(uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
pcache->sibling_map[k+2] =
(uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
pcache->sibling_map[k+3] =
(uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
k += 4;
cu_sibling_map_mask =
cu_info->cu_bitmap[i % 4][j + i / 4];
cu_sibling_map_mask &= (
(1 << pcache_info[cache_type].num_cu_shared)
- 1);
}
}
return 0;
}
return 1;
}
/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info
* tables
*
@ -624,6 +1233,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
int mem_available = available_size;
unsigned int cu_processor_id;
int ret;
unsigned int num_cu_shared;
switch (kdev->device_info->asic_family) {
case CHIP_KAVERI:
@ -663,13 +1273,22 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
break;
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_ARCTURUS:
case CHIP_ALDEBARAN:
pcache_info = vega10_cache_info;
num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
break;
case CHIP_VEGA12:
pcache_info = vega12_cache_info;
num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
break;
case CHIP_VEGA20:
case CHIP_ARCTURUS:
pcache_info = vega20_cache_info;
num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
break;
case CHIP_ALDEBARAN:
pcache_info = aldebaran_cache_info;
num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
break;
case CHIP_RAVEN:
pcache_info = raven_cache_info;
num_of_cache_types = ARRAY_SIZE(raven_cache_info);
@ -680,13 +1299,25 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
break;
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_NAVI14:
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
case CHIP_DIMGREY_CAVEFISH:
pcache_info = navi10_cache_info;
num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
break;
case CHIP_NAVI14:
pcache_info = navi14_cache_info;
num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
break;
case CHIP_SIENNA_CICHLID:
pcache_info = sienna_cichlid_cache_info;
num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
break;
case CHIP_NAVY_FLOUNDER:
pcache_info = navy_flounder_cache_info;
num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
break;
case CHIP_DIMGREY_CAVEFISH:
pcache_info = dimgrey_cavefish_cache_info;
num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
break;
case CHIP_VANGOGH:
pcache_info = vangogh_cache_info;
num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
@ -709,40 +1340,58 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
*/
for (ct = 0; ct < num_of_cache_types; ct++) {
cu_processor_id = gpu_processor_id;
for (i = 0; i < cu_info->num_shader_engines; i++) {
for (j = 0; j < cu_info->num_shader_arrays_per_engine;
j++) {
for (k = 0; k < cu_info->num_cu_per_sh;
k += pcache_info[ct].num_cu_shared) {
cu_processor_id = gpu_processor_id;
if (pcache_info[ct].cache_level == 1) {
for (i = 0; i < cu_info->num_shader_engines; i++) {
for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
for (k = 0; k < cu_info->num_cu_per_sh;
k += pcache_info[ct].num_cu_shared) {
ret = fill_in_l1_pcache(pcache,
pcache_info,
cu_info,
mem_available,
cu_info->cu_bitmap[i % 4][j + i / 4],
ct,
cu_processor_id,
k);
ret = fill_in_pcache(pcache,
pcache_info,
cu_info,
mem_available,
cu_info->cu_bitmap[i % 4][j + i / 4],
ct,
cu_processor_id,
k);
if (ret < 0)
break;
if (ret < 0)
break;
if (!ret) {
pcache++;
(*num_of_entries)++;
mem_available -= sizeof(*pcache);
(*size_filled) += sizeof(*pcache);
}
if (!ret) {
pcache++;
(*num_of_entries)++;
mem_available -=
sizeof(*pcache);
(*size_filled) +=
sizeof(*pcache);
}
/* Move to next CU block */
cu_processor_id +=
pcache_info[ct].num_cu_shared;
}
}
/* Move to next CU block */
num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
cu_info->num_cu_per_sh) ?
pcache_info[ct].num_cu_shared :
(cu_info->num_cu_per_sh - k);
cu_processor_id += num_cu_shared;
}
}
}
} else {
ret = fill_in_l2_l3_pcache(pcache,
pcache_info,
cu_info,
mem_available,
ct,
cu_processor_id);
if (ret < 0)
break;
if (!ret) {
pcache++;
(*num_of_entries)++;
mem_available -= sizeof(*pcache);
(*size_filled) += sizeof(*pcache);
}
}
}
pr_debug("Added [%d] GPU cache entries\n", *num_of_entries);
@ -1100,6 +1749,92 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
return 0;
}
#ifdef CONFIG_ACPI_NUMA
static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
{
struct acpi_table_header *table_header = NULL;
struct acpi_subtable_header *sub_header = NULL;
unsigned long table_end, subtable_len;
u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
pci_dev_id(kdev->pdev);
u32 bdf;
acpi_status status;
struct acpi_srat_cpu_affinity *cpu;
struct acpi_srat_generic_affinity *gpu;
int pxm = 0, max_pxm = 0;
int numa_node = NUMA_NO_NODE;
bool found = false;
/* Fetch the SRAT table from ACPI */
status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
if (status == AE_NOT_FOUND) {
pr_warn("SRAT table not found\n");
return;
} else if (ACPI_FAILURE(status)) {
const char *err = acpi_format_exception(status);
pr_err("SRAT table error: %s\n", err);
return;
}
table_end = (unsigned long)table_header + table_header->length;
/* Parse all entries looking for a match. */
sub_header = (struct acpi_subtable_header *)
((unsigned long)table_header +
sizeof(struct acpi_table_srat));
subtable_len = sub_header->length;
while (((unsigned long)sub_header) + subtable_len < table_end) {
/*
* If length is 0, break from this loop to avoid
* infinite loop.
*/
if (subtable_len == 0) {
pr_err("SRAT invalid zero length\n");
break;
}
switch (sub_header->type) {
case ACPI_SRAT_TYPE_CPU_AFFINITY:
cpu = (struct acpi_srat_cpu_affinity *)sub_header;
pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
cpu->proximity_domain_lo;
if (pxm > max_pxm)
max_pxm = pxm;
break;
case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
gpu = (struct acpi_srat_generic_affinity *)sub_header;
bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
*((u16 *)(&gpu->device_handle[2]));
if (bdf == pci_id) {
found = true;
numa_node = pxm_to_node(gpu->proximity_domain);
}
break;
default:
break;
}
if (found)
break;
sub_header = (struct acpi_subtable_header *)
((unsigned long)sub_header + subtable_len);
subtable_len = sub_header->length;
}
acpi_put_table(table_header);
/* Workaround bad cpu-gpu binding case */
if (found && (numa_node < 0 ||
numa_node > pxm_to_node(max_pxm)))
numa_node = 0;
if (numa_node != NUMA_NO_NODE)
set_dev_node(&kdev->pdev->dev, numa_node);
}
#endif
/* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
* to its NUMA node
* @avail_size: Available size in the memory
@ -1140,11 +1875,17 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
*/
sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
sub_type_hdr->num_hops_xgmi = 1;
} else {
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
}
sub_type_hdr->proximity_domain_from = proximity_domain;
#ifdef CONFIG_ACPI_NUMA
if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
kfd_find_numa_node_in_srat(kdev);
#endif
#ifdef CONFIG_NUMA
if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
sub_type_hdr->proximity_domain_to = 0;

View File

@ -26,10 +26,12 @@
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers_vi.h"
#include "kfd_pm4_headers_aldebaran.h"
#include "cwsr_trap_handler.h"
#include "kfd_iommu.h"
#include "amdgpu_amdkfd.h"
#include "kfd_smi_events.h"
#include "kfd_migrate.h"
#define MQD_SIZE_ALIGNED 768
@ -576,7 +578,7 @@ static const struct kfd_device_info *kfd_supported_devices[][2] = {
[CHIP_VEGA20] = {&vega20_device_info, NULL},
[CHIP_RENOIR] = {&renoir_device_info, NULL},
[CHIP_ARCTURUS] = {&arcturus_device_info, &arcturus_device_info},
[CHIP_ALDEBARAN] = {&aldebaran_device_info, NULL},
[CHIP_ALDEBARAN] = {&aldebaran_device_info, &aldebaran_device_info},
[CHIP_NAVI10] = {&navi10_device_info, NULL},
[CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info},
[CHIP_NAVI14] = {&navi14_device_info, NULL},
@ -697,7 +699,9 @@ static int kfd_gws_init(struct kfd_dev *kfd)
&& kfd->device_info->asic_family <= CHIP_RAVEN
&& kfd->mec2_fw_version >= 0x1b3)
|| (kfd->device_info->asic_family == CHIP_ARCTURUS
&& kfd->mec2_fw_version >= 0x30))
&& kfd->mec2_fw_version >= 0x30)
|| (kfd->device_info->asic_family == CHIP_ALDEBARAN
&& kfd->mec2_fw_version >= 0x28))
ret = amdgpu_amdkfd_alloc_gws(kfd->kgd,
amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws);
@ -713,7 +717,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
struct drm_device *ddev,
const struct kgd2kfd_shared_resources *gpu_resources)
{
unsigned int size;
unsigned int size, map_process_packet_size;
kfd->ddev = ddev;
kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
@ -748,7 +752,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
* calculate max size of runlist packet.
* There can be only 2 packets at once
*/
size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_mes_map_process) +
map_process_packet_size =
kfd->device_info->asic_family == CHIP_ALDEBARAN ?
sizeof(struct pm4_mes_map_process_aldebaran) :
sizeof(struct pm4_mes_map_process);
size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size +
max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
+ sizeof(struct pm4_mes_runlist)) * 2;
@ -814,6 +822,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd_cwsr_init(kfd);
svm_migrate_init((struct amdgpu_device *)kfd->kgd);
if (kfd_resume(kfd))
goto kfd_resume_error;
@ -862,6 +872,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
{
if (kfd->init_complete) {
kgd2kfd_suspend(kfd, false);
svm_migrate_fini((struct amdgpu_device *)kfd->kgd);
device_queue_manager_uninit(kfd->dqm);
kfd_interrupt_exit(kfd);
kfd_topology_remove_device(kfd);

View File

@ -738,7 +738,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
pdd = qpd_to_pdd(qpd);
/* Retrieve PD base */
pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
dqm_lock(dqm);
if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
@ -821,7 +821,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
pdd = qpd_to_pdd(qpd);
/* Retrieve PD base */
pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
dqm_lock(dqm);
if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
@ -873,7 +873,7 @@ static int register_process(struct device_queue_manager *dqm,
pdd = qpd_to_pdd(qpd);
/* Retrieve PD base */
pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
dqm_lock(dqm);
list_add(&n->list, &dqm->queues);

View File

@ -61,10 +61,19 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
qpd->sh_mem_config =
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
if (dqm->dev->noretry &&
!dqm->dev->use_iommu_v2)
if (dqm->dev->device_info->asic_family == CHIP_ALDEBARAN) {
/* Aldebaran can safely support different XNACK modes
* per process
*/
if (!pdd->process->xnack_enabled)
qpd->sh_mem_config |=
1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
} else if (dqm->dev->noretry &&
!dqm->dev->use_iommu_v2) {
qpd->sh_mem_config |=
1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
}
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0;

View File

@ -405,6 +405,10 @@ int kfd_init_apertures(struct kfd_process *process)
case CHIP_POLARIS12:
case CHIP_VEGAM:
kfd_init_apertures_vi(pdd, id);
/* VI GPUs cannot support SVM with only
* 40 bits of virtual address space.
*/
process->svm_disabled = true;
break;
case CHIP_VEGA10:
case CHIP_VEGA12:

View File

@ -25,7 +25,6 @@
#include "soc15_int.h"
#include "kfd_device_queue_manager.h"
#include "kfd_smi_events.h"
#include "amdgpu.h"
enum SQ_INTERRUPT_WORD_ENCODING {
SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x0,

View File

@ -0,0 +1,937 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright 2020-2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/types.h>
#include <linux/hmm.h>
#include <linux/dma-direction.h>
#include <linux/dma-mapping.h>
#include "amdgpu_sync.h"
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
#include "amdgpu_mn.h"
#include "kfd_priv.h"
#include "kfd_svm.h"
#include "kfd_migrate.h"
static uint64_t
svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
{
return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
}
static int
svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_job *job;
unsigned int num_dw, num_bytes;
struct dma_fence *fence;
uint64_t src_addr, dst_addr;
uint64_t pte_flags;
void *cpu_addr;
int r;
/* use gart window 0 */
*gart_addr = adev->gmc.gart_start;
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
num_bytes = npages * 8;
r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
AMDGPU_IB_POOL_DELAYED, &job);
if (r)
return r;
src_addr = num_dw * 4;
src_addr += job->ibs[0].gpu_addr;
dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
dst_addr, num_bytes, false);
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED;
if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
pte_flags |= AMDGPU_PTE_WRITEABLE;
pte_flags |= adev->gart.gart_pte_flags;
cpu_addr = &job->ibs[0].ptr[num_dw];
r = amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
if (r)
goto error_free;
r = amdgpu_job_submit(job, &adev->mman.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
if (r)
goto error_free;
dma_fence_put(fence);
return r;
error_free:
amdgpu_job_free(job);
return r;
}
/**
* svm_migrate_copy_memory_gart - sdma copy data between ram and vram
*
* @adev: amdgpu device the sdma ring running
* @src: source page address array
* @dst: destination page address array
* @npages: number of pages to copy
* @direction: enum MIGRATION_COPY_DIR
* @mfence: output, sdma fence to signal after sdma is done
*
* ram address uses GART table continuous entries mapping to ram pages,
* vram address uses direct mapping of vram pages, which must have npages
* number of continuous pages.
* GART update and sdma uses same buf copy function ring, sdma is splited to
* multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for
* the last sdma finish fence which is returned to check copy memory is done.
*
* Context: Process context, takes and releases gtt_window_lock
*
* Return:
* 0 - OK, otherwise error code
*/
static int
svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
uint64_t *vram, uint64_t npages,
enum MIGRATION_COPY_DIR direction,
struct dma_fence **mfence)
{
const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
uint64_t gart_s, gart_d;
struct dma_fence *next;
uint64_t size;
int r;
mutex_lock(&adev->mman.gtt_window_lock);
while (npages) {
size = min(GTT_MAX_PAGES, npages);
if (direction == FROM_VRAM_TO_RAM) {
gart_s = svm_migrate_direct_mapping_addr(adev, *vram);
r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0);
} else if (direction == FROM_RAM_TO_VRAM) {
r = svm_migrate_gart_map(ring, size, sys, &gart_s,
KFD_IOCTL_SVM_FLAG_GPU_RO);
gart_d = svm_migrate_direct_mapping_addr(adev, *vram);
}
if (r) {
pr_debug("failed %d to create gart mapping\n", r);
goto out_unlock;
}
r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
NULL, &next, false, true, false);
if (r) {
pr_debug("failed %d to copy memory\n", r);
goto out_unlock;
}
dma_fence_put(*mfence);
*mfence = next;
npages -= size;
if (npages) {
sys += size;
vram += size;
}
}
out_unlock:
mutex_unlock(&adev->mman.gtt_window_lock);
return r;
}
/**
* svm_migrate_copy_done - wait for memory copy sdma is done
*
* @adev: amdgpu device the sdma memory copy is executing on
* @mfence: migrate fence
*
* Wait for dma fence is signaled, if the copy ssplit into multiple sdma
* operations, this is the last sdma operation fence.
*
* Context: called after svm_migrate_copy_memory
*
* Return:
* 0 - success
* otherwise - error code from dma fence signal
*/
static int
svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence)
{
int r = 0;
if (mfence) {
r = dma_fence_wait(mfence, false);
dma_fence_put(mfence);
pr_debug("sdma copy memory fence done\n");
}
return r;
}
static uint64_t
svm_migrate_node_physical_addr(struct amdgpu_device *adev,
struct drm_mm_node **mm_node, uint64_t *offset)
{
struct drm_mm_node *node = *mm_node;
uint64_t pos = *offset;
if (node->start == AMDGPU_BO_INVALID_OFFSET) {
pr_debug("drm node is not validated\n");
return 0;
}
pr_debug("vram node start 0x%llx npages 0x%llx\n", node->start,
node->size);
if (pos >= node->size) {
do {
pos -= node->size;
node++;
} while (pos >= node->size);
*mm_node = node;
*offset = pos;
}
return (node->start + pos) << PAGE_SHIFT;
}
unsigned long
svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr)
{
return (addr + adev->kfd.dev->pgmap.range.start) >> PAGE_SHIFT;
}
static void
svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
{
struct page *page;
page = pfn_to_page(pfn);
page->zone_device_data = prange;
get_page(page);
lock_page(page);
}
static void
svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr)
{
struct page *page;
page = pfn_to_page(svm_migrate_addr_to_pfn(adev, addr));
unlock_page(page);
put_page(page);
}
static unsigned long
svm_migrate_addr(struct amdgpu_device *adev, struct page *page)
{
unsigned long addr;
addr = page_to_pfn(page) << PAGE_SHIFT;
return (addr - adev->kfd.dev->pgmap.range.start);
}
static struct page *
svm_migrate_get_sys_page(struct vm_area_struct *vma, unsigned long addr)
{
struct page *page;
page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
if (page)
lock_page(page);
return page;
}
static void svm_migrate_put_sys_page(unsigned long addr)
{
struct page *page;
page = pfn_to_page(addr >> PAGE_SHIFT);
unlock_page(page);
put_page(page);
}
static int
svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct migrate_vma *migrate, struct dma_fence **mfence,
dma_addr_t *scratch)
{
uint64_t npages = migrate->cpages;
struct device *dev = adev->dev;
struct drm_mm_node *node;
dma_addr_t *src;
uint64_t *dst;
uint64_t vram_addr;
uint64_t offset;
uint64_t i, j;
int r;
pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
prange->last);
src = scratch;
dst = (uint64_t *)(scratch + npages);
r = svm_range_vram_node_new(adev, prange, true);
if (r) {
pr_debug("failed %d get 0x%llx pages from vram\n", r, npages);
goto out;
}
node = prange->ttm_res->mm_node;
offset = prange->offset;
vram_addr = svm_migrate_node_physical_addr(adev, &node, &offset);
if (!vram_addr) {
WARN_ONCE(1, "vram node address is 0\n");
r = -ENOMEM;
goto out;
}
for (i = j = 0; i < npages; i++) {
struct page *spage;
dst[i] = vram_addr + (j << PAGE_SHIFT);
migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
svm_migrate_get_vram_page(prange, migrate->dst[i]);
migrate->dst[i] = migrate_pfn(migrate->dst[i]);
migrate->dst[i] |= MIGRATE_PFN_LOCKED;
if (migrate->src[i] & MIGRATE_PFN_VALID) {
spage = migrate_pfn_to_page(migrate->src[i]);
src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
DMA_TO_DEVICE);
r = dma_mapping_error(dev, src[i]);
if (r) {
pr_debug("failed %d dma_map_page\n", r);
goto out_free_vram_pages;
}
} else {
if (j) {
r = svm_migrate_copy_memory_gart(
adev, src + i - j,
dst + i - j, j,
FROM_RAM_TO_VRAM,
mfence);
if (r)
goto out_free_vram_pages;
offset += j;
vram_addr = (node->start + offset) << PAGE_SHIFT;
j = 0;
} else {
offset++;
vram_addr += PAGE_SIZE;
}
if (offset >= node->size) {
node++;
pr_debug("next node size 0x%llx\n", node->size);
vram_addr = node->start << PAGE_SHIFT;
offset = 0;
}
continue;
}
pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n",
src[i] >> PAGE_SHIFT, page_to_pfn(spage));
if (j + offset >= node->size - 1 && i < npages - 1) {
r = svm_migrate_copy_memory_gart(adev, src + i - j,
dst + i - j, j + 1,
FROM_RAM_TO_VRAM,
mfence);
if (r)
goto out_free_vram_pages;
node++;
pr_debug("next node size 0x%llx\n", node->size);
vram_addr = node->start << PAGE_SHIFT;
offset = 0;
j = 0;
} else {
j++;
}
}
r = svm_migrate_copy_memory_gart(adev, src + i - j, dst + i - j, j,
FROM_RAM_TO_VRAM, mfence);
out_free_vram_pages:
if (r) {
pr_debug("failed %d to copy memory to vram\n", r);
while (i--) {
svm_migrate_put_vram_page(adev, dst[i]);
migrate->dst[i] = 0;
}
}
out:
return r;
}
static int
svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start,
uint64_t end)
{
uint64_t npages = (end - start) >> PAGE_SHIFT;
struct dma_fence *mfence = NULL;
struct migrate_vma migrate;
dma_addr_t *scratch;
size_t size;
void *buf;
int r = -ENOMEM;
int retry = 0;
memset(&migrate, 0, sizeof(migrate));
migrate.vma = vma;
migrate.start = start;
migrate.end = end;
migrate.flags = MIGRATE_VMA_SELECT_SYSTEM;
migrate.pgmap_owner = adev;
size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
size *= npages;
buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
if (!buf)
goto out;
migrate.src = buf;
migrate.dst = migrate.src + npages;
scratch = (dma_addr_t *)(migrate.dst + npages);
retry:
r = migrate_vma_setup(&migrate);
if (r) {
pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
r, prange->svms, prange->start, prange->last);
goto out_free;
}
if (migrate.cpages != npages) {
pr_debug("collect 0x%lx/0x%llx pages, retry\n", migrate.cpages,
npages);
migrate_vma_finalize(&migrate);
if (retry++ >= 3) {
r = -ENOMEM;
pr_debug("failed %d migrate svms 0x%p [0x%lx 0x%lx]\n",
r, prange->svms, prange->start, prange->last);
goto out_free;
}
goto retry;
}
if (migrate.cpages) {
r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence,
scratch);
migrate_vma_pages(&migrate);
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
}
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
svm_range_free_dma_mappings(prange);
out_free:
kvfree(buf);
out:
return r;
}
/**
* svm_migrate_ram_to_vram - migrate svm range from system to device
* @prange: range structure
* @best_loc: the device to migrate to
* @mm: the process mm structure
*
* Context: Process context, caller hold mmap read lock, svms lock, prange lock
*
* Return:
* 0 - OK, otherwise error code
*/
static int
svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
struct mm_struct *mm)
{
unsigned long addr, start, end;
struct vm_area_struct *vma;
struct amdgpu_device *adev;
int r = 0;
if (prange->actual_loc == best_loc) {
pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
prange->svms, prange->start, prange->last, best_loc);
return 0;
}
adev = svm_range_get_adev_by_id(prange, best_loc);
if (!adev) {
pr_debug("failed to get device by id 0x%x\n", best_loc);
return -ENODEV;
}
pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
prange->start, prange->last, best_loc);
/* FIXME: workaround for page locking bug with invalid pages */
svm_range_prefault(prange, mm);
start = prange->start << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT;
for (addr = start; addr < end;) {
unsigned long next;
vma = find_vma(mm, addr);
if (!vma || addr < vma->vm_start)
break;
next = min(vma->vm_end, end);
r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next);
if (r) {
pr_debug("failed to migrate\n");
break;
}
addr = next;
}
if (!r)
prange->actual_loc = best_loc;
return r;
}
static void svm_migrate_page_free(struct page *page)
{
/* Keep this function to avoid warning */
}
static int
svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
struct migrate_vma *migrate, struct dma_fence **mfence,
dma_addr_t *scratch)
{
uint64_t npages = migrate->cpages;
struct device *dev = adev->dev;
uint64_t *src;
dma_addr_t *dst;
struct page *dpage;
uint64_t i = 0, j;
uint64_t addr;
int r = 0;
pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
prange->last);
addr = prange->start << PAGE_SHIFT;
src = (uint64_t *)(scratch + npages);
dst = scratch;
for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) {
struct page *spage;
spage = migrate_pfn_to_page(migrate->src[i]);
if (!spage) {
pr_debug("failed get spage svms 0x%p [0x%lx 0x%lx]\n",
prange->svms, prange->start, prange->last);
r = -ENOMEM;
goto out_oom;
}
src[i] = svm_migrate_addr(adev, spage);
if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
r = svm_migrate_copy_memory_gart(adev, dst + i - j,
src + i - j, j,
FROM_VRAM_TO_RAM,
mfence);
if (r)
goto out_oom;
j = 0;
}
dpage = svm_migrate_get_sys_page(migrate->vma, addr);
if (!dpage) {
pr_debug("failed get page svms 0x%p [0x%lx 0x%lx]\n",
prange->svms, prange->start, prange->last);
r = -ENOMEM;
goto out_oom;
}
dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
r = dma_mapping_error(dev, dst[i]);
if (r) {
pr_debug("failed %d dma_map_page\n", r);
goto out_oom;
}
pr_debug("dma mapping dst to 0x%llx, page_to_pfn 0x%lx\n",
dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
migrate->dst[i] |= MIGRATE_PFN_LOCKED;
}
r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j,
FROM_VRAM_TO_RAM, mfence);
out_oom:
if (r) {
pr_debug("failed %d copy to ram\n", r);
while (i--) {
svm_migrate_put_sys_page(dst[i]);
migrate->dst[i] = 0;
}
}
return r;
}
static int
svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start, uint64_t end)
{
uint64_t npages = (end - start) >> PAGE_SHIFT;
struct dma_fence *mfence = NULL;
struct migrate_vma migrate;
dma_addr_t *scratch;
size_t size;
void *buf;
int r = -ENOMEM;
memset(&migrate, 0, sizeof(migrate));
migrate.vma = vma;
migrate.start = start;
migrate.end = end;
migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
migrate.pgmap_owner = adev;
size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
size *= npages;
buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
if (!buf)
goto out;
migrate.src = buf;
migrate.dst = migrate.src + npages;
scratch = (dma_addr_t *)(migrate.dst + npages);
r = migrate_vma_setup(&migrate);
if (r) {
pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
r, prange->svms, prange->start, prange->last);
goto out_free;
}
pr_debug("cpages %ld\n", migrate.cpages);
if (migrate.cpages) {
r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
scratch);
migrate_vma_pages(&migrate);
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
} else {
pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n",
prange->start, prange->last);
}
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
out_free:
kvfree(buf);
out:
return r;
}
/**
* svm_migrate_vram_to_ram - migrate svm range from device to system
* @prange: range structure
* @mm: process mm, use current->mm if NULL
*
* Context: Process context, caller hold mmap read lock, svms lock, prange lock
*
* Return:
* 0 - OK, otherwise error code
*/
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
{
struct amdgpu_device *adev;
struct vm_area_struct *vma;
unsigned long addr;
unsigned long start;
unsigned long end;
int r = 0;
if (!prange->actual_loc) {
pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
prange->start, prange->last);
return 0;
}
adev = svm_range_get_adev_by_id(prange, prange->actual_loc);
if (!adev) {
pr_debug("failed to get device by id 0x%x\n",
prange->actual_loc);
return -ENODEV;
}
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n",
prange->svms, prange, prange->start, prange->last,
prange->actual_loc);
start = prange->start << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT;
for (addr = start; addr < end;) {
unsigned long next;
vma = find_vma(mm, addr);
if (!vma || addr < vma->vm_start)
break;
next = min(vma->vm_end, end);
r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next);
if (r) {
pr_debug("failed %d to migrate\n", r);
break;
}
addr = next;
}
if (!r) {
svm_range_vram_node_free(prange);
prange->actual_loc = 0;
}
return r;
}
/**
* svm_migrate_vram_to_vram - migrate svm range from device to device
* @prange: range structure
* @best_loc: the device to migrate to
* @mm: process mm, use current->mm if NULL
*
* Context: Process context, caller hold mmap read lock, svms lock, prange lock
*
* Return:
* 0 - OK, otherwise error code
*/
static int
svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
struct mm_struct *mm)
{
int r;
/*
* TODO: for both devices with PCIe large bar or on same xgmi hive, skip
* system memory as migration bridge
*/
pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
r = svm_migrate_vram_to_ram(prange, mm);
if (r)
return r;
return svm_migrate_ram_to_vram(prange, best_loc, mm);
}
int
svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
struct mm_struct *mm)
{
if (!prange->actual_loc)
return svm_migrate_ram_to_vram(prange, best_loc, mm);
else
return svm_migrate_vram_to_vram(prange, best_loc, mm);
}
/**
* svm_migrate_to_ram - CPU page fault handler
* @vmf: CPU vm fault vma, address
*
* Context: vm fault handler, caller holds the mmap read lock
*
* Return:
* 0 - OK
* VM_FAULT_SIGBUS - notice application to have SIGBUS page fault
*/
static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
{
unsigned long addr = vmf->address;
struct vm_area_struct *vma;
enum svm_work_list_ops op;
struct svm_range *parent;
struct svm_range *prange;
struct kfd_process *p;
struct mm_struct *mm;
int r = 0;
vma = vmf->vma;
mm = vma->vm_mm;
p = kfd_lookup_process_by_mm(vma->vm_mm);
if (!p) {
pr_debug("failed find process at fault address 0x%lx\n", addr);
return VM_FAULT_SIGBUS;
}
addr >>= PAGE_SHIFT;
pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr);
mutex_lock(&p->svms.lock);
prange = svm_range_from_addr(&p->svms, addr, &parent);
if (!prange) {
pr_debug("cannot find svm range at 0x%lx\n", addr);
r = -EFAULT;
goto out;
}
mutex_lock(&parent->migrate_mutex);
if (prange != parent)
mutex_lock_nested(&prange->migrate_mutex, 1);
if (!prange->actual_loc)
goto out_unlock_prange;
svm_range_lock(parent);
if (prange != parent)
mutex_lock_nested(&prange->lock, 1);
r = svm_range_split_by_granularity(p, mm, addr, parent, prange);
if (prange != parent)
mutex_unlock(&prange->lock);
svm_range_unlock(parent);
if (r) {
pr_debug("failed %d to split range by granularity\n", r);
goto out_unlock_prange;
}
r = svm_migrate_vram_to_ram(prange, mm);
if (r)
pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r,
prange, prange->start, prange->last);
/* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
if (p->xnack_enabled && parent == prange)
op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
else
op = SVM_OP_UPDATE_RANGE_NOTIFIER;
svm_range_add_list_work(&p->svms, parent, mm, op);
schedule_deferred_list_work(&p->svms);
out_unlock_prange:
if (prange != parent)
mutex_unlock(&prange->migrate_mutex);
mutex_unlock(&parent->migrate_mutex);
out:
mutex_unlock(&p->svms.lock);
kfd_unref_process(p);
pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr);
return r ? VM_FAULT_SIGBUS : 0;
}
static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
.page_free = svm_migrate_page_free,
.migrate_to_ram = svm_migrate_to_ram,
};
/* Each VRAM page uses sizeof(struct page) on system memory */
#define SVM_HMM_PAGE_STRUCT_SIZE(size) ((size)/PAGE_SIZE * sizeof(struct page))
int svm_migrate_init(struct amdgpu_device *adev)
{
struct kfd_dev *kfddev = adev->kfd.dev;
struct dev_pagemap *pgmap;
struct resource *res;
unsigned long size;
void *r;
/* Page migration works on Vega10 or newer */
if (kfddev->device_info->asic_family < CHIP_VEGA10)
return -EINVAL;
pgmap = &kfddev->pgmap;
memset(pgmap, 0, sizeof(*pgmap));
/* TODO: register all vram to HMM for now.
* should remove reserved size
*/
size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20);
res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
if (IS_ERR(res))
return -ENOMEM;
pgmap->type = MEMORY_DEVICE_PRIVATE;
pgmap->nr_range = 1;
pgmap->range.start = res->start;
pgmap->range.end = res->end;
pgmap->ops = &svm_migrate_pgmap_ops;
pgmap->owner = adev;
pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
r = devm_memremap_pages(adev->dev, pgmap);
if (IS_ERR(r)) {
pr_err("failed to register HMM device memory\n");
devm_release_mem_region(adev->dev, res->start,
res->end - res->start + 1);
return PTR_ERR(r);
}
pr_debug("reserve %ldMB system memory for VRAM pages struct\n",
SVM_HMM_PAGE_STRUCT_SIZE(size) >> 20);
amdgpu_amdkfd_reserve_system_mem(SVM_HMM_PAGE_STRUCT_SIZE(size));
pr_info("HMM registered %ldMB device memory\n", size >> 20);
return 0;
}
void svm_migrate_fini(struct amdgpu_device *adev)
{
struct dev_pagemap *pgmap = &adev->kfd.dev->pgmap;
devm_memunmap_pages(adev->dev, pgmap);
devm_release_mem_region(adev->dev, pgmap->range.start,
pgmap->range.end - pgmap->range.start + 1);
}

View File

@ -0,0 +1,65 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright 2020-2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef KFD_MIGRATE_H_
#define KFD_MIGRATE_H_
#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
#include <linux/rwsem.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/sched/mm.h>
#include <linux/hmm.h>
#include "kfd_priv.h"
#include "kfd_svm.h"
enum MIGRATION_COPY_DIR {
FROM_RAM_TO_VRAM = 0,
FROM_VRAM_TO_RAM
};
int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
struct mm_struct *mm);
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm);
unsigned long
svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
int svm_migrate_init(struct amdgpu_device *adev);
void svm_migrate_fini(struct amdgpu_device *adev);
#else
static inline int svm_migrate_init(struct amdgpu_device *adev)
{
return 0;
}
static inline void svm_migrate_fini(struct amdgpu_device *adev)
{
/* empty */
}
#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
#endif /* KFD_MIGRATE_H_ */

View File

@ -124,14 +124,14 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
{
unsigned int alloc_size_bytes;
unsigned int *rl_buffer, rl_wptr, i;
int retval, proccesses_mapped;
int retval, processes_mapped;
struct device_process_node *cur;
struct qcm_process_device *qpd;
struct queue *q;
struct kernel_queue *kq;
bool is_over_subscription;
rl_wptr = retval = proccesses_mapped = 0;
rl_wptr = retval = processes_mapped = 0;
retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,
&alloc_size_bytes, &is_over_subscription);
@ -148,7 +148,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
list_for_each_entry(cur, queues, list) {
qpd = cur->qpd;
/* build map process packet */
if (proccesses_mapped >= pm->dqm->processes_count) {
if (processes_mapped >= pm->dqm->processes_count) {
pr_debug("Not enough space left in runlist IB\n");
pm_release_ib(pm);
return -ENOMEM;
@ -158,7 +158,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
if (retval)
return retval;
proccesses_mapped++;
processes_mapped++;
inc_wptr(&rl_wptr, pm->pmf->map_process_size,
alloc_size_bytes);
@ -242,7 +242,6 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
case CHIP_RAVEN:
case CHIP_RENOIR:
case CHIP_ARCTURUS:
case CHIP_ALDEBARAN:
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_NAVI14:
@ -252,6 +251,9 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
case CHIP_DIMGREY_CAVEFISH:
pm->pmf = &kfd_v9_pm_funcs;
break;
case CHIP_ALDEBARAN:
pm->pmf = &kfd_aldebaran_pm_funcs;
break;
default:
WARN(1, "Unexpected ASIC family %u",
dqm->dev->device_info->asic_family);

View File

@ -24,6 +24,7 @@
#include "kfd_kernel_queue.h"
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers_ai.h"
#include "kfd_pm4_headers_aldebaran.h"
#include "kfd_pm4_opcodes.h"
#include "gc/gc_10_1_0_sh_mask.h"
@ -35,7 +36,6 @@ static int pm_map_process_v9(struct packet_manager *pm,
packet = (struct pm4_mes_map_process *)buffer;
memset(buffer, 0, sizeof(struct pm4_mes_map_process));
packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
sizeof(struct pm4_mes_map_process));
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
@ -73,6 +73,45 @@ static int pm_map_process_v9(struct packet_manager *pm,
return 0;
}
static int pm_map_process_aldebaran(struct packet_manager *pm,
uint32_t *buffer, struct qcm_process_device *qpd)
{
struct pm4_mes_map_process_aldebaran *packet;
uint64_t vm_page_table_base_addr = qpd->page_table_base;
packet = (struct pm4_mes_map_process_aldebaran *)buffer;
memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran));
packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
sizeof(struct pm4_mes_map_process_aldebaran));
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
packet->bitfields2.process_quantum = 10;
packet->bitfields2.pasid = qpd->pqm->process->pasid;
packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0;
packet->bitfields14.num_oac = qpd->num_oac;
packet->bitfields14.sdma_enable = 1;
packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
packet->sh_mem_config = qpd->sh_mem_config;
packet->sh_mem_bases = qpd->sh_mem_bases;
if (qpd->tba_addr) {
packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
}
packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
packet->vm_context_page_table_base_addr_lo32 =
lower_32_bits(vm_page_table_base_addr);
packet->vm_context_page_table_base_addr_hi32 =
upper_32_bits(vm_page_table_base_addr);
return 0;
}
static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
uint64_t ib, size_t ib_size_in_dwords, bool chain)
{
@ -324,3 +363,20 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = 0,
};
const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
.map_process = pm_map_process_aldebaran,
.runlist = pm_runlist_v9,
.set_resources = pm_set_resources_v9,
.map_queues = pm_map_queues_v9,
.unmap_queues = pm_unmap_queues_v9,
.query_status = pm_query_status_v9,
.release_mem = NULL,
.map_process_size = sizeof(struct pm4_mes_map_process_aldebaran),
.runlist_size = sizeof(struct pm4_mes_runlist),
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = 0,
};

View File

@ -0,0 +1,93 @@
/*
* Copyright 2020 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
/*--------------------MES_MAP_PROCESS (PER DEBUG VMID)--------------------*/
#ifndef PM4_MES_MAP_PROCESS_PER_DEBUG_VMID_DEFINED
#define PM4_MES_MAP_PROCESS_PER_DEBUG_VMID_DEFINED
struct pm4_mes_map_process_aldebaran {
union {
union PM4_MES_TYPE_3_HEADER header; /* header */
uint32_t ordinal1;
};
union {
struct {
uint32_t pasid:16; /* 0 - 15 */
uint32_t single_memops:1; /* 16 */
uint32_t reserved1:1; /* 17 */
uint32_t debug_vmid:4; /* 18 - 21 */
uint32_t new_debug:1; /* 22 */
uint32_t tmz:1; /* 23 */
uint32_t diq_enable:1; /* 24 */
uint32_t process_quantum:7; /* 25 - 31 */
} bitfields2;
uint32_t ordinal2;
};
uint32_t vm_context_page_table_base_addr_lo32;
uint32_t vm_context_page_table_base_addr_hi32;
uint32_t sh_mem_bases;
uint32_t sh_mem_config;
uint32_t sq_shader_tba_lo;
uint32_t sq_shader_tba_hi;
uint32_t sq_shader_tma_lo;
uint32_t sq_shader_tma_hi;
uint32_t reserved6;
uint32_t gds_addr_lo;
uint32_t gds_addr_hi;
union {
struct {
uint32_t num_gws:7;
uint32_t sdma_enable:1;
uint32_t num_oac:4;
uint32_t gds_size_hi:4;
uint32_t gds_size:6;
uint32_t num_queues:10;
} bitfields14;
uint32_t ordinal14;
};
uint32_t spi_gdbg_per_vmid_cntl;
uint32_t tcp_watch_cntl[4];
uint32_t completion_signal_lo;
uint32_t completion_signal_hi;
};
#endif

View File

@ -322,6 +322,9 @@ struct kfd_dev {
unsigned int max_doorbell_slices;
int noretry;
/* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
struct dev_pagemap pgmap;
};
enum kfd_mempool {
@ -669,7 +672,7 @@ struct kfd_process_device {
/* VM context for GPUVM allocations */
struct file *drm_file;
void *vm;
void *drm_priv;
/* GPUVM allocations storage */
struct idr alloc_idr;
@ -731,6 +734,17 @@ struct kfd_process_device {
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
struct svm_range_list {
struct mutex lock;
struct rb_root_cached objects;
struct list_head list;
struct work_struct deferred_list_work;
struct list_head deferred_range_list;
spinlock_t deferred_list_lock;
atomic_t evicted_ranges;
struct delayed_work restore_work;
};
/* Process data */
struct kfd_process {
/*
@ -809,6 +823,12 @@ struct kfd_process {
struct kobject *kobj;
struct kobject *kobj_queues;
struct attribute attr_pasid;
/* shared virtual memory registered by this process */
struct svm_range_list svms;
bool svm_disabled;
bool xnack_enabled;
};
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
@ -842,6 +862,20 @@ struct kfd_process *kfd_create_process(struct file *filep);
struct kfd_process *kfd_get_process(const struct task_struct *);
struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
int kfd_process_gpuid_from_kgd(struct kfd_process *p,
struct amdgpu_device *adev, uint32_t *gpuid,
uint32_t *gpuidx);
static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p,
uint32_t gpuidx, uint32_t *gpuid) {
return gpuidx < p->n_pdds ? p->pdds[gpuidx]->dev->id : -EINVAL;
}
static inline struct kfd_process_device *kfd_process_device_from_gpuidx(
struct kfd_process *p, uint32_t gpuidx) {
return gpuidx < p->n_pdds ? p->pdds[gpuidx] : NULL;
}
void kfd_unref_process(struct kfd_process *p);
int kfd_process_evict_queues(struct kfd_process *p);
int kfd_process_restore_queues(struct kfd_process *p);
@ -857,6 +891,8 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
struct kfd_process *p);
bool kfd_process_xnack_mode(struct kfd_process *p, bool supported);
int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
struct vm_area_struct *vma);
@ -1052,6 +1088,7 @@ struct packet_manager_funcs {
extern const struct packet_manager_funcs kfd_vi_pm_funcs;
extern const struct packet_manager_funcs kfd_v9_pm_funcs;
extern const struct packet_manager_funcs kfd_aldebaran_pm_funcs;
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
void pm_uninit(struct packet_manager *pm, bool hanging);

View File

@ -35,6 +35,7 @@
#include <linux/pm_runtime.h>
#include "amdgpu_amdkfd.h"
#include "amdgpu.h"
#include "kfd_svm.h"
struct mm_struct;
@ -42,6 +43,7 @@ struct mm_struct;
#include "kfd_device_queue_manager.h"
#include "kfd_dbgmgr.h"
#include "kfd_iommu.h"
#include "kfd_svm.h"
/*
* List of struct kfd_process (field kfd_process).
@ -250,7 +252,7 @@ cleanup:
}
/**
* @kfd_get_cu_occupancy() - Collect number of waves in-flight on this device
* @kfd_get_cu_occupancy - Collect number of waves in-flight on this device
* by current process. Translates acquired wave count into number of compute units
* that are occupied.
*
@ -647,8 +649,9 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
{
struct kfd_dev *dev = pdd->dev;
amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm);
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, NULL);
amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv);
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, pdd->drm_priv,
NULL);
}
/* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
@ -667,11 +670,11 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
int err;
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
pdd->vm, &mem, NULL, flags);
pdd->drm_priv, &mem, NULL, flags);
if (err)
goto err_alloc_mem;
err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->drm_priv);
if (err)
goto err_map_mem;
@ -712,7 +715,8 @@ sync_memory_failed:
return err;
err_map_mem:
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, NULL);
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, pdd->drm_priv,
NULL);
err_alloc_mem:
*kptr = NULL;
return err;
@ -901,13 +905,14 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *peer_pdd = p->pdds[i];
if (!peer_pdd->vm)
if (!peer_pdd->drm_priv)
continue;
amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
peer_pdd->dev->kgd, mem, peer_pdd->vm);
peer_pdd->dev->kgd, mem, peer_pdd->drm_priv);
}
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, NULL);
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem,
pdd->drm_priv, NULL);
kfd_process_device_remove_obj_handle(pdd, id);
}
}
@ -932,7 +937,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
if (pdd->drm_file) {
amdgpu_amdkfd_gpuvm_release_process_vm(
pdd->dev->kgd, pdd->vm);
pdd->dev->kgd, pdd->drm_priv);
fput(pdd->drm_file);
}
@ -1000,6 +1005,7 @@ static void kfd_process_wq_release(struct work_struct *work)
kfd_iommu_unbind_process(p);
kfd_process_free_outstanding_kfd_bos(p);
svm_range_list_fini(p);
kfd_process_destroy_pdds(p);
dma_fence_put(p->ef);
@ -1058,6 +1064,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
cancel_delayed_work_sync(&p->eviction_work);
cancel_delayed_work_sync(&p->restore_work);
cancel_delayed_work_sync(&p->svms.restore_work);
mutex_lock(&p->mutex);
@ -1186,6 +1193,56 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
}
}
bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
{
int i;
/* On most GFXv9 GPUs, the retry mode in the SQ must match the
* boot time retry setting. Mixing processes with different
* XNACK/retry settings can hang the GPU.
*
* Different GPUs can have different noretry settings depending
* on HW bugs or limitations. We need to find at least one
* XNACK mode for this process that's compatible with all GPUs.
* Fortunately GPUs with retry enabled (noretry=0) can run code
* built for XNACK-off. On GFXv9 it may perform slower.
*
* Therefore applications built for XNACK-off can always be
* supported and will be our fallback if any GPU does not
* support retry.
*/
for (i = 0; i < p->n_pdds; i++) {
struct kfd_dev *dev = p->pdds[i]->dev;
/* Only consider GFXv9 and higher GPUs. Older GPUs don't
* support the SVM APIs and don't need to be considered
* for the XNACK mode selection.
*/
if (dev->device_info->asic_family < CHIP_VEGA10)
continue;
/* Aldebaran can always support XNACK because it can support
* per-process XNACK mode selection. But let the dev->noretry
* setting still influence the default XNACK mode.
*/
if (supported &&
dev->device_info->asic_family == CHIP_ALDEBARAN)
continue;
/* GFXv10 and later GPUs do not support shader preemption
* during page faults. This can lead to poor QoS for queue
* management and memory-manager-related preemptions or
* even deadlocks.
*/
if (dev->device_info->asic_family >= CHIP_NAVI10)
return false;
if (dev->noretry)
return false;
}
return true;
}
/*
* On return the kfd_process is fully operational and will be freed when the
* mm is released
@ -1205,6 +1262,7 @@ static struct kfd_process *create_process(const struct task_struct *thread)
process->mm = thread->mm;
process->lead_thread = thread->group_leader;
process->n_pdds = 0;
process->svm_disabled = false;
INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
process->last_restore_timestamp = get_jiffies_64();
@ -1224,6 +1282,13 @@ static struct kfd_process *create_process(const struct task_struct *thread)
if (err != 0)
goto err_init_apertures;
/* Check XNACK support after PDDs are created in kfd_init_apertures */
process->xnack_enabled = kfd_process_xnack_mode(process, false);
err = svm_range_list_init(process);
if (err)
goto err_init_svm_range_list;
/* alloc_notifier needs to find the process in the hash table */
hash_add_rcu(kfd_processes_table, &process->kfd_processes,
(uintptr_t)process->mm);
@ -1246,6 +1311,8 @@ static struct kfd_process *create_process(const struct task_struct *thread)
err_register_notifier:
hash_del_rcu(&process->kfd_processes);
svm_range_list_fini(process);
err_init_svm_range_list:
kfd_process_free_outstanding_kfd_bos(process);
kfd_process_destroy_pdds(process);
err_init_apertures:
@ -1375,7 +1442,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
if (!drm_file)
return -EINVAL;
if (pdd->vm)
if (pdd->drm_priv)
return -EBUSY;
p = pdd->process;
@ -1383,13 +1450,12 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
dev->kgd, drm_file, p->pasid,
&pdd->vm, &p->kgd_process_info, &p->ef);
&p->kgd_process_info, &p->ef);
if (ret) {
pr_err("Failed to create process VM object\n");
return ret;
}
amdgpu_vm_set_task_info(pdd->vm);
pdd->drm_priv = drm_file->private_data;
ret = kfd_process_device_reserve_ib_mem(pdd);
if (ret)
@ -1405,7 +1471,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
err_init_cwsr:
err_reserve_ib_mem:
kfd_process_device_free_bos(pdd);
pdd->vm = NULL;
pdd->drm_priv = NULL;
return ret;
}
@ -1429,7 +1495,7 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
return ERR_PTR(-ENOMEM);
}
if (!pdd->vm)
if (!pdd->drm_priv)
return ERR_PTR(-ENODEV);
/*
@ -1600,6 +1666,32 @@ int kfd_process_restore_queues(struct kfd_process *p)
return ret;
}
int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id)
{
int i;
for (i = 0; i < p->n_pdds; i++)
if (p->pdds[i] && gpu_id == p->pdds[i]->dev->id)
return i;
return -EINVAL;
}
int
kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev,
uint32_t *gpuid, uint32_t *gpuidx)
{
struct kgd_dev *kgd = (struct kgd_dev *)adev;
int i;
for (i = 0; i < p->n_pdds; i++)
if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) {
*gpuid = p->pdds[i]->dev->id;
*gpuidx = i;
return 0;
}
return -EINVAL;
}
static void evict_process_worker(struct work_struct *work)
{
int ret;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,206 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright 2020-2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef KFD_SVM_H_
#define KFD_SVM_H_
#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
#include <linux/rwsem.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/sched/mm.h>
#include <linux/hmm.h>
#include "amdgpu.h"
#include "kfd_priv.h"
struct svm_range_bo {
struct amdgpu_bo *bo;
struct kref kref;
struct list_head range_list; /* all svm ranges shared this bo */
spinlock_t list_lock;
struct amdgpu_amdkfd_fence *eviction_fence;
struct work_struct eviction_work;
struct svm_range_list *svms;
uint32_t evicting;
};
enum svm_work_list_ops {
SVM_OP_NULL,
SVM_OP_UNMAP_RANGE,
SVM_OP_UPDATE_RANGE_NOTIFIER,
SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP,
SVM_OP_ADD_RANGE,
SVM_OP_ADD_RANGE_AND_MAP
};
struct svm_work_list_item {
enum svm_work_list_ops op;
struct mm_struct *mm;
};
/**
* struct svm_range - shared virtual memory range
*
* @svms: list of svm ranges, structure defined in kfd_process
* @migrate_mutex: to serialize range migration, validation and mapping update
* @start: range start address in pages
* @last: range last address in pages
* @it_node: node [start, last] stored in interval tree, start, last are page
* aligned, page size is (last - start + 1)
* @list: link list node, used to scan all ranges of svms
* @update_list:link list node used to add to update_list
* @remove_list:link list node used to add to remove list
* @insert_list:link list node used to add to insert list
* @mapping: bo_va mapping structure to create and update GPU page table
* @npages: number of pages
* @dma_addr: dma mapping address on each GPU for system memory physical page
* @ttm_res: vram ttm resource map
* @offset: range start offset within mm_nodes
* @svm_bo: struct to manage splited amdgpu_bo
* @svm_bo_list:link list node, to scan all ranges which share same svm_bo
* @lock: protect prange start, last, child_list, svm_bo_list
* @saved_flags:save/restore current PF_MEMALLOC flags
* @flags: flags defined as KFD_IOCTL_SVM_FLAG_*
* @perferred_loc: perferred location, 0 for CPU, or GPU id
* @perfetch_loc: last prefetch location, 0 for CPU, or GPU id
* @actual_loc: the actual location, 0 for CPU, or GPU id
* @granularity:migration granularity, log2 num pages
* @invalid: not 0 means cpu page table is invalidated
* @validate_timestamp: system timestamp when range is validated
* @notifier: register mmu interval notifier
* @work_item: deferred work item information
* @deferred_list: list header used to add range to deferred list
* @child_list: list header for split ranges which are not added to svms yet
* @bitmap_access: index bitmap of GPUs which can access the range
* @bitmap_aip: index bitmap of GPUs which can access the range in place
*
* Data structure for virtual memory range shared by CPU and GPUs, it can be
* allocated from system memory ram or device vram, and migrate from ram to vram
* or from vram to ram.
*/
struct svm_range {
struct svm_range_list *svms;
struct mutex migrate_mutex;
unsigned long start;
unsigned long last;
struct interval_tree_node it_node;
struct list_head list;
struct list_head update_list;
struct list_head remove_list;
struct list_head insert_list;
struct amdgpu_bo_va_mapping mapping;
uint64_t npages;
dma_addr_t *dma_addr[MAX_GPU_INSTANCE];
struct ttm_resource *ttm_res;
uint64_t offset;
struct svm_range_bo *svm_bo;
struct list_head svm_bo_list;
struct mutex lock;
unsigned int saved_flags;
uint32_t flags;
uint32_t preferred_loc;
uint32_t prefetch_loc;
uint32_t actual_loc;
uint8_t granularity;
atomic_t invalid;
uint64_t validate_timestamp;
struct mmu_interval_notifier notifier;
struct svm_work_list_item work_item;
struct list_head deferred_list;
struct list_head child_list;
DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
bool validated_once;
};
static inline void svm_range_lock(struct svm_range *prange)
{
mutex_lock(&prange->lock);
prange->saved_flags = memalloc_noreclaim_save();
}
static inline void svm_range_unlock(struct svm_range *prange)
{
memalloc_noreclaim_restore(prange->saved_flags);
mutex_unlock(&prange->lock);
}
int svm_range_list_init(struct kfd_process *p);
void svm_range_list_fini(struct kfd_process *p);
int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
uint64_t size, uint32_t nattrs,
struct kfd_ioctl_svm_attribute *attrs);
struct svm_range *svm_range_from_addr(struct svm_range_list *svms,
unsigned long addr,
struct svm_range **parent);
struct amdgpu_device *svm_range_get_adev_by_id(struct svm_range *prange,
uint32_t id);
int svm_range_vram_node_new(struct amdgpu_device *adev,
struct svm_range *prange, bool clear);
void svm_range_vram_node_free(struct svm_range *prange);
int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
unsigned long addr, struct svm_range *parent,
struct svm_range *prange);
int svm_range_restore_pages(struct amdgpu_device *adev,
unsigned int pasid, uint64_t addr);
int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence);
void svm_range_add_list_work(struct svm_range_list *svms,
struct svm_range *prange, struct mm_struct *mm,
enum svm_work_list_ops op);
void schedule_deferred_list_work(struct svm_range_list *svms);
void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
unsigned long offset, unsigned long npages);
void svm_range_free_dma_mappings(struct svm_range *prange);
void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm);
#else
struct kfd_process;
static inline int svm_range_list_init(struct kfd_process *p)
{
return 0;
}
static inline void svm_range_list_fini(struct kfd_process *p)
{
/* empty */
}
static inline int svm_range_restore_pages(struct amdgpu_device *adev,
unsigned int pasid, uint64_t addr)
{
return -EFAULT;
}
static inline int svm_range_schedule_evict_svm_bo(
struct amdgpu_amdkfd_fence *fence)
{
WARN_ONCE(1, "SVM eviction fence triggered, but SVM is disabled");
return -EINVAL;
}
#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
#endif /* KFD_SVM_H_ */

View File

@ -1192,40 +1192,60 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
mem->mem_clk_max = local_mem_info.mem_clk_max;
}
static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev,
struct kfd_topology_device *target_gpu_dev,
struct kfd_iolink_properties *link)
{
/* xgmi always supports atomics between links. */
if (link->iolink_type == CRAT_IOLINK_TYPE_XGMI)
return;
/* check pcie support to set cpu(dev) flags for target_gpu_dev link. */
if (target_gpu_dev) {
uint32_t cap;
pcie_capability_read_dword(target_gpu_dev->gpu->pdev,
PCI_EXP_DEVCAP2, &cap);
if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
PCI_EXP_DEVCAP2_ATOMIC_COMP64)))
link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
/* set gpu (dev) flags. */
} else {
if (!dev->gpu->pci_atomic_requested ||
dev->gpu->device_info->asic_family ==
CHIP_HAWAII)
link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
}
}
static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
{
struct kfd_iolink_properties *link, *cpu_link;
struct kfd_topology_device *cpu_dev;
uint32_t cap;
uint32_t cpu_flag = CRAT_IOLINK_FLAGS_ENABLED;
uint32_t flag = CRAT_IOLINK_FLAGS_ENABLED;
struct kfd_iolink_properties *link, *inbound_link;
struct kfd_topology_device *peer_dev;
if (!dev || !dev->gpu)
return;
pcie_capability_read_dword(dev->gpu->pdev,
PCI_EXP_DEVCAP2, &cap);
if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
PCI_EXP_DEVCAP2_ATOMIC_COMP64)))
cpu_flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
if (!dev->gpu->pci_atomic_requested ||
dev->gpu->device_info->asic_family == CHIP_HAWAII)
flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
/* GPU only creates direct links so apply flags setting to all */
list_for_each_entry(link, &dev->io_link_props, list) {
link->flags = flag;
cpu_dev = kfd_topology_device_by_proximity_domain(
link->flags = CRAT_IOLINK_FLAGS_ENABLED;
kfd_set_iolink_no_atomics(dev, NULL, link);
peer_dev = kfd_topology_device_by_proximity_domain(
link->node_to);
if (cpu_dev) {
list_for_each_entry(cpu_link,
&cpu_dev->io_link_props, list)
if (cpu_link->node_to == link->node_from)
cpu_link->flags = cpu_flag;
if (!peer_dev)
continue;
list_for_each_entry(inbound_link, &peer_dev->io_link_props,
list) {
if (inbound_link->node_to != link->node_from)
continue;
inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED;
kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link);
}
}
}
@ -1410,15 +1430,21 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
adev = (struct amdgpu_device *)(dev->gpu->kgd);
/* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
dev->node_props.capability |=
((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
HSA_CAP_SRAM_EDCSUPPORTED : 0;
dev->node_props.capability |= ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
dev->node_props.capability |= ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
HSA_CAP_MEM_EDCSUPPORTED : 0;
if (adev->asic_type != CHIP_VEGA10)
dev->node_props.capability |= (adev->ras_features != 0) ?
dev->node_props.capability |= (adev->ras_enabled != 0) ?
HSA_CAP_RASEVENTNOTIFY : 0;
/* SVM API and HMM page migration work together, device memory type
* is initialized to not 0 when page migration register device memory.
*/
if (adev->kfd.dev->pgmap.type != 0)
dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED;
kfd_debug_print_topology();
if (!res)

View File

@ -53,8 +53,9 @@
#define HSA_CAP_ASIC_REVISION_MASK 0x03c00000
#define HSA_CAP_ASIC_REVISION_SHIFT 22
#define HSA_CAP_SRAM_EDCSUPPORTED 0x04000000
#define HSA_CAP_SVMAPI_SUPPORTED 0x08000000
#define HSA_CAP_RESERVED 0xf80f8000
#define HSA_CAP_RESERVED 0xf00f8000
struct kfd_node_properties {
uint64_t hive_id;
@ -98,9 +99,10 @@ struct kfd_node_properties {
#define HSA_MEM_HEAP_TYPE_GPU_LDS 4
#define HSA_MEM_HEAP_TYPE_GPU_SCRATCH 5
#define HSA_MEM_FLAGS_HOT_PLUGGABLE 0x00000001
#define HSA_MEM_FLAGS_NON_VOLATILE 0x00000002
#define HSA_MEM_FLAGS_RESERVED 0xfffffffc
#define HSA_MEM_FLAGS_HOT_PLUGGABLE 0x00000001
#define HSA_MEM_FLAGS_NON_VOLATILE 0x00000002
#define HSA_MEM_FLAGS_COHERENTHOSTACCESS 0x00000004
#define HSA_MEM_FLAGS_RESERVED 0xfffffff8
struct kfd_mem_properties {
struct list_head list;

View File

@ -35,6 +35,7 @@
#include "dc/inc/hw/abm.h"
#include "dc/dc_dmub_srv.h"
#include "dc/dc_edid_parser.h"
#include "dc/dc_stat.h"
#include "amdgpu_dm_trace.h"
#include "vid.h"
@ -59,6 +60,7 @@
#include "ivsrcid/ivsrcid_vislands30.h"
#include "i2caux_interface.h"
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/types.h>
@ -618,6 +620,58 @@ static void dm_dcn_vertical_interrupt0_high_irq(void *interrupt_params)
amdgpu_dm_crtc_handle_crc_window_irq(&acrtc->base);
}
#endif
/**
* dm_dmub_outbox1_low_irq() - Handles Outbox interrupt
* @interrupt_params: used for determining the Outbox instance
*
* Handles the Outbox Interrupt
* event handler.
*/
#define DMUB_TRACE_MAX_READ 64
static void dm_dmub_outbox1_low_irq(void *interrupt_params)
{
struct dmub_notification notify;
struct common_irq_params *irq_params = interrupt_params;
struct amdgpu_device *adev = irq_params->adev;
struct amdgpu_display_manager *dm = &adev->dm;
struct dmcub_trace_buf_entry entry = { 0 };
uint32_t count = 0;
if (dc_enable_dmub_notifications(adev->dm.dc)) {
if (irq_params->irq_src == DC_IRQ_SOURCE_DMCUB_OUTBOX) {
do {
dc_stat_get_dmub_notification(adev->dm.dc, &notify);
} while (notify.pending_notification);
if (adev->dm.dmub_notify)
memcpy(adev->dm.dmub_notify, &notify, sizeof(struct dmub_notification));
if (notify.type == DMUB_NOTIFICATION_AUX_REPLY)
complete(&adev->dm.dmub_aux_transfer_done);
// TODO : HPD Implementation
} else {
DRM_ERROR("DM: Failed to receive correct outbox IRQ !");
}
}
do {
if (dc_dmub_srv_get_dmub_outbox0_msg(dm->dc, &entry)) {
trace_amdgpu_dmub_trace_high_irq(entry.trace_code, entry.tick_count,
entry.param0, entry.param1);
DRM_DEBUG_DRIVER("trace_code:%u, tick_count:%u, param0:%u, param1:%u\n",
entry.trace_code, entry.tick_count, entry.param0, entry.param1);
} else
break;
count++;
} while (count <= DMUB_TRACE_MAX_READ);
ASSERT(count <= DMUB_TRACE_MAX_READ);
}
#endif
static int dm_set_clockgating_state(void *handle,
@ -938,32 +992,6 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
}
#if defined(CONFIG_DRM_AMD_DC_DCN)
#define DMUB_TRACE_MAX_READ 64
static void dm_dmub_trace_high_irq(void *interrupt_params)
{
struct common_irq_params *irq_params = interrupt_params;
struct amdgpu_device *adev = irq_params->adev;
struct amdgpu_display_manager *dm = &adev->dm;
struct dmcub_trace_buf_entry entry = { 0 };
uint32_t count = 0;
do {
if (dc_dmub_srv_get_dmub_outbox0_msg(dm->dc, &entry)) {
trace_amdgpu_dmub_trace_high_irq(entry.trace_code, entry.tick_count,
entry.param0, entry.param1);
DRM_DEBUG_DRIVER("trace_code:%u, tick_count:%u, param0:%u, param1:%u\n",
entry.trace_code, entry.tick_count, entry.param0, entry.param1);
} else
break;
count++;
} while (count <= DMUB_TRACE_MAX_READ);
ASSERT(count <= DMUB_TRACE_MAX_READ);
}
static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_addr_space_config *pa_config)
{
uint64_t pt_base;
@ -1220,6 +1248,16 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
adev->dm.crc_rd_wrk = amdgpu_dm_crtc_secure_display_create_work();
#endif
if (dc_enable_dmub_notifications(adev->dm.dc)) {
init_completion(&adev->dm.dmub_aux_transfer_done);
adev->dm.dmub_notify = kzalloc(sizeof(struct dmub_notification), GFP_KERNEL);
if (!adev->dm.dmub_notify) {
DRM_INFO("amdgpu: fail to allocate adev->dm.dmub_notify");
goto error;
}
amdgpu_dm_outbox_init(adev);
}
if (amdgpu_dm_initialize_drm_device(adev)) {
DRM_ERROR(
"amdgpu: failed to initialize sw for display support.\n");
@ -1293,6 +1331,11 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
adev->dm.dc->ctx->dmub_srv = NULL;
}
if (dc_enable_dmub_notifications(adev->dm.dc)) {
kfree(adev->dm.dmub_notify);
adev->dm.dmub_notify = NULL;
}
if (adev->dm.dmub_bo)
amdgpu_bo_free_kernel(&adev->dm.dmub_bo,
&adev->dm.dmub_bo_gpu_addr,
@ -2708,8 +2751,7 @@ static void handle_hpd_rx_irq(void *param)
* conflict, after implement i2c helper, this mutex should be
* retired.
*/
if (dc_link->type != dc_connection_mst_branch)
mutex_lock(&aconnector->hpd_lock);
mutex_lock(&aconnector->hpd_lock);
read_hpd_rx_irq_data(dc_link, &hpd_irq_data);
@ -2726,13 +2768,15 @@ static void handle_hpd_rx_irq(void *param)
}
}
mutex_lock(&adev->dm.dc_lock);
if (!amdgpu_in_reset(adev)) {
mutex_lock(&adev->dm.dc_lock);
#ifdef CONFIG_DRM_AMD_DC_HDCP
result = dc_link_handle_hpd_rx_irq(dc_link, &hpd_irq_data, NULL);
#else
result = dc_link_handle_hpd_rx_irq(dc_link, NULL, NULL);
#endif
mutex_unlock(&adev->dm.dc_lock);
mutex_unlock(&adev->dm.dc_lock);
}
out:
if (result && !is_mst_root_connector) {
@ -2776,10 +2820,10 @@ out:
}
#endif
if (dc_link->type != dc_connection_mst_branch) {
if (dc_link->type != dc_connection_mst_branch)
drm_dp_cec_irq(&aconnector->dm_dp_aux.aux);
mutex_unlock(&aconnector->hpd_lock);
}
mutex_unlock(&aconnector->hpd_lock);
}
static void register_hpd_handlers(struct amdgpu_device *adev)
@ -3151,28 +3195,6 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
}
if (dc->ctx->dmub_srv) {
i = DCN_1_0__SRCID__DMCUB_OUTBOX_HIGH_PRIORITY_READY_INT;
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->dmub_trace_irq);
if (r) {
DRM_ERROR("Failed to add dmub trace irq id!\n");
return r;
}
int_params.int_context = INTERRUPT_HIGH_IRQ_CONTEXT;
int_params.irq_source =
dc_interrupt_to_irq_source(dc, i, 0);
c_irq_params = &adev->dm.dmub_trace_params[0];
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
amdgpu_dm_irq_register_interrupt(adev, &int_params,
dm_dmub_trace_high_irq, c_irq_params);
}
/* HPD */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, DCN_1_0__SRCID__DC_HPD1_INT,
&adev->hpd_irq);
@ -3185,6 +3207,41 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
return 0;
}
/* Register Outbox IRQ sources and initialize IRQ callbacks */
static int register_outbox_irq_handlers(struct amdgpu_device *adev)
{
struct dc *dc = adev->dm.dc;
struct common_irq_params *c_irq_params;
struct dc_interrupt_params int_params = {0};
int r, i;
int_params.requested_polarity = INTERRUPT_POLARITY_DEFAULT;
int_params.current_polarity = INTERRUPT_POLARITY_DEFAULT;
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, DCN_1_0__SRCID__DMCUB_OUTBOX_LOW_PRIORITY_READY_INT,
&adev->dmub_outbox_irq);
if (r) {
DRM_ERROR("Failed to add outbox irq id!\n");
return r;
}
if (dc->ctx->dmub_srv) {
i = DCN_1_0__SRCID__DMCUB_OUTBOX_LOW_PRIORITY_READY_INT;
int_params.int_context = INTERRUPT_LOW_IRQ_CONTEXT;
int_params.irq_source =
dc_interrupt_to_irq_source(dc, i, 0);
c_irq_params = &adev->dm.dmub_outbox_params[0];
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
amdgpu_dm_irq_register_interrupt(adev, &int_params,
dm_dmub_outbox1_low_irq, c_irq_params);
}
return 0;
}
#endif
/*
@ -3414,22 +3471,37 @@ static int amdgpu_dm_backlight_update_status(struct backlight_device *bd)
{
struct amdgpu_display_manager *dm = bl_get_data(bd);
struct amdgpu_dm_backlight_caps caps;
struct dc_link *link = NULL;
struct dc_link *link[AMDGPU_DM_MAX_NUM_EDP];
u32 brightness;
bool rc;
int i;
amdgpu_dm_update_backlight_caps(dm);
caps = dm->backlight_caps;
link = (struct dc_link *)dm->backlight_link;
for (i = 0; i < dm->num_of_edps; i++)
link[i] = (struct dc_link *)dm->backlight_link[i];
brightness = convert_brightness_from_user(&caps, bd->props.brightness);
// Change brightness based on AUX property
if (caps.aux_support)
rc = dc_link_set_backlight_level_nits(link, true, brightness,
AUX_BL_DEFAULT_TRANSITION_TIME_MS);
else
rc = dc_link_set_backlight_level(dm->backlight_link, brightness, 0);
if (caps.aux_support) {
for (i = 0; i < dm->num_of_edps; i++) {
rc = dc_link_set_backlight_level_nits(link[i], true, brightness,
AUX_BL_DEFAULT_TRANSITION_TIME_MS);
if (!rc) {
DRM_ERROR("DM: Failed to update backlight via AUX on eDP[%d]\n", i);
break;
}
}
} else {
for (i = 0; i < dm->num_of_edps; i++) {
rc = dc_link_set_backlight_level(dm->backlight_link[i], brightness, 0);
if (!rc) {
DRM_ERROR("DM: Failed to update backlight on eDP[%d]\n", i);
break;
}
}
}
return rc ? 0 : 1;
}
@ -3443,7 +3515,7 @@ static int amdgpu_dm_backlight_get_brightness(struct backlight_device *bd)
caps = dm->backlight_caps;
if (caps.aux_support) {
struct dc_link *link = (struct dc_link *)dm->backlight_link;
struct dc_link *link = (struct dc_link *)dm->backlight_link[0];
u32 avg, peak;
bool rc;
@ -3452,7 +3524,7 @@ static int amdgpu_dm_backlight_get_brightness(struct backlight_device *bd)
return bd->props.brightness;
return convert_brightness_to_user(&caps, avg);
} else {
int ret = dc_link_get_backlight_level(dm->backlight_link);
int ret = dc_link_get_backlight_level(dm->backlight_link[0]);
if (ret == DC_ERROR_UNEXPECTED)
return bd->props.brightness;
@ -3549,10 +3621,13 @@ static void register_backlight_device(struct amdgpu_display_manager *dm,
* DM initialization because not having a backlight control
* is better then a black screen.
*/
amdgpu_dm_register_backlight_device(dm);
if (!dm->backlight_dev)
amdgpu_dm_register_backlight_device(dm);
if (dm->backlight_dev)
dm->backlight_link = link;
if (dm->backlight_dev) {
dm->backlight_link[dm->num_of_edps] = link;
dm->num_of_edps++;
}
}
#endif
}
@ -3643,6 +3718,22 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
goto fail;
}
#if defined(CONFIG_DRM_AMD_DC_DCN)
/* Use Outbox interrupt */
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
case CHIP_RENOIR:
if (register_outbox_irq_handlers(dm->adev)) {
DRM_ERROR("DM: Failed to initialize IRQ\n");
goto fail;
}
break;
default:
DRM_DEBUG_KMS("Unsupported ASIC type for outbox: 0x%X\n", adev->asic_type);
}
#endif
/* loops over all connectors on the board */
for (i = 0; i < link_cnt; i++) {
struct dc_link *link = NULL;
@ -6560,13 +6651,13 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
{
struct dc_stream_state *stream = NULL;
struct drm_connector *connector;
struct drm_connector_state *new_con_state, *old_con_state;
struct drm_connector_state *new_con_state;
struct amdgpu_dm_connector *aconnector;
struct dm_connector_state *dm_conn_state;
int i, j, clock, bpp;
int vcpi, pbn_div, pbn = 0;
for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) {
for_each_new_connector_in_state(state, connector, new_con_state, i) {
aconnector = to_amdgpu_dm_connector(connector);
@ -8164,15 +8255,14 @@ static void amdgpu_dm_handle_vrr_transition(struct dm_crtc_state *old_state,
static void amdgpu_dm_commit_cursors(struct drm_atomic_state *state)
{
struct drm_plane *plane;
struct drm_plane_state *old_plane_state, *new_plane_state;
struct drm_plane_state *old_plane_state;
int i;
/*
* TODO: Make this per-stream so we don't issue redundant updates for
* commits with multiple streams.
*/
for_each_oldnew_plane_in_state(state, plane, old_plane_state,
new_plane_state, i)
for_each_old_plane_in_state(state, plane, old_plane_state, i)
if (plane->type == DRM_PLANE_TYPE_CURSOR)
handle_cursor_update(plane, old_plane_state);
}
@ -10668,3 +10758,30 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address,
return value;
}
int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int linkIndex,
struct aux_payload *payload, enum aux_return_code_type *operation_result)
{
struct amdgpu_device *adev = ctx->driver_context;
int ret = 0;
dc_process_dmub_aux_transfer_async(ctx->dc, linkIndex, payload);
ret = wait_for_completion_interruptible_timeout(&adev->dm.dmub_aux_transfer_done, 10*HZ);
if (ret == 0) {
*operation_result = AUX_RET_ERROR_TIMEOUT;
return -1;
}
*operation_result = (enum aux_return_code_type)adev->dm.dmub_notify->result;
if (adev->dm.dmub_notify->result == AUX_RET_SUCCESS) {
(*payload->reply) = adev->dm.dmub_notify->aux_reply.command;
// For read case, Copy data to payload
if (!payload->write && adev->dm.dmub_notify->aux_reply.length &&
(*payload->reply == AUX_TRANSACTION_REPLY_AUX_ACK))
memcpy(payload->data, adev->dm.dmub_notify->aux_reply.data,
adev->dm.dmub_notify->aux_reply.length);
}
return adev->dm.dmub_notify->aux_reply.length;
}

View File

@ -46,6 +46,7 @@
#define AMDGPU_DM_MAX_CRTC 6
#define AMDGPU_DM_MAX_NUM_EDP 2
/*
#include "include/amdgpu_dal_power_if.h"
#include "amdgpu_dm_irq.h"
@ -54,6 +55,8 @@
#include "irq_types.h"
#include "signal_types.h"
#include "amdgpu_dm_crc.h"
struct aux_payload;
enum aux_return_code_type;
/* Forward declarations */
struct amdgpu_device;
@ -62,6 +65,7 @@ struct dc;
struct amdgpu_bo;
struct dmub_srv;
struct dc_plane_state;
struct dmub_notification;
struct common_irq_params {
struct amdgpu_device *adev;
@ -135,6 +139,10 @@ struct amdgpu_dm_backlight_caps {
/**
* struct dal_allocation - Tracks mapped FB memory for SMU communication
* @list: list of dal allocations
* @bo: GPU buffer object
* @cpu_ptr: CPU virtual address of the GPU buffer object
* @gpu_addr: GPU virtual address of the GPU buffer object
*/
struct dal_allocation {
struct list_head list;
@ -164,6 +172,7 @@ struct dal_allocation {
* @compressor: Frame buffer compression buffer. See &struct dm_compressor_info
* @force_timing_sync: set via debugfs. When set, indicates that all connected
* displays will be forced to synchronize.
* @dmcub_trace_event_en: enable dmcub trace events
*/
struct amdgpu_display_manager {
@ -178,6 +187,8 @@ struct amdgpu_display_manager {
*/
struct dmub_srv *dmub_srv;
struct dmub_notification *dmub_notify;
/**
* @dmub_fb_info:
*
@ -349,11 +360,17 @@ struct amdgpu_display_manager {
struct common_irq_params
dmub_trace_params[1];
struct common_irq_params
dmub_outbox_params[1];
spinlock_t irq_handler_list_table_lock;
struct backlight_device *backlight_dev;
const struct dc_link *backlight_link;
const struct dc_link *backlight_link[AMDGPU_DM_MAX_NUM_EDP];
uint8_t num_of_edps;
struct amdgpu_dm_backlight_caps backlight_caps;
struct mod_freesync *freesync_module;
@ -418,6 +435,7 @@ struct amdgpu_display_manager {
* DAL fb memory allocation list, for communication with SMU.
*/
struct list_head da_list;
struct completion dmub_aux_transfer_done;
};
enum dsc_clock_force_state {
@ -600,4 +618,6 @@ void amdgpu_dm_update_connector_after_detect(
extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs;
int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int linkIndex,
struct aux_payload *payload, enum aux_return_code_type *operation_result);
#endif /* __AMDGPU_DM_H__ */

View File

@ -925,6 +925,22 @@ static int hdcp_sink_capability_show(struct seq_file *m, void *data)
return 0;
}
#endif
/*
* Returns whether the connected display is internal and not hotpluggable.
* Example usage: cat /sys/kernel/debug/dri/0/DP-1/internal_display
*/
static int internal_display_show(struct seq_file *m, void *data)
{
struct drm_connector *connector = m->private;
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
struct dc_link *link = aconnector->dc_link;
seq_printf(m, "Internal: %u\n", link->is_internal_display);
return 0;
}
/* function description
*
* generic SDP message access for testing
@ -2361,6 +2377,44 @@ unlock:
return size;
}
/*
* Backlight at this moment. Read only.
* As written to display, taking ABM and backlight lut into account.
* Ranges from 0x0 to 0x10000 (= 100% PWM)
*
* Example usage: cat /sys/kernel/debug/dri/0/eDP-1/current_backlight
*/
static int current_backlight_show(struct seq_file *m, void *unused)
{
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(m->private);
struct dc_link *link = aconnector->dc_link;
unsigned int backlight;
backlight = dc_link_get_backlight_level(link);
seq_printf(m, "0x%x\n", backlight);
return 0;
}
/*
* Backlight value that is being approached. Read only.
* As written to display, taking ABM and backlight lut into account.
* Ranges from 0x0 to 0x10000 (= 100% PWM)
*
* Example usage: cat /sys/kernel/debug/dri/0/eDP-1/target_backlight
*/
static int target_backlight_show(struct seq_file *m, void *unused)
{
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(m->private);
struct dc_link *link = aconnector->dc_link;
unsigned int backlight;
backlight = dc_link_get_target_backlight_pwm(link);
seq_printf(m, "0x%x\n", backlight);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support);
DEFINE_SHOW_ATTRIBUTE(dmub_fw_state);
DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer);
@ -2369,6 +2423,7 @@ DEFINE_SHOW_ATTRIBUTE(dp_lttpr_status);
#ifdef CONFIG_DRM_AMD_DC_HDCP
DEFINE_SHOW_ATTRIBUTE(hdcp_sink_capability);
#endif
DEFINE_SHOW_ATTRIBUTE(internal_display);
static const struct file_operations dp_dsc_clock_en_debugfs_fops = {
.owner = THIS_MODULE,
@ -2594,13 +2649,17 @@ DEFINE_DEBUGFS_ATTRIBUTE(dmcub_trace_event_state_fops, dmcub_trace_event_state_g
DEFINE_DEBUGFS_ATTRIBUTE(psr_fops, psr_get, NULL, "%llu\n");
DEFINE_SHOW_ATTRIBUTE(current_backlight);
DEFINE_SHOW_ATTRIBUTE(target_backlight);
static const struct {
char *name;
const struct file_operations *fops;
} connector_debugfs_entries[] = {
{"force_yuv420_output", &force_yuv420_output_fops},
{"output_bpc", &output_bpc_fops},
{"trigger_hotplug", &trigger_hotplug_debugfs_fops}
{"trigger_hotplug", &trigger_hotplug_debugfs_fops},
{"internal_display", &internal_display_fops}
};
void connector_debugfs_init(struct amdgpu_dm_connector *connector)
@ -2616,8 +2675,13 @@ void connector_debugfs_init(struct amdgpu_dm_connector *connector)
dp_debugfs_entries[i].fops);
}
}
if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP)
if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP) {
debugfs_create_file_unsafe("psr_state", 0444, dir, connector, &psr_fops);
debugfs_create_file("amdgpu_current_backlight_pwm", 0444, dir, connector,
&current_backlight_fops);
debugfs_create_file("amdgpu_target_backlight_pwm", 0444, dir, connector,
&target_backlight_fops);
}
for (i = 0; i < ARRAY_SIZE(connector_debugfs_entries); i++) {
debugfs_create_file(connector_debugfs_entries[i].name,
@ -2920,38 +2984,6 @@ static ssize_t dtn_log_write(
return size;
}
/*
* Backlight at this moment. Read only.
* As written to display, taking ABM and backlight lut into account.
* Ranges from 0x0 to 0x10000 (= 100% PWM)
*/
static int current_backlight_show(struct seq_file *m, void *unused)
{
struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
struct amdgpu_display_manager *dm = &adev->dm;
unsigned int backlight = dc_link_get_backlight_level(dm->backlight_link);
seq_printf(m, "0x%x\n", backlight);
return 0;
}
/*
* Backlight value that is being approached. Read only.
* As written to display, taking ABM and backlight lut into account.
* Ranges from 0x0 to 0x10000 (= 100% PWM)
*/
static int target_backlight_show(struct seq_file *m, void *unused)
{
struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
struct amdgpu_display_manager *dm = &adev->dm;
unsigned int backlight = dc_link_get_target_backlight_pwm(dm->backlight_link);
seq_printf(m, "0x%x\n", backlight);
return 0;
}
static int mst_topo_show(struct seq_file *m, void *unused)
{
struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
@ -3134,8 +3166,6 @@ static int visual_confirm_get(void *data, u64 *val)
return 0;
}
DEFINE_SHOW_ATTRIBUTE(current_backlight);
DEFINE_SHOW_ATTRIBUTE(target_backlight);
DEFINE_SHOW_ATTRIBUTE(mst_topo);
DEFINE_DEBUGFS_ATTRIBUTE(visual_confirm_fops, visual_confirm_get,
visual_confirm_set, "%llu\n");
@ -3215,10 +3245,6 @@ void dtn_debugfs_init(struct amdgpu_device *adev)
struct drm_minor *minor = adev_to_drm(adev)->primary;
struct dentry *root = minor->debugfs_root;
debugfs_create_file("amdgpu_current_backlight_pwm", 0444,
root, adev, &current_backlight_fops);
debugfs_create_file("amdgpu_target_backlight_pwm", 0444,
root, adev, &target_backlight_fops);
debugfs_create_file("amdgpu_mst_topology", 0444, root,
adev, &mst_topo_fops);
debugfs_create_file("amdgpu_dm_dtn_log", 0644, root, adev,

View File

@ -544,8 +544,10 @@ bool dm_helpers_dp_write_dsc_enable(
ret = drm_dp_dpcd_write(aconnector->dsc_aux, DP_DSC_ENABLE, &enable_dsc, 1);
}
if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT)
return dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1);
if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT) {
ret = dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1);
DC_LOG_DC("Send DSC %s to sst display\n", enable_dsc ? "enable" : "disable");
}
return (ret > 0);
}
@ -640,7 +642,14 @@ enum dc_edid_status dm_helpers_read_local_edid(
return edid_status;
}
int dm_helper_dmub_aux_transfer_sync(
struct dc_context *ctx,
const struct dc_link *link,
struct aux_payload *payload,
enum aux_return_code_type *operation_result)
{
return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload, operation_result);
}
void dm_set_dcn_clocks(struct dc_context *ctx, struct dc_clocks *clks)
{
/* TODO: something */
@ -698,12 +707,12 @@ void dm_helpers_free_gpu_mem(
}
}
bool dm_helpers_dmub_outbox0_interrupt_control(struct dc_context *ctx, bool enable)
bool dm_helpers_dmub_outbox_interrupt_control(struct dc_context *ctx, bool enable)
{
enum dc_irq_source irq_source;
bool ret;
irq_source = DC_IRQ_SOURCE_DMCUB_OUTBOX0;
irq_source = DC_IRQ_SOURCE_DMCUB_OUTBOX;
ret = dc_interrupt_set(ctx->dc, irq_source, enable);

View File

@ -769,6 +769,18 @@ static int amdgpu_dm_set_vline0_irq_state(struct amdgpu_device *adev,
__func__);
}
static int amdgpu_dm_set_dmub_outbox_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned int crtc_id,
enum amdgpu_interrupt_state state)
{
enum dc_irq_source irq_source = DC_IRQ_SOURCE_DMCUB_OUTBOX;
bool st = (state == AMDGPU_IRQ_STATE_ENABLE);
dc_interrupt_set(adev->dm.dc, irq_source, st);
return 0;
}
static int amdgpu_dm_set_vupdate_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned int crtc_id,
@ -805,6 +817,11 @@ static const struct amdgpu_irq_src_funcs dm_vline0_irq_funcs = {
.process = amdgpu_dm_irq_handler,
};
static const struct amdgpu_irq_src_funcs dm_dmub_outbox_irq_funcs = {
.set = amdgpu_dm_set_dmub_outbox_irq_state,
.process = amdgpu_dm_irq_handler,
};
static const struct amdgpu_irq_src_funcs dm_vupdate_irq_funcs = {
.set = amdgpu_dm_set_vupdate_irq_state,
.process = amdgpu_dm_irq_handler,
@ -827,13 +844,15 @@ static const struct amdgpu_irq_src_funcs dm_hpd_irq_funcs = {
void amdgpu_dm_set_irq_funcs(struct amdgpu_device *adev)
{
adev->crtc_irq.num_types = adev->mode_info.num_crtc;
adev->crtc_irq.funcs = &dm_crtc_irq_funcs;
adev->vline0_irq.num_types = adev->mode_info.num_crtc;
adev->vline0_irq.funcs = &dm_vline0_irq_funcs;
adev->dmub_outbox_irq.num_types = 1;
adev->dmub_outbox_irq.funcs = &dm_dmub_outbox_irq_funcs;
adev->vupdate_irq.num_types = adev->mode_info.num_crtc;
adev->vupdate_irq.funcs = &dm_vupdate_irq_funcs;
@ -846,6 +865,12 @@ void amdgpu_dm_set_irq_funcs(struct amdgpu_device *adev)
adev->hpd_irq.num_types = adev->mode_info.num_hpd;
adev->hpd_irq.funcs = &dm_hpd_irq_funcs;
}
void amdgpu_dm_outbox_init(struct amdgpu_device *adev)
{
dc_interrupt_set(adev->dm.dc,
DC_IRQ_SOURCE_DMCUB_OUTBOX,
true);
}
/**
* amdgpu_dm_hpd_init - hpd setup callback.

View File

@ -82,6 +82,7 @@ void amdgpu_dm_irq_unregister_interrupt(struct amdgpu_device *adev,
void amdgpu_dm_set_irq_funcs(struct amdgpu_device *adev);
void amdgpu_dm_outbox_init(struct amdgpu_device *adev);
void amdgpu_dm_hpd_init(struct amdgpu_device *adev);
void amdgpu_dm_hpd_fini(struct amdgpu_device *adev);

View File

@ -278,6 +278,9 @@ dm_dp_mst_detect(struct drm_connector *connector,
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
struct amdgpu_dm_connector *master = aconnector->mst_port;
if (drm_connector_is_unregistered(connector))
return connector_status_disconnected;
return drm_dp_mst_detect_port(connector, ctx, &master->mst_mgr,
aconnector->port);
}

View File

@ -54,7 +54,7 @@ AMD_DC = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/dc/,$(DC_LI
include $(AMD_DC)
DISPLAY_CORE = dc.o dc_link.o dc_resource.o dc_hw_sequencer.o dc_sink.o \
DISPLAY_CORE = dc.o dc_stat.o dc_link.o dc_resource.o dc_hw_sequencer.o dc_sink.o \
dc_surface.o dc_link_hwss.o dc_link_dp.o dc_link_ddc.o dc_debug.o dc_stream.o \
dc_link_enc_cfg.o

View File

@ -836,8 +836,10 @@ static enum bp_result bios_parser_get_spread_spectrum_info(
return get_ss_info_v4_1(bp, signal, index, ss_info);
case 2:
case 3:
case 4:
return get_ss_info_v4_2(bp, signal, index, ss_info);
default:
ASSERT(0);
break;
}
break;

View File

@ -106,10 +106,10 @@ static void rn_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
int dpp_inst, dppclk_khz, prev_dppclk_khz;
/* Loop index will match dpp->inst if resource exists,
* and we want to avoid dependency on dpp object
/* Loop index may not match dpp->inst if some pipes disabled,
* so select correct inst from res_pool
*/
dpp_inst = i;
dpp_inst = clk_mgr->base.ctx->dc->res_pool->dpps[i]->inst;
dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz;
prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[i];
@ -128,7 +128,7 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base,
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
struct dc *dc = clk_mgr_base->ctx->dc;
int display_count, i;
int display_count;
bool update_dppclk = false;
bool update_dispclk = false;
bool dpp_clock_lowered = false;
@ -210,14 +210,6 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base,
clk_mgr_base->clks.dppclk_khz,
safe_to_lower);
for (i = 0; i < context->stream_count; i++) {
if (context->streams[i]->signal == SIGNAL_TYPE_EDP &&
context->streams[i]->apply_seamless_boot_optimization) {
dc_wait_for_vblank(dc, context->streams[i]);
break;
}
}
clk_mgr_base->clks.actual_dppclk_khz =
rn_vbios_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz);
@ -769,43 +761,6 @@ static struct wm_table ddr4_wm_table_rn = {
}
};
static struct wm_table ddr4_1R_wm_table_rn = {
.entries = {
{
.wm_inst = WM_A,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 13.90,
.sr_enter_plus_exit_time_us = 14.80,
.valid = true,
},
{
.wm_inst = WM_B,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 13.90,
.sr_enter_plus_exit_time_us = 14.80,
.valid = true,
},
{
.wm_inst = WM_C,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 13.90,
.sr_enter_plus_exit_time_us = 14.80,
.valid = true,
},
{
.wm_inst = WM_D,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 13.90,
.sr_enter_plus_exit_time_us = 14.80,
.valid = true,
},
}
};
static struct wm_table lpddr4_wm_table_rn = {
.entries = {
{
@ -842,46 +797,67 @@ static struct wm_table lpddr4_wm_table_rn = {
},
}
};
static unsigned int find_socclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage)
static unsigned int find_max_fclk_for_voltage(struct dpm_clocks *clock_table,
unsigned int voltage)
{
int i;
uint32_t max_clk = 0;
for (i = 0; i < PP_SMU_NUM_FCLK_DPM_LEVELS; i++) {
if (clock_table->FClocks[i].Vol <= voltage) {
max_clk = clock_table->FClocks[i].Freq > max_clk ?
clock_table->FClocks[i].Freq : max_clk;
}
}
return max_clk;
}
static unsigned int find_max_memclk_for_voltage(struct dpm_clocks *clock_table,
unsigned int voltage)
{
int i;
uint32_t max_clk = 0;
for (i = 0; i < PP_SMU_NUM_MEMCLK_DPM_LEVELS; i++) {
if (clock_table->MemClocks[i].Vol <= voltage) {
max_clk = clock_table->MemClocks[i].Freq > max_clk ?
clock_table->MemClocks[i].Freq : max_clk;
}
}
return max_clk;
}
static unsigned int find_max_socclk_for_voltage(struct dpm_clocks *clock_table,
unsigned int voltage)
{
int i;
uint32_t max_clk = 0;
for (i = 0; i < PP_SMU_NUM_SOCCLK_DPM_LEVELS; i++) {
if (clock_table->SocClocks[i].Vol == voltage)
return clock_table->SocClocks[i].Freq;
if (clock_table->SocClocks[i].Vol <= voltage) {
max_clk = clock_table->SocClocks[i].Freq > max_clk ?
clock_table->SocClocks[i].Freq : max_clk;
}
}
ASSERT(0);
return 0;
}
static unsigned int find_dcfclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage)
{
int i;
for (i = 0; i < PP_SMU_NUM_DCFCLK_DPM_LEVELS; i++) {
if (clock_table->DcfClocks[i].Vol == voltage)
return clock_table->DcfClocks[i].Freq;
}
ASSERT(0);
return 0;
return max_clk;
}
static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params, struct dpm_clocks *clock_table, struct integrated_info *bios_info)
{
int i, j = 0;
unsigned int volt;
j = -1;
ASSERT(PP_SMU_NUM_FCLK_DPM_LEVELS <= MAX_NUM_DPM_LVL);
/* Find lowest DPM, FCLK is filled in reverse order*/
for (i = PP_SMU_NUM_FCLK_DPM_LEVELS - 1; i >= 0; i--) {
if (clock_table->FClocks[i].Freq != 0 && clock_table->FClocks[i].Vol != 0) {
/* Find max DPM */
for (i = 0; i < PP_SMU_NUM_DCFCLK_DPM_LEVELS; ++i) {
if (clock_table->DcfClocks[i].Freq != 0 &&
clock_table->DcfClocks[i].Vol != 0)
j = i;
break;
}
}
if (j == -1) {
@ -892,13 +868,18 @@ static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params
bw_params->clk_table.num_entries = j + 1;
for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) {
bw_params->clk_table.entries[i].fclk_mhz = clock_table->FClocks[j].Freq;
bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemClocks[j].Freq;
bw_params->clk_table.entries[i].voltage = clock_table->FClocks[j].Vol;
bw_params->clk_table.entries[i].dcfclk_mhz = find_dcfclk_for_voltage(clock_table, clock_table->FClocks[j].Vol);
bw_params->clk_table.entries[i].socclk_mhz = find_socclk_for_voltage(clock_table,
bw_params->clk_table.entries[i].voltage);
for (i = 0; i < bw_params->clk_table.num_entries; i++) {
volt = clock_table->DcfClocks[i].Vol;
bw_params->clk_table.entries[i].voltage = volt;
bw_params->clk_table.entries[i].dcfclk_mhz =
clock_table->DcfClocks[i].Freq;
bw_params->clk_table.entries[i].fclk_mhz =
find_max_fclk_for_voltage(clock_table, volt);
bw_params->clk_table.entries[i].memclk_mhz =
find_max_memclk_for_voltage(clock_table, volt);
bw_params->clk_table.entries[i].socclk_mhz =
find_max_socclk_for_voltage(clock_table, volt);
}
bw_params->vram_type = bios_info->memory_type;
@ -990,12 +971,8 @@ void rn_clk_mgr_construct(
} else {
if (is_green_sardine)
rn_bw_params.wm_table = ddr4_wm_table_gs;
else {
if (ctx->dc->config.is_single_rank_dimm)
rn_bw_params.wm_table = ddr4_1R_wm_table_rn;
else
rn_bw_params.wm_table = ddr4_wm_table_rn;
}
else
rn_bw_params.wm_table = ddr4_wm_table_rn;
}
/* Saved clocks configured at boot for debug purposes */
rn_dump_clk_registers(&clk_mgr->base.boot_snapshot, &clk_mgr->base, &log_info);
@ -1013,9 +990,6 @@ void rn_clk_mgr_construct(
if (status == PP_SMU_RESULT_OK &&
ctx->dc_bios && ctx->dc_bios->integrated_info) {
rn_clk_mgr_helper_populate_bw_params (clk_mgr->base.bw_params, &clock_table, ctx->dc_bios->integrated_info);
/* treat memory config as single channel if memory is asymmetrics. */
if (ctx->dc->config.is_asymmetric_memory)
clk_mgr->base.bw_params->num_channels = 1;
}
}

View File

@ -59,7 +59,6 @@
#include "dc_link_ddc.h"
#include "dm_helpers.h"
#include "mem_input.h"
#include "hubp.h"
#include "dc_link_dp.h"
#include "dc_dmub_srv.h"
@ -3219,19 +3218,6 @@ void dc_link_remove_remote_sink(struct dc_link *link, struct dc_sink *sink)
}
}
void dc_wait_for_vblank(struct dc *dc, struct dc_stream_state *stream)
{
int i;
for (i = 0; i < dc->res_pool->pipe_count; i++)
if (dc->current_state->res_ctx.pipe_ctx[i].stream == stream) {
struct timing_generator *tg =
dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg;
tg->funcs->wait_for_state(tg, CRTC_STATE_VBLANK);
break;
}
}
void get_clock_requirements_for_state(struct dc_state *state, struct AsicStateEx *info)
{
info->displayClock = (unsigned int)state->bw_ctx.bw.dcn.clk.dispclk_khz;
@ -3287,7 +3273,7 @@ void dc_allow_idle_optimizations(struct dc *dc, bool allow)
if (dc->debug.disable_idle_power_optimizations)
return;
if (dc->clk_mgr->funcs->is_smu_present)
if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->is_smu_present)
if (!dc->clk_mgr->funcs->is_smu_present(dc->clk_mgr))
return;

View File

@ -48,6 +48,7 @@
#include "dce/dmub_psr.h"
#include "dmub/dmub_srv.h"
#include "inc/hw/panel_cntl.h"
#include "inc/link_enc_cfg.h"
#define DC_LOGGER_INIT(logger)
@ -247,6 +248,16 @@ bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type)
link->dc->hwss.edp_wait_for_hpd_ready(link, true);
}
/* Link may not have physical HPD pin. */
if (link->ep_type != DISPLAY_ENDPOINT_PHY) {
if (link->hpd_status)
*type = dc_connection_single;
else
*type = dc_connection_none;
return true;
}
/* todo: may need to lock gpio access */
hpd_pin = get_hpd_gpio(link->ctx->dc_bios, link->link_id,
link->ctx->gpio_service);
@ -432,8 +443,18 @@ bool dc_link_is_dp_sink_present(struct dc_link *link)
static enum signal_type link_detect_sink(struct dc_link *link,
enum dc_detect_reason reason)
{
enum signal_type result = get_basic_signal_type(link->link_enc->id,
link->link_id);
enum signal_type result;
struct graphics_object_id enc_id;
if (link->is_dig_mapping_flexible)
enc_id = (struct graphics_object_id){.id = ENCODER_ID_UNKNOWN};
else
enc_id = link->link_enc->id;
result = get_basic_signal_type(enc_id, link->link_id);
/* Use basic signal type for link without physical connector. */
if (link->ep_type != DISPLAY_ENDPOINT_PHY)
return result;
/* Internal digital encoder will detect only dongles
* that require digital signal
@ -762,19 +783,20 @@ static bool detect_dp(struct dc_link *link,
}
if (link->type != dc_connection_mst_branch &&
is_dp_active_dongle(link)) {
/* DP active dongles */
link->type = dc_connection_active_dongle;
is_dp_branch_device(link)) {
/* DP SST branch */
link->type = dc_connection_sst_branch;
if (!link->dpcd_caps.sink_count.bits.SINK_COUNT) {
/*
* active dongle unplug processing for short irq
* SST branch unplug processing for short irq
*/
link_disconnect_sink(link);
return true;
}
if (link->dpcd_caps.dongle_type !=
DISPLAY_DONGLE_DP_HDMI_CONVERTER)
if (is_dp_active_dongle(link) &&
(link->dpcd_caps.dongle_type !=
DISPLAY_DONGLE_DP_HDMI_CONVERTER))
*converter_disable_audio = true;
}
} else {
@ -954,7 +976,8 @@ static bool dc_link_detect_helper(struct dc_link *link,
case SIGNAL_TYPE_DISPLAY_PORT: {
/* wa HPD high coming too early*/
if (link->link_enc->features.flags.bits.DP_IS_USB_C == 1) {
if (link->ep_type == DISPLAY_ENDPOINT_PHY &&
link->link_enc->features.flags.bits.DP_IS_USB_C == 1) {
/* if alt mode times out, return false */
if (!wait_for_entering_dp_alt_mode(link))
return false;
@ -974,8 +997,8 @@ static bool dc_link_detect_helper(struct dc_link *link,
sizeof(struct dpcd_caps)))
same_dpcd = false;
}
/* Active dongle downstream unplug*/
if (link->type == dc_connection_active_dongle &&
/* Active SST downstream branch device unplug*/
if (link->type == dc_connection_sst_branch &&
link->dpcd_caps.sink_count.bits.SINK_COUNT == 0) {
if (prev_sink)
/* Downstream unplug */
@ -1206,14 +1229,25 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
{
const struct dc *dc = link->dc;
bool ret;
bool can_apply_seamless_boot = false;
int i;
for (i = 0; i < dc->current_state->stream_count; i++) {
if (dc->current_state->streams[i]->apply_seamless_boot_optimization) {
can_apply_seamless_boot = true;
break;
}
}
/* get out of low power state */
clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr);
if (!can_apply_seamless_boot && reason != DETECT_REASON_BOOT)
clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr);
ret = dc_link_detect_helper(link, reason);
/* Go back to power optimized state */
clk_mgr_optimize_pwr_state(dc, dc->clk_mgr);
if (!can_apply_seamless_boot && reason != DETECT_REASON_BOOT)
clk_mgr_optimize_pwr_state(dc, dc->clk_mgr);
return ret;
}
@ -1716,6 +1750,8 @@ static enum dc_status enable_link_dp(struct dc_state *state,
bool apply_seamless_boot_optimization = false;
uint32_t bl_oled_enable_delay = 50; // in ms
const uint32_t post_oui_delay = 30; // 30ms
/* Reduce link bandwidth between failed link training attempts. */
bool do_fallback = false;
// check for seamless boot
for (i = 0; i < state->stream_count; i++) {
@ -1754,7 +1790,8 @@ static enum dc_status enable_link_dp(struct dc_state *state,
skip_video_pattern,
LINK_TRAINING_ATTEMPTS,
pipe_ctx,
pipe_ctx->stream->signal)) {
pipe_ctx->stream->signal,
do_fallback)) {
link->cur_link_settings = link_settings;
status = DC_OK;
} else {
@ -3475,9 +3512,11 @@ uint32_t dc_bandwidth_in_kbps_from_timing(
uint32_t kbps;
#if defined(CONFIG_DRM_AMD_DC_DCN)
if (timing->flags.DSC) {
return dc_dsc_stream_bandwidth_in_kbps(timing->pix_clk_100hz, timing->dsc_cfg.bits_per_pixel);
}
if (timing->flags.DSC)
return dc_dsc_stream_bandwidth_in_kbps(timing,
timing->dsc_cfg.bits_per_pixel,
timing->dsc_cfg.num_slices_h,
timing->dsc_cfg.is_dp);
#endif
switch (timing->display_color_depth) {
@ -3539,19 +3578,6 @@ void dc_link_set_drive_settings(struct dc *dc,
dc_link_dp_set_drive_settings(dc->links[i], lt_settings);
}
void dc_link_perform_link_training(struct dc *dc,
struct dc_link_settings *link_setting,
bool skip_video_pattern)
{
int i;
for (i = 0; i < dc->link_count; i++)
dc_link_dp_perform_link_training(
dc->links[i],
link_setting,
skip_video_pattern);
}
void dc_link_set_preferred_link_settings(struct dc *dc,
struct dc_link_settings *link_setting,
struct dc_link *link)
@ -3702,8 +3728,22 @@ void dc_link_overwrite_extended_receiver_cap(
bool dc_link_is_fec_supported(const struct dc_link *link)
{
struct link_encoder *link_enc = NULL;
/* Links supporting dynamically assigned link encoder will be assigned next
* available encoder if one not already assigned.
*/
if (link->is_dig_mapping_flexible &&
link->dc->res_pool->funcs->link_encs_assign) {
link_enc = link_enc_cfg_get_link_enc_used_by_link(link->dc->current_state, link);
if (link_enc == NULL)
link_enc = link_enc_cfg_get_next_avail_link_enc(link->dc, link->dc->current_state);
} else
link_enc = link->link_enc;
ASSERT(link_enc);
return (dc_is_dp_signal(link->connector_signal) &&
link->link_enc->features.fec_supported &&
link_enc->features.fec_supported &&
link->dpcd_caps.fec_cap.bits.FEC_CAPABLE &&
!IS_FPGA_MAXIMUS_DC(link->ctx->dce_environment));
}

View File

@ -658,7 +658,10 @@ int dc_link_aux_transfer_raw(struct ddc_service *ddc,
struct aux_payload *payload,
enum aux_return_code_type *operation_result)
{
return dce_aux_transfer_raw(ddc, payload, operation_result);
if (dc_enable_dmub_notifications(ddc->ctx->dc))
return dce_aux_transfer_dmub_raw(ddc, payload, operation_result);
else
return dce_aux_transfer_raw(ddc, payload, operation_result);
}
/* dc_link_aux_transfer_with_retries() - Attempt to submit an
@ -682,6 +685,10 @@ bool dc_link_aux_try_to_configure_timeout(struct ddc_service *ddc,
bool result = false;
struct ddc *ddc_pin = ddc->ddc_pin;
/* Do not try to access nonexistent DDC pin. */
if (ddc->link->ep_type != DISPLAY_ENDPOINT_PHY)
return true;
if (ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]->funcs->configure_timeout) {
ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]->funcs->configure_timeout(ddc, timeout);
result = true;

View File

@ -14,6 +14,7 @@
#include "dpcd_defs.h"
#include "dc_dmub_srv.h"
#include "dce/dmub_hw_lock_mgr.h"
#include "inc/link_enc_cfg.h"
/*Travis*/
static const uint8_t DP_VGA_LVDS_CONVERTER_ID_2[] = "sivarT";
@ -107,10 +108,50 @@ static void wait_for_training_aux_rd_interval(
wait_in_micro_secs);
}
static enum dpcd_training_patterns
dc_dp_training_pattern_to_dpcd_training_pattern(
struct dc_link *link,
enum dc_dp_training_pattern pattern)
{
enum dpcd_training_patterns dpcd_tr_pattern =
DPCD_TRAINING_PATTERN_VIDEOIDLE;
switch (pattern) {
case DP_TRAINING_PATTERN_SEQUENCE_1:
dpcd_tr_pattern = DPCD_TRAINING_PATTERN_1;
break;
case DP_TRAINING_PATTERN_SEQUENCE_2:
dpcd_tr_pattern = DPCD_TRAINING_PATTERN_2;
break;
case DP_TRAINING_PATTERN_SEQUENCE_3:
dpcd_tr_pattern = DPCD_TRAINING_PATTERN_3;
break;
case DP_TRAINING_PATTERN_SEQUENCE_4:
dpcd_tr_pattern = DPCD_TRAINING_PATTERN_4;
break;
case DP_TRAINING_PATTERN_VIDEOIDLE:
dpcd_tr_pattern = DPCD_TRAINING_PATTERN_VIDEOIDLE;
break;
default:
ASSERT(0);
DC_LOG_HW_LINK_TRAINING("%s: Invalid HW Training pattern: %d\n",
__func__, pattern);
break;
}
return dpcd_tr_pattern;
}
static void dpcd_set_training_pattern(
struct dc_link *link,
union dpcd_training_pattern dpcd_pattern)
enum dc_dp_training_pattern training_pattern)
{
union dpcd_training_pattern dpcd_pattern = { {0} };
dpcd_pattern.v1_4.TRAINING_PATTERN_SET =
dc_dp_training_pattern_to_dpcd_training_pattern(
link, training_pattern);
core_link_write_dpcd(
link,
DP_TRAINING_PATTERN_SET,
@ -132,10 +173,22 @@ static enum dc_dp_training_pattern decide_cr_training_pattern(
static enum dc_dp_training_pattern decide_eq_training_pattern(struct dc_link *link,
const struct dc_link_settings *link_settings)
{
struct link_encoder *link_enc;
enum dc_dp_training_pattern highest_tp = DP_TRAINING_PATTERN_SEQUENCE_2;
struct encoder_feature_support *features = &link->link_enc->features;
struct encoder_feature_support *features;
struct dpcd_caps *dpcd_caps = &link->dpcd_caps;
/* Access link encoder capability based on whether it is statically
* or dynamically assigned to a link.
*/
if (link->is_dig_mapping_flexible &&
link->dc->res_pool->funcs->link_encs_assign)
link_enc = link_enc_cfg_get_link_enc_used_by_link(link->dc->current_state, link);
else
link_enc = link->link_enc;
ASSERT(link_enc);
features = &link_enc->features;
if (features->flags.bits.IS_TPS3_CAPABLE)
highest_tp = DP_TRAINING_PATTERN_SEQUENCE_3;
@ -227,37 +280,6 @@ static void dpcd_set_link_settings(
}
}
static enum dpcd_training_patterns
dc_dp_training_pattern_to_dpcd_training_pattern(
struct dc_link *link,
enum dc_dp_training_pattern pattern)
{
enum dpcd_training_patterns dpcd_tr_pattern =
DPCD_TRAINING_PATTERN_VIDEOIDLE;
switch (pattern) {
case DP_TRAINING_PATTERN_SEQUENCE_1:
dpcd_tr_pattern = DPCD_TRAINING_PATTERN_1;
break;
case DP_TRAINING_PATTERN_SEQUENCE_2:
dpcd_tr_pattern = DPCD_TRAINING_PATTERN_2;
break;
case DP_TRAINING_PATTERN_SEQUENCE_3:
dpcd_tr_pattern = DPCD_TRAINING_PATTERN_3;
break;
case DP_TRAINING_PATTERN_SEQUENCE_4:
dpcd_tr_pattern = DPCD_TRAINING_PATTERN_4;
break;
default:
ASSERT(0);
DC_LOG_HW_LINK_TRAINING("%s: Invalid HW Training pattern: %d\n",
__func__, pattern);
break;
}
return dpcd_tr_pattern;
}
static uint8_t dc_dp_initialize_scrambling_data_symbols(
struct dc_link *link,
enum dc_dp_training_pattern pattern)
@ -420,20 +442,30 @@ static bool is_cr_done(enum dc_lane_count ln_count,
}
static bool is_ch_eq_done(enum dc_lane_count ln_count,
union lane_status *dpcd_lane_status,
union lane_align_status_updated *lane_status_updated)
union lane_status *dpcd_lane_status)
{
bool done = true;
uint32_t lane;
if (!lane_status_updated->bits.INTERLANE_ALIGN_DONE)
return false;
else {
for (lane = 0; lane < (uint32_t)(ln_count); lane++) {
if (!dpcd_lane_status[lane].bits.SYMBOL_LOCKED_0 ||
!dpcd_lane_status[lane].bits.CHANNEL_EQ_DONE_0)
return false;
}
}
return true;
for (lane = 0; lane < (uint32_t)(ln_count); lane++)
if (!dpcd_lane_status[lane].bits.CHANNEL_EQ_DONE_0)
done = false;
return done;
}
static bool is_symbol_locked(enum dc_lane_count ln_count,
union lane_status *dpcd_lane_status)
{
bool locked = true;
uint32_t lane;
for (lane = 0; lane < (uint32_t)(ln_count); lane++)
if (!dpcd_lane_status[lane].bits.SYMBOL_LOCKED_0)
locked = false;
return locked;
}
static inline bool is_interlane_aligned(union lane_align_status_updated align_status)
{
return align_status.bits.INTERLANE_ALIGN_DONE == 1;
}
static void update_drive_settings(
@ -835,10 +867,9 @@ static bool perform_post_lt_adj_req_sequence(
if (!is_cr_done(lane_count, dpcd_lane_status))
return false;
if (!is_ch_eq_done(
lane_count,
dpcd_lane_status,
&dpcd_lane_status_updated))
if (!is_ch_eq_done(lane_count, dpcd_lane_status) ||
!is_symbol_locked(lane_count, dpcd_lane_status) ||
!is_interlane_aligned(dpcd_lane_status_updated))
return false;
for (lane = 0; lane < (uint32_t)(lane_count); lane++) {
@ -992,9 +1023,9 @@ static enum link_training_result perform_channel_equalization_sequence(
return LINK_TRAINING_EQ_FAIL_CR;
/* 6. check CHEQ done*/
if (is_ch_eq_done(lane_count,
dpcd_lane_status,
&dpcd_lane_status_updated))
if (is_ch_eq_done(lane_count, dpcd_lane_status) &&
is_symbol_locked(lane_count, dpcd_lane_status) &&
is_interlane_aligned(dpcd_lane_status_updated))
return LINK_TRAINING_SUCCESS;
/* 7. update VS/PE/PC2 in lt_settings*/
@ -1162,7 +1193,7 @@ static inline enum link_training_result perform_link_training_int(
return status;
}
static enum link_training_result check_link_loss_status(
enum link_training_result dp_check_link_loss_status(
struct dc_link *link,
const struct link_training_settings *link_training_setting)
{
@ -1296,7 +1327,7 @@ static void initialize_training_settings(
lt_settings->enhanced_framing = 1;
}
static uint8_t convert_to_count(uint8_t lttpr_repeater_count)
uint8_t dp_convert_to_count(uint8_t lttpr_repeater_count)
{
switch (lttpr_repeater_count) {
case 0x80: // 1 lttpr repeater
@ -1365,7 +1396,8 @@ static void configure_lttpr_mode_non_transparent(struct dc_link *link)
link->dpcd_caps.lttpr_caps.mode = repeater_mode;
}
repeater_cnt = convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
repeater_cnt = dp_convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
for (repeater_id = repeater_cnt; repeater_id > 0; repeater_id--) {
aux_interval_address = DP_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1 +
((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (repeater_id - 1));
@ -1555,7 +1587,6 @@ enum link_training_result dc_link_dp_perform_link_training(
{
enum link_training_result status = LINK_TRAINING_SUCCESS;
struct link_training_settings lt_settings;
union dpcd_training_pattern dpcd_pattern = { { 0 } };
bool fec_enable;
uint8_t repeater_cnt;
@ -1591,7 +1622,7 @@ enum link_training_result dc_link_dp_perform_link_training(
/* 2. perform link training (set link training done
* to false is done as well)
*/
repeater_cnt = convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
repeater_cnt = dp_convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
for (repeater_id = repeater_cnt; (repeater_id > 0 && status == LINK_TRAINING_SUCCESS);
repeater_id--) {
@ -1621,8 +1652,7 @@ enum link_training_result dc_link_dp_perform_link_training(
}
/* 3. set training not in progress*/
dpcd_pattern.v1_4.TRAINING_PATTERN_SET = DPCD_TRAINING_PATTERN_VIDEOIDLE;
dpcd_set_training_pattern(link, dpcd_pattern);
dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE);
if ((status == LINK_TRAINING_SUCCESS) || !skip_video_pattern) {
status = perform_link_training_int(link,
&lt_settings,
@ -1634,7 +1664,7 @@ enum link_training_result dc_link_dp_perform_link_training(
*/
if (link->connector_signal != SIGNAL_TYPE_EDP && status == LINK_TRAINING_SUCCESS) {
msleep(5);
status = check_link_loss_status(link, &lt_settings);
status = dp_check_link_loss_status(link, &lt_settings);
}
/* 6. print status message*/
@ -1687,18 +1717,31 @@ bool perform_link_training_with_retries(
bool skip_video_pattern,
int attempts,
struct pipe_ctx *pipe_ctx,
enum signal_type signal)
enum signal_type signal,
bool do_fallback)
{
uint8_t j;
uint8_t delay_between_attempts = LINK_TRAINING_RETRY_DELAY;
struct dc_stream_state *stream = pipe_ctx->stream;
struct dc_link *link = stream->link;
enum dp_panel_mode panel_mode;
struct link_encoder *link_enc;
enum link_training_result status = LINK_TRAINING_CR_FAIL_LANE0;
struct dc_link_settings currnet_setting = *link_setting;
/* Dynamically assigned link encoders associated with stream rather than
* link.
*/
if (link->dc->res_pool->funcs->link_encs_assign)
link_enc = stream->link_enc;
else
link_enc = link->link_enc;
ASSERT(link_enc);
/* We need to do this before the link training to ensure the idle pattern in SST
* mode will be sent right after the link training
*/
link->link_enc->funcs->connect_dig_be_to_fe(link->link_enc,
link_enc->funcs->connect_dig_be_to_fe(link_enc,
pipe_ctx->stream_res.stream_enc->id, true);
for (j = 0; j < attempts; ++j) {
@ -1710,7 +1753,7 @@ bool perform_link_training_with_retries(
link,
signal,
pipe_ctx->clock_source->id,
link_setting);
&currnet_setting);
if (stream->sink_patches.dppowerup_delay > 0) {
int delay_dp_power_up_in_ms = stream->sink_patches.dppowerup_delay;
@ -1725,14 +1768,12 @@ bool perform_link_training_with_retries(
panel_mode != DP_PANEL_MODE_DEFAULT);
if (link->aux_access_disabled) {
dc_link_dp_perform_link_training_skip_aux(link, link_setting);
dc_link_dp_perform_link_training_skip_aux(link, &currnet_setting);
return true;
} else {
enum link_training_result status = LINK_TRAINING_CR_FAIL_LANE0;
status = dc_link_dp_perform_link_training(
link,
link_setting,
&currnet_setting,
skip_video_pattern);
if (status == LINK_TRAINING_SUCCESS)
return true;
@ -1740,7 +1781,7 @@ bool perform_link_training_with_retries(
/* latest link training still fail, skip delay and keep PHY on
*/
if (j == (attempts - 1))
if (j == (attempts - 1) && link->ep_type == DISPLAY_ENDPOINT_PHY)
break;
DC_LOG_WARNING("%s: Link training attempt %u of %d failed\n",
@ -1748,6 +1789,19 @@ bool perform_link_training_with_retries(
dp_disable_link_phy(link, signal);
/* Abort link training if failure due to sink being unplugged. */
if (status == LINK_TRAINING_ABORT)
break;
else if (do_fallback) {
decide_fallback_link_setting(*link_setting, &currnet_setting, status);
/* Fail link training if reduced link bandwidth no longer meets
* stream requirements.
*/
if (dc_bandwidth_in_kbps_from_timing(&stream->timing) <
dc_link_bandwidth_kbps(link, &currnet_setting))
break;
}
msleep(delay_between_attempts);
delay_between_attempts += LINK_TRAINING_RETRY_DELAY;
@ -2429,6 +2483,12 @@ bool dp_validate_mode_timing(
const struct dc_link_settings *link_setting;
/* According to spec, VSC SDP should be used if pixel format is YCbCr420 */
if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420 &&
!link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED &&
dal_graphics_object_id_get_connector_id(link->link_id) != CONNECTOR_ID_VIRTUAL)
return false;
/*always DP fail safe mode*/
if ((timing->pix_clk_100hz / 10) == (uint32_t) 25175 &&
timing->h_addressable == (uint32_t) 640 &&
@ -2611,13 +2671,11 @@ static bool allow_hpd_rx_irq(const struct dc_link *link)
/*
* Don't handle RX IRQ unless one of following is met:
* 1) The link is established (cur_link_settings != unknown)
* 2) We kicked off MST detection
* 3) We know we're dealing with an active dongle
* 2) We know we're dealing with a branch device, SST or MST
*/
if ((link->cur_link_settings.lane_count != LANE_COUNT_UNKNOWN) ||
(link->type == dc_connection_mst_branch) ||
is_dp_active_dongle(link))
is_dp_branch_device(link))
return true;
return false;
@ -2917,6 +2975,22 @@ static void dp_test_send_link_test_pattern(struct dc_link *link)
break;
}
switch (dpcd_test_params.bits.CLR_FORMAT) {
case 0:
pipe_ctx->stream->timing.pixel_encoding = PIXEL_ENCODING_RGB;
break;
case 1:
pipe_ctx->stream->timing.pixel_encoding = PIXEL_ENCODING_YCBCR422;
break;
case 2:
pipe_ctx->stream->timing.pixel_encoding = PIXEL_ENCODING_YCBCR444;
break;
default:
pipe_ctx->stream->timing.pixel_encoding = PIXEL_ENCODING_RGB;
break;
}
if (requestColorDepth != COLOR_DEPTH_UNDEFINED
&& pipe_ctx->stream->timing.display_color_depth != requestColorDepth) {
DC_LOG_DEBUG("%s: original bpc %d, changing to %d\n",
@ -2924,9 +2998,10 @@ static void dp_test_send_link_test_pattern(struct dc_link *link)
pipe_ctx->stream->timing.display_color_depth,
requestColorDepth);
pipe_ctx->stream->timing.display_color_depth = requestColorDepth;
dp_update_dsc_config(pipe_ctx);
}
dp_update_dsc_config(pipe_ctx);
dc_link_dp_set_test_pattern(
link,
test_pattern,
@ -3182,7 +3257,7 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd
*out_link_loss = true;
}
if (link->type == dc_connection_active_dongle &&
if (link->type == dc_connection_sst_branch &&
hpd_irq_dpcd_data.bytes.sink_cnt.bits.SINK_COUNT
!= link->dpcd_sink_count)
status = true;
@ -3232,6 +3307,12 @@ bool is_mst_supported(struct dc_link *link)
}
bool is_dp_active_dongle(const struct dc_link *link)
{
return (link->dpcd_caps.dongle_type >= DISPLAY_DONGLE_DP_VGA_CONVERTER) &&
(link->dpcd_caps.dongle_type <= DISPLAY_DONGLE_DP_HDMI_CONVERTER);
}
bool is_dp_branch_device(const struct dc_link *link)
{
return link->dpcd_caps.is_branch_dev;
}
@ -3593,7 +3674,9 @@ static bool retrieve_link_cap(struct dc_link *link)
lttpr_dpcd_data[DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT -
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
/* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */
is_lttpr_present = (link->dpcd_caps.lttpr_caps.phy_repeater_cnt > 0 &&
link->dpcd_caps.lttpr_caps.phy_repeater_cnt < 0xff &&
link->dpcd_caps.lttpr_caps.max_lane_count > 0 &&
link->dpcd_caps.lttpr_caps.max_lane_count <= 4 &&
link->dpcd_caps.lttpr_caps.revision.raw >= 0x14);

View File

@ -112,8 +112,8 @@ static void update_link_enc_assignment(
/* Return first available DIG link encoder. */
static enum engine_id find_first_avail_link_enc(
struct dc_context *ctx,
struct dc_state *state)
const struct dc_context *ctx,
const struct dc_state *state)
{
enum engine_id eng_id = ENGINE_ID_UNKNOWN;
int i;
@ -270,7 +270,7 @@ struct dc_link *link_enc_cfg_get_link_using_link_enc(
struct link_encoder *link_enc_cfg_get_link_enc_used_by_link(
struct dc_state *state,
struct dc_link *link)
const struct dc_link *link)
{
struct link_encoder *link_enc = NULL;
struct display_endpoint_id ep_id;
@ -296,8 +296,20 @@ struct link_encoder *link_enc_cfg_get_link_enc_used_by_link(
if (stream_idx != -1)
link_enc = state->streams[stream_idx]->link_enc;
else
dm_output_to_console("%s: No link encoder used by link(%d).\n", __func__, link->link_index);
return link_enc;
}
struct link_encoder *link_enc_cfg_get_next_avail_link_enc(
const struct dc *dc,
const struct dc_state *state)
{
struct link_encoder *link_enc = NULL;
enum engine_id eng_id = ENGINE_ID_UNKNOWN;
eng_id = find_first_avail_link_enc(dc->ctx, state);
if (eng_id != ENGINE_ID_UNKNOWN)
link_enc = dc->res_pool->link_encoders[eng_id - ENGINE_ID_DIGA];
return link_enc;
}

View File

@ -384,7 +384,8 @@ void dp_retrain_link_dp_test(struct dc_link *link,
skip_video_pattern,
LINK_TRAINING_ATTEMPTS,
&pipes[i],
SIGNAL_TYPE_DISPLAY_PORT);
SIGNAL_TYPE_DISPLAY_PORT,
false);
link->dc->hwss.enable_stream(&pipes[i]);

View File

@ -1706,12 +1706,6 @@ static bool is_timing_changed(struct dc_stream_state *cur_stream,
if (cur_stream == NULL)
return true;
/* If sink pointer changed, it means this is a hotplug, we should do
* full hw setting.
*/
if (cur_stream->sink != new_stream->sink)
return true;
/* If output color space is changed, need to reprogram info frames */
if (cur_stream->output_color_space != new_stream->output_color_space)
return true;
@ -2679,6 +2673,7 @@ void dc_resource_state_destruct(struct dc_state *context)
dc_stream_release(context->streams[i]);
context->streams[i] = NULL;
}
context->stream_count = 0;
}
void dc_resource_state_copy_construct(

View File

@ -45,7 +45,7 @@
/* forward declaration */
struct aux_payload;
#define DC_VER "3.2.132"
#define DC_VER "3.2.135.1"
#define MAX_SURFACES 3
#define MAX_PLANES 6
@ -308,8 +308,6 @@ struct dc_config {
#endif
uint64_t vblank_alignment_dto_params;
uint8_t vblank_alignment_max_frame_time_diff;
bool is_asymmetric_memory;
bool is_single_rank_dimm;
};
enum visual_confirm {
@ -600,7 +598,6 @@ struct dc_bounding_box_overrides {
int min_dcfclk_mhz;
};
struct dc_state;
struct resource_pool;
struct dce_hwseq;
struct gpu_info_soc_bounding_box_v1_0;
@ -719,7 +716,6 @@ void dc_init_callbacks(struct dc *dc,
void dc_deinit_callbacks(struct dc *dc);
void dc_destroy(struct dc **dc);
void dc_wait_for_vblank(struct dc *dc, struct dc_stream_state *stream);
/*******************************************************************************
* Surface Interfaces
******************************************************************************/

View File

@ -180,5 +180,5 @@ bool dc_dmub_srv_get_dmub_outbox0_msg(const struct dc *dc, struct dmcub_trace_bu
void dc_dmub_trace_event_control(struct dc *dc, bool enable)
{
dm_helpers_dmub_outbox0_interrupt_control(dc->ctx, enable);
dm_helpers_dmub_outbox_interrupt_control(dc->ctx, enable);
}

View File

@ -95,6 +95,7 @@ enum dc_dp_training_pattern {
DP_TRAINING_PATTERN_SEQUENCE_2,
DP_TRAINING_PATTERN_SEQUENCE_3,
DP_TRAINING_PATTERN_SEQUENCE_4,
DP_TRAINING_PATTERN_VIDEOIDLE,
};
struct dc_link_settings {

View File

@ -78,7 +78,8 @@ bool dc_dsc_compute_config(
const struct dc_crtc_timing *timing,
struct dc_dsc_config *dsc_cfg);
uint32_t dc_dsc_stream_bandwidth_in_kbps(uint32_t pix_clk_100hz, uint32_t bpp_x16);
uint32_t dc_dsc_stream_bandwidth_in_kbps(const struct dc_crtc_timing *timing,
uint32_t bpp_x16, uint32_t num_slices_h, bool is_dp);
void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing,
uint32_t max_target_bpp_limit_override_x16,
@ -88,6 +89,6 @@ void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit);
void dc_dsc_policy_set_enable_dsc_when_not_needed(bool enable);
uint32_t dc_dsc_stream_bandwidth_in_kbps(uint32_t pix_clk_100hz, uint32_t bpp_x16);
void dc_dsc_policy_set_disable_dsc_stream_overhead(bool disable);
#endif

Some files were not shown because too many files have changed in this diff Show More