Merge branch 'drm-next-4.14' of git://people.freedesktop.org/~agd5f/linux into drm-next
A few fixes for 4.14. Nothing too major.
This commit is contained in:
commit
47e0cd6b1d
@ -76,7 +76,7 @@
|
||||
extern int amdgpu_modeset;
|
||||
extern int amdgpu_vram_limit;
|
||||
extern int amdgpu_vis_vram_limit;
|
||||
extern unsigned amdgpu_gart_size;
|
||||
extern int amdgpu_gart_size;
|
||||
extern int amdgpu_gtt_size;
|
||||
extern int amdgpu_moverate;
|
||||
extern int amdgpu_benchmarking;
|
||||
|
@ -155,7 +155,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
|
||||
{
|
||||
return (struct kfd2kgd_calls *)&kfd2kgd;
|
||||
return (struct kfd2kgd_calls *)&kfd2kgd;
|
||||
}
|
||||
|
||||
static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
|
||||
|
@ -1079,6 +1079,9 @@ static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
|
||||
GFP_KERNEL);
|
||||
p->num_post_dep_syncobjs = 0;
|
||||
|
||||
if (!p->post_dep_syncobjs)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < num_deps; ++i) {
|
||||
p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
|
||||
if (!p->post_dep_syncobjs[i])
|
||||
@ -1150,7 +1153,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
|
||||
cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
|
||||
job->uf_sequence = cs->out.handle;
|
||||
amdgpu_job_free_resources(job);
|
||||
amdgpu_cs_parser_fini(p, 0, true);
|
||||
|
||||
trace_amdgpu_cs_ioctl(job);
|
||||
amd_sched_entity_push_job(&job->base);
|
||||
@ -1208,10 +1210,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||
goto out;
|
||||
|
||||
r = amdgpu_cs_submit(&parser, cs);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
return 0;
|
||||
out:
|
||||
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
|
||||
return r;
|
||||
|
@ -1062,11 +1062,11 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
|
||||
amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
|
||||
}
|
||||
|
||||
if (amdgpu_gart_size < 32) {
|
||||
if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
|
||||
/* gart size must be greater or equal to 32M */
|
||||
dev_warn(adev->dev, "gart size (%d) too small\n",
|
||||
amdgpu_gart_size);
|
||||
amdgpu_gart_size = 32;
|
||||
amdgpu_gart_size = -1;
|
||||
}
|
||||
|
||||
if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
|
||||
@ -2622,12 +2622,6 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
|
||||
goto err;
|
||||
}
|
||||
|
||||
r = amdgpu_ttm_bind(&bo->shadow->tbo, &bo->shadow->tbo.mem);
|
||||
if (r) {
|
||||
DRM_ERROR("%p bind failed\n", bo->shadow);
|
||||
goto err;
|
||||
}
|
||||
|
||||
r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
|
||||
NULL, fence, true);
|
||||
if (r) {
|
||||
|
@ -76,7 +76,7 @@
|
||||
|
||||
int amdgpu_vram_limit = 0;
|
||||
int amdgpu_vis_vram_limit = 0;
|
||||
unsigned amdgpu_gart_size = 256;
|
||||
int amdgpu_gart_size = -1; /* auto */
|
||||
int amdgpu_gtt_size = -1; /* auto */
|
||||
int amdgpu_moverate = -1; /* auto */
|
||||
int amdgpu_benchmarking = 0;
|
||||
@ -128,7 +128,7 @@ module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
|
||||
MODULE_PARM_DESC(vis_vramlimit, "Restrict visible VRAM for testing, in megabytes");
|
||||
module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444);
|
||||
|
||||
MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc.)");
|
||||
MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)");
|
||||
module_param_named(gartsize, amdgpu_gart_size, uint, 0600);
|
||||
|
||||
MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)");
|
||||
|
@ -56,18 +56,6 @@
|
||||
* Common GART table functions.
|
||||
*/
|
||||
|
||||
/**
|
||||
* amdgpu_gart_set_defaults - set the default gart_size
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Set the default gart_size based on parameters and available VRAM.
|
||||
*/
|
||||
void amdgpu_gart_set_defaults(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->mc.gart_size = (uint64_t)amdgpu_gart_size << 20;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gart_table_ram_alloc - allocate system ram for gart page table
|
||||
*
|
||||
|
@ -56,7 +56,6 @@ struct amdgpu_gart {
|
||||
const struct amdgpu_gart_funcs *gart_funcs;
|
||||
};
|
||||
|
||||
void amdgpu_gart_set_defaults(struct amdgpu_device *adev);
|
||||
int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev);
|
||||
void amdgpu_gart_table_ram_free(struct amdgpu_device *adev);
|
||||
int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev);
|
||||
|
@ -108,10 +108,10 @@ bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem)
|
||||
*
|
||||
* Allocate the address space for a node.
|
||||
*/
|
||||
int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
|
||||
struct ttm_buffer_object *tbo,
|
||||
const struct ttm_place *place,
|
||||
struct ttm_mem_reg *mem)
|
||||
static int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
|
||||
struct ttm_buffer_object *tbo,
|
||||
const struct ttm_place *place,
|
||||
struct ttm_mem_reg *mem)
|
||||
{
|
||||
struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev);
|
||||
struct amdgpu_gtt_mgr *mgr = man->priv;
|
||||
@ -143,12 +143,8 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
|
||||
fpfn, lpfn, mode);
|
||||
spin_unlock(&mgr->lock);
|
||||
|
||||
if (!r) {
|
||||
if (!r)
|
||||
mem->start = node->start;
|
||||
if (&tbo->mem == mem)
|
||||
tbo->offset = (tbo->mem.start << PAGE_SHIFT) +
|
||||
tbo->bdev->man[tbo->mem.mem_type].gpu_offset;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -221,8 +221,9 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
|
||||
|
||||
spin_lock_init(&adev->irq.lock);
|
||||
|
||||
/* Disable vblank irqs aggressively for power-saving */
|
||||
adev->ddev->vblank_disable_immediate = true;
|
||||
if (!adev->enable_virtual_display)
|
||||
/* Disable vblank irqs aggressively for power-saving */
|
||||
adev->ddev->vblank_disable_immediate = true;
|
||||
|
||||
r = drm_vblank_init(adev->ddev, adev->mode_info.num_crtc);
|
||||
if (r) {
|
||||
|
@ -91,7 +91,10 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
|
||||
|
||||
if (domain & AMDGPU_GEM_DOMAIN_GTT) {
|
||||
places[c].fpfn = 0;
|
||||
places[c].lpfn = 0;
|
||||
if (flags & AMDGPU_GEM_CREATE_SHADOW)
|
||||
places[c].lpfn = adev->mc.gart_size >> PAGE_SHIFT;
|
||||
else
|
||||
places[c].lpfn = 0;
|
||||
places[c].flags = TTM_PL_FLAG_TT;
|
||||
if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
|
||||
places[c].flags |= TTM_PL_FLAG_WC |
|
||||
@ -446,17 +449,16 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
|
||||
if (bo->shadow)
|
||||
return 0;
|
||||
|
||||
bo->flags |= AMDGPU_GEM_CREATE_SHADOW;
|
||||
memset(&placements, 0,
|
||||
(AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place));
|
||||
|
||||
amdgpu_ttm_placement_init(adev, &placement,
|
||||
placements, AMDGPU_GEM_DOMAIN_GTT,
|
||||
AMDGPU_GEM_CREATE_CPU_GTT_USWC);
|
||||
memset(&placements, 0, sizeof(placements));
|
||||
amdgpu_ttm_placement_init(adev, &placement, placements,
|
||||
AMDGPU_GEM_DOMAIN_GTT,
|
||||
AMDGPU_GEM_CREATE_CPU_GTT_USWC |
|
||||
AMDGPU_GEM_CREATE_SHADOW);
|
||||
|
||||
r = amdgpu_bo_create_restricted(adev, size, byte_align, true,
|
||||
AMDGPU_GEM_DOMAIN_GTT,
|
||||
AMDGPU_GEM_CREATE_CPU_GTT_USWC,
|
||||
AMDGPU_GEM_CREATE_CPU_GTT_USWC |
|
||||
AMDGPU_GEM_CREATE_SHADOW,
|
||||
NULL, &placement,
|
||||
bo->tbo.resv,
|
||||
0,
|
||||
@ -484,30 +486,28 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
|
||||
{
|
||||
struct ttm_placement placement = {0};
|
||||
struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1];
|
||||
uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW;
|
||||
int r;
|
||||
|
||||
memset(&placements, 0,
|
||||
(AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place));
|
||||
memset(&placements, 0, sizeof(placements));
|
||||
amdgpu_ttm_placement_init(adev, &placement, placements,
|
||||
domain, parent_flags);
|
||||
|
||||
amdgpu_ttm_placement_init(adev, &placement,
|
||||
placements, domain, flags);
|
||||
|
||||
r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel,
|
||||
domain, flags, sg, &placement,
|
||||
resv, init_value, bo_ptr);
|
||||
r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel, domain,
|
||||
parent_flags, sg, &placement, resv,
|
||||
init_value, bo_ptr);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (amdgpu_need_backup(adev) && (flags & AMDGPU_GEM_CREATE_SHADOW)) {
|
||||
if (!resv) {
|
||||
r = ww_mutex_lock(&(*bo_ptr)->tbo.resv->lock, NULL);
|
||||
WARN_ON(r != 0);
|
||||
}
|
||||
if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) {
|
||||
if (!resv)
|
||||
WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
|
||||
NULL));
|
||||
|
||||
r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr));
|
||||
|
||||
if (!resv)
|
||||
ww_mutex_unlock(&(*bo_ptr)->tbo.resv->lock);
|
||||
reservation_object_unlock((*bo_ptr)->tbo.resv);
|
||||
|
||||
if (r)
|
||||
amdgpu_bo_unref(bo_ptr);
|
||||
|
@ -170,6 +170,16 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
|
||||
unsigned irq_type)
|
||||
{
|
||||
int r;
|
||||
int sched_hw_submission = amdgpu_sched_hw_submission;
|
||||
|
||||
/* Set the hw submission limit higher for KIQ because
|
||||
* it's used for a number of gfx/compute tasks by both
|
||||
* KFD and KGD which may have outstanding fences and
|
||||
* it doesn't really use the gpu scheduler anyway;
|
||||
* KIQ tasks get submitted directly to the ring.
|
||||
*/
|
||||
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
|
||||
sched_hw_submission = max(sched_hw_submission, 256);
|
||||
|
||||
if (ring->adev == NULL) {
|
||||
if (adev->num_rings >= AMDGPU_MAX_RINGS)
|
||||
@ -178,8 +188,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
|
||||
ring->adev = adev;
|
||||
ring->idx = adev->num_rings++;
|
||||
adev->rings[ring->idx] = ring;
|
||||
r = amdgpu_fence_driver_init_ring(ring,
|
||||
amdgpu_sched_hw_submission);
|
||||
r = amdgpu_fence_driver_init_ring(ring, sched_hw_submission);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
@ -218,8 +227,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
|
||||
return r;
|
||||
}
|
||||
|
||||
ring->ring_size = roundup_pow_of_two(max_dw * 4 *
|
||||
amdgpu_sched_hw_submission);
|
||||
ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
|
||||
|
||||
ring->buf_mask = (ring->ring_size / 4) - 1;
|
||||
ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
|
||||
|
@ -761,35 +761,11 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
|
||||
sg_free_table(ttm->sg);
|
||||
}
|
||||
|
||||
static int amdgpu_ttm_do_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
|
||||
{
|
||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
||||
uint64_t flags;
|
||||
int r;
|
||||
|
||||
spin_lock(>t->adev->gtt_list_lock);
|
||||
flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, mem);
|
||||
gtt->offset = (u64)mem->start << PAGE_SHIFT;
|
||||
r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
|
||||
ttm->pages, gtt->ttm.dma_address, flags);
|
||||
|
||||
if (r) {
|
||||
DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
|
||||
ttm->num_pages, gtt->offset);
|
||||
goto error_gart_bind;
|
||||
}
|
||||
|
||||
list_add_tail(>t->list, >t->adev->gtt_list);
|
||||
error_gart_bind:
|
||||
spin_unlock(>t->adev->gtt_list_lock);
|
||||
return r;
|
||||
|
||||
}
|
||||
|
||||
static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
|
||||
struct ttm_mem_reg *bo_mem)
|
||||
{
|
||||
struct amdgpu_ttm_tt *gtt = (void*)ttm;
|
||||
uint64_t flags;
|
||||
int r = 0;
|
||||
|
||||
if (gtt->userptr) {
|
||||
@ -809,9 +785,24 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
|
||||
bo_mem->mem_type == AMDGPU_PL_OA)
|
||||
return -EINVAL;
|
||||
|
||||
if (amdgpu_gtt_mgr_is_allocated(bo_mem))
|
||||
r = amdgpu_ttm_do_bind(ttm, bo_mem);
|
||||
if (!amdgpu_gtt_mgr_is_allocated(bo_mem))
|
||||
return 0;
|
||||
|
||||
spin_lock(>t->adev->gtt_list_lock);
|
||||
flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
|
||||
gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
|
||||
r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
|
||||
ttm->pages, gtt->ttm.dma_address, flags);
|
||||
|
||||
if (r) {
|
||||
DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
|
||||
ttm->num_pages, gtt->offset);
|
||||
goto error_gart_bind;
|
||||
}
|
||||
|
||||
list_add_tail(>t->list, >t->adev->gtt_list);
|
||||
error_gart_bind:
|
||||
spin_unlock(>t->adev->gtt_list_lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -824,20 +815,39 @@ bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
|
||||
|
||||
int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
|
||||
{
|
||||
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
|
||||
struct ttm_tt *ttm = bo->ttm;
|
||||
struct ttm_mem_reg tmp;
|
||||
|
||||
struct ttm_placement placement;
|
||||
struct ttm_place placements;
|
||||
int r;
|
||||
|
||||
if (!ttm || amdgpu_ttm_is_bound(ttm))
|
||||
return 0;
|
||||
|
||||
r = amdgpu_gtt_mgr_alloc(&bo->bdev->man[TTM_PL_TT], bo,
|
||||
NULL, bo_mem);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to allocate GTT address space (%d)\n", r);
|
||||
return r;
|
||||
}
|
||||
tmp = bo->mem;
|
||||
tmp.mm_node = NULL;
|
||||
placement.num_placement = 1;
|
||||
placement.placement = &placements;
|
||||
placement.num_busy_placement = 1;
|
||||
placement.busy_placement = &placements;
|
||||
placements.fpfn = 0;
|
||||
placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT;
|
||||
placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
|
||||
|
||||
return amdgpu_ttm_do_bind(ttm, bo_mem);
|
||||
r = ttm_bo_mem_space(bo, &placement, &tmp, true, false);
|
||||
if (unlikely(r))
|
||||
return r;
|
||||
|
||||
r = ttm_bo_move_ttm(bo, true, false, &tmp);
|
||||
if (unlikely(r))
|
||||
ttm_bo_mem_put(bo, &tmp);
|
||||
else
|
||||
bo->offset = (bo->mem.start << PAGE_SHIFT) +
|
||||
bo->bdev->man[bo->mem.mem_type].gpu_offset;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
|
||||
|
@ -62,10 +62,6 @@ extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func;
|
||||
extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func;
|
||||
|
||||
bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem);
|
||||
int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
|
||||
struct ttm_buffer_object *tbo,
|
||||
const struct ttm_place *place,
|
||||
struct ttm_mem_reg *mem);
|
||||
uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man);
|
||||
|
||||
uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);
|
||||
|
@ -165,14 +165,6 @@ static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,
|
||||
unsigned i;
|
||||
int r;
|
||||
|
||||
if (parent->bo->shadow) {
|
||||
struct amdgpu_bo *shadow = parent->bo->shadow;
|
||||
|
||||
r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (use_cpu_for_update) {
|
||||
r = amdgpu_bo_kmap(parent->bo, NULL);
|
||||
if (r)
|
||||
@ -1277,7 +1269,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
|
||||
/* In the case of a mixed PT the PDE must point to it*/
|
||||
if (p->adev->asic_type < CHIP_VEGA10 ||
|
||||
nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
|
||||
p->func == amdgpu_vm_do_copy_ptes ||
|
||||
p->src ||
|
||||
!(flags & AMDGPU_PTE_VALID)) {
|
||||
|
||||
dst = amdgpu_bo_gpu_offset(entry->bo);
|
||||
@ -1294,9 +1286,23 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
|
||||
entry->addr = (dst | flags);
|
||||
|
||||
if (use_cpu_update) {
|
||||
/* In case a huge page is replaced with a system
|
||||
* memory mapping, p->pages_addr != NULL and
|
||||
* amdgpu_vm_cpu_set_ptes would try to translate dst
|
||||
* through amdgpu_vm_map_gart. But dst is already a
|
||||
* GPU address (of the page table). Disable
|
||||
* amdgpu_vm_map_gart temporarily.
|
||||
*/
|
||||
dma_addr_t *tmp;
|
||||
|
||||
tmp = p->pages_addr;
|
||||
p->pages_addr = NULL;
|
||||
|
||||
pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo);
|
||||
pde = pd_addr + (entry - parent->entries) * 8;
|
||||
amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
|
||||
|
||||
p->pages_addr = tmp;
|
||||
} else {
|
||||
if (parent->bo->shadow) {
|
||||
pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
|
||||
@ -1610,7 +1616,6 @@ error_free:
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @exclusive: fence we need to sync to
|
||||
* @gtt_flags: flags as they are used for GTT
|
||||
* @pages_addr: DMA addresses to use for mapping
|
||||
* @vm: requested vm
|
||||
* @mapping: mapped range and flags to use for the update
|
||||
@ -1624,7 +1629,6 @@ error_free:
|
||||
*/
|
||||
static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
|
||||
struct dma_fence *exclusive,
|
||||
uint64_t gtt_flags,
|
||||
dma_addr_t *pages_addr,
|
||||
struct amdgpu_vm *vm,
|
||||
struct amdgpu_bo_va_mapping *mapping,
|
||||
@ -1679,11 +1683,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
|
||||
}
|
||||
|
||||
if (pages_addr) {
|
||||
if (flags == gtt_flags)
|
||||
src = adev->gart.table_addr +
|
||||
(addr >> AMDGPU_GPU_PAGE_SHIFT) * 8;
|
||||
else
|
||||
max_entries = min(max_entries, 16ull * 1024ull);
|
||||
max_entries = min(max_entries, 16ull * 1024ull);
|
||||
addr = 0;
|
||||
} else if (flags & AMDGPU_PTE_VALID) {
|
||||
addr += adev->vm_manager.vram_base_offset;
|
||||
@ -1728,10 +1728,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
|
||||
struct amdgpu_vm *vm = bo_va->base.vm;
|
||||
struct amdgpu_bo_va_mapping *mapping;
|
||||
dma_addr_t *pages_addr = NULL;
|
||||
uint64_t gtt_flags, flags;
|
||||
struct ttm_mem_reg *mem;
|
||||
struct drm_mm_node *nodes;
|
||||
struct dma_fence *exclusive;
|
||||
uint64_t flags;
|
||||
int r;
|
||||
|
||||
if (clear || !bo_va->base.bo) {
|
||||
@ -1751,15 +1751,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
|
||||
exclusive = reservation_object_get_excl(bo->tbo.resv);
|
||||
}
|
||||
|
||||
if (bo) {
|
||||
if (bo)
|
||||
flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
|
||||
gtt_flags = (amdgpu_ttm_is_bound(bo->tbo.ttm) &&
|
||||
adev == amdgpu_ttm_adev(bo->tbo.bdev)) ?
|
||||
flags : 0;
|
||||
} else {
|
||||
else
|
||||
flags = 0x0;
|
||||
gtt_flags = ~0x0;
|
||||
}
|
||||
|
||||
spin_lock(&vm->status_lock);
|
||||
if (!list_empty(&bo_va->base.vm_status))
|
||||
@ -1767,8 +1762,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
|
||||
spin_unlock(&vm->status_lock);
|
||||
|
||||
list_for_each_entry(mapping, &bo_va->invalids, list) {
|
||||
r = amdgpu_vm_bo_split_mapping(adev, exclusive,
|
||||
gtt_flags, pages_addr, vm,
|
||||
r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
|
||||
mapping, flags, nodes,
|
||||
&bo_va->last_pt_update);
|
||||
if (r)
|
||||
|
@ -4579,9 +4579,9 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
|
||||
mqd->compute_misc_reserved = 0x00000003;
|
||||
if (!(adev->flags & AMD_IS_APU)) {
|
||||
mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
|
||||
+ offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
|
||||
+ offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
|
||||
mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
|
||||
+ offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
|
||||
+ offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
|
||||
}
|
||||
eop_base_addr = ring->eop_gpu_addr >> 8;
|
||||
mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
|
||||
@ -4768,8 +4768,8 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
|
||||
mutex_unlock(&adev->srbm_mutex);
|
||||
} else {
|
||||
memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
|
||||
((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
|
||||
((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
|
||||
((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
|
||||
((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
|
||||
mutex_lock(&adev->srbm_mutex);
|
||||
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
|
||||
gfx_v8_0_mqd_init(ring);
|
||||
@ -4792,8 +4792,8 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
|
||||
|
||||
if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
|
||||
memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
|
||||
((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
|
||||
((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
|
||||
((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
|
||||
((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
|
||||
mutex_lock(&adev->srbm_mutex);
|
||||
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
|
||||
gfx_v8_0_mqd_init(ring);
|
||||
|
@ -124,7 +124,7 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
|
||||
|
||||
static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
|
||||
{
|
||||
uint32_t tmp, field;
|
||||
uint32_t tmp;
|
||||
|
||||
/* Setup L2 cache */
|
||||
tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL);
|
||||
@ -143,9 +143,8 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
|
||||
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
|
||||
WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
|
||||
|
||||
field = adev->vm_manager.fragment_size;
|
||||
tmp = mmVM_L2_CNTL3_DEFAULT;
|
||||
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field);
|
||||
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
|
||||
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
|
||||
WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
|
||||
|
||||
|
@ -332,7 +332,24 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
|
||||
adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
|
||||
adev->mc.visible_vram_size = adev->mc.aper_size;
|
||||
|
||||
amdgpu_gart_set_defaults(adev);
|
||||
/* set the gart size */
|
||||
if (amdgpu_gart_size == -1) {
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_HAINAN: /* no MM engines */
|
||||
default:
|
||||
adev->mc.gart_size = 256ULL << 20;
|
||||
break;
|
||||
case CHIP_VERDE: /* UVD, VCE do not support GPUVM */
|
||||
case CHIP_TAHITI: /* UVD, VCE do not support GPUVM */
|
||||
case CHIP_PITCAIRN: /* UVD, VCE do not support GPUVM */
|
||||
case CHIP_OLAND: /* UVD, VCE do not support GPUVM */
|
||||
adev->mc.gart_size = 1024ULL << 20;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
|
||||
}
|
||||
|
||||
gmc_v6_0_vram_gtt_location(adev, &adev->mc);
|
||||
|
||||
return 0;
|
||||
|
@ -386,7 +386,27 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
|
||||
if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
|
||||
adev->mc.visible_vram_size = adev->mc.real_vram_size;
|
||||
|
||||
amdgpu_gart_set_defaults(adev);
|
||||
/* set the gart size */
|
||||
if (amdgpu_gart_size == -1) {
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_TOPAZ: /* no MM engines */
|
||||
default:
|
||||
adev->mc.gart_size = 256ULL << 20;
|
||||
break;
|
||||
#ifdef CONFIG_DRM_AMDGPU_CIK
|
||||
case CHIP_BONAIRE: /* UVD, VCE do not support GPUVM */
|
||||
case CHIP_HAWAII: /* UVD, VCE do not support GPUVM */
|
||||
case CHIP_KAVERI: /* UVD, VCE do not support GPUVM */
|
||||
case CHIP_KABINI: /* UVD, VCE do not support GPUVM */
|
||||
case CHIP_MULLINS: /* UVD, VCE do not support GPUVM */
|
||||
adev->mc.gart_size = 1024ULL << 20;
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
|
||||
}
|
||||
|
||||
gmc_v7_0_vram_gtt_location(adev, &adev->mc);
|
||||
|
||||
return 0;
|
||||
|
@ -562,7 +562,26 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
|
||||
if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
|
||||
adev->mc.visible_vram_size = adev->mc.real_vram_size;
|
||||
|
||||
amdgpu_gart_set_defaults(adev);
|
||||
/* set the gart size */
|
||||
if (amdgpu_gart_size == -1) {
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_POLARIS11: /* all engines support GPUVM */
|
||||
case CHIP_POLARIS10: /* all engines support GPUVM */
|
||||
case CHIP_POLARIS12: /* all engines support GPUVM */
|
||||
default:
|
||||
adev->mc.gart_size = 256ULL << 20;
|
||||
break;
|
||||
case CHIP_TONGA: /* UVD, VCE do not support GPUVM */
|
||||
case CHIP_FIJI: /* UVD, VCE do not support GPUVM */
|
||||
case CHIP_CARRIZO: /* UVD, VCE do not support GPUVM, DCE SG support */
|
||||
case CHIP_STONEY: /* UVD does not support GPUVM, DCE SG support */
|
||||
adev->mc.gart_size = 1024ULL << 20;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
|
||||
}
|
||||
|
||||
gmc_v8_0_vram_gtt_location(adev, &adev->mc);
|
||||
|
||||
return 0;
|
||||
|
@ -499,7 +499,21 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
|
||||
if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
|
||||
adev->mc.visible_vram_size = adev->mc.real_vram_size;
|
||||
|
||||
amdgpu_gart_set_defaults(adev);
|
||||
/* set the gart size */
|
||||
if (amdgpu_gart_size == -1) {
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_VEGA10: /* all engines support GPUVM */
|
||||
default:
|
||||
adev->mc.gart_size = 256ULL << 20;
|
||||
break;
|
||||
case CHIP_RAVEN: /* DCE SG support */
|
||||
adev->mc.gart_size = 1024ULL << 20;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
|
||||
}
|
||||
|
||||
gmc_v9_0_vram_gtt_location(adev, &adev->mc);
|
||||
|
||||
return 0;
|
||||
|
@ -138,7 +138,7 @@ static void mmhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
|
||||
|
||||
static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
|
||||
{
|
||||
uint32_t tmp, field;
|
||||
uint32_t tmp;
|
||||
|
||||
/* Setup L2 cache */
|
||||
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
|
||||
@ -157,9 +157,8 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
|
||||
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
|
||||
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
|
||||
|
||||
field = adev->vm_manager.fragment_size;
|
||||
tmp = mmVM_L2_CNTL3_DEFAULT;
|
||||
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field);
|
||||
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
|
||||
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
|
||||
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
|
||||
|
||||
|
@ -419,8 +419,8 @@ struct vi_mqd_allocation {
|
||||
struct vi_mqd mqd;
|
||||
uint32_t wptr_poll_mem;
|
||||
uint32_t rptr_report_mem;
|
||||
uint32_t dyamic_cu_mask;
|
||||
uint32_t dyamic_rb_mask;
|
||||
uint32_t dynamic_cu_mask;
|
||||
uint32_t dynamic_rb_mask;
|
||||
};
|
||||
|
||||
struct cz_mqd {
|
||||
|
@ -1558,7 +1558,8 @@ static int vega10_populate_smc_link_levels(struct pp_hwmgr *hwmgr)
|
||||
*/
|
||||
|
||||
static int vega10_populate_single_gfx_level(struct pp_hwmgr *hwmgr,
|
||||
uint32_t gfx_clock, PllSetting_t *current_gfxclk_level)
|
||||
uint32_t gfx_clock, PllSetting_t *current_gfxclk_level,
|
||||
uint32_t *acg_freq)
|
||||
{
|
||||
struct phm_ppt_v2_information *table_info =
|
||||
(struct phm_ppt_v2_information *)(hwmgr->pptable);
|
||||
@ -1609,6 +1610,8 @@ static int vega10_populate_single_gfx_level(struct pp_hwmgr *hwmgr,
|
||||
cpu_to_le16(dividers.usPll_ss_slew_frac);
|
||||
current_gfxclk_level->Did = (uint8_t)(dividers.ulDid);
|
||||
|
||||
*acg_freq = gfx_clock / 100; /* 100 Khz to Mhz conversion */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1689,7 +1692,8 @@ static int vega10_populate_all_graphic_levels(struct pp_hwmgr *hwmgr)
|
||||
for (i = 0; i < dpm_table->count; i++) {
|
||||
result = vega10_populate_single_gfx_level(hwmgr,
|
||||
dpm_table->dpm_levels[i].value,
|
||||
&(pp_table->GfxclkLevel[i]));
|
||||
&(pp_table->GfxclkLevel[i]),
|
||||
&(pp_table->AcgFreqTable[i]));
|
||||
if (result)
|
||||
return result;
|
||||
}
|
||||
@ -1698,7 +1702,8 @@ static int vega10_populate_all_graphic_levels(struct pp_hwmgr *hwmgr)
|
||||
while (i < NUM_GFXCLK_DPM_LEVELS) {
|
||||
result = vega10_populate_single_gfx_level(hwmgr,
|
||||
dpm_table->dpm_levels[j].value,
|
||||
&(pp_table->GfxclkLevel[i]));
|
||||
&(pp_table->GfxclkLevel[i]),
|
||||
&(pp_table->AcgFreqTable[i]));
|
||||
if (result)
|
||||
return result;
|
||||
i++;
|
||||
|
@ -315,10 +315,12 @@ typedef struct {
|
||||
uint8_t AcgEnable[NUM_GFXCLK_DPM_LEVELS];
|
||||
GbVdroopTable_t AcgBtcGbVdroopTable;
|
||||
QuadraticInt_t AcgAvfsGb;
|
||||
uint32_t Reserved[4];
|
||||
|
||||
/* ACG Frequency Table, in Mhz */
|
||||
uint32_t AcgFreqTable[NUM_GFXCLK_DPM_LEVELS];
|
||||
|
||||
/* Padding - ignore */
|
||||
uint32_t MmHubPadding[7]; /* SMU internal use */
|
||||
uint32_t MmHubPadding[3]; /* SMU internal use */
|
||||
|
||||
} PPTable_t;
|
||||
|
||||
|
@ -380,7 +380,8 @@ static int smu7_populate_single_firmware_entry(struct pp_smumgr *smumgr,
|
||||
entry->num_register_entries = 0;
|
||||
}
|
||||
|
||||
if (fw_type == UCODE_ID_RLC_G)
|
||||
if ((fw_type == UCODE_ID_RLC_G)
|
||||
|| (fw_type == UCODE_ID_CP_MEC))
|
||||
entry->flags = 1;
|
||||
else
|
||||
entry->flags = 0;
|
||||
|
@ -205,17 +205,32 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
|
||||
struct amd_sched_entity *entity)
|
||||
{
|
||||
struct amd_sched_rq *rq = entity->rq;
|
||||
int r;
|
||||
|
||||
if (!amd_sched_entity_is_initialized(sched, entity))
|
||||
return;
|
||||
|
||||
/**
|
||||
* The client will not queue more IBs during this fini, consume existing
|
||||
* queued IBs
|
||||
* queued IBs or discard them on SIGKILL
|
||||
*/
|
||||
wait_event(sched->job_scheduled, amd_sched_entity_is_idle(entity));
|
||||
|
||||
if ((current->flags & PF_SIGNALED) && current->exit_code == SIGKILL)
|
||||
r = -ERESTARTSYS;
|
||||
else
|
||||
r = wait_event_killable(sched->job_scheduled,
|
||||
amd_sched_entity_is_idle(entity));
|
||||
amd_sched_rq_remove_entity(rq, entity);
|
||||
if (r) {
|
||||
struct amd_sched_job *job;
|
||||
|
||||
/* Park the kernel for a moment to make sure it isn't processing
|
||||
* our enity.
|
||||
*/
|
||||
kthread_park(sched->thread);
|
||||
kthread_unpark(sched->thread);
|
||||
while (kfifo_out(&entity->job_queue, &job, sizeof(job)))
|
||||
sched->ops->free_job(job);
|
||||
|
||||
}
|
||||
kfifo_free(&entity->job_queue);
|
||||
}
|
||||
|
||||
|
@ -109,8 +109,8 @@ static ssize_t ttm_bo_global_show(struct kobject *kobj,
|
||||
struct ttm_bo_global *glob =
|
||||
container_of(kobj, struct ttm_bo_global, kobj);
|
||||
|
||||
return snprintf(buffer, PAGE_SIZE, "%lu\n",
|
||||
(unsigned long) atomic_read(&glob->bo_count));
|
||||
return snprintf(buffer, PAGE_SIZE, "%d\n",
|
||||
atomic_read(&glob->bo_count));
|
||||
}
|
||||
|
||||
static struct attribute *ttm_bo_global_attrs[] = {
|
||||
|
@ -469,6 +469,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
|
||||
* TODO: Explicit member copy would probably be better here.
|
||||
*/
|
||||
|
||||
atomic_inc(&bo->glob->bo_count);
|
||||
INIT_LIST_HEAD(&fbo->ddestroy);
|
||||
INIT_LIST_HEAD(&fbo->lru);
|
||||
INIT_LIST_HEAD(&fbo->swap);
|
||||
|
Loading…
Reference in New Issue
Block a user