drm/amdgpu: set sched_hw_submission higher for KIQ (v3)

KIQ doesn't really use the GPU scheduler.  The base
drivers generally use the KIQ ring directly rather than
submitting IBs.  However, amdgpu_sched_hw_submission
(which defaults to 2) limits the number of outstanding
fences to 2.  KFD uses the KIQ for TLB flushes and the
2 fence limit hurts performance when there are several KFD
processes running.

v2: move some expressions to one line
    change KIQ sched_hw_submission to at least 16
v3: bump to 256

Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Alex Deucher 2017-08-22 16:39:30 -04:00
parent c3db7b5a55
commit b249e18df1

View File

@ -170,6 +170,16 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
unsigned irq_type) unsigned irq_type)
{ {
int r; int r;
int sched_hw_submission = amdgpu_sched_hw_submission;
/* Set the hw submission limit higher for KIQ because
* it's used for a number of gfx/compute tasks by both
* KFD and KGD which may have outstanding fences and
* it doesn't really use the gpu scheduler anyway;
* KIQ tasks get submitted directly to the ring.
*/
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
sched_hw_submission = max(sched_hw_submission, 256);
if (ring->adev == NULL) { if (ring->adev == NULL) {
if (adev->num_rings >= AMDGPU_MAX_RINGS) if (adev->num_rings >= AMDGPU_MAX_RINGS)
@ -178,8 +188,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
ring->adev = adev; ring->adev = adev;
ring->idx = adev->num_rings++; ring->idx = adev->num_rings++;
adev->rings[ring->idx] = ring; adev->rings[ring->idx] = ring;
r = amdgpu_fence_driver_init_ring(ring, r = amdgpu_fence_driver_init_ring(ring, sched_hw_submission);
amdgpu_sched_hw_submission);
if (r) if (r)
return r; return r;
} }
@ -218,8 +227,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
return r; return r;
} }
ring->ring_size = roundup_pow_of_two(max_dw * 4 * ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
amdgpu_sched_hw_submission);
ring->buf_mask = (ring->ring_size / 4) - 1; ring->buf_mask = (ring->ring_size / 4) - 1;
ring->ptr_mask = ring->funcs->support_64bit_ptrs ? ring->ptr_mask = ring->funcs->support_64bit_ptrs ?