drm/amdgpu: add high priority compute support for gfx9

We follow the same approach as gfx8. The only changes are register
access macros.

Tested on vega10. The execution latency results fall within the expected
ranges from the polaris10 data.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Andres Rodriguez 2018-01-02 15:49:40 -05:00 committed by Alex Deucher
parent 10cd19c877
commit 761c77c195

View File

@ -3735,6 +3735,105 @@ static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
return wptr;
}
static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
bool acquire)
{
struct amdgpu_device *adev = ring->adev;
int pipe_num, tmp, reg;
int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
/* first me only has 2 entries, GFX and HP3D */
if (ring->me > 0)
pipe_num -= 2;
reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
tmp = RREG32(reg);
tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
WREG32(reg, tmp);
}
static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
bool acquire)
{
int i, pipe;
bool reserve;
struct amdgpu_ring *iring;
mutex_lock(&adev->gfx.pipe_reserve_mutex);
pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
if (acquire)
set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
else
clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
/* Clear all reservations - everyone reacquires all resources */
for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
true);
for (i = 0; i < adev->gfx.num_compute_rings; ++i)
gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
true);
} else {
/* Lower all pipes without a current reservation */
for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
iring = &adev->gfx.gfx_ring[i];
pipe = amdgpu_gfx_queue_to_bit(adev,
iring->me,
iring->pipe,
0);
reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
gfx_v9_0_ring_set_pipe_percent(iring, reserve);
}
for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
iring = &adev->gfx.compute_ring[i];
pipe = amdgpu_gfx_queue_to_bit(adev,
iring->me,
iring->pipe,
0);
reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
gfx_v9_0_ring_set_pipe_percent(iring, reserve);
}
}
mutex_unlock(&adev->gfx.pipe_reserve_mutex);
}
static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
bool acquire)
{
uint32_t pipe_priority = acquire ? 0x2 : 0x0;
uint32_t queue_priority = acquire ? 0xf : 0x0;
mutex_lock(&adev->srbm_mutex);
soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
soc15_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
enum drm_sched_priority priority)
{
struct amdgpu_device *adev = ring->adev;
bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
return;
gfx_v9_0_hqd_set_priority(adev, ring, acquire);
gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
}
static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
@ -4261,6 +4360,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
.test_ib = gfx_v9_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.set_priority = gfx_v9_0_ring_set_priority_compute,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {