drm/amdgpu: fix sdma v4 startup under SRIOV
Under SRIOV we were enabling the ring buffer before it was initialized. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Huang Rui <ray.huang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
105f20706f
commit
51235849d9
@ -675,13 +675,14 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
|
||||
* sdma_v4_0_gfx_resume - setup and start the async dma engines
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @i: instance to resume
|
||||
*
|
||||
* Set up the gfx DMA ring buffers and enable them (VEGA10).
|
||||
* Returns 0 for success, error for failure.
|
||||
*/
|
||||
static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
|
||||
static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
|
||||
{
|
||||
struct amdgpu_ring *ring;
|
||||
struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
|
||||
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
|
||||
u32 rb_bufsz;
|
||||
u32 wb_offset;
|
||||
@ -689,129 +690,108 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
|
||||
u32 doorbell_offset;
|
||||
u32 temp;
|
||||
u64 wptr_gpu_addr;
|
||||
int i, r;
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
ring = &adev->sdma.instance[i].ring;
|
||||
wb_offset = (ring->rptr_offs * 4);
|
||||
wb_offset = (ring->rptr_offs * 4);
|
||||
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
|
||||
|
||||
/* Set ring buffer size in dwords */
|
||||
rb_bufsz = order_base_2(ring->ring_size / 4);
|
||||
rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
|
||||
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
|
||||
/* Set ring buffer size in dwords */
|
||||
rb_bufsz = order_base_2(ring->ring_size / 4);
|
||||
rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
|
||||
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
|
||||
#ifdef __BIG_ENDIAN
|
||||
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
|
||||
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
|
||||
RPTR_WRITEBACK_SWAP_ENABLE, 1);
|
||||
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
|
||||
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
|
||||
RPTR_WRITEBACK_SWAP_ENABLE, 1);
|
||||
#endif
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
|
||||
|
||||
/* Initialize the ring buffer's read and write pointers */
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
|
||||
/* Initialize the ring buffer's read and write pointers */
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
|
||||
|
||||
/* set the wb address whether it's enabled or not */
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
|
||||
upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
|
||||
lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
|
||||
/* set the wb address whether it's enabled or not */
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
|
||||
upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
|
||||
lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
|
||||
|
||||
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
|
||||
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
|
||||
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
|
||||
|
||||
ring->wptr = 0;
|
||||
ring->wptr = 0;
|
||||
|
||||
/* before programing wptr to a less value, need set minor_ptr_update first */
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
|
||||
|
||||
if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
|
||||
}
|
||||
|
||||
doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
|
||||
doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
|
||||
|
||||
if (ring->use_doorbell) {
|
||||
doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
|
||||
doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
|
||||
OFFSET, ring->doorbell_index);
|
||||
} else {
|
||||
doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
|
||||
}
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
|
||||
adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
|
||||
ring->doorbell_index);
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
sdma_v4_0_ring_set_wptr(ring);
|
||||
|
||||
/* set minor_ptr_update to 0 after wptr programed */
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
|
||||
|
||||
/* set utc l1 enable flag always to 1 */
|
||||
temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
|
||||
temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
|
||||
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
/* unhalt engine */
|
||||
temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
|
||||
temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
|
||||
}
|
||||
|
||||
/* setup the wptr shadow polling */
|
||||
wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
|
||||
lower_32_bits(wptr_gpu_addr));
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
|
||||
upper_32_bits(wptr_gpu_addr));
|
||||
wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
|
||||
else
|
||||
wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
|
||||
|
||||
/* enable DMA RB */
|
||||
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
|
||||
|
||||
ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
|
||||
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
|
||||
#ifdef __BIG_ENDIAN
|
||||
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
|
||||
#endif
|
||||
/* enable DMA IBs */
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
|
||||
|
||||
ring->ready = true;
|
||||
|
||||
if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
|
||||
sdma_v4_0_ctx_switch_enable(adev, true);
|
||||
sdma_v4_0_enable(adev, true);
|
||||
}
|
||||
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r) {
|
||||
ring->ready = false;
|
||||
return r;
|
||||
}
|
||||
|
||||
if (adev->mman.buffer_funcs_ring == ring)
|
||||
amdgpu_ttm_set_buffer_funcs_status(adev, true);
|
||||
/* before programing wptr to a less value, need set minor_ptr_update first */
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
|
||||
|
||||
if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
|
||||
doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
|
||||
|
||||
if (ring->use_doorbell) {
|
||||
doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
|
||||
doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
|
||||
OFFSET, ring->doorbell_index);
|
||||
} else {
|
||||
doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
|
||||
}
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
|
||||
adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
|
||||
ring->doorbell_index);
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
sdma_v4_0_ring_set_wptr(ring);
|
||||
|
||||
/* set minor_ptr_update to 0 after wptr programed */
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
|
||||
|
||||
/* set utc l1 enable flag always to 1 */
|
||||
temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
|
||||
temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
|
||||
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
/* unhalt engine */
|
||||
temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
|
||||
temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
|
||||
}
|
||||
|
||||
/* setup the wptr shadow polling */
|
||||
wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
|
||||
lower_32_bits(wptr_gpu_addr));
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
|
||||
upper_32_bits(wptr_gpu_addr));
|
||||
wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
|
||||
else
|
||||
wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
|
||||
|
||||
/* enable DMA RB */
|
||||
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
|
||||
|
||||
ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
|
||||
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
|
||||
#ifdef __BIG_ENDIAN
|
||||
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
|
||||
#endif
|
||||
/* enable DMA IBs */
|
||||
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
|
||||
|
||||
ring->ready = true;
|
||||
}
|
||||
|
||||
static void
|
||||
@ -943,33 +923,51 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
|
||||
*/
|
||||
static int sdma_v4_0_start(struct amdgpu_device *adev)
|
||||
{
|
||||
int r = 0;
|
||||
struct amdgpu_ring *ring;
|
||||
int i, r;
|
||||
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
sdma_v4_0_ctx_switch_enable(adev, false);
|
||||
sdma_v4_0_enable(adev, false);
|
||||
} else {
|
||||
|
||||
/* set RB registers */
|
||||
r = sdma_v4_0_gfx_resume(adev);
|
||||
return r;
|
||||
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
|
||||
r = sdma_v4_0_load_microcode(adev);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* unhalt the MEs */
|
||||
sdma_v4_0_enable(adev, true);
|
||||
/* enable sdma ring preemption */
|
||||
sdma_v4_0_ctx_switch_enable(adev, true);
|
||||
}
|
||||
|
||||
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
|
||||
r = sdma_v4_0_load_microcode(adev);
|
||||
/* start the gfx rings and rlc compute queues */
|
||||
for (i = 0; i < adev->sdma.num_instances; i++)
|
||||
sdma_v4_0_gfx_resume(adev, i);
|
||||
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
sdma_v4_0_ctx_switch_enable(adev, true);
|
||||
sdma_v4_0_enable(adev, true);
|
||||
} else {
|
||||
r = sdma_v4_0_rlc_resume(adev);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* unhalt the MEs */
|
||||
sdma_v4_0_enable(adev, true);
|
||||
/* enable sdma ring preemption */
|
||||
sdma_v4_0_ctx_switch_enable(adev, true);
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
ring = &adev->sdma.instance[i].ring;
|
||||
|
||||
/* start the gfx rings and rlc compute queues */
|
||||
r = sdma_v4_0_gfx_resume(adev);
|
||||
if (r)
|
||||
return r;
|
||||
r = sdma_v4_0_rlc_resume(adev);
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r) {
|
||||
ring->ready = false;
|
||||
return r;
|
||||
}
|
||||
|
||||
if (adev->mman.buffer_funcs_ring == ring)
|
||||
amdgpu_ttm_set_buffer_funcs_status(adev, true);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user