mirror of
https://github.com/torvalds/linux.git
synced 2024-12-25 04:11:49 +00:00
drm/amdgpu: implement burst NOP for SDMA
Customize the insert_nop func for SDMA rings, and use burst NOP for ring/IB submissions in other places as well Signed-off-by: Jammy Zhou <Jammy.Zhou@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
edff0e2826
commit
ac01db3dd5
@ -188,6 +188,19 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
|
|||||||
WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc);
|
WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
|
||||||
|
{
|
||||||
|
struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring);
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < count; i++)
|
||||||
|
if (sdma && sdma->burst_nop && (i == 0))
|
||||||
|
amdgpu_ring_write(ring, ring->nop |
|
||||||
|
SDMA_NOP_COUNT(count - 1));
|
||||||
|
else
|
||||||
|
amdgpu_ring_write(ring, ring->nop);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* cik_sdma_ring_emit_ib - Schedule an IB on the DMA engine
|
* cik_sdma_ring_emit_ib - Schedule an IB on the DMA engine
|
||||||
*
|
*
|
||||||
@ -213,8 +226,8 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
|
|||||||
amdgpu_ring_write(ring, next_rptr);
|
amdgpu_ring_write(ring, next_rptr);
|
||||||
|
|
||||||
/* IB packet must end on a 8 DW boundary */
|
/* IB packet must end on a 8 DW boundary */
|
||||||
while ((ring->wptr & 7) != 4)
|
cik_sdma_ring_insert_nop(ring, (12 - (ring->wptr & 7)) % 8);
|
||||||
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
|
|
||||||
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
|
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
|
||||||
amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
|
amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
|
||||||
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
|
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
|
||||||
@ -817,8 +830,19 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib,
|
|||||||
*/
|
*/
|
||||||
static void cik_sdma_vm_pad_ib(struct amdgpu_ib *ib)
|
static void cik_sdma_vm_pad_ib(struct amdgpu_ib *ib)
|
||||||
{
|
{
|
||||||
while (ib->length_dw & 0x7)
|
struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring);
|
||||||
ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
|
u32 pad_count;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
pad_count = (8 - (ib->length_dw & 0x7)) % 8;
|
||||||
|
for (i = 0; i < pad_count; i++)
|
||||||
|
if (sdma && sdma->burst_nop && (i == 0))
|
||||||
|
ib->ptr[ib->length_dw++] =
|
||||||
|
SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0) |
|
||||||
|
SDMA_NOP_COUNT(pad_count - 1);
|
||||||
|
else
|
||||||
|
ib->ptr[ib->length_dw++] =
|
||||||
|
SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1305,7 +1329,7 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
|
|||||||
.test_ring = cik_sdma_ring_test_ring,
|
.test_ring = cik_sdma_ring_test_ring,
|
||||||
.test_ib = cik_sdma_ring_test_ib,
|
.test_ib = cik_sdma_ring_test_ib,
|
||||||
.is_lockup = cik_sdma_ring_is_lockup,
|
.is_lockup = cik_sdma_ring_is_lockup,
|
||||||
.insert_nop = amdgpu_ring_insert_nop,
|
.insert_nop = cik_sdma_ring_insert_nop,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
|
static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
|
||||||
|
@ -220,6 +220,19 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
|
|||||||
WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2);
|
WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
|
||||||
|
{
|
||||||
|
struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring);
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < count; i++)
|
||||||
|
if (sdma && sdma->burst_nop && (i == 0))
|
||||||
|
amdgpu_ring_write(ring, ring->nop |
|
||||||
|
SDMA_PKT_NOP_HEADER_COUNT(count - 1));
|
||||||
|
else
|
||||||
|
amdgpu_ring_write(ring, ring->nop);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine
|
* sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine
|
||||||
*
|
*
|
||||||
@ -247,8 +260,8 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
|
|||||||
amdgpu_ring_write(ring, next_rptr);
|
amdgpu_ring_write(ring, next_rptr);
|
||||||
|
|
||||||
/* IB packet must end on a 8 DW boundary */
|
/* IB packet must end on a 8 DW boundary */
|
||||||
while ((ring->wptr & 7) != 2)
|
sdma_v2_4_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8);
|
||||||
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_NOP));
|
|
||||||
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
|
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
|
||||||
SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
|
SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
|
||||||
/* base must be 32 byte aligned */
|
/* base must be 32 byte aligned */
|
||||||
@ -881,8 +894,19 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib,
|
|||||||
*/
|
*/
|
||||||
static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib)
|
static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib)
|
||||||
{
|
{
|
||||||
while (ib->length_dw & 0x7)
|
struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring);
|
||||||
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
|
u32 pad_count;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
pad_count = (8 - (ib->length_dw & 0x7)) % 8;
|
||||||
|
for (i = 0; i < pad_count; i++)
|
||||||
|
if (sdma && sdma->burst_nop && (i == 0))
|
||||||
|
ib->ptr[ib->length_dw++] =
|
||||||
|
SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
|
||||||
|
SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
|
||||||
|
else
|
||||||
|
ib->ptr[ib->length_dw++] =
|
||||||
|
SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1316,7 +1340,7 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
|
|||||||
.test_ring = sdma_v2_4_ring_test_ring,
|
.test_ring = sdma_v2_4_ring_test_ring,
|
||||||
.test_ib = sdma_v2_4_ring_test_ib,
|
.test_ib = sdma_v2_4_ring_test_ib,
|
||||||
.is_lockup = sdma_v2_4_ring_is_lockup,
|
.is_lockup = sdma_v2_4_ring_is_lockup,
|
||||||
.insert_nop = amdgpu_ring_insert_nop,
|
.insert_nop = sdma_v2_4_ring_insert_nop,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
|
static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
|
||||||
|
@ -306,6 +306,19 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
|
||||||
|
{
|
||||||
|
struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring);
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < count; i++)
|
||||||
|
if (sdma && sdma->burst_nop && (i == 0))
|
||||||
|
amdgpu_ring_write(ring, ring->nop |
|
||||||
|
SDMA_PKT_NOP_HEADER_COUNT(count - 1));
|
||||||
|
else
|
||||||
|
amdgpu_ring_write(ring, ring->nop);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* sdma_v3_0_ring_emit_ib - Schedule an IB on the DMA engine
|
* sdma_v3_0_ring_emit_ib - Schedule an IB on the DMA engine
|
||||||
*
|
*
|
||||||
@ -332,8 +345,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
|
|||||||
amdgpu_ring_write(ring, next_rptr);
|
amdgpu_ring_write(ring, next_rptr);
|
||||||
|
|
||||||
/* IB packet must end on a 8 DW boundary */
|
/* IB packet must end on a 8 DW boundary */
|
||||||
while ((ring->wptr & 7) != 2)
|
sdma_v3_0_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8);
|
||||||
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_NOP));
|
|
||||||
|
|
||||||
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
|
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
|
||||||
SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
|
SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
|
||||||
@ -1001,8 +1013,19 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib,
|
|||||||
*/
|
*/
|
||||||
static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib)
|
static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib)
|
||||||
{
|
{
|
||||||
while (ib->length_dw & 0x7)
|
struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring);
|
||||||
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
|
u32 pad_count;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
pad_count = (8 - (ib->length_dw & 0x7)) % 8;
|
||||||
|
for (i = 0; i < pad_count; i++)
|
||||||
|
if (sdma && sdma->burst_nop && (i == 0))
|
||||||
|
ib->ptr[ib->length_dw++] =
|
||||||
|
SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
|
||||||
|
SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
|
||||||
|
else
|
||||||
|
ib->ptr[ib->length_dw++] =
|
||||||
|
SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1440,7 +1463,7 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
|
|||||||
.test_ring = sdma_v3_0_ring_test_ring,
|
.test_ring = sdma_v3_0_ring_test_ring,
|
||||||
.test_ib = sdma_v3_0_ring_test_ib,
|
.test_ib = sdma_v3_0_ring_test_ib,
|
||||||
.is_lockup = sdma_v3_0_ring_is_lockup,
|
.is_lockup = sdma_v3_0_ring_is_lockup,
|
||||||
.insert_nop = amdgpu_ring_insert_nop,
|
.insert_nop = sdma_v3_0_ring_insert_nop,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)
|
static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)
|
||||||
|
Loading…
Reference in New Issue
Block a user