forked from Minki/linux
drm/radeon: allow semaphore emission to fail
To workaround bugs and/or certain limits it's sometimes useful to fall back to waiting on fences. Signed-off-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Cc: stable@vger.kernel.org
This commit is contained in:
parent
bd80c8ba99
commit
1654b817d8
@ -3556,7 +3556,7 @@ void cik_fence_compute_ring_emit(struct radeon_device *rdev,
|
||||
radeon_ring_write(ring, 0);
|
||||
}
|
||||
|
||||
void cik_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
bool cik_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
struct radeon_ring *ring,
|
||||
struct radeon_semaphore *semaphore,
|
||||
bool emit_wait)
|
||||
@ -3567,6 +3567,8 @@ void cik_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
|
||||
radeon_ring_write(ring, addr & 0xffffffff);
|
||||
radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -3609,13 +3611,8 @@ int cik_copy_cpdma(struct radeon_device *rdev,
|
||||
return r;
|
||||
}
|
||||
|
||||
if (radeon_fence_need_sync(*fence, ring->idx)) {
|
||||
radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
|
||||
ring->idx);
|
||||
radeon_fence_note_sync(*fence, ring->idx);
|
||||
} else {
|
||||
radeon_semaphore_free(rdev, &sem, NULL);
|
||||
}
|
||||
radeon_semaphore_sync_to(sem, *fence);
|
||||
radeon_semaphore_sync_rings(rdev, sem, ring->idx);
|
||||
|
||||
for (i = 0; i < num_loops; i++) {
|
||||
cur_size_in_bytes = size_in_bytes;
|
||||
|
@ -130,7 +130,7 @@ void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
|
||||
* Add a DMA semaphore packet to the ring wait on or signal
|
||||
* other rings (CIK).
|
||||
*/
|
||||
void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
struct radeon_ring *ring,
|
||||
struct radeon_semaphore *semaphore,
|
||||
bool emit_wait)
|
||||
@ -141,6 +141,8 @@ void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
|
||||
radeon_ring_write(ring, addr & 0xfffffff8);
|
||||
radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -443,13 +445,8 @@ int cik_copy_dma(struct radeon_device *rdev,
|
||||
return r;
|
||||
}
|
||||
|
||||
if (radeon_fence_need_sync(*fence, ring->idx)) {
|
||||
radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
|
||||
ring->idx);
|
||||
radeon_fence_note_sync(*fence, ring->idx);
|
||||
} else {
|
||||
radeon_semaphore_free(rdev, &sem, NULL);
|
||||
}
|
||||
radeon_semaphore_sync_to(sem, *fence);
|
||||
radeon_semaphore_sync_rings(rdev, sem, ring->idx);
|
||||
|
||||
for (i = 0; i < num_loops; i++) {
|
||||
cur_size_in_bytes = size_in_bytes;
|
||||
|
@ -131,13 +131,8 @@ int evergreen_copy_dma(struct radeon_device *rdev,
|
||||
return r;
|
||||
}
|
||||
|
||||
if (radeon_fence_need_sync(*fence, ring->idx)) {
|
||||
radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
|
||||
ring->idx);
|
||||
radeon_fence_note_sync(*fence, ring->idx);
|
||||
} else {
|
||||
radeon_semaphore_free(rdev, &sem, NULL);
|
||||
}
|
||||
radeon_semaphore_sync_to(sem, *fence);
|
||||
radeon_semaphore_sync_rings(rdev, sem, ring->idx);
|
||||
|
||||
for (i = 0; i < num_loops; i++) {
|
||||
cur_size_in_dw = size_in_dw;
|
||||
|
@ -869,13 +869,14 @@ void r100_fence_ring_emit(struct radeon_device *rdev,
|
||||
radeon_ring_write(ring, RADEON_SW_INT_FIRE);
|
||||
}
|
||||
|
||||
void r100_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
bool r100_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
struct radeon_ring *ring,
|
||||
struct radeon_semaphore *semaphore,
|
||||
bool emit_wait)
|
||||
{
|
||||
/* Unused on older asics, since we don't have semaphores or multiple rings */
|
||||
BUG();
|
||||
return false;
|
||||
}
|
||||
|
||||
int r100_copy_blit(struct radeon_device *rdev,
|
||||
|
@ -2650,7 +2650,7 @@ void r600_fence_ring_emit(struct radeon_device *rdev,
|
||||
}
|
||||
}
|
||||
|
||||
void r600_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
bool r600_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
struct radeon_ring *ring,
|
||||
struct radeon_semaphore *semaphore,
|
||||
bool emit_wait)
|
||||
@ -2664,6 +2664,8 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
|
||||
radeon_ring_write(ring, addr & 0xffffffff);
|
||||
radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2706,13 +2708,8 @@ int r600_copy_cpdma(struct radeon_device *rdev,
|
||||
return r;
|
||||
}
|
||||
|
||||
if (radeon_fence_need_sync(*fence, ring->idx)) {
|
||||
radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
|
||||
ring->idx);
|
||||
radeon_fence_note_sync(*fence, ring->idx);
|
||||
} else {
|
||||
radeon_semaphore_free(rdev, &sem, NULL);
|
||||
}
|
||||
radeon_semaphore_sync_to(sem, *fence);
|
||||
radeon_semaphore_sync_rings(rdev, sem, ring->idx);
|
||||
|
||||
radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
|
||||
radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
|
||||
|
@ -311,7 +311,7 @@ void r600_dma_fence_ring_emit(struct radeon_device *rdev,
|
||||
* Add a DMA semaphore packet to the ring wait on or signal
|
||||
* other rings (r6xx-SI).
|
||||
*/
|
||||
void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
bool r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
struct radeon_ring *ring,
|
||||
struct radeon_semaphore *semaphore,
|
||||
bool emit_wait)
|
||||
@ -322,6 +322,8 @@ void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0));
|
||||
radeon_ring_write(ring, addr & 0xfffffffc);
|
||||
radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -462,13 +464,8 @@ int r600_copy_dma(struct radeon_device *rdev,
|
||||
return r;
|
||||
}
|
||||
|
||||
if (radeon_fence_need_sync(*fence, ring->idx)) {
|
||||
radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
|
||||
ring->idx);
|
||||
radeon_fence_note_sync(*fence, ring->idx);
|
||||
} else {
|
||||
radeon_semaphore_free(rdev, &sem, NULL);
|
||||
}
|
||||
radeon_semaphore_sync_to(sem, *fence);
|
||||
radeon_semaphore_sync_rings(rdev, sem, ring->idx);
|
||||
|
||||
for (i = 0; i < num_loops; i++) {
|
||||
cur_size_in_dw = size_in_dw;
|
||||
|
@ -348,6 +348,7 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, i
|
||||
void radeon_fence_process(struct radeon_device *rdev, int ring);
|
||||
bool radeon_fence_signaled(struct radeon_fence *fence);
|
||||
int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
|
||||
int radeon_fence_wait_locked(struct radeon_fence *fence);
|
||||
int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring);
|
||||
int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring);
|
||||
int radeon_fence_wait_any(struct radeon_device *rdev,
|
||||
@ -548,17 +549,20 @@ struct radeon_semaphore {
|
||||
struct radeon_sa_bo *sa_bo;
|
||||
signed waiters;
|
||||
uint64_t gpu_addr;
|
||||
struct radeon_fence *sync_to[RADEON_NUM_RINGS];
|
||||
};
|
||||
|
||||
int radeon_semaphore_create(struct radeon_device *rdev,
|
||||
struct radeon_semaphore **semaphore);
|
||||
void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
|
||||
bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
|
||||
struct radeon_semaphore *semaphore);
|
||||
void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
|
||||
bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
|
||||
struct radeon_semaphore *semaphore);
|
||||
void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore,
|
||||
struct radeon_fence *fence);
|
||||
int radeon_semaphore_sync_rings(struct radeon_device *rdev,
|
||||
struct radeon_semaphore *semaphore,
|
||||
int signaler, int waiter);
|
||||
int waiting_ring);
|
||||
void radeon_semaphore_free(struct radeon_device *rdev,
|
||||
struct radeon_semaphore **semaphore,
|
||||
struct radeon_fence *fence);
|
||||
@ -765,7 +769,6 @@ struct radeon_ib {
|
||||
struct radeon_fence *fence;
|
||||
struct radeon_vm *vm;
|
||||
bool is_const_ib;
|
||||
struct radeon_fence *sync_to[RADEON_NUM_RINGS];
|
||||
struct radeon_semaphore *semaphore;
|
||||
};
|
||||
|
||||
@ -921,7 +924,6 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
|
||||
struct radeon_ib *ib, struct radeon_vm *vm,
|
||||
unsigned size);
|
||||
void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib);
|
||||
void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence);
|
||||
int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
|
||||
struct radeon_ib *const_ib);
|
||||
int radeon_ib_pool_init(struct radeon_device *rdev);
|
||||
@ -1638,7 +1640,7 @@ struct radeon_asic_ring {
|
||||
/* command emmit functions */
|
||||
void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib);
|
||||
void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence);
|
||||
void (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp,
|
||||
bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp,
|
||||
struct radeon_semaphore *semaphore, bool emit_wait);
|
||||
void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
|
||||
|
||||
|
@ -80,7 +80,7 @@ int r100_irq_set(struct radeon_device *rdev);
|
||||
int r100_irq_process(struct radeon_device *rdev);
|
||||
void r100_fence_ring_emit(struct radeon_device *rdev,
|
||||
struct radeon_fence *fence);
|
||||
void r100_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
bool r100_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
struct radeon_ring *cp,
|
||||
struct radeon_semaphore *semaphore,
|
||||
bool emit_wait);
|
||||
@ -313,13 +313,13 @@ int r600_cs_parse(struct radeon_cs_parser *p);
|
||||
int r600_dma_cs_parse(struct radeon_cs_parser *p);
|
||||
void r600_fence_ring_emit(struct radeon_device *rdev,
|
||||
struct radeon_fence *fence);
|
||||
void r600_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
bool r600_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
struct radeon_ring *cp,
|
||||
struct radeon_semaphore *semaphore,
|
||||
bool emit_wait);
|
||||
void r600_dma_fence_ring_emit(struct radeon_device *rdev,
|
||||
struct radeon_fence *fence);
|
||||
void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
bool r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
struct radeon_ring *ring,
|
||||
struct radeon_semaphore *semaphore,
|
||||
bool emit_wait);
|
||||
@ -566,10 +566,6 @@ int sumo_dpm_force_performance_level(struct radeon_device *rdev,
|
||||
*/
|
||||
void cayman_fence_ring_emit(struct radeon_device *rdev,
|
||||
struct radeon_fence *fence);
|
||||
void cayman_uvd_semaphore_emit(struct radeon_device *rdev,
|
||||
struct radeon_ring *ring,
|
||||
struct radeon_semaphore *semaphore,
|
||||
bool emit_wait);
|
||||
void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev);
|
||||
int cayman_init(struct radeon_device *rdev);
|
||||
void cayman_fini(struct radeon_device *rdev);
|
||||
@ -697,7 +693,7 @@ void cik_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
|
||||
int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
|
||||
void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
|
||||
struct radeon_fence *fence);
|
||||
void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
struct radeon_ring *ring,
|
||||
struct radeon_semaphore *semaphore,
|
||||
bool emit_wait);
|
||||
@ -717,7 +713,7 @@ void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
|
||||
struct radeon_fence *fence);
|
||||
void cik_fence_compute_ring_emit(struct radeon_device *rdev,
|
||||
struct radeon_fence *fence);
|
||||
void cik_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
bool cik_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
struct radeon_ring *cp,
|
||||
struct radeon_semaphore *semaphore,
|
||||
bool emit_wait);
|
||||
@ -807,7 +803,7 @@ void uvd_v1_0_stop(struct radeon_device *rdev);
|
||||
|
||||
int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring);
|
||||
int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
|
||||
void uvd_v1_0_semaphore_emit(struct radeon_device *rdev,
|
||||
bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev,
|
||||
struct radeon_ring *ring,
|
||||
struct radeon_semaphore *semaphore,
|
||||
bool emit_wait);
|
||||
@ -819,7 +815,7 @@ void uvd_v2_2_fence_emit(struct radeon_device *rdev,
|
||||
struct radeon_fence *fence);
|
||||
|
||||
/* uvd v3.1 */
|
||||
void uvd_v3_1_semaphore_emit(struct radeon_device *rdev,
|
||||
bool uvd_v3_1_semaphore_emit(struct radeon_device *rdev,
|
||||
struct radeon_ring *ring,
|
||||
struct radeon_semaphore *semaphore,
|
||||
bool emit_wait);
|
||||
|
@ -159,7 +159,8 @@ static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
|
||||
if (!p->relocs[i].robj)
|
||||
continue;
|
||||
|
||||
radeon_ib_sync_to(&p->ib, p->relocs[i].robj->tbo.sync_obj);
|
||||
radeon_semaphore_sync_to(p->ib.semaphore,
|
||||
p->relocs[i].robj->tbo.sync_obj);
|
||||
}
|
||||
}
|
||||
|
||||
@ -411,9 +412,9 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
|
||||
goto out;
|
||||
}
|
||||
radeon_cs_sync_rings(parser);
|
||||
radeon_ib_sync_to(&parser->ib, vm->fence);
|
||||
radeon_ib_sync_to(&parser->ib, radeon_vm_grab_id(
|
||||
rdev, vm, parser->ring));
|
||||
radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence);
|
||||
radeon_semaphore_sync_to(parser->ib.semaphore,
|
||||
radeon_vm_grab_id(rdev, vm, parser->ring));
|
||||
|
||||
if ((rdev->family >= CHIP_TAHITI) &&
|
||||
(parser->chunk_const_ib_idx != -1)) {
|
||||
|
@ -471,6 +471,36 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* radeon_fence_wait_locked - wait for a fence to signal
|
||||
*
|
||||
* @fence: radeon fence object
|
||||
*
|
||||
* Wait for the requested fence to signal (all asics).
|
||||
* Returns 0 if the fence has passed, error for all other cases.
|
||||
*/
|
||||
int radeon_fence_wait_locked(struct radeon_fence *fence)
|
||||
{
|
||||
uint64_t seq[RADEON_NUM_RINGS] = {};
|
||||
int r;
|
||||
|
||||
if (fence == NULL) {
|
||||
WARN(1, "Querying an invalid fence : %p !\n", fence);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
seq[fence->ring] = fence->seq;
|
||||
if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ)
|
||||
return 0;
|
||||
|
||||
r = radeon_fence_wait_seq(fence->rdev, seq, false, false);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
fence->seq = RADEON_FENCE_SIGNALED_SEQ;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* radeon_fence_wait_next_locked - wait for the next fence to signal
|
||||
*
|
||||
|
@ -651,7 +651,7 @@ retry:
|
||||
radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr,
|
||||
0, pd_entries, 0, 0);
|
||||
|
||||
radeon_ib_sync_to(&ib, vm->fence);
|
||||
radeon_semaphore_sync_to(ib.semaphore, vm->fence);
|
||||
r = radeon_ib_schedule(rdev, &ib, NULL);
|
||||
if (r) {
|
||||
radeon_ib_free(rdev, &ib);
|
||||
@ -1220,7 +1220,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
|
||||
radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset,
|
||||
addr, radeon_vm_page_flags(bo_va->flags));
|
||||
|
||||
radeon_ib_sync_to(&ib, vm->fence);
|
||||
radeon_semaphore_sync_to(ib.semaphore, vm->fence);
|
||||
r = radeon_ib_schedule(rdev, &ib, NULL);
|
||||
if (r) {
|
||||
radeon_ib_free(rdev, &ib);
|
||||
|
@ -61,7 +61,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
|
||||
struct radeon_ib *ib, struct radeon_vm *vm,
|
||||
unsigned size)
|
||||
{
|
||||
int i, r;
|
||||
int r;
|
||||
|
||||
r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true);
|
||||
if (r) {
|
||||
@ -87,8 +87,6 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
|
||||
ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo);
|
||||
}
|
||||
ib->is_const_ib = false;
|
||||
for (i = 0; i < RADEON_NUM_RINGS; ++i)
|
||||
ib->sync_to[i] = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -108,25 +106,6 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
|
||||
radeon_fence_unref(&ib->fence);
|
||||
}
|
||||
|
||||
/**
|
||||
* radeon_ib_sync_to - sync to fence before executing the IB
|
||||
*
|
||||
* @ib: IB object to add fence to
|
||||
* @fence: fence to sync to
|
||||
*
|
||||
* Sync to the fence before executing the IB
|
||||
*/
|
||||
void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence)
|
||||
{
|
||||
struct radeon_fence *other;
|
||||
|
||||
if (!fence)
|
||||
return;
|
||||
|
||||
other = ib->sync_to[fence->ring];
|
||||
ib->sync_to[fence->ring] = radeon_fence_later(fence, other);
|
||||
}
|
||||
|
||||
/**
|
||||
* radeon_ib_schedule - schedule an IB (Indirect Buffer) on the ring
|
||||
*
|
||||
@ -151,8 +130,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
|
||||
struct radeon_ib *const_ib)
|
||||
{
|
||||
struct radeon_ring *ring = &rdev->ring[ib->ring];
|
||||
bool need_sync = false;
|
||||
int i, r = 0;
|
||||
int r = 0;
|
||||
|
||||
if (!ib->length_dw || !ring->ready) {
|
||||
/* TODO: Nothings in the ib we should report. */
|
||||
@ -166,19 +144,15 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
|
||||
dev_err(rdev->dev, "scheduling IB failed (%d).\n", r);
|
||||
return r;
|
||||
}
|
||||
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
|
||||
struct radeon_fence *fence = ib->sync_to[i];
|
||||
if (radeon_fence_need_sync(fence, ib->ring)) {
|
||||
need_sync = true;
|
||||
radeon_semaphore_sync_rings(rdev, ib->semaphore,
|
||||
fence->ring, ib->ring);
|
||||
radeon_fence_note_sync(fence, ib->ring);
|
||||
}
|
||||
}
|
||||
/* immediately free semaphore when we don't need to sync */
|
||||
if (!need_sync) {
|
||||
radeon_semaphore_free(rdev, &ib->semaphore, NULL);
|
||||
|
||||
/* sync with other rings */
|
||||
r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring);
|
||||
if (r) {
|
||||
dev_err(rdev->dev, "failed to sync rings (%d)\n", r);
|
||||
radeon_ring_unlock_undo(rdev, ring);
|
||||
return r;
|
||||
}
|
||||
|
||||
/* if we can't remember our last VM flush then flush now! */
|
||||
/* XXX figure out why we have to flush for every IB */
|
||||
if (ib->vm /*&& !ib->vm->last_flush*/) {
|
||||
|
@ -34,7 +34,7 @@
|
||||
int radeon_semaphore_create(struct radeon_device *rdev,
|
||||
struct radeon_semaphore **semaphore)
|
||||
{
|
||||
int r;
|
||||
int i, r;
|
||||
|
||||
*semaphore = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL);
|
||||
if (*semaphore == NULL) {
|
||||
@ -50,59 +50,122 @@ int radeon_semaphore_create(struct radeon_device *rdev,
|
||||
(*semaphore)->waiters = 0;
|
||||
(*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo);
|
||||
*((uint64_t*)radeon_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0;
|
||||
|
||||
for (i = 0; i < RADEON_NUM_RINGS; ++i)
|
||||
(*semaphore)->sync_to[i] = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
|
||||
bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ridx,
|
||||
struct radeon_semaphore *semaphore)
|
||||
{
|
||||
trace_radeon_semaphore_signale(ring, semaphore);
|
||||
struct radeon_ring *ring = &rdev->ring[ridx];
|
||||
|
||||
--semaphore->waiters;
|
||||
radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, false);
|
||||
trace_radeon_semaphore_signale(ridx, semaphore);
|
||||
|
||||
if (radeon_semaphore_ring_emit(rdev, ridx, ring, semaphore, false)) {
|
||||
--semaphore->waiters;
|
||||
|
||||
/* for debugging lockup only, used by sysfs debug files */
|
||||
ring->last_semaphore_signal_addr = semaphore->gpu_addr;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
|
||||
bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ridx,
|
||||
struct radeon_semaphore *semaphore)
|
||||
{
|
||||
trace_radeon_semaphore_wait(ring, semaphore);
|
||||
struct radeon_ring *ring = &rdev->ring[ridx];
|
||||
|
||||
++semaphore->waiters;
|
||||
radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, true);
|
||||
trace_radeon_semaphore_wait(ridx, semaphore);
|
||||
|
||||
if (radeon_semaphore_ring_emit(rdev, ridx, ring, semaphore, true)) {
|
||||
++semaphore->waiters;
|
||||
|
||||
/* for debugging lockup only, used by sysfs debug files */
|
||||
ring->last_semaphore_wait_addr = semaphore->gpu_addr;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* caller must hold ring lock */
|
||||
/**
|
||||
* radeon_semaphore_sync_to - use the semaphore to sync to a fence
|
||||
*
|
||||
* @semaphore: semaphore object to add fence to
|
||||
* @fence: fence to sync to
|
||||
*
|
||||
* Sync to the fence using this semaphore object
|
||||
*/
|
||||
void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore,
|
||||
struct radeon_fence *fence)
|
||||
{
|
||||
struct radeon_fence *other;
|
||||
|
||||
if (!fence)
|
||||
return;
|
||||
|
||||
other = semaphore->sync_to[fence->ring];
|
||||
semaphore->sync_to[fence->ring] = radeon_fence_later(fence, other);
|
||||
}
|
||||
|
||||
/**
|
||||
* radeon_semaphore_sync_rings - sync ring to all registered fences
|
||||
*
|
||||
* @rdev: radeon_device pointer
|
||||
* @semaphore: semaphore object to use for sync
|
||||
* @ring: ring that needs sync
|
||||
*
|
||||
* Ensure that all registered fences are signaled before letting
|
||||
* the ring continue. The caller must hold the ring lock.
|
||||
*/
|
||||
int radeon_semaphore_sync_rings(struct radeon_device *rdev,
|
||||
struct radeon_semaphore *semaphore,
|
||||
int signaler, int waiter)
|
||||
int ring)
|
||||
{
|
||||
int r;
|
||||
int i, r;
|
||||
|
||||
/* no need to signal and wait on the same ring */
|
||||
if (signaler == waiter) {
|
||||
return 0;
|
||||
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
|
||||
struct radeon_fence *fence = semaphore->sync_to[i];
|
||||
|
||||
/* check if we really need to sync */
|
||||
if (!radeon_fence_need_sync(fence, ring))
|
||||
continue;
|
||||
|
||||
/* prevent GPU deadlocks */
|
||||
if (!rdev->ring[i].ready) {
|
||||
dev_err(rdev->dev, "Syncing to a disabled ring!");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* allocate enough space for sync command */
|
||||
r = radeon_ring_alloc(rdev, &rdev->ring[i], 16);
|
||||
if (r) {
|
||||
return r;
|
||||
}
|
||||
|
||||
/* emit the signal semaphore */
|
||||
if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) {
|
||||
/* signaling wasn't successful wait manually */
|
||||
radeon_ring_undo(&rdev->ring[i]);
|
||||
radeon_fence_wait_locked(fence);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* we assume caller has already allocated space on waiters ring */
|
||||
if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) {
|
||||
/* waiting wasn't successful wait manually */
|
||||
radeon_ring_undo(&rdev->ring[i]);
|
||||
radeon_fence_wait_locked(fence);
|
||||
continue;
|
||||
}
|
||||
|
||||
radeon_ring_commit(rdev, &rdev->ring[i]);
|
||||
radeon_fence_note_sync(fence, ring);
|
||||
}
|
||||
|
||||
/* prevent GPU deadlocks */
|
||||
if (!rdev->ring[signaler].ready) {
|
||||
dev_err(rdev->dev, "Trying to sync to a disabled ring!");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
r = radeon_ring_alloc(rdev, &rdev->ring[signaler], 8);
|
||||
if (r) {
|
||||
return r;
|
||||
}
|
||||
radeon_semaphore_emit_signal(rdev, signaler, semaphore);
|
||||
radeon_ring_commit(rdev, &rdev->ring[signaler]);
|
||||
|
||||
/* we assume caller has already allocated space on waiters ring */
|
||||
radeon_semaphore_emit_wait(rdev, waiter, semaphore);
|
||||
|
||||
/* for debugging lockup only, used by sysfs debug files */
|
||||
rdev->ring[signaler].last_semaphore_signal_addr = semaphore->gpu_addr;
|
||||
rdev->ring[waiter].last_semaphore_wait_addr = semaphore->gpu_addr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -66,13 +66,8 @@ int rv770_copy_dma(struct radeon_device *rdev,
|
||||
return r;
|
||||
}
|
||||
|
||||
if (radeon_fence_need_sync(*fence, ring->idx)) {
|
||||
radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
|
||||
ring->idx);
|
||||
radeon_fence_note_sync(*fence, ring->idx);
|
||||
} else {
|
||||
radeon_semaphore_free(rdev, &sem, NULL);
|
||||
}
|
||||
radeon_semaphore_sync_to(sem, *fence);
|
||||
radeon_semaphore_sync_rings(rdev, sem, ring->idx);
|
||||
|
||||
for (i = 0; i < num_loops; i++) {
|
||||
cur_size_in_dw = size_in_dw;
|
||||
|
@ -195,13 +195,8 @@ int si_copy_dma(struct radeon_device *rdev,
|
||||
return r;
|
||||
}
|
||||
|
||||
if (radeon_fence_need_sync(*fence, ring->idx)) {
|
||||
radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
|
||||
ring->idx);
|
||||
radeon_fence_note_sync(*fence, ring->idx);
|
||||
} else {
|
||||
radeon_semaphore_free(rdev, &sem, NULL);
|
||||
}
|
||||
radeon_semaphore_sync_to(sem, *fence);
|
||||
radeon_semaphore_sync_rings(rdev, sem, ring->idx);
|
||||
|
||||
for (i = 0; i < num_loops; i++) {
|
||||
cur_size_in_bytes = size_in_bytes;
|
||||
|
@ -357,7 +357,7 @@ int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
|
||||
*
|
||||
* Emit a semaphore command (either wait or signal) to the UVD ring.
|
||||
*/
|
||||
void uvd_v1_0_semaphore_emit(struct radeon_device *rdev,
|
||||
bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev,
|
||||
struct radeon_ring *ring,
|
||||
struct radeon_semaphore *semaphore,
|
||||
bool emit_wait)
|
||||
@ -372,6 +372,8 @@ void uvd_v1_0_semaphore_emit(struct radeon_device *rdev,
|
||||
|
||||
radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
|
||||
radeon_ring_write(ring, emit_wait ? 1 : 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -37,7 +37,7 @@
|
||||
*
|
||||
* Emit a semaphore command (either wait or signal) to the UVD ring.
|
||||
*/
|
||||
void uvd_v3_1_semaphore_emit(struct radeon_device *rdev,
|
||||
bool uvd_v3_1_semaphore_emit(struct radeon_device *rdev,
|
||||
struct radeon_ring *ring,
|
||||
struct radeon_semaphore *semaphore,
|
||||
bool emit_wait)
|
||||
@ -52,4 +52,6 @@ void uvd_v3_1_semaphore_emit(struct radeon_device *rdev,
|
||||
|
||||
radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
|
||||
radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user