drm/radeon: UVD bringup v8

Just everything needed to decode videos using UVD.

v6: just all the bugfixes and support for R7xx-SI merged in one patch
v7: UVD_CGC_GATE is a write only register, lockup detection fix
v8: split out VRAM fallback changes, remove support for RV770,
    add support for HEMLOCK, add buffer sizes checks

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Christian König 2013-04-08 12:41:29 +02:00 committed by Alex Deucher
parent 4474f3a91f
commit f2ba57b5ea
23 changed files with 1534 additions and 53 deletions

View File

@ -76,7 +76,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o \ evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o \
evergreen_hdmi.o radeon_trace_points.o ni.o cayman_blit_shaders.o \ evergreen_hdmi.o radeon_trace_points.o ni.o cayman_blit_shaders.o \
atombios_encoders.o radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o \ atombios_encoders.o radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o \
si_blit_shaders.o radeon_prime.o si_blit_shaders.o radeon_prime.o radeon_uvd.o
radeon-$(CONFIG_COMPAT) += radeon_ioc32.o radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o

View File

@ -3360,6 +3360,9 @@ restart_ih:
DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
break; break;
} }
case 124: /* UVD */
DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
break; break;
case 146: case 146:
case 147: case 147:
@ -3571,7 +3574,7 @@ int evergreen_copy_dma(struct radeon_device *rdev,
static int evergreen_startup(struct radeon_device *rdev) static int evergreen_startup(struct radeon_device *rdev)
{ {
struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; struct radeon_ring *ring;
int r; int r;
/* enable pcie gen2 link */ /* enable pcie gen2 link */
@ -3638,6 +3641,17 @@ static int evergreen_startup(struct radeon_device *rdev)
return r; return r;
} }
r = rv770_uvd_resume(rdev);
if (!r) {
r = radeon_fence_driver_start_ring(rdev,
R600_RING_TYPE_UVD_INDEX);
if (r)
dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
}
if (r)
rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
/* Enable IRQ */ /* Enable IRQ */
r = r600_irq_init(rdev); r = r600_irq_init(rdev);
if (r) { if (r) {
@ -3647,6 +3661,7 @@ static int evergreen_startup(struct radeon_device *rdev)
} }
evergreen_irq_set(rdev); evergreen_irq_set(rdev);
ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
R600_CP_RB_RPTR, R600_CP_RB_WPTR, R600_CP_RB_RPTR, R600_CP_RB_WPTR,
0, 0xfffff, RADEON_CP_PACKET2); 0, 0xfffff, RADEON_CP_PACKET2);
@ -3670,6 +3685,19 @@ static int evergreen_startup(struct radeon_device *rdev)
if (r) if (r)
return r; return r;
ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
if (ring->ring_size) {
r = radeon_ring_init(rdev, ring, ring->ring_size,
R600_WB_UVD_RPTR_OFFSET,
UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
0, 0xfffff, RADEON_CP_PACKET2);
if (!r)
r = r600_uvd_init(rdev);
if (r)
DRM_ERROR("radeon: error initializing UVD (%d).\n", r);
}
r = radeon_ib_pool_init(rdev); r = radeon_ib_pool_init(rdev);
if (r) { if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r); dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@ -3716,8 +3744,10 @@ int evergreen_resume(struct radeon_device *rdev)
int evergreen_suspend(struct radeon_device *rdev) int evergreen_suspend(struct radeon_device *rdev)
{ {
r600_audio_fini(rdev); r600_audio_fini(rdev);
radeon_uvd_suspend(rdev);
r700_cp_stop(rdev); r700_cp_stop(rdev);
r600_dma_stop(rdev); r600_dma_stop(rdev);
r600_uvd_rbc_stop(rdev);
evergreen_irq_suspend(rdev); evergreen_irq_suspend(rdev);
radeon_wb_disable(rdev); radeon_wb_disable(rdev);
evergreen_pcie_gart_disable(rdev); evergreen_pcie_gart_disable(rdev);
@ -3797,6 +3827,13 @@ int evergreen_init(struct radeon_device *rdev)
rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL; rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024); r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
r = radeon_uvd_init(rdev);
if (!r) {
rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX],
4096);
}
rdev->ih.ring_obj = NULL; rdev->ih.ring_obj = NULL;
r600_ih_ring_init(rdev, 64 * 1024); r600_ih_ring_init(rdev, 64 * 1024);
@ -3843,6 +3880,7 @@ void evergreen_fini(struct radeon_device *rdev)
radeon_ib_pool_fini(rdev); radeon_ib_pool_fini(rdev);
radeon_irq_kms_fini(rdev); radeon_irq_kms_fini(rdev);
evergreen_pcie_gart_fini(rdev); evergreen_pcie_gart_fini(rdev);
radeon_uvd_fini(rdev);
r600_vram_scratch_fini(rdev); r600_vram_scratch_fini(rdev);
radeon_gem_fini(rdev); radeon_gem_fini(rdev);
radeon_fence_driver_fini(rdev); radeon_fence_driver_fini(rdev);

View File

@ -992,6 +992,13 @@
# define TARGET_LINK_SPEED_MASK (0xf << 0) # define TARGET_LINK_SPEED_MASK (0xf << 0)
# define SELECTABLE_DEEMPHASIS (1 << 6) # define SELECTABLE_DEEMPHASIS (1 << 6)
/*
* UVD
*/
#define UVD_RBC_RB_RPTR 0xf690
#define UVD_RBC_RB_WPTR 0xf694
/* /*
* PM4 * PM4
*/ */

View File

@ -933,6 +933,23 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
radeon_ring_write(ring, 10); /* poll interval */ radeon_ring_write(ring, 10); /* poll interval */
} }
void cayman_uvd_semaphore_emit(struct radeon_device *rdev,
struct radeon_ring *ring,
struct radeon_semaphore *semaphore,
bool emit_wait)
{
uint64_t addr = semaphore->gpu_addr;
radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
}
static void cayman_cp_enable(struct radeon_device *rdev, bool enable) static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
{ {
if (enable) if (enable)
@ -1684,6 +1701,16 @@ static int cayman_startup(struct radeon_device *rdev)
return r; return r;
} }
r = rv770_uvd_resume(rdev);
if (!r) {
r = radeon_fence_driver_start_ring(rdev,
R600_RING_TYPE_UVD_INDEX);
if (r)
dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
}
if (r)
rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX); r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
if (r) { if (r) {
dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
@ -1750,6 +1777,18 @@ static int cayman_startup(struct radeon_device *rdev)
if (r) if (r)
return r; return r;
ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
if (ring->ring_size) {
r = radeon_ring_init(rdev, ring, ring->ring_size,
R600_WB_UVD_RPTR_OFFSET,
UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
0, 0xfffff, RADEON_CP_PACKET2);
if (!r)
r = r600_uvd_init(rdev);
if (r)
DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
}
r = radeon_ib_pool_init(rdev); r = radeon_ib_pool_init(rdev);
if (r) { if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r); dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@ -1796,6 +1835,8 @@ int cayman_suspend(struct radeon_device *rdev)
radeon_vm_manager_fini(rdev); radeon_vm_manager_fini(rdev);
cayman_cp_enable(rdev, false); cayman_cp_enable(rdev, false);
cayman_dma_stop(rdev); cayman_dma_stop(rdev);
r600_uvd_rbc_stop(rdev);
radeon_uvd_suspend(rdev);
evergreen_irq_suspend(rdev); evergreen_irq_suspend(rdev);
radeon_wb_disable(rdev); radeon_wb_disable(rdev);
cayman_pcie_gart_disable(rdev); cayman_pcie_gart_disable(rdev);
@ -1870,6 +1911,13 @@ int cayman_init(struct radeon_device *rdev)
ring->ring_obj = NULL; ring->ring_obj = NULL;
r600_ring_init(rdev, ring, 64 * 1024); r600_ring_init(rdev, ring, 64 * 1024);
r = radeon_uvd_init(rdev);
if (!r) {
ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
ring->ring_obj = NULL;
r600_ring_init(rdev, ring, 4096);
}
rdev->ih.ring_obj = NULL; rdev->ih.ring_obj = NULL;
r600_ih_ring_init(rdev, 64 * 1024); r600_ih_ring_init(rdev, 64 * 1024);
@ -1921,6 +1969,7 @@ void cayman_fini(struct radeon_device *rdev)
radeon_vm_manager_fini(rdev); radeon_vm_manager_fini(rdev);
radeon_ib_pool_fini(rdev); radeon_ib_pool_fini(rdev);
radeon_irq_kms_fini(rdev); radeon_irq_kms_fini(rdev);
radeon_uvd_fini(rdev);
cayman_pcie_gart_fini(rdev); cayman_pcie_gart_fini(rdev);
r600_vram_scratch_fini(rdev); r600_vram_scratch_fini(rdev);
radeon_gem_fini(rdev); radeon_gem_fini(rdev);

View File

@ -489,6 +489,15 @@
# define CACHE_FLUSH_AND_INV_EVENT_TS (0x14 << 0) # define CACHE_FLUSH_AND_INV_EVENT_TS (0x14 << 0)
# define CACHE_FLUSH_AND_INV_EVENT (0x16 << 0) # define CACHE_FLUSH_AND_INV_EVENT (0x16 << 0)
/*
* UVD
*/
#define UVD_SEMA_ADDR_LOW 0xEF00
#define UVD_SEMA_ADDR_HIGH 0xEF04
#define UVD_SEMA_CMD 0xEF08
#define UVD_RBC_RB_RPTR 0xF690
#define UVD_RBC_RB_WPTR 0xF694
/* /*
* PM4 * PM4
*/ */

View File

@ -2551,6 +2551,185 @@ void r600_dma_fini(struct radeon_device *rdev)
radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
} }
/*
* UVD
*/
int r600_uvd_rbc_start(struct radeon_device *rdev)
{
struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
uint64_t rptr_addr;
uint32_t rb_bufsz, tmp;
int r;
rptr_addr = rdev->wb.gpu_addr + R600_WB_UVD_RPTR_OFFSET;
if (upper_32_bits(rptr_addr) != upper_32_bits(ring->gpu_addr)) {
DRM_ERROR("UVD ring and rptr not in the same 4GB segment!\n");
return -EINVAL;
}
/* force RBC into idle state */
WREG32(UVD_RBC_RB_CNTL, 0x11010101);
/* Set the write pointer delay */
WREG32(UVD_RBC_RB_WPTR_CNTL, 0);
/* set the wb address */
WREG32(UVD_RBC_RB_RPTR_ADDR, rptr_addr >> 2);
/* programm the 4GB memory segment for rptr and ring buffer */
WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(rptr_addr) |
(0x7 << 16) | (0x1 << 31));
/* Initialize the ring buffer's read and write pointers */
WREG32(UVD_RBC_RB_RPTR, 0x0);
ring->wptr = ring->rptr = RREG32(UVD_RBC_RB_RPTR);
WREG32(UVD_RBC_RB_WPTR, ring->wptr);
/* set the ring address */
WREG32(UVD_RBC_RB_BASE, ring->gpu_addr);
/* Set ring buffer size */
rb_bufsz = drm_order(ring->ring_size);
rb_bufsz = (0x1 << 8) | rb_bufsz;
WREG32(UVD_RBC_RB_CNTL, rb_bufsz);
ring->ready = true;
r = radeon_ring_test(rdev, R600_RING_TYPE_UVD_INDEX, ring);
if (r) {
ring->ready = false;
return r;
}
r = radeon_ring_lock(rdev, ring, 10);
if (r) {
DRM_ERROR("radeon: ring failed to lock UVD ring (%d).\n", r);
return r;
}
tmp = PACKET0(UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0);
radeon_ring_write(ring, tmp);
radeon_ring_write(ring, 0xFFFFF);
tmp = PACKET0(UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0);
radeon_ring_write(ring, tmp);
radeon_ring_write(ring, 0xFFFFF);
tmp = PACKET0(UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0);
radeon_ring_write(ring, tmp);
radeon_ring_write(ring, 0xFFFFF);
/* Clear timeout status bits */
radeon_ring_write(ring, PACKET0(UVD_SEMA_TIMEOUT_STATUS, 0));
radeon_ring_write(ring, 0x8);
radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0));
radeon_ring_write(ring, 1);
radeon_ring_unlock_commit(rdev, ring);
return 0;
}
void r600_uvd_rbc_stop(struct radeon_device *rdev)
{
struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
/* force RBC into idle state */
WREG32(UVD_RBC_RB_CNTL, 0x11010101);
ring->ready = false;
}
int r600_uvd_init(struct radeon_device *rdev)
{
int i, j, r;
/* disable clock gating */
WREG32(UVD_CGC_GATE, 0);
/* disable interupt */
WREG32_P(UVD_MASTINT_EN, 0, ~(1 << 1));
/* put LMI, VCPU, RBC etc... into reset */
WREG32(UVD_SOFT_RESET, LMI_SOFT_RESET | VCPU_SOFT_RESET |
LBSI_SOFT_RESET | RBC_SOFT_RESET | CSM_SOFT_RESET |
CXW_SOFT_RESET | TAP_SOFT_RESET | LMI_UMC_SOFT_RESET);
mdelay(5);
/* take UVD block out of reset */
WREG32_P(SRBM_SOFT_RESET, 0, ~SOFT_RESET_UVD);
mdelay(5);
/* initialize UVD memory controller */
WREG32(UVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) |
(1 << 21) | (1 << 9) | (1 << 20));
/* disable byte swapping */
WREG32(UVD_LMI_SWAP_CNTL, 0);
WREG32(UVD_MP_SWAP_CNTL, 0);
WREG32(UVD_MPC_SET_MUXA0, 0x40c2040);
WREG32(UVD_MPC_SET_MUXA1, 0x0);
WREG32(UVD_MPC_SET_MUXB0, 0x40c2040);
WREG32(UVD_MPC_SET_MUXB1, 0x0);
WREG32(UVD_MPC_SET_ALU, 0);
WREG32(UVD_MPC_SET_MUX, 0x88);
/* Stall UMC */
WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3));
/* take all subblocks out of reset, except VCPU */
WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET);
mdelay(5);
/* enable VCPU clock */
WREG32(UVD_VCPU_CNTL, 1 << 9);
/* enable UMC */
WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8));
/* boot up the VCPU */
WREG32(UVD_SOFT_RESET, 0);
mdelay(10);
WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3));
for (i = 0; i < 10; ++i) {
uint32_t status;
for (j = 0; j < 100; ++j) {
status = RREG32(UVD_STATUS);
if (status & 2)
break;
mdelay(10);
}
r = 0;
if (status & 2)
break;
DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
WREG32_P(UVD_SOFT_RESET, VCPU_SOFT_RESET, ~VCPU_SOFT_RESET);
mdelay(10);
WREG32_P(UVD_SOFT_RESET, 0, ~VCPU_SOFT_RESET);
mdelay(10);
r = -1;
}
if (r) {
DRM_ERROR("UVD not responding, giving up!!!\n");
return r;
}
/* enable interupt */
WREG32_P(UVD_MASTINT_EN, 3<<1, ~(3 << 1));
r = r600_uvd_rbc_start(rdev);
if (r)
return r;
DRM_INFO("UVD initialized successfully.\n");
return 0;
}
/* /*
* GPU scratch registers helpers function. * GPU scratch registers helpers function.
*/ */
@ -2660,6 +2839,40 @@ int r600_dma_ring_test(struct radeon_device *rdev,
return r; return r;
} }
int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
{
uint32_t tmp = 0;
unsigned i;
int r;
WREG32(UVD_CONTEXT_ID, 0xCAFEDEAD);
r = radeon_ring_lock(rdev, ring, 3);
if (r) {
DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n",
ring->idx, r);
return r;
}
radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
radeon_ring_write(ring, 0xDEADBEEF);
radeon_ring_unlock_commit(rdev, ring);
for (i = 0; i < rdev->usec_timeout; i++) {
tmp = RREG32(UVD_CONTEXT_ID);
if (tmp == 0xDEADBEEF)
break;
DRM_UDELAY(1);
}
if (i < rdev->usec_timeout) {
DRM_INFO("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
return r;
}
/* /*
* CP fences/semaphores * CP fences/semaphores
*/ */
@ -2711,6 +2924,30 @@ void r600_fence_ring_emit(struct radeon_device *rdev,
} }
} }
void r600_uvd_fence_emit(struct radeon_device *rdev,
struct radeon_fence *fence)
{
struct radeon_ring *ring = &rdev->ring[fence->ring];
uint32_t addr = rdev->fence_drv[fence->ring].gpu_addr;
radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
radeon_ring_write(ring, fence->seq);
radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
radeon_ring_write(ring, addr & 0xffffffff);
radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
radeon_ring_write(ring, 0);
radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
radeon_ring_write(ring, 0);
radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
radeon_ring_write(ring, 0);
radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
radeon_ring_write(ring, 2);
return;
}
void r600_semaphore_ring_emit(struct radeon_device *rdev, void r600_semaphore_ring_emit(struct radeon_device *rdev,
struct radeon_ring *ring, struct radeon_ring *ring,
struct radeon_semaphore *semaphore, struct radeon_semaphore *semaphore,
@ -2780,6 +3017,23 @@ void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
radeon_ring_write(ring, upper_32_bits(addr) & 0xff); radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
} }
void r600_uvd_semaphore_emit(struct radeon_device *rdev,
struct radeon_ring *ring,
struct radeon_semaphore *semaphore,
bool emit_wait)
{
uint64_t addr = semaphore->gpu_addr;
radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
radeon_ring_write(ring, emit_wait ? 1 : 0);
}
int r600_copy_blit(struct radeon_device *rdev, int r600_copy_blit(struct radeon_device *rdev,
uint64_t src_offset, uint64_t src_offset,
uint64_t dst_offset, uint64_t dst_offset,
@ -3183,6 +3437,16 @@ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
radeon_ring_write(ring, ib->length_dw); radeon_ring_write(ring, ib->length_dw);
} }
void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
{
struct radeon_ring *ring = &rdev->ring[ib->ring];
radeon_ring_write(ring, PACKET0(UVD_RBC_IB_BASE, 0));
radeon_ring_write(ring, ib->gpu_addr);
radeon_ring_write(ring, PACKET0(UVD_RBC_IB_SIZE, 0));
radeon_ring_write(ring, ib->length_dw);
}
int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
{ {
struct radeon_ib ib; struct radeon_ib ib;
@ -3300,6 +3564,33 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
return r; return r;
} }
int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
{
struct radeon_fence *fence;
int r;
r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL);
if (r) {
DRM_ERROR("radeon: failed to get create msg (%d).\n", r);
return r;
}
r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, &fence);
if (r) {
DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r);
return r;
}
r = radeon_fence_wait(fence, false);
if (r) {
DRM_ERROR("radeon: fence wait failed (%d).\n", r);
return r;
}
DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
radeon_fence_unref(&fence);
return r;
}
/** /**
* r600_dma_ring_ib_execute - Schedule an IB on the DMA engine * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine
* *

View File

@ -691,6 +691,7 @@
#define SRBM_SOFT_RESET 0xe60 #define SRBM_SOFT_RESET 0xe60
# define SOFT_RESET_DMA (1 << 12) # define SOFT_RESET_DMA (1 << 12)
# define SOFT_RESET_RLC (1 << 13) # define SOFT_RESET_RLC (1 << 13)
# define SOFT_RESET_UVD (1 << 18)
# define RV770_SOFT_RESET_DMA (1 << 20) # define RV770_SOFT_RESET_DMA (1 << 20)
#define CP_INT_CNTL 0xc124 #define CP_INT_CNTL 0xc124
@ -1142,6 +1143,66 @@
# define AFMT_AZ_FORMAT_WTRIG_ACK (1 << 29) # define AFMT_AZ_FORMAT_WTRIG_ACK (1 << 29)
# define AFMT_AZ_AUDIO_ENABLE_CHG_ACK (1 << 30) # define AFMT_AZ_AUDIO_ENABLE_CHG_ACK (1 << 30)
/*
* UVD
*/
#define UVD_SEMA_ADDR_LOW 0xef00
#define UVD_SEMA_ADDR_HIGH 0xef04
#define UVD_SEMA_CMD 0xef08
#define UVD_GPCOM_VCPU_CMD 0xef0c
#define UVD_GPCOM_VCPU_DATA0 0xef10
#define UVD_GPCOM_VCPU_DATA1 0xef14
#define UVD_ENGINE_CNTL 0xef18
#define UVD_SEMA_CNTL 0xf400
#define UVD_RB_ARB_CTRL 0xf480
#define UVD_LMI_EXT40_ADDR 0xf498
#define UVD_CGC_GATE 0xf4a8
#define UVD_LMI_CTRL2 0xf4f4
#define UVD_MASTINT_EN 0xf500
#define UVD_LMI_ADDR_EXT 0xf594
#define UVD_LMI_CTRL 0xf598
#define UVD_LMI_SWAP_CNTL 0xf5b4
#define UVD_MP_SWAP_CNTL 0xf5bC
#define UVD_MPC_CNTL 0xf5dC
#define UVD_MPC_SET_MUXA0 0xf5e4
#define UVD_MPC_SET_MUXA1 0xf5e8
#define UVD_MPC_SET_MUXB0 0xf5eC
#define UVD_MPC_SET_MUXB1 0xf5f0
#define UVD_MPC_SET_MUX 0xf5f4
#define UVD_MPC_SET_ALU 0xf5f8
#define UVD_VCPU_CNTL 0xf660
#define UVD_SOFT_RESET 0xf680
#define RBC_SOFT_RESET (1<<0)
#define LBSI_SOFT_RESET (1<<1)
#define LMI_SOFT_RESET (1<<2)
#define VCPU_SOFT_RESET (1<<3)
#define CSM_SOFT_RESET (1<<5)
#define CXW_SOFT_RESET (1<<6)
#define TAP_SOFT_RESET (1<<7)
#define LMI_UMC_SOFT_RESET (1<<13)
#define UVD_RBC_IB_BASE 0xf684
#define UVD_RBC_IB_SIZE 0xf688
#define UVD_RBC_RB_BASE 0xf68c
#define UVD_RBC_RB_RPTR 0xf690
#define UVD_RBC_RB_WPTR 0xf694
#define UVD_RBC_RB_WPTR_CNTL 0xf698
#define UVD_STATUS 0xf6bc
#define UVD_SEMA_TIMEOUT_STATUS 0xf6c0
#define UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL 0xf6c4
#define UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL 0xf6c8
#define UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL 0xf6cc
#define UVD_RBC_RB_CNTL 0xf6a4
#define UVD_RBC_RB_RPTR_ADDR 0xf6a8
#define UVD_CONTEXT_ID 0xf6f4
/* /*
* PM4 * PM4
*/ */

View File

@ -110,24 +110,27 @@ extern int radeon_fastfb;
#define RADEON_BIOS_NUM_SCRATCH 8 #define RADEON_BIOS_NUM_SCRATCH 8
/* max number of rings */ /* max number of rings */
#define RADEON_NUM_RINGS 5 #define RADEON_NUM_RINGS 6
/* fence seq are set to this number when signaled */ /* fence seq are set to this number when signaled */
#define RADEON_FENCE_SIGNALED_SEQ 0LL #define RADEON_FENCE_SIGNALED_SEQ 0LL
/* internal ring indices */ /* internal ring indices */
/* r1xx+ has gfx CP ring */ /* r1xx+ has gfx CP ring */
#define RADEON_RING_TYPE_GFX_INDEX 0 #define RADEON_RING_TYPE_GFX_INDEX 0
/* cayman has 2 compute CP rings */ /* cayman has 2 compute CP rings */
#define CAYMAN_RING_TYPE_CP1_INDEX 1 #define CAYMAN_RING_TYPE_CP1_INDEX 1
#define CAYMAN_RING_TYPE_CP2_INDEX 2 #define CAYMAN_RING_TYPE_CP2_INDEX 2
/* R600+ has an async dma ring */ /* R600+ has an async dma ring */
#define R600_RING_TYPE_DMA_INDEX 3 #define R600_RING_TYPE_DMA_INDEX 3
/* cayman add a second async dma ring */ /* cayman add a second async dma ring */
#define CAYMAN_RING_TYPE_DMA1_INDEX 4 #define CAYMAN_RING_TYPE_DMA1_INDEX 4
/* R600+ */
#define R600_RING_TYPE_UVD_INDEX 5
/* hardcode those limit for now */ /* hardcode those limit for now */
#define RADEON_VA_IB_OFFSET (1 << 20) #define RADEON_VA_IB_OFFSET (1 << 20)
#define RADEON_VA_RESERVED_SIZE (8 << 20) #define RADEON_VA_RESERVED_SIZE (8 << 20)
@ -921,6 +924,7 @@ struct radeon_wb {
#define R600_WB_DMA_RPTR_OFFSET 1792 #define R600_WB_DMA_RPTR_OFFSET 1792
#define R600_WB_IH_WPTR_OFFSET 2048 #define R600_WB_IH_WPTR_OFFSET 2048
#define CAYMAN_WB_DMA1_RPTR_OFFSET 2304 #define CAYMAN_WB_DMA1_RPTR_OFFSET 2304
#define R600_WB_UVD_RPTR_OFFSET 2560
#define R600_WB_EVENT_OFFSET 3072 #define R600_WB_EVENT_OFFSET 3072
/** /**
@ -1121,6 +1125,33 @@ struct radeon_pm {
int radeon_pm_get_type_index(struct radeon_device *rdev, int radeon_pm_get_type_index(struct radeon_device *rdev,
enum radeon_pm_state_type ps_type, enum radeon_pm_state_type ps_type,
int instance); int instance);
/*
* UVD
*/
#define RADEON_MAX_UVD_HANDLES 10
#define RADEON_UVD_STACK_SIZE (1024*1024)
#define RADEON_UVD_HEAP_SIZE (1024*1024)
struct radeon_uvd {
struct radeon_bo *vcpu_bo;
void *cpu_addr;
uint64_t gpu_addr;
atomic_t handles[RADEON_MAX_UVD_HANDLES];
struct drm_file *filp[RADEON_MAX_UVD_HANDLES];
};
int radeon_uvd_init(struct radeon_device *rdev);
void radeon_uvd_fini(struct radeon_device *rdev);
int radeon_uvd_suspend(struct radeon_device *rdev);
int radeon_uvd_resume(struct radeon_device *rdev);
int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
uint32_t handle, struct radeon_fence **fence);
int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
uint32_t handle, struct radeon_fence **fence);
void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo);
void radeon_uvd_free_handles(struct radeon_device *rdev,
struct drm_file *filp);
int radeon_uvd_cs_parse(struct radeon_cs_parser *parser);
struct r600_audio { struct r600_audio {
int channels; int channels;
@ -1611,6 +1642,7 @@ struct radeon_device {
struct radeon_asic *asic; struct radeon_asic *asic;
struct radeon_gem gem; struct radeon_gem gem;
struct radeon_pm pm; struct radeon_pm pm;
struct radeon_uvd uvd;
uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH]; uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH];
struct radeon_wb wb; struct radeon_wb wb;
struct radeon_dummy_page dummy_page; struct radeon_dummy_page dummy_page;
@ -1625,6 +1657,7 @@ struct radeon_device {
const struct firmware *rlc_fw; /* r6/700 RLC firmware */ const struct firmware *rlc_fw; /* r6/700 RLC firmware */
const struct firmware *mc_fw; /* NI MC firmware */ const struct firmware *mc_fw; /* NI MC firmware */
const struct firmware *ce_fw; /* SI CE firmware */ const struct firmware *ce_fw; /* SI CE firmware */
const struct firmware *uvd_fw; /* UVD firmware */
struct r600_blit r600_blit; struct r600_blit r600_blit;
struct r600_vram_scratch vram_scratch; struct r600_vram_scratch vram_scratch;
int msi_enabled; /* msi enabled */ int msi_enabled; /* msi enabled */

View File

@ -1130,6 +1130,15 @@ static struct radeon_asic rv770_asic = {
.ring_test = &r600_dma_ring_test, .ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test, .ib_test = &r600_dma_ib_test,
.is_lockup = &r600_dma_is_lockup, .is_lockup = &r600_dma_is_lockup,
},
[R600_RING_TYPE_UVD_INDEX] = {
.ib_execute = &r600_uvd_ib_execute,
.emit_fence = &r600_uvd_fence_emit,
.emit_semaphore = &r600_uvd_semaphore_emit,
.cs_parse = &radeon_uvd_cs_parse,
.ring_test = &r600_uvd_ring_test,
.ib_test = &r600_uvd_ib_test,
.is_lockup = &radeon_ring_test_lockup,
} }
}, },
.irq = { .irq = {
@ -1216,6 +1225,15 @@ static struct radeon_asic evergreen_asic = {
.ring_test = &r600_dma_ring_test, .ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test, .ib_test = &r600_dma_ib_test,
.is_lockup = &evergreen_dma_is_lockup, .is_lockup = &evergreen_dma_is_lockup,
},
[R600_RING_TYPE_UVD_INDEX] = {
.ib_execute = &r600_uvd_ib_execute,
.emit_fence = &r600_uvd_fence_emit,
.emit_semaphore = &r600_uvd_semaphore_emit,
.cs_parse = &radeon_uvd_cs_parse,
.ring_test = &r600_uvd_ring_test,
.ib_test = &r600_uvd_ib_test,
.is_lockup = &radeon_ring_test_lockup,
} }
}, },
.irq = { .irq = {
@ -1302,6 +1320,15 @@ static struct radeon_asic sumo_asic = {
.ring_test = &r600_dma_ring_test, .ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test, .ib_test = &r600_dma_ib_test,
.is_lockup = &evergreen_dma_is_lockup, .is_lockup = &evergreen_dma_is_lockup,
},
[R600_RING_TYPE_UVD_INDEX] = {
.ib_execute = &r600_uvd_ib_execute,
.emit_fence = &r600_uvd_fence_emit,
.emit_semaphore = &r600_uvd_semaphore_emit,
.cs_parse = &radeon_uvd_cs_parse,
.ring_test = &r600_uvd_ring_test,
.ib_test = &r600_uvd_ib_test,
.is_lockup = &radeon_ring_test_lockup,
} }
}, },
.irq = { .irq = {
@ -1388,6 +1415,15 @@ static struct radeon_asic btc_asic = {
.ring_test = &r600_dma_ring_test, .ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test, .ib_test = &r600_dma_ib_test,
.is_lockup = &evergreen_dma_is_lockup, .is_lockup = &evergreen_dma_is_lockup,
},
[R600_RING_TYPE_UVD_INDEX] = {
.ib_execute = &r600_uvd_ib_execute,
.emit_fence = &r600_uvd_fence_emit,
.emit_semaphore = &r600_uvd_semaphore_emit,
.cs_parse = &radeon_uvd_cs_parse,
.ring_test = &r600_uvd_ring_test,
.ib_test = &r600_uvd_ib_test,
.is_lockup = &radeon_ring_test_lockup,
} }
}, },
.irq = { .irq = {
@ -1517,6 +1553,15 @@ static struct radeon_asic cayman_asic = {
.ib_test = &r600_dma_ib_test, .ib_test = &r600_dma_ib_test,
.is_lockup = &cayman_dma_is_lockup, .is_lockup = &cayman_dma_is_lockup,
.vm_flush = &cayman_dma_vm_flush, .vm_flush = &cayman_dma_vm_flush,
},
[R600_RING_TYPE_UVD_INDEX] = {
.ib_execute = &r600_uvd_ib_execute,
.emit_fence = &r600_uvd_fence_emit,
.emit_semaphore = &cayman_uvd_semaphore_emit,
.cs_parse = &radeon_uvd_cs_parse,
.ring_test = &r600_uvd_ring_test,
.ib_test = &r600_uvd_ib_test,
.is_lockup = &radeon_ring_test_lockup,
} }
}, },
.irq = { .irq = {
@ -1646,6 +1691,15 @@ static struct radeon_asic trinity_asic = {
.ib_test = &r600_dma_ib_test, .ib_test = &r600_dma_ib_test,
.is_lockup = &cayman_dma_is_lockup, .is_lockup = &cayman_dma_is_lockup,
.vm_flush = &cayman_dma_vm_flush, .vm_flush = &cayman_dma_vm_flush,
},
[R600_RING_TYPE_UVD_INDEX] = {
.ib_execute = &r600_uvd_ib_execute,
.emit_fence = &r600_uvd_fence_emit,
.emit_semaphore = &cayman_uvd_semaphore_emit,
.cs_parse = &radeon_uvd_cs_parse,
.ring_test = &r600_uvd_ring_test,
.ib_test = &r600_uvd_ib_test,
.is_lockup = &radeon_ring_test_lockup,
} }
}, },
.irq = { .irq = {
@ -1775,6 +1829,15 @@ static struct radeon_asic si_asic = {
.ib_test = &r600_dma_ib_test, .ib_test = &r600_dma_ib_test,
.is_lockup = &si_dma_is_lockup, .is_lockup = &si_dma_is_lockup,
.vm_flush = &si_dma_vm_flush, .vm_flush = &si_dma_vm_flush,
},
[R600_RING_TYPE_UVD_INDEX] = {
.ib_execute = &r600_uvd_ib_execute,
.emit_fence = &r600_uvd_fence_emit,
.emit_semaphore = &cayman_uvd_semaphore_emit,
.cs_parse = &radeon_uvd_cs_parse,
.ring_test = &r600_uvd_ring_test,
.ib_test = &r600_uvd_ib_test,
.is_lockup = &radeon_ring_test_lockup,
} }
}, },
.irq = { .irq = {

View File

@ -330,6 +330,7 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring);
int r600_copy_blit(struct radeon_device *rdev, int r600_copy_blit(struct radeon_device *rdev,
uint64_t src_offset, uint64_t dst_offset, uint64_t src_offset, uint64_t dst_offset,
unsigned num_gpu_pages, struct radeon_fence **fence); unsigned num_gpu_pages, struct radeon_fence **fence);
@ -392,6 +393,19 @@ int r600_mc_wait_for_idle(struct radeon_device *rdev);
u32 r600_get_xclk(struct radeon_device *rdev); u32 r600_get_xclk(struct radeon_device *rdev);
uint64_t r600_get_gpu_clock_counter(struct radeon_device *rdev); uint64_t r600_get_gpu_clock_counter(struct radeon_device *rdev);
/* uvd */
int r600_uvd_init(struct radeon_device *rdev);
int r600_uvd_rbc_start(struct radeon_device *rdev);
void r600_uvd_rbc_stop(struct radeon_device *rdev);
int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
void r600_uvd_fence_emit(struct radeon_device *rdev,
struct radeon_fence *fence);
void r600_uvd_semaphore_emit(struct radeon_device *rdev,
struct radeon_ring *ring,
struct radeon_semaphore *semaphore,
bool emit_wait);
void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
/* /*
* rv770,rv730,rv710,rv740 * rv770,rv730,rv710,rv740
*/ */
@ -409,6 +423,7 @@ int rv770_copy_dma(struct radeon_device *rdev,
unsigned num_gpu_pages, unsigned num_gpu_pages,
struct radeon_fence **fence); struct radeon_fence **fence);
u32 rv770_get_xclk(struct radeon_device *rdev); u32 rv770_get_xclk(struct radeon_device *rdev);
int rv770_uvd_resume(struct radeon_device *rdev);
/* /*
* evergreen * evergreen
@ -465,6 +480,10 @@ int evergreen_copy_dma(struct radeon_device *rdev,
*/ */
void cayman_fence_ring_emit(struct radeon_device *rdev, void cayman_fence_ring_emit(struct radeon_device *rdev,
struct radeon_fence *fence); struct radeon_fence *fence);
void cayman_uvd_semaphore_emit(struct radeon_device *rdev,
struct radeon_ring *ring,
struct radeon_semaphore *semaphore,
bool emit_wait);
void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev); void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev);
int cayman_init(struct radeon_device *rdev); int cayman_init(struct radeon_device *rdev);
void cayman_fini(struct radeon_device *rdev); void cayman_fini(struct radeon_device *rdev);

View File

@ -53,7 +53,6 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
} }
for (i = 0; i < p->nrelocs; i++) { for (i = 0; i < p->nrelocs; i++) {
struct drm_radeon_cs_reloc *r; struct drm_radeon_cs_reloc *r;
uint32_t domain;
duplicate = false; duplicate = false;
r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4]; r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
@ -81,11 +80,25 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
p->relocs[i].lobj.bo = p->relocs[i].robj; p->relocs[i].lobj.bo = p->relocs[i].robj;
p->relocs[i].lobj.written = !!r->write_domain; p->relocs[i].lobj.written = !!r->write_domain;
domain = r->write_domain ? r->write_domain : r->read_domains; /* the first reloc of an UVD job is the
p->relocs[i].lobj.domain = domain; msg and that must be in VRAM */
if (domain == RADEON_GEM_DOMAIN_VRAM) if (p->ring == R600_RING_TYPE_UVD_INDEX && i == 0) {
domain |= RADEON_GEM_DOMAIN_GTT; /* TODO: is this still needed for NI+ ? */
p->relocs[i].lobj.alt_domain = domain; p->relocs[i].lobj.domain =
RADEON_GEM_DOMAIN_VRAM;
p->relocs[i].lobj.alt_domain =
RADEON_GEM_DOMAIN_VRAM;
} else {
uint32_t domain = r->write_domain ?
r->write_domain : r->read_domains;
p->relocs[i].lobj.domain = domain;
if (domain == RADEON_GEM_DOMAIN_VRAM)
domain |= RADEON_GEM_DOMAIN_GTT;
p->relocs[i].lobj.alt_domain = domain;
}
p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo; p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
p->relocs[i].handle = r->handle; p->relocs[i].handle = r->handle;
@ -93,7 +106,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
radeon_bo_list_add_object(&p->relocs[i].lobj, radeon_bo_list_add_object(&p->relocs[i].lobj,
&p->validated); &p->validated);
} }
return radeon_bo_list_validate(&p->validated); return radeon_bo_list_validate(&p->validated, p->ring);
} }
static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
@ -128,6 +141,9 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
return -EINVAL; return -EINVAL;
} }
break; break;
case RADEON_CS_RING_UVD:
p->ring = R600_RING_TYPE_UVD_INDEX;
break;
} }
return 0; return 0;
} }

View File

@ -31,9 +31,9 @@
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/list.h>
#include <linux/kref.h> #include <linux/kref.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/firmware.h>
#include <drm/drmP.h> #include <drm/drmP.h>
#include "radeon_reg.h" #include "radeon_reg.h"
#include "radeon.h" #include "radeon.h"
@ -767,8 +767,21 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) { if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
rdev->fence_drv[ring].scratch_reg = 0; if (ring != R600_RING_TYPE_UVD_INDEX) {
index = R600_WB_EVENT_OFFSET + ring * 4; rdev->fence_drv[ring].scratch_reg = 0;
index = R600_WB_EVENT_OFFSET + ring * 4;
rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
index;
} else {
/* put fence directly behind firmware */
rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr +
rdev->uvd_fw->size;
rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr +
rdev->uvd_fw->size;
}
} else { } else {
r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg); r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
if (r) { if (r) {
@ -778,9 +791,9 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
index = RADEON_WB_SCRATCH_OFFSET + index = RADEON_WB_SCRATCH_OFFSET +
rdev->fence_drv[ring].scratch_reg - rdev->fence_drv[ring].scratch_reg -
rdev->scratch.reg_base; rdev->scratch.reg_base;
rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
} }
rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring); radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
rdev->fence_drv[ring].initialized = true; rdev->fence_drv[ring].initialized = true;
dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n", dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",

View File

@ -516,6 +516,7 @@ void radeon_driver_preclose_kms(struct drm_device *dev,
rdev->hyperz_filp = NULL; rdev->hyperz_filp = NULL;
if (rdev->cmask_filp == file_priv) if (rdev->cmask_filp == file_priv)
rdev->cmask_filp = NULL; rdev->cmask_filp = NULL;
radeon_uvd_free_handles(rdev, file_priv);
} }
/* /*

View File

@ -348,7 +348,7 @@ void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
} }
} }
int radeon_bo_list_validate(struct list_head *head) int radeon_bo_list_validate(struct list_head *head, int ring)
{ {
struct radeon_bo_list *lobj; struct radeon_bo_list *lobj;
struct radeon_bo *bo; struct radeon_bo *bo;
@ -366,6 +366,8 @@ int radeon_bo_list_validate(struct list_head *head)
retry: retry:
radeon_ttm_placement_from_domain(bo, domain); radeon_ttm_placement_from_domain(bo, domain);
if (ring == R600_RING_TYPE_UVD_INDEX)
radeon_uvd_force_into_uvd_segment(bo);
r = ttm_bo_validate(&bo->tbo, &bo->placement, r = ttm_bo_validate(&bo->tbo, &bo->placement,
true, false); true, false);
if (unlikely(r)) { if (unlikely(r)) {

View File

@ -128,7 +128,7 @@ extern int radeon_bo_init(struct radeon_device *rdev);
extern void radeon_bo_fini(struct radeon_device *rdev); extern void radeon_bo_fini(struct radeon_device *rdev);
extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj, extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
struct list_head *head); struct list_head *head);
extern int radeon_bo_list_validate(struct list_head *head); extern int radeon_bo_list_validate(struct list_head *head, int ring);
extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo, extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo,
struct vm_area_struct *vma); struct vm_area_struct *vma);
extern int radeon_bo_set_tiling_flags(struct radeon_bo *bo, extern int radeon_bo_set_tiling_flags(struct radeon_bo *bo,

View File

@ -368,7 +368,7 @@ void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring)
{ {
u32 rptr; u32 rptr;
if (rdev->wb.enabled) if (rdev->wb.enabled && ring != &rdev->ring[R600_RING_TYPE_UVD_INDEX])
rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]); rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
else else
rptr = RREG32(ring->rptr_reg); rptr = RREG32(ring->rptr_reg);
@ -821,18 +821,20 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data)
return 0; return 0;
} }
static int radeon_ring_type_gfx_index = RADEON_RING_TYPE_GFX_INDEX; static int radeon_gfx_index = RADEON_RING_TYPE_GFX_INDEX;
static int cayman_ring_type_cp1_index = CAYMAN_RING_TYPE_CP1_INDEX; static int cayman_cp1_index = CAYMAN_RING_TYPE_CP1_INDEX;
static int cayman_ring_type_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX; static int cayman_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX;
static int radeon_ring_type_dma1_index = R600_RING_TYPE_DMA_INDEX; static int radeon_dma1_index = R600_RING_TYPE_DMA_INDEX;
static int radeon_ring_type_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX; static int radeon_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX;
static int r600_uvd_index = R600_RING_TYPE_UVD_INDEX;
static struct drm_info_list radeon_debugfs_ring_info_list[] = { static struct drm_info_list radeon_debugfs_ring_info_list[] = {
{"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_ring_type_gfx_index}, {"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_gfx_index},
{"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp1_index}, {"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_cp1_index},
{"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp2_index}, {"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_cp2_index},
{"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma1_index}, {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_dma1_index},
{"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma2_index}, {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_dma2_index},
{"radeon_ring_uvd", radeon_debugfs_ring_info, 0, &r600_uvd_index},
}; };
static int radeon_debugfs_sa_info(struct seq_file *m, void *data) static int radeon_debugfs_sa_info(struct seq_file *m, void *data)

View File

@ -252,6 +252,36 @@ void radeon_test_moves(struct radeon_device *rdev)
radeon_do_test_moves(rdev, RADEON_TEST_COPY_BLIT); radeon_do_test_moves(rdev, RADEON_TEST_COPY_BLIT);
} }
static int radeon_test_create_and_emit_fence(struct radeon_device *rdev,
struct radeon_ring *ring,
struct radeon_fence **fence)
{
int r;
if (ring->idx == R600_RING_TYPE_UVD_INDEX) {
r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL);
if (r) {
DRM_ERROR("Failed to get dummy create msg\n");
return r;
}
r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, fence);
if (r) {
DRM_ERROR("Failed to get dummy destroy msg\n");
return r;
}
} else {
r = radeon_ring_lock(rdev, ring, 64);
if (r) {
DRM_ERROR("Failed to lock ring A %d\n", ring->idx);
return r;
}
radeon_fence_emit(rdev, fence, ring->idx);
radeon_ring_unlock_commit(rdev, ring);
}
return 0;
}
void radeon_test_ring_sync(struct radeon_device *rdev, void radeon_test_ring_sync(struct radeon_device *rdev,
struct radeon_ring *ringA, struct radeon_ring *ringA,
struct radeon_ring *ringB) struct radeon_ring *ringB)
@ -272,21 +302,24 @@ void radeon_test_ring_sync(struct radeon_device *rdev,
goto out_cleanup; goto out_cleanup;
} }
radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
r = radeon_fence_emit(rdev, &fence1, ringA->idx); radeon_ring_unlock_commit(rdev, ringA);
r = radeon_test_create_and_emit_fence(rdev, ringA, &fence1);
if (r)
goto out_cleanup;
r = radeon_ring_lock(rdev, ringA, 64);
if (r) { if (r) {
DRM_ERROR("Failed to emit fence 1\n"); DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
radeon_ring_unlock_undo(rdev, ringA);
goto out_cleanup; goto out_cleanup;
} }
radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
r = radeon_fence_emit(rdev, &fence2, ringA->idx);
if (r) {
DRM_ERROR("Failed to emit fence 2\n");
radeon_ring_unlock_undo(rdev, ringA);
goto out_cleanup;
}
radeon_ring_unlock_commit(rdev, ringA); radeon_ring_unlock_commit(rdev, ringA);
r = radeon_test_create_and_emit_fence(rdev, ringA, &fence2);
if (r)
goto out_cleanup;
mdelay(1000); mdelay(1000);
if (radeon_fence_signaled(fence1)) { if (radeon_fence_signaled(fence1)) {
@ -364,27 +397,22 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev,
goto out_cleanup; goto out_cleanup;
} }
radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
r = radeon_fence_emit(rdev, &fenceA, ringA->idx);
if (r) {
DRM_ERROR("Failed to emit sync fence 1\n");
radeon_ring_unlock_undo(rdev, ringA);
goto out_cleanup;
}
radeon_ring_unlock_commit(rdev, ringA); radeon_ring_unlock_commit(rdev, ringA);
r = radeon_test_create_and_emit_fence(rdev, ringA, &fenceA);
if (r)
goto out_cleanup;
r = radeon_ring_lock(rdev, ringB, 64); r = radeon_ring_lock(rdev, ringB, 64);
if (r) { if (r) {
DRM_ERROR("Failed to lock ring B %d\n", ringB->idx); DRM_ERROR("Failed to lock ring B %d\n", ringB->idx);
goto out_cleanup; goto out_cleanup;
} }
radeon_semaphore_emit_wait(rdev, ringB->idx, semaphore); radeon_semaphore_emit_wait(rdev, ringB->idx, semaphore);
r = radeon_fence_emit(rdev, &fenceB, ringB->idx);
if (r) {
DRM_ERROR("Failed to create sync fence 2\n");
radeon_ring_unlock_undo(rdev, ringB);
goto out_cleanup;
}
radeon_ring_unlock_commit(rdev, ringB); radeon_ring_unlock_commit(rdev, ringB);
r = radeon_test_create_and_emit_fence(rdev, ringB, &fenceB);
if (r)
goto out_cleanup;
mdelay(1000); mdelay(1000);
@ -393,7 +421,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev,
goto out_cleanup; goto out_cleanup;
} }
if (radeon_fence_signaled(fenceB)) { if (radeon_fence_signaled(fenceB)) {
DRM_ERROR("Fence A signaled without waiting for semaphore.\n"); DRM_ERROR("Fence B signaled without waiting for semaphore.\n");
goto out_cleanup; goto out_cleanup;
} }

View File

@ -0,0 +1,664 @@
/*
* Copyright 2011 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
*/
/*
* Authors:
* Christian König <deathsimple@vodafone.de>
*/
#include <linux/firmware.h>
#include <linux/module.h>
#include <drm/drmP.h>
#include <drm/drm.h>
#include "radeon.h"
#include "r600d.h"
/* Firmware Names */
#define FIRMWARE_RV710 "radeon/RV710_uvd.bin"
#define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin"
#define FIRMWARE_SUMO "radeon/SUMO_uvd.bin"
#define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin"
MODULE_FIRMWARE(FIRMWARE_RV710);
MODULE_FIRMWARE(FIRMWARE_CYPRESS);
MODULE_FIRMWARE(FIRMWARE_SUMO);
MODULE_FIRMWARE(FIRMWARE_TAHITI);
int radeon_uvd_init(struct radeon_device *rdev)
{
struct platform_device *pdev;
unsigned long bo_size;
const char *fw_name;
int i, r;
pdev = platform_device_register_simple("radeon_uvd", 0, NULL, 0);
r = IS_ERR(pdev);
if (r) {
dev_err(rdev->dev, "radeon_uvd: Failed to register firmware\n");
return -EINVAL;
}
switch (rdev->family) {
case CHIP_RV710:
case CHIP_RV730:
case CHIP_RV740:
fw_name = FIRMWARE_RV710;
break;
case CHIP_CYPRESS:
case CHIP_HEMLOCK:
case CHIP_JUNIPER:
case CHIP_REDWOOD:
case CHIP_CEDAR:
fw_name = FIRMWARE_CYPRESS;
break;
case CHIP_SUMO:
case CHIP_SUMO2:
case CHIP_PALM:
case CHIP_CAYMAN:
case CHIP_BARTS:
case CHIP_TURKS:
case CHIP_CAICOS:
fw_name = FIRMWARE_SUMO;
break;
case CHIP_TAHITI:
case CHIP_VERDE:
case CHIP_PITCAIRN:
case CHIP_ARUBA:
fw_name = FIRMWARE_TAHITI;
break;
default:
return -EINVAL;
}
r = request_firmware(&rdev->uvd_fw, fw_name, &pdev->dev);
if (r) {
dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n",
fw_name);
platform_device_unregister(pdev);
return r;
}
platform_device_unregister(pdev);
bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) +
RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE;
r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true,
RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->uvd.vcpu_bo);
if (r) {
dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r);
return r;
}
r = radeon_uvd_resume(rdev);
if (r)
return r;
memset(rdev->uvd.cpu_addr, 0, bo_size);
memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size);
r = radeon_uvd_suspend(rdev);
if (r)
return r;
for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
atomic_set(&rdev->uvd.handles[i], 0);
rdev->uvd.filp[i] = NULL;
}
return 0;
}
void radeon_uvd_fini(struct radeon_device *rdev)
{
radeon_uvd_suspend(rdev);
radeon_bo_unref(&rdev->uvd.vcpu_bo);
}
int radeon_uvd_suspend(struct radeon_device *rdev)
{
int r;
if (rdev->uvd.vcpu_bo == NULL)
return 0;
r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
if (!r) {
radeon_bo_kunmap(rdev->uvd.vcpu_bo);
radeon_bo_unpin(rdev->uvd.vcpu_bo);
radeon_bo_unreserve(rdev->uvd.vcpu_bo);
}
return r;
}
int radeon_uvd_resume(struct radeon_device *rdev)
{
int r;
if (rdev->uvd.vcpu_bo == NULL)
return -EINVAL;
r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
if (r) {
radeon_bo_unref(&rdev->uvd.vcpu_bo);
dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r);
return r;
}
r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM,
&rdev->uvd.gpu_addr);
if (r) {
radeon_bo_unreserve(rdev->uvd.vcpu_bo);
radeon_bo_unref(&rdev->uvd.vcpu_bo);
dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r);
return r;
}
r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr);
if (r) {
dev_err(rdev->dev, "(%d) UVD map failed\n", r);
return r;
}
radeon_bo_unreserve(rdev->uvd.vcpu_bo);
return 0;
}
void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo)
{
rbo->placement.fpfn = 0 >> PAGE_SHIFT;
rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
}
void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp)
{
int i, r;
for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
if (rdev->uvd.filp[i] == filp) {
uint32_t handle = atomic_read(&rdev->uvd.handles[i]);
struct radeon_fence *fence;
r = radeon_uvd_get_destroy_msg(rdev,
R600_RING_TYPE_UVD_INDEX, handle, &fence);
if (r) {
DRM_ERROR("Error destroying UVD (%d)!\n", r);
continue;
}
radeon_fence_wait(fence, false);
radeon_fence_unref(&fence);
rdev->uvd.filp[i] = NULL;
atomic_set(&rdev->uvd.handles[i], 0);
}
}
}
static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[])
{
unsigned stream_type = msg[4];
unsigned width = msg[6];
unsigned height = msg[7];
unsigned dpb_size = msg[9];
unsigned pitch = msg[28];
unsigned width_in_mb = width / 16;
unsigned height_in_mb = ALIGN(height / 16, 2);
unsigned image_size, tmp, min_dpb_size;
image_size = width * height;
image_size += image_size / 2;
image_size = ALIGN(image_size, 1024);
switch (stream_type) {
case 0: /* H264 */
/* reference picture buffer */
min_dpb_size = image_size * 17;
/* macroblock context buffer */
min_dpb_size += width_in_mb * height_in_mb * 17 * 192;
/* IT surface buffer */
min_dpb_size += width_in_mb * height_in_mb * 32;
break;
case 1: /* VC1 */
/* reference picture buffer */
min_dpb_size = image_size * 3;
/* CONTEXT_BUFFER */
min_dpb_size += width_in_mb * height_in_mb * 128;
/* IT surface buffer */
min_dpb_size += width_in_mb * 64;
/* DB surface buffer */
min_dpb_size += width_in_mb * 128;
/* BP */
tmp = max(width_in_mb, height_in_mb);
min_dpb_size += ALIGN(tmp * 7 * 16, 64);
break;
case 3: /* MPEG2 */
/* reference picture buffer */
min_dpb_size = image_size * 3;
break;
case 4: /* MPEG4 */
/* reference picture buffer */
min_dpb_size = image_size * 3;
/* CM */
min_dpb_size += width_in_mb * height_in_mb * 64;
/* IT surface buffer */
min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64);
break;
default:
DRM_ERROR("UVD codec not handled %d!\n", stream_type);
return -EINVAL;
}
if (width > pitch) {
DRM_ERROR("Invalid UVD decoding target pitch!\n");
return -EINVAL;
}
if (dpb_size < min_dpb_size) {
DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n",
dpb_size, min_dpb_size);
return -EINVAL;
}
buf_sizes[0x1] = dpb_size;
buf_sizes[0x2] = image_size;
return 0;
}
static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
unsigned offset, unsigned buf_sizes[])
{
int32_t *msg, msg_type, handle;
void *ptr;
int i, r;
if (offset & 0x3F) {
DRM_ERROR("UVD messages must be 64 byte aligned!\n");
return -EINVAL;
}
r = radeon_bo_kmap(bo, &ptr);
if (r)
return r;
msg = ptr + offset;
msg_type = msg[1];
handle = msg[2];
if (handle == 0) {
DRM_ERROR("Invalid UVD handle!\n");
return -EINVAL;
}
if (msg_type == 1) {
/* it's a decode msg, calc buffer sizes */
r = radeon_uvd_cs_msg_decode(msg, buf_sizes);
radeon_bo_kunmap(bo);
if (r)
return r;
} else if (msg_type == 2) {
/* it's a destroy msg, free the handle */
for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i)
atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0);
radeon_bo_kunmap(bo);
return 0;
} else {
/* it's a create msg, no special handling needed */
radeon_bo_kunmap(bo);
}
/* create or decode, validate the handle */
for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
if (atomic_read(&p->rdev->uvd.handles[i]) == handle)
return 0;
}
/* handle not found try to alloc a new one */
for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) {
p->rdev->uvd.filp[i] = p->filp;
return 0;
}
}
DRM_ERROR("No more free UVD handles!\n");
return -EINVAL;
}
static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
int data0, int data1,
unsigned buf_sizes[])
{
struct radeon_cs_chunk *relocs_chunk;
struct radeon_cs_reloc *reloc;
unsigned idx, cmd, offset;
uint64_t start, end;
int r;
relocs_chunk = &p->chunks[p->chunk_relocs_idx];
offset = radeon_get_ib_value(p, data0);
idx = radeon_get_ib_value(p, data1);
if (idx >= relocs_chunk->length_dw) {
DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
idx, relocs_chunk->length_dw);
return -EINVAL;
}
reloc = p->relocs_ptr[(idx / 4)];
start = reloc->lobj.gpu_offset;
end = start + radeon_bo_size(reloc->robj);
start += offset;
p->ib.ptr[data0] = start & 0xFFFFFFFF;
p->ib.ptr[data1] = start >> 32;
cmd = radeon_get_ib_value(p, p->idx) >> 1;
if (cmd < 0x4) {
if ((end - start) < buf_sizes[cmd]) {
DRM_ERROR("buffer to small (%d / %d)!\n",
(unsigned)(end - start), buf_sizes[cmd]);
return -EINVAL;
}
} else if (cmd != 0x100) {
DRM_ERROR("invalid UVD command %X!\n", cmd);
return -EINVAL;
}
if (cmd == 0) {
if (end & 0xFFFFFFFFF0000000) {
DRM_ERROR("msg buffer %LX-%LX out of 256MB segment!\n",
start, end);
return -EINVAL;
}
r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes);
if (r)
return r;
}
if ((start & 0xFFFFFFFFF0000000) != (end & 0xFFFFFFFFF0000000)) {
DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n",
start, end);
return -EINVAL;
}
return 0;
}
static int radeon_uvd_cs_reg(struct radeon_cs_parser *p,
struct radeon_cs_packet *pkt,
int *data0, int *data1,
unsigned buf_sizes[])
{
int i, r;
p->idx++;
for (i = 0; i <= pkt->count; ++i) {
switch (pkt->reg + i*4) {
case UVD_GPCOM_VCPU_DATA0:
*data0 = p->idx;
break;
case UVD_GPCOM_VCPU_DATA1:
*data1 = p->idx;
break;
case UVD_GPCOM_VCPU_CMD:
r = radeon_uvd_cs_reloc(p, *data0, *data1, buf_sizes);
if (r)
return r;
break;
case UVD_ENGINE_CNTL:
break;
default:
DRM_ERROR("Invalid reg 0x%X!\n",
pkt->reg + i*4);
return -EINVAL;
}
p->idx++;
}
return 0;
}
int radeon_uvd_cs_parse(struct radeon_cs_parser *p)
{
struct radeon_cs_packet pkt;
int r, data0 = 0, data1 = 0;
/* minimum buffer sizes */
unsigned buf_sizes[] = {
[0x00000000] = 2048,
[0x00000001] = 32 * 1024 * 1024,
[0x00000002] = 2048 * 1152 * 3,
[0x00000003] = 2048,
};
if (p->chunks[p->chunk_ib_idx].length_dw % 16) {
DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
p->chunks[p->chunk_ib_idx].length_dw);
return -EINVAL;
}
if (p->chunk_relocs_idx == -1) {
DRM_ERROR("No relocation chunk !\n");
return -EINVAL;
}
do {
r = radeon_cs_packet_parse(p, &pkt, p->idx);
if (r)
return r;
switch (pkt.type) {
case RADEON_PACKET_TYPE0:
r = radeon_uvd_cs_reg(p, &pkt, &data0,
&data1, buf_sizes);
if (r)
return r;
break;
case RADEON_PACKET_TYPE2:
p->idx += pkt.count + 2;
break;
default:
DRM_ERROR("Unknown packet type %d !\n", pkt.type);
return -EINVAL;
}
} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
return 0;
}
static int radeon_uvd_send_msg(struct radeon_device *rdev,
int ring, struct radeon_bo *bo,
struct radeon_fence **fence)
{
struct ttm_validate_buffer tv;
struct list_head head;
struct radeon_ib ib;
uint64_t addr;
int i, r;
memset(&tv, 0, sizeof(tv));
tv.bo = &bo->tbo;
INIT_LIST_HEAD(&head);
list_add(&tv.head, &head);
r = ttm_eu_reserve_buffers(&head);
if (r)
return r;
radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM);
radeon_uvd_force_into_uvd_segment(bo);
r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
if (r) {
ttm_eu_backoff_reservation(&head);
return r;
}
r = radeon_ib_get(rdev, ring, &ib, NULL, 16);
if (r) {
ttm_eu_backoff_reservation(&head);
return r;
}
addr = radeon_bo_gpu_offset(bo);
ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0);
ib.ptr[1] = addr;
ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0);
ib.ptr[3] = addr >> 32;
ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0);
ib.ptr[5] = 0;
for (i = 6; i < 16; ++i)
ib.ptr[i] = PACKET2(0);
ib.length_dw = 16;
r = radeon_ib_schedule(rdev, &ib, NULL);
if (r) {
ttm_eu_backoff_reservation(&head);
return r;
}
ttm_eu_fence_buffer_objects(&head, ib.fence);
if (fence)
*fence = radeon_fence_ref(ib.fence);
radeon_ib_free(rdev, &ib);
radeon_bo_unref(&bo);
return 0;
}
/* multiple fence commands without any stream commands in between can
crash the vcpu so just try to emmit a dummy create/destroy msg to
avoid this */
int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
uint32_t handle, struct radeon_fence **fence)
{
struct radeon_bo *bo;
uint32_t *msg;
int r, i;
r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true,
RADEON_GEM_DOMAIN_VRAM, NULL, &bo);
if (r)
return r;
r = radeon_bo_reserve(bo, false);
if (r) {
radeon_bo_unref(&bo);
return r;
}
r = radeon_bo_kmap(bo, (void **)&msg);
if (r) {
radeon_bo_unreserve(bo);
radeon_bo_unref(&bo);
return r;
}
/* stitch together an UVD create msg */
msg[0] = 0x00000de4;
msg[1] = 0x00000000;
msg[2] = handle;
msg[3] = 0x00000000;
msg[4] = 0x00000000;
msg[5] = 0x00000000;
msg[6] = 0x00000000;
msg[7] = 0x00000780;
msg[8] = 0x00000440;
msg[9] = 0x00000000;
msg[10] = 0x01b37000;
for (i = 11; i < 1024; ++i)
msg[i] = 0x0;
radeon_bo_kunmap(bo);
radeon_bo_unreserve(bo);
return radeon_uvd_send_msg(rdev, ring, bo, fence);
}
int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
uint32_t handle, struct radeon_fence **fence)
{
struct radeon_bo *bo;
uint32_t *msg;
int r, i;
r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true,
RADEON_GEM_DOMAIN_VRAM, NULL, &bo);
if (r)
return r;
r = radeon_bo_reserve(bo, false);
if (r) {
radeon_bo_unref(&bo);
return r;
}
r = radeon_bo_kmap(bo, (void **)&msg);
if (r) {
radeon_bo_unreserve(bo);
radeon_bo_unref(&bo);
return r;
}
/* stitch together an UVD destroy msg */
msg[0] = 0x00000de4;
msg[1] = 0x00000002;
msg[2] = handle;
msg[3] = 0x00000000;
for (i = 4; i < 1024; ++i)
msg[i] = 0x0;
radeon_bo_kunmap(bo);
radeon_bo_unreserve(bo);
return radeon_uvd_send_msg(rdev, ring, bo, fence);
}

View File

@ -68,6 +68,105 @@ u32 rv770_get_xclk(struct radeon_device *rdev)
return reference_clock; return reference_clock;
} }
int rv770_uvd_resume(struct radeon_device *rdev)
{
uint64_t addr;
uint32_t chip_id, size;
int r;
r = radeon_uvd_resume(rdev);
if (r)
return r;
/* programm the VCPU memory controller bits 0-27 */
addr = rdev->uvd.gpu_addr >> 3;
size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
WREG32(UVD_VCPU_CACHE_SIZE0, size);
addr += size;
size = RADEON_UVD_STACK_SIZE >> 3;
WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
WREG32(UVD_VCPU_CACHE_SIZE1, size);
addr += size;
size = RADEON_UVD_HEAP_SIZE >> 3;
WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
WREG32(UVD_VCPU_CACHE_SIZE2, size);
/* bits 28-31 */
addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
/* bits 32-39 */
addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
/* tell firmware which hardware it is running on */
switch (rdev->family) {
default:
return -EINVAL;
case CHIP_RV710:
chip_id = 0x01000005;
break;
case CHIP_RV730:
chip_id = 0x01000006;
break;
case CHIP_RV740:
chip_id = 0x01000007;
break;
case CHIP_CYPRESS:
case CHIP_HEMLOCK:
chip_id = 0x01000008;
break;
case CHIP_JUNIPER:
chip_id = 0x01000009;
break;
case CHIP_REDWOOD:
chip_id = 0x0100000a;
break;
case CHIP_CEDAR:
chip_id = 0x0100000b;
break;
case CHIP_SUMO:
chip_id = 0x0100000c;
break;
case CHIP_SUMO2:
chip_id = 0x0100000d;
break;
case CHIP_PALM:
chip_id = 0x0100000e;
break;
case CHIP_CAYMAN:
chip_id = 0x0100000f;
break;
case CHIP_BARTS:
chip_id = 0x01000010;
break;
case CHIP_TURKS:
chip_id = 0x01000011;
break;
case CHIP_CAICOS:
chip_id = 0x01000012;
break;
case CHIP_TAHITI:
chip_id = 0x01000014;
break;
case CHIP_VERDE:
chip_id = 0x01000015;
break;
case CHIP_PITCAIRN:
chip_id = 0x01000016;
break;
case CHIP_ARUBA:
chip_id = 0x01000017;
break;
}
WREG32(UVD_VCPU_CHIP_ID, chip_id);
return 0;
}
u32 rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base) u32 rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
{ {
struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
@ -1040,6 +1139,17 @@ static int rv770_startup(struct radeon_device *rdev)
return r; return r;
} }
r = rv770_uvd_resume(rdev);
if (!r) {
r = radeon_fence_driver_start_ring(rdev,
R600_RING_TYPE_UVD_INDEX);
if (r)
dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
}
if (r)
rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
/* Enable IRQ */ /* Enable IRQ */
r = r600_irq_init(rdev); r = r600_irq_init(rdev);
if (r) { if (r) {
@ -1074,6 +1184,19 @@ static int rv770_startup(struct radeon_device *rdev)
if (r) if (r)
return r; return r;
ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
if (ring->ring_size) {
r = radeon_ring_init(rdev, ring, ring->ring_size,
R600_WB_UVD_RPTR_OFFSET,
UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
0, 0xfffff, RADEON_CP_PACKET2);
if (!r)
r = r600_uvd_init(rdev);
if (r)
DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
}
r = radeon_ib_pool_init(rdev); r = radeon_ib_pool_init(rdev);
if (r) { if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r); dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@ -1115,6 +1238,7 @@ int rv770_resume(struct radeon_device *rdev)
int rv770_suspend(struct radeon_device *rdev) int rv770_suspend(struct radeon_device *rdev)
{ {
r600_audio_fini(rdev); r600_audio_fini(rdev);
radeon_uvd_suspend(rdev);
r700_cp_stop(rdev); r700_cp_stop(rdev);
r600_dma_stop(rdev); r600_dma_stop(rdev);
r600_irq_suspend(rdev); r600_irq_suspend(rdev);
@ -1190,6 +1314,13 @@ int rv770_init(struct radeon_device *rdev)
rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL; rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024); r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
r = radeon_uvd_init(rdev);
if (!r) {
rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX],
4096);
}
rdev->ih.ring_obj = NULL; rdev->ih.ring_obj = NULL;
r600_ih_ring_init(rdev, 64 * 1024); r600_ih_ring_init(rdev, 64 * 1024);
@ -1224,6 +1355,7 @@ void rv770_fini(struct radeon_device *rdev)
radeon_ib_pool_fini(rdev); radeon_ib_pool_fini(rdev);
radeon_irq_kms_fini(rdev); radeon_irq_kms_fini(rdev);
rv770_pcie_gart_fini(rdev); rv770_pcie_gart_fini(rdev);
radeon_uvd_fini(rdev);
r600_vram_scratch_fini(rdev); r600_vram_scratch_fini(rdev);
radeon_gem_fini(rdev); radeon_gem_fini(rdev);
radeon_fence_driver_fini(rdev); radeon_fence_driver_fini(rdev);

View File

@ -671,4 +671,18 @@
# define TARGET_LINK_SPEED_MASK (0xf << 0) # define TARGET_LINK_SPEED_MASK (0xf << 0)
# define SELECTABLE_DEEMPHASIS (1 << 6) # define SELECTABLE_DEEMPHASIS (1 << 6)
/* UVD */
#define UVD_LMI_EXT40_ADDR 0xf498
#define UVD_VCPU_CHIP_ID 0xf4d4
#define UVD_VCPU_CACHE_OFFSET0 0xf4d8
#define UVD_VCPU_CACHE_SIZE0 0xf4dc
#define UVD_VCPU_CACHE_OFFSET1 0xf4e0
#define UVD_VCPU_CACHE_SIZE1 0xf4e4
#define UVD_VCPU_CACHE_OFFSET2 0xf4e8
#define UVD_VCPU_CACHE_SIZE2 0xf4ec
#define UVD_LMI_ADDR_EXT 0xf594
#define UVD_RBC_RB_RPTR 0xf690
#define UVD_RBC_RB_WPTR 0xf694
#endif #endif

View File

@ -4333,6 +4333,16 @@ static int si_startup(struct radeon_device *rdev)
return r; return r;
} }
r = rv770_uvd_resume(rdev);
if (!r) {
r = radeon_fence_driver_start_ring(rdev,
R600_RING_TYPE_UVD_INDEX);
if (r)
dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
}
if (r)
rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
/* Enable IRQ */ /* Enable IRQ */
r = si_irq_init(rdev); r = si_irq_init(rdev);
if (r) { if (r) {
@ -4390,6 +4400,18 @@ static int si_startup(struct radeon_device *rdev)
if (r) if (r)
return r; return r;
ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
if (ring->ring_size) {
r = radeon_ring_init(rdev, ring, ring->ring_size,
R600_WB_UVD_RPTR_OFFSET,
UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
0, 0xfffff, RADEON_CP_PACKET2);
if (!r)
r = r600_uvd_init(rdev);
if (r)
DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
}
r = radeon_ib_pool_init(rdev); r = radeon_ib_pool_init(rdev);
if (r) { if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r); dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@ -4433,6 +4455,8 @@ int si_suspend(struct radeon_device *rdev)
radeon_vm_manager_fini(rdev); radeon_vm_manager_fini(rdev);
si_cp_enable(rdev, false); si_cp_enable(rdev, false);
cayman_dma_stop(rdev); cayman_dma_stop(rdev);
r600_uvd_rbc_stop(rdev);
radeon_uvd_suspend(rdev);
si_irq_suspend(rdev); si_irq_suspend(rdev);
radeon_wb_disable(rdev); radeon_wb_disable(rdev);
si_pcie_gart_disable(rdev); si_pcie_gart_disable(rdev);
@ -4518,6 +4542,13 @@ int si_init(struct radeon_device *rdev)
ring->ring_obj = NULL; ring->ring_obj = NULL;
r600_ring_init(rdev, ring, 64 * 1024); r600_ring_init(rdev, ring, 64 * 1024);
r = radeon_uvd_init(rdev);
if (!r) {
ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
ring->ring_obj = NULL;
r600_ring_init(rdev, ring, 4096);
}
rdev->ih.ring_obj = NULL; rdev->ih.ring_obj = NULL;
r600_ih_ring_init(rdev, 64 * 1024); r600_ih_ring_init(rdev, 64 * 1024);
@ -4566,6 +4597,7 @@ void si_fini(struct radeon_device *rdev)
radeon_vm_manager_fini(rdev); radeon_vm_manager_fini(rdev);
radeon_ib_pool_fini(rdev); radeon_ib_pool_fini(rdev);
radeon_irq_kms_fini(rdev); radeon_irq_kms_fini(rdev);
radeon_uvd_fini(rdev);
si_pcie_gart_fini(rdev); si_pcie_gart_fini(rdev);
r600_vram_scratch_fini(rdev); r600_vram_scratch_fini(rdev);
radeon_gem_fini(rdev); radeon_gem_fini(rdev);

View File

@ -799,6 +799,12 @@
# define THREAD_TRACE_FLUSH (54 << 0) # define THREAD_TRACE_FLUSH (54 << 0)
# define THREAD_TRACE_FINISH (55 << 0) # define THREAD_TRACE_FINISH (55 << 0)
/*
* UVD
*/
#define UVD_RBC_RB_RPTR 0xF690
#define UVD_RBC_RB_WPTR 0xF694
/* /*
* PM4 * PM4
*/ */

View File

@ -918,6 +918,7 @@ struct drm_radeon_gem_va {
#define RADEON_CS_RING_GFX 0 #define RADEON_CS_RING_GFX 0
#define RADEON_CS_RING_COMPUTE 1 #define RADEON_CS_RING_COMPUTE 1
#define RADEON_CS_RING_DMA 2 #define RADEON_CS_RING_DMA 2
#define RADEON_CS_RING_UVD 3
/* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */ /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */
/* 0 = normal, + = higher priority, - = lower priority */ /* 0 = normal, + = higher priority, - = lower priority */