From 54f961a628b737f66710eca0b0d95346645dd33e Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 13 Aug 2008 09:46:31 +1000 Subject: [PATCH 1/5] radeon: fix some hard lockups on r3/4/500s This patch should fix hard lockup and convert them in softlockup (ie you can ssh the box but the gpu is busted and we are waiting in loop for it to come back to reason). Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r300_cmdbuf.c | 117 ++++++++++++++++++++++----- drivers/gpu/drm/radeon/r300_reg.h | 5 +- drivers/gpu/drm/radeon/radeon_cp.c | 38 +++++---- drivers/gpu/drm/radeon/radeon_drv.h | 19 +++-- 4 files changed, 130 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/radeon/r300_cmdbuf.c b/drivers/gpu/drm/radeon/r300_cmdbuf.c index 702df45320f7..4b3bd6303daf 100644 --- a/drivers/gpu/drm/radeon/r300_cmdbuf.c +++ b/drivers/gpu/drm/radeon/r300_cmdbuf.c @@ -136,6 +136,18 @@ static int r300_emit_cliprects(drm_radeon_private_t *dev_priv, ADVANCE_RING(); } + /* flus cache and wait idle clean after cliprect change */ + BEGIN_RING(2); + OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); + OUT_RING(R300_RB3D_DC_FLUSH); + ADVANCE_RING(); + BEGIN_RING(2); + OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); + OUT_RING(RADEON_WAIT_3D_IDLECLEAN); + ADVANCE_RING(); + /* set flush flag */ + dev_priv->track_flush |= RADEON_FLUSH_EMITED; + return 0; } @@ -166,13 +178,13 @@ void r300_init_reg_flags(struct drm_device *dev) ADD_RANGE(0x21DC, 1); ADD_RANGE(R300_VAP_UNKNOWN_221C, 1); ADD_RANGE(R300_VAP_CLIP_X_0, 4); - ADD_RANGE(R300_VAP_PVS_WAITIDLE, 1); + ADD_RANGE(R300_VAP_PVS_STATE_FLUSH_REG, 1); ADD_RANGE(R300_VAP_UNKNOWN_2288, 1); ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2); ADD_RANGE(R300_VAP_PVS_CNTL_1, 3); ADD_RANGE(R300_GB_ENABLE, 1); ADD_RANGE(R300_GB_MSPOS0, 5); - ADD_RANGE(R300_TX_CNTL, 1); + ADD_RANGE(R300_TX_INVALTAGS, 1); ADD_RANGE(R300_TX_ENABLE, 1); ADD_RANGE(0x4200, 4); ADD_RANGE(0x4214, 1); @@ -388,15 +400,28 @@ static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv, if (sz * 16 > cmdbuf->bufsz) return -EINVAL; - BEGIN_RING(5 + sz * 4); - /* Wait for VAP to come to senses.. */ - /* there is no need to emit it multiple times, (only once before VAP is programmed, - but this optimization is for later */ - OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0); + /* VAP is very sensitive so we purge cache before we program it + * and we also flush its state before & after */ + BEGIN_RING(6); + OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); + OUT_RING(R300_RB3D_DC_FLUSH); + OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); + OUT_RING(RADEON_WAIT_3D_IDLECLEAN); + OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0)); + OUT_RING(0); + ADVANCE_RING(); + /* set flush flag */ + dev_priv->track_flush |= RADEON_FLUSH_EMITED; + + BEGIN_RING(3 + sz * 4); OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr); OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1)); OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4); + ADVANCE_RING(); + BEGIN_RING(2); + OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0)); + OUT_RING(0); ADVANCE_RING(); cmdbuf->buf += sz * 16; @@ -424,6 +449,15 @@ static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv, OUT_RING_TABLE((int *)cmdbuf->buf, 8); ADVANCE_RING(); + BEGIN_RING(4); + OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); + OUT_RING(R300_RB3D_DC_FLUSH); + OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); + OUT_RING(RADEON_WAIT_3D_IDLECLEAN); + ADVANCE_RING(); + /* set flush flag */ + dev_priv->track_flush |= RADEON_FLUSH_EMITED; + cmdbuf->buf += 8 * 4; cmdbuf->bufsz -= 8 * 4; @@ -613,11 +647,19 @@ static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv, case RADEON_CNTL_BITBLT_MULTI: return r300_emit_bitblt_multi(dev_priv, cmdbuf); - case RADEON_CP_INDX_BUFFER: /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */ + case RADEON_CP_INDX_BUFFER: + /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */ return r300_emit_indx_buffer(dev_priv, cmdbuf); - case RADEON_CP_3D_DRAW_IMMD_2: /* triggers drawing using in-packet vertex data */ - case RADEON_CP_3D_DRAW_VBUF_2: /* triggers drawing of vertex buffers setup elsewhere */ - case RADEON_CP_3D_DRAW_INDX_2: /* triggers drawing using indices to vertex buffer */ + case RADEON_CP_3D_DRAW_IMMD_2: + /* triggers drawing using in-packet vertex data */ + case RADEON_CP_3D_DRAW_VBUF_2: + /* triggers drawing of vertex buffers setup elsewhere */ + case RADEON_CP_3D_DRAW_INDX_2: + /* triggers drawing using indices to vertex buffer */ + /* whenever we send vertex we clear flush & purge */ + dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED | + RADEON_PURGE_EMITED); + break; case RADEON_WAIT_FOR_IDLE: case RADEON_CP_NOP: /* these packets are safe */ @@ -713,17 +755,53 @@ static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv, */ static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv) { + uint32_t cache_z, cache_3d, cache_2d; RING_LOCALS; + + cache_z = R300_ZC_FLUSH; + cache_2d = R300_RB2D_DC_FLUSH; + cache_3d = R300_RB3D_DC_FLUSH; + if (!(dev_priv->track_flush & RADEON_PURGE_EMITED)) { + /* we can purge, primitive where draw since last purge */ + cache_z |= R300_ZC_FREE; + cache_2d |= R300_RB2D_DC_FREE; + cache_3d |= R300_RB3D_DC_FREE; + } - BEGIN_RING(6); - OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); - OUT_RING(R300_RB3D_DSTCACHE_UNKNOWN_0A); + /* flush & purge zbuffer */ + BEGIN_RING(2); OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0)); - OUT_RING(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE| - R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); - OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0)); - OUT_RING(0x0); + OUT_RING(cache_z); ADVANCE_RING(); + /* flush & purge 3d */ + BEGIN_RING(2); + OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); + OUT_RING(cache_3d); + ADVANCE_RING(); + /* flush & purge texture */ + BEGIN_RING(2); + OUT_RING(CP_PACKET0(R300_TX_INVALTAGS, 0)); + OUT_RING(0); + ADVANCE_RING(); + /* FIXME: is this one really needed ? */ + BEGIN_RING(2); + OUT_RING(CP_PACKET0(R300_RB3D_AARESOLVE_CTL, 0)); + OUT_RING(0); + ADVANCE_RING(); + BEGIN_RING(2); + OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); + OUT_RING(RADEON_WAIT_3D_IDLECLEAN); + ADVANCE_RING(); + /* flush & purge 2d through E2 as RB2D will trigger lockup */ + BEGIN_RING(4); + OUT_RING(CP_PACKET0(R300_DSTCACHE_CTLSTAT, 0)); + OUT_RING(cache_2d); + OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); + OUT_RING(RADEON_WAIT_2D_IDLECLEAN | + RADEON_WAIT_HOST_IDLECLEAN); + ADVANCE_RING(); + /* set flush & purge flags */ + dev_priv->track_flush |= RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED; } /** @@ -905,8 +983,7 @@ int r300_do_cp_cmdbuf(struct drm_device *dev, DRM_DEBUG("\n"); - /* See the comment above r300_emit_begin3d for why this call must be here, - * and what the cleanup gotos are for. */ + /* pacify */ r300_pacify(dev_priv); if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) { diff --git a/drivers/gpu/drm/radeon/r300_reg.h b/drivers/gpu/drm/radeon/r300_reg.h index a6802f26afc4..ee6f811599a3 100644 --- a/drivers/gpu/drm/radeon/r300_reg.h +++ b/drivers/gpu/drm/radeon/r300_reg.h @@ -317,7 +317,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Therefore, I suspect writing zero to 0x2284 synchronizes the engine and * avoids bugs caused by still running shaders reading bad data from memory. */ -#define R300_VAP_PVS_WAITIDLE 0x2284 /* GUESS */ +#define R300_VAP_PVS_STATE_FLUSH_REG 0x2284 /* Absolutely no clue what this register is about. */ #define R300_VAP_UNKNOWN_2288 0x2288 @@ -513,7 +513,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* gap */ /* Zero to flush caches. */ -#define R300_TX_CNTL 0x4100 +#define R300_TX_INVALTAGS 0x4100 #define R300_TX_FLUSH 0x0 /* The upper enable bits are guessed, based on fglrx reported limits. */ @@ -1362,6 +1362,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_RB3D_COLORPITCH2 0x4E40 /* GUESS */ #define R300_RB3D_COLORPITCH3 0x4E44 /* GUESS */ +#define R300_RB3D_AARESOLVE_CTL 0x4E88 /* gap */ /* Guess by Vladimir. diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c index f0de81a5689d..3331f88dcfb6 100644 --- a/drivers/gpu/drm/radeon/radeon_cp.c +++ b/drivers/gpu/drm/radeon/radeon_cp.c @@ -40,6 +40,7 @@ #define RADEON_FIFO_DEBUG 0 static int radeon_do_cleanup_cp(struct drm_device * dev); +static void radeon_do_cp_start(drm_radeon_private_t * dev_priv); static u32 R500_READ_MCIND(drm_radeon_private_t *dev_priv, int addr) { @@ -198,23 +199,8 @@ static int radeon_do_pixcache_flush(drm_radeon_private_t * dev_priv) DRM_UDELAY(1); } } else { - /* 3D */ - tmp = RADEON_READ(R300_RB3D_DSTCACHE_CTLSTAT); - tmp |= RADEON_RB3D_DC_FLUSH_ALL; - RADEON_WRITE(R300_RB3D_DSTCACHE_CTLSTAT, tmp); - - /* 2D */ - tmp = RADEON_READ(R300_DSTCACHE_CTLSTAT); - tmp |= RADEON_RB3D_DC_FLUSH_ALL; - RADEON_WRITE(R300_DSTCACHE_CTLSTAT, tmp); - - for (i = 0; i < dev_priv->usec_timeout; i++) { - if (!(RADEON_READ(R300_DSTCACHE_CTLSTAT) - & RADEON_RB3D_DC_BUSY)) { - return 0; - } - DRM_UDELAY(1); - } + /* don't flush or purge cache here or lockup */ + return 0; } #if RADEON_FIFO_DEBUG @@ -237,6 +223,9 @@ static int radeon_do_wait_for_fifo(drm_radeon_private_t * dev_priv, int entries) return 0; DRM_UDELAY(1); } + DRM_INFO("wait for fifo failed status : 0x%08X 0x%08X\n", + RADEON_READ(RADEON_RBBM_STATUS), + RADEON_READ(R300_VAP_CNTL_STATUS)); #if RADEON_FIFO_DEBUG DRM_ERROR("failed!\n"); @@ -263,6 +252,9 @@ static int radeon_do_wait_for_idle(drm_radeon_private_t * dev_priv) } DRM_UDELAY(1); } + DRM_INFO("wait idle failed status : 0x%08X 0x%08X\n", + RADEON_READ(RADEON_RBBM_STATUS), + RADEON_READ(R300_VAP_CNTL_STATUS)); #if RADEON_FIFO_DEBUG DRM_ERROR("failed!\n"); @@ -443,14 +435,20 @@ static void radeon_do_cp_start(drm_radeon_private_t * dev_priv) dev_priv->cp_running = 1; - BEGIN_RING(6); - + BEGIN_RING(8); + /* isync can only be written through cp on r5xx write it here */ + OUT_RING(CP_PACKET0(RADEON_ISYNC_CNTL, 0)); + OUT_RING(RADEON_ISYNC_ANY2D_IDLE3D | + RADEON_ISYNC_ANY3D_IDLE2D | + RADEON_ISYNC_WAIT_IDLEGUI | + RADEON_ISYNC_CPSCRATCH_IDLEGUI); RADEON_PURGE_CACHE(); RADEON_PURGE_ZCACHE(); RADEON_WAIT_UNTIL_IDLE(); - ADVANCE_RING(); COMMIT_RING(); + + dev_priv->track_flush |= RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED; } /* Reset the Command Processor. This will not flush any pending diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h index 3f0eca957aa7..099381693175 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.h +++ b/drivers/gpu/drm/radeon/radeon_drv.h @@ -220,6 +220,9 @@ struct radeon_virt_surface { struct drm_file *file_priv; }; +#define RADEON_FLUSH_EMITED (1 < 0) +#define RADEON_PURGE_EMITED (1 < 1) + typedef struct drm_radeon_private { drm_radeon_ring_buffer_t ring; drm_radeon_sarea_t *sarea_priv; @@ -311,6 +314,7 @@ typedef struct drm_radeon_private { unsigned long fb_aper_offset; int num_gb_pipes; + int track_flush; } drm_radeon_private_t; typedef struct drm_radeon_buf_priv { @@ -693,7 +697,6 @@ extern int r300_do_cp_cmdbuf(struct drm_device * dev, #define R300_ZB_ZCACHE_CTLSTAT 0x4f18 # define R300_ZC_FLUSH (1 << 0) # define R300_ZC_FREE (1 << 1) -# define R300_ZC_FLUSH_ALL 0x3 # define R300_ZC_BUSY (1 << 31) #define RADEON_RB3D_DSTCACHE_CTLSTAT 0x325c # define RADEON_RB3D_DC_FLUSH (3 << 0) @@ -701,6 +704,8 @@ extern int r300_do_cp_cmdbuf(struct drm_device * dev, # define RADEON_RB3D_DC_FLUSH_ALL 0xf # define RADEON_RB3D_DC_BUSY (1 << 31) #define R300_RB3D_DSTCACHE_CTLSTAT 0x4e4c +# define R300_RB3D_DC_FLUSH (2 << 0) +# define R300_RB3D_DC_FREE (2 << 2) # define R300_RB3D_DC_FINISH (1 << 4) #define RADEON_RB3D_ZSTENCILCNTL 0x1c2c # define RADEON_Z_TEST_MASK (7 << 4) @@ -1246,17 +1251,17 @@ do { \ OUT_RING(RADEON_RB3D_DC_FLUSH); \ } else { \ OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); \ - OUT_RING(RADEON_RB3D_DC_FLUSH); \ + OUT_RING(R300_RB3D_DC_FLUSH); \ } \ } while (0) #define RADEON_PURGE_CACHE() do { \ if ((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV280) { \ OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); \ - OUT_RING(RADEON_RB3D_DC_FLUSH_ALL); \ + OUT_RING(RADEON_RB3D_DC_FLUSH | RADEON_RB3D_DC_FREE); \ } else { \ OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); \ - OUT_RING(RADEON_RB3D_DC_FLUSH_ALL); \ + OUT_RING(R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE); \ } \ } while (0) @@ -1273,10 +1278,10 @@ do { \ #define RADEON_PURGE_ZCACHE() do { \ if ((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV280) { \ OUT_RING(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); \ - OUT_RING(RADEON_RB3D_ZC_FLUSH_ALL); \ + OUT_RING(RADEON_RB3D_ZC_FLUSH | RADEON_RB3D_ZC_FREE); \ } else { \ - OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); \ - OUT_RING(R300_ZC_FLUSH_ALL); \ + OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0)); \ + OUT_RING(R300_ZC_FLUSH | R300_ZC_FREE); \ } \ } while (0) From e2898c5fdd91f54c9c84fbf7d32edb8e4dfda574 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Wed, 13 Aug 2008 09:49:15 +1000 Subject: [PATCH 2/5] drm/radeon: r300_cmdbuf: Always emit INDX_BUFFER immediately after DRAW_INDEX DRAW_INDEX writes a vertex count to VAP_VF_CNTL. Docs say that behaviour is undefined (i.e. lockups happen) when this write is not followed by the right number of vertex indices. Thus we used to do the wrong thing when drawing across many cliprects was necessary, because we emitted a sequence DRAW_INDEX, DRAW_INDEX, INDX_BUFFER, INDX_BUFFER instead of DRAW_INDEX, INDX_BUFFER, DRAW_INDEX, INDX_BUFFER The latter is what we're doing now and which ought to be correct. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r300_cmdbuf.c | 80 ++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/radeon/r300_cmdbuf.c b/drivers/gpu/drm/radeon/r300_cmdbuf.c index 4b3bd6303daf..b1fdfc61f27d 100644 --- a/drivers/gpu/drm/radeon/r300_cmdbuf.c +++ b/drivers/gpu/drm/radeon/r300_cmdbuf.c @@ -577,22 +577,23 @@ static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv, return 0; } -static __inline__ int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv, - drm_radeon_kcmd_buffer_t *cmdbuf) +static __inline__ int r300_emit_draw_indx_2(drm_radeon_private_t *dev_priv, + drm_radeon_kcmd_buffer_t *cmdbuf) { - u32 *cmd = (u32 *) cmdbuf->buf; - int count, ret; + u32 *cmd; + int count; + int expected_count; RING_LOCALS; - count=(cmd[0]>>16) & 0x3fff; + cmd = (u32 *) cmdbuf->buf; + count = (cmd[0]>>16) & 0x3fff; + expected_count = cmd[1] >> 16; + if (!(cmd[1] & R300_VAP_VF_CNTL__INDEX_SIZE_32bit)) + expected_count = (expected_count+1)/2; - if ((cmd[1] & 0x8000ffff) != 0x80000810) { - DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]); - return -EINVAL; - } - ret = !radeon_check_offset(dev_priv, cmd[2]); - if (ret) { - DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]); + if (count && count != expected_count) { + DRM_ERROR("3D_DRAW_INDX_2: packet size %i, expected %i\n", + count, expected_count); return -EINVAL; } @@ -604,6 +605,50 @@ static __inline__ int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv, cmdbuf->buf += (count+2)*4; cmdbuf->bufsz -= (count+2)*4; + if (!count) { + drm_r300_cmd_header_t header; + + if (cmdbuf->bufsz < 4*4 + sizeof(header)) { + DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER, but stream is too short.\n"); + return -EINVAL; + } + + header.u = *(unsigned int *)cmdbuf->buf; + + cmdbuf->buf += sizeof(header); + cmdbuf->bufsz -= sizeof(header); + cmd = (u32 *) cmdbuf->buf; + + if (header.header.cmd_type != R300_CMD_PACKET3 || + header.packet3.packet != R300_CMD_PACKET3_RAW || + cmd[0] != CP_PACKET3(RADEON_CP_INDX_BUFFER, 2)) { + DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER.\n"); + return -EINVAL; + } + + if ((cmd[1] & 0x8000ffff) != 0x80000810) { + DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]); + return -EINVAL; + } + if (!radeon_check_offset(dev_priv, cmd[2])) { + DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]); + return -EINVAL; + } + if (cmd[3] != expected_count) { + DRM_ERROR("INDX_BUFFER: buffer size %i, expected %i\n", + cmd[3], expected_count); + return -EINVAL; + } + + BEGIN_RING(4); + OUT_RING(cmd[0]); + OUT_RING_TABLE((int *)(cmdbuf->buf + 4), 3); + ADVANCE_RING(); + + cmdbuf->buf += 4*4; + cmdbuf->bufsz -= 4*4; + } + return 0; } @@ -648,18 +693,21 @@ static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv, return r300_emit_bitblt_multi(dev_priv, cmdbuf); case RADEON_CP_INDX_BUFFER: - /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */ - return r300_emit_indx_buffer(dev_priv, cmdbuf); + DRM_ERROR("packet3 INDX_BUFFER without preceding 3D_DRAW_INDX_2 is illegal.\n"); + return -EINVAL; case RADEON_CP_3D_DRAW_IMMD_2: /* triggers drawing using in-packet vertex data */ case RADEON_CP_3D_DRAW_VBUF_2: /* triggers drawing of vertex buffers setup elsewhere */ + dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED | + RADEON_PURGE_EMITED); + break; case RADEON_CP_3D_DRAW_INDX_2: /* triggers drawing using indices to vertex buffer */ /* whenever we send vertex we clear flush & purge */ dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED); - break; + return r300_emit_draw_indx_2(dev_priv, cmdbuf); case RADEON_WAIT_FOR_IDLE: case RADEON_CP_NOP: /* these packets are safe */ @@ -757,7 +805,7 @@ static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv) { uint32_t cache_z, cache_3d, cache_2d; RING_LOCALS; - + cache_z = R300_ZC_FLUSH; cache_2d = R300_RB2D_DC_FLUSH; cache_3d = R300_RB3D_DC_FLUSH; From 649ffc06a62bf487b78440669bdfeb637f1d675b Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Wed, 13 Aug 2008 09:50:12 +1000 Subject: [PATCH 3/5] r300: Fix cliprect emit This makes our handling of cliprects sane. drm_clip_rect always has exclusiv bottom-right corners, but the hardware expects inclusive bottom-right corner so we adjust this here. This complements Michel Daenzer's commit 57aea290e1e0a26d1e74df6cff777eb9f03 to Mesa. See also http://bugs.freedesktop.org/show_bug.cgi?id=16123 Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r300_cmdbuf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/r300_cmdbuf.c b/drivers/gpu/drm/radeon/r300_cmdbuf.c index b1fdfc61f27d..4b27d9abb7bc 100644 --- a/drivers/gpu/drm/radeon/r300_cmdbuf.c +++ b/drivers/gpu/drm/radeon/r300_cmdbuf.c @@ -77,6 +77,9 @@ static int r300_emit_cliprects(drm_radeon_private_t *dev_priv, return -EFAULT; } + box.x2--; /* Hardware expects inclusive bottom-right corner */ + box.y2--; + if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) { box.x1 = (box.x1) & R300_CLIPRECT_MASK; @@ -95,8 +98,8 @@ static int r300_emit_cliprects(drm_radeon_private_t *dev_priv, R300_CLIPRECT_MASK; box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK; - } + OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) | (box.y1 << R300_CLIPRECT_Y_SHIFT)); OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) | From e5b4f19417b75a2d7c1e36934f60a3e836c4337e Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Sun, 24 Aug 2008 17:00:00 +1000 Subject: [PATCH 4/5] drm: don't call the vblank tasklet with irqs disabled. If a specific tasklet shares data with irq context, it needs to take a private irq-blocking spinlock within the tasklet itself. Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_irq.c | 20 ++++++++++++-------- drivers/gpu/drm/drm_lock.c | 12 +++++------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 089c015c01d1..53f0e5af1cc8 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -400,27 +400,31 @@ static void drm_locked_tasklet_func(unsigned long data) { struct drm_device *dev = (struct drm_device *)data; unsigned long irqflags; - + void (*tasklet_func)(struct drm_device *); + spin_lock_irqsave(&dev->tasklet_lock, irqflags); + tasklet_func = dev->locked_tasklet_func; + spin_unlock_irqrestore(&dev->tasklet_lock, irqflags); - if (!dev->locked_tasklet_func || + if (!tasklet_func || !drm_lock_take(&dev->lock, DRM_KERNEL_CONTEXT)) { - spin_unlock_irqrestore(&dev->tasklet_lock, irqflags); return; } dev->lock.lock_time = jiffies; atomic_inc(&dev->counts[_DRM_STAT_LOCKS]); - dev->locked_tasklet_func(dev); + spin_lock_irqsave(&dev->tasklet_lock, irqflags); + tasklet_func = dev->locked_tasklet_func; + dev->locked_tasklet_func = NULL; + spin_unlock_irqrestore(&dev->tasklet_lock, irqflags); + + if (tasklet_func != NULL) + tasklet_func(dev); drm_lock_free(&dev->lock, DRM_KERNEL_CONTEXT); - - dev->locked_tasklet_func = NULL; - - spin_unlock_irqrestore(&dev->tasklet_lock, irqflags); } /** diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c index 0998723cde79..d78e0dc18245 100644 --- a/drivers/gpu/drm/drm_lock.c +++ b/drivers/gpu/drm/drm_lock.c @@ -150,6 +150,7 @@ int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_lock *lock = data; unsigned long irqflags; + void (*tasklet_func)(struct drm_device *); if (lock->context == DRM_KERNEL_CONTEXT) { DRM_ERROR("Process %d using kernel context %d\n", @@ -158,14 +159,11 @@ int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv) } spin_lock_irqsave(&dev->tasklet_lock, irqflags); - - if (dev->locked_tasklet_func) { - dev->locked_tasklet_func(dev); - - dev->locked_tasklet_func = NULL; - } - + tasklet_func = dev->locked_tasklet_func; + dev->locked_tasklet_func = NULL; spin_unlock_irqrestore(&dev->tasklet_lock, irqflags); + if (tasklet_func != NULL) + tasklet_func(dev); atomic_inc(&dev->counts[_DRM_STAT_UNLOCKS]); From 3e5fc80a404a24c858468030b63555cccfb3e79c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 24 Aug 2008 17:02:26 +1000 Subject: [PATCH 5/5] drm: don't set the signal blocker on the master process. There is a problem with debugging the X server and gdb crashes in the xkb startup code. This avoids the problem by allowing the master process to get signals. It should be safe as the signal blocker is mainly so that you can Ctrl-Z a 3D application without locking up the whole box. Ctrl-Z the X server isn't something many people do. Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_lock.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c index d78e0dc18245..a4caf95485d7 100644 --- a/drivers/gpu/drm/drm_lock.c +++ b/drivers/gpu/drm/drm_lock.c @@ -105,14 +105,19 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv) ret ? "interrupted" : "has lock"); if (ret) return ret; - sigemptyset(&dev->sigmask); - sigaddset(&dev->sigmask, SIGSTOP); - sigaddset(&dev->sigmask, SIGTSTP); - sigaddset(&dev->sigmask, SIGTTIN); - sigaddset(&dev->sigmask, SIGTTOU); - dev->sigdata.context = lock->context; - dev->sigdata.lock = dev->lock.hw_lock; - block_all_signals(drm_notifier, &dev->sigdata, &dev->sigmask); + /* don't set the block all signals on the master process for now + * really probably not the correct answer but lets us debug xkb + * xserver for now */ + if (!file_priv->master) { + sigemptyset(&dev->sigmask); + sigaddset(&dev->sigmask, SIGSTOP); + sigaddset(&dev->sigmask, SIGTSTP); + sigaddset(&dev->sigmask, SIGTTIN); + sigaddset(&dev->sigmask, SIGTTOU); + dev->sigdata.context = lock->context; + dev->sigdata.lock = dev->lock.hw_lock; + block_all_signals(drm_notifier, &dev->sigdata, &dev->sigmask); + } if (dev->driver->dma_ready && (lock->flags & _DRM_LOCK_READY)) dev->driver->dma_ready(dev);