This pull request brings in fragment shader threading and ETC1 support
for vc4. -----BEGIN PGP SIGNATURE----- iQIcBAABCgAGBQJYLM74AAoJELXWKTbR/J7oMhoP/iXpvkEMm5tujsiXLSJwi2yZ Kt3h8lDIuF+p0i0lQDpUj9M5OfG8z0XJwLgmIJzh64uclG70tcfAuQ9WECC0h0ix ZvV+g8OaLURu/kRpLf4MDwxMAVnd0zOxmJ3PugLjmdrVtMR7keF/iedJpwzxNNWi PI0bZ8Vl2V8S5ayerDtDGTNs6yVP3/+SBKHsjPo2tj46FspMBWlzS7tI4YsyCdts gHI+0ICtAK2Aj0imo01UEIriO5AzHxkFk/c848knUYgrK51H0zmqXxWPo2I1NHFG wMHx/gUgasAv9EbSCjfyS/KP+YRKMeOER002/xSOuy3HFoAgOBHKZ54p3zPw9T44 17eDpLU2LUwJYoMSWYGGjeeDsmHCqTDY84K9qqkwhLx8saODtk99P5zXdoMxYgyt a77ibarPAdJH1tC2WOo86yxQzcKaDzjzhhq4mqdBBDksZK0eVYfwDe1PssDZUQQe QAAs+DkygXYI+80rRdKFFeu4ApZZu151zdDKviXE2J7fYT9EQUgRJRlnZJNuYnok N7X0SVoqwGtxipZhIRlE348clEZ3abzdL6k1aUBL96O4tXu95n/BbbCXHXDYvOQf USyhrhTWVY2tSc3OAoXkZJ6bR7Mj+iXoQxV25tZohHoXUFaBu3Bgz5lBhDWBoMeN UK+TE4Wcfsr3EvA39plG =dFfc -----END PGP SIGNATURE----- Merge tag 'drm-vc4-next-2016-11-16' of https://github.com/anholt/linux into drm-next This pull request brings in fragment shader threading and ETC1 support for vc4.
This commit is contained in:
		
						commit
						b7c0e47d98
					
				| @ -61,23 +61,28 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data, | ||||
| 		if (ret < 0) | ||||
| 			return ret; | ||||
| 		args->value = V3D_READ(V3D_IDENT0); | ||||
| 		pm_runtime_put(&vc4->v3d->pdev->dev); | ||||
| 		pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); | ||||
| 		pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev); | ||||
| 		break; | ||||
| 	case DRM_VC4_PARAM_V3D_IDENT1: | ||||
| 		ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); | ||||
| 		if (ret < 0) | ||||
| 			return ret; | ||||
| 		args->value = V3D_READ(V3D_IDENT1); | ||||
| 		pm_runtime_put(&vc4->v3d->pdev->dev); | ||||
| 		pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); | ||||
| 		pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev); | ||||
| 		break; | ||||
| 	case DRM_VC4_PARAM_V3D_IDENT2: | ||||
| 		ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); | ||||
| 		if (ret < 0) | ||||
| 			return ret; | ||||
| 		args->value = V3D_READ(V3D_IDENT2); | ||||
| 		pm_runtime_put(&vc4->v3d->pdev->dev); | ||||
| 		pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); | ||||
| 		pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev); | ||||
| 		break; | ||||
| 	case DRM_VC4_PARAM_SUPPORTS_BRANCHES: | ||||
| 	case DRM_VC4_PARAM_SUPPORTS_ETC1: | ||||
| 	case DRM_VC4_PARAM_SUPPORTS_THREADED_FS: | ||||
| 		args->value = true; | ||||
| 		break; | ||||
| 	default: | ||||
|  | ||||
| @ -381,6 +381,8 @@ struct vc4_validated_shader_info { | ||||
| 
 | ||||
| 	uint32_t num_uniform_addr_offsets; | ||||
| 	uint32_t *uniform_addr_offsets; | ||||
| 
 | ||||
| 	bool is_threaded; | ||||
| }; | ||||
| 
 | ||||
| /**
 | ||||
|  | ||||
| @ -544,14 +544,15 @@ vc4_cl_lookup_bos(struct drm_device *dev, | ||||
| 
 | ||||
| 	handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t)); | ||||
| 	if (!handles) { | ||||
| 		ret = -ENOMEM; | ||||
| 		DRM_ERROR("Failed to allocate incoming GEM handles\n"); | ||||
| 		goto fail; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = copy_from_user(handles, | ||||
| 			     (void __user *)(uintptr_t)args->bo_handles, | ||||
| 			     exec->bo_count * sizeof(uint32_t)); | ||||
| 	if (ret) { | ||||
| 	if (copy_from_user(handles, | ||||
| 			   (void __user *)(uintptr_t)args->bo_handles, | ||||
| 			   exec->bo_count * sizeof(uint32_t))) { | ||||
| 		ret = -EFAULT; | ||||
| 		DRM_ERROR("Failed to copy in GEM handles\n"); | ||||
| 		goto fail; | ||||
| 	} | ||||
| @ -708,8 +709,10 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) | ||||
| 	} | ||||
| 
 | ||||
| 	mutex_lock(&vc4->power_lock); | ||||
| 	if (--vc4->power_refcount == 0) | ||||
| 		pm_runtime_put(&vc4->v3d->pdev->dev); | ||||
| 	if (--vc4->power_refcount == 0) { | ||||
| 		pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); | ||||
| 		pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev); | ||||
| 	} | ||||
| 	mutex_unlock(&vc4->power_lock); | ||||
| 
 | ||||
| 	kfree(exec); | ||||
|  | ||||
| @ -222,6 +222,8 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) | ||||
| 		return ret; | ||||
| 	} | ||||
| 
 | ||||
| 	pm_runtime_use_autosuspend(dev); | ||||
| 	pm_runtime_set_autosuspend_delay(dev, 40); /* a little over 2 frames. */ | ||||
| 	pm_runtime_enable(dev); | ||||
| 
 | ||||
| 	return 0; | ||||
|  | ||||
| @ -644,6 +644,13 @@ reloc_tex(struct vc4_exec_info *exec, | ||||
| 		cpp = 1; | ||||
| 		break; | ||||
| 	case VC4_TEXTURE_TYPE_ETC1: | ||||
| 		/* ETC1 is arranged as 64-bit blocks, where each block is 4x4
 | ||||
| 		 * pixels. | ||||
| 		 */ | ||||
| 		cpp = 8; | ||||
| 		width = (width + 3) >> 2; | ||||
| 		height = (height + 3) >> 2; | ||||
| 		break; | ||||
| 	case VC4_TEXTURE_TYPE_BW1: | ||||
| 	case VC4_TEXTURE_TYPE_A4: | ||||
| 	case VC4_TEXTURE_TYPE_A1: | ||||
| @ -782,11 +789,6 @@ validate_gl_shader_rec(struct drm_device *dev, | ||||
| 	exec->shader_rec_v += roundup(packet_size, 16); | ||||
| 	exec->shader_rec_size -= packet_size; | ||||
| 
 | ||||
| 	if (!(*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD)) { | ||||
| 		DRM_ERROR("Multi-threaded fragment shaders not supported.\n"); | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	for (i = 0; i < shader_reloc_count; i++) { | ||||
| 		if (src_handles[i] > exec->bo_count) { | ||||
| 			DRM_ERROR("Shader handle %d too big\n", src_handles[i]); | ||||
| @ -803,6 +805,18 @@ validate_gl_shader_rec(struct drm_device *dev, | ||||
| 			return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	if (((*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD) == 0) != | ||||
| 	    to_vc4_bo(&bo[0]->base)->validated_shader->is_threaded) { | ||||
| 		DRM_ERROR("Thread mode of CL and FS do not match\n"); | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	if (to_vc4_bo(&bo[1]->base)->validated_shader->is_threaded || | ||||
| 	    to_vc4_bo(&bo[2]->base)->validated_shader->is_threaded) { | ||||
| 		DRM_ERROR("cs and vs cannot be threaded\n"); | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	for (i = 0; i < shader_reloc_count; i++) { | ||||
| 		struct vc4_validated_shader_info *validated_shader; | ||||
| 		uint32_t o = shader_reloc_offsets[i]; | ||||
|  | ||||
| @ -83,6 +83,13 @@ struct vc4_shader_validation_state { | ||||
| 	 * basic blocks. | ||||
| 	 */ | ||||
| 	bool needs_uniform_address_for_loop; | ||||
| 
 | ||||
| 	/* Set when we find an instruction writing the top half of the
 | ||||
| 	 * register files.  If we allowed writing the unusable regs in | ||||
| 	 * a threaded shader, then the other shader running on our | ||||
| 	 * QPU's clamp validation would be invalid. | ||||
| 	 */ | ||||
| 	bool all_registers_used; | ||||
| }; | ||||
| 
 | ||||
| static uint32_t | ||||
| @ -118,6 +125,13 @@ raddr_add_a_to_live_reg_index(uint64_t inst) | ||||
| 		return ~0; | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| live_reg_is_upper_half(uint32_t lri) | ||||
| { | ||||
| 	return	(lri >= 16 && lri < 32) || | ||||
| 		(lri >= 32 + 16 && lri < 32 + 32); | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
| is_tmu_submit(uint32_t waddr) | ||||
| { | ||||
| @ -390,6 +404,9 @@ check_reg_write(struct vc4_validated_shader_info *validated_shader, | ||||
| 		} else { | ||||
| 			validation_state->live_immediates[lri] = ~0; | ||||
| 		} | ||||
| 
 | ||||
| 		if (live_reg_is_upper_half(lri)) | ||||
| 			validation_state->all_registers_used = true; | ||||
| 	} | ||||
| 
 | ||||
| 	switch (waddr) { | ||||
| @ -598,6 +615,11 @@ check_instruction_reads(struct vc4_validated_shader_info *validated_shader, | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if ((raddr_a >= 16 && raddr_a < 32) || | ||||
| 	    (raddr_b >= 16 && raddr_b < 32 && sig != QPU_SIG_SMALL_IMM)) { | ||||
| 		validation_state->all_registers_used = true; | ||||
| 	} | ||||
| 
 | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| @ -608,9 +630,7 @@ static bool | ||||
| vc4_validate_branches(struct vc4_shader_validation_state *validation_state) | ||||
| { | ||||
| 	uint32_t max_branch_target = 0; | ||||
| 	bool found_shader_end = false; | ||||
| 	int ip; | ||||
| 	int shader_end_ip = 0; | ||||
| 	int last_branch = -2; | ||||
| 
 | ||||
| 	for (ip = 0; ip < validation_state->max_ip; ip++) { | ||||
| @ -621,8 +641,13 @@ vc4_validate_branches(struct vc4_shader_validation_state *validation_state) | ||||
| 		uint32_t branch_target_ip; | ||||
| 
 | ||||
| 		if (sig == QPU_SIG_PROG_END) { | ||||
| 			shader_end_ip = ip; | ||||
| 			found_shader_end = true; | ||||
| 			/* There are two delay slots after program end is
 | ||||
| 			 * signaled that are still executed, then we're | ||||
| 			 * finished.  validation_state->max_ip is the | ||||
| 			 * instruction after the last valid instruction in the | ||||
| 			 * program. | ||||
| 			 */ | ||||
| 			validation_state->max_ip = ip + 3; | ||||
| 			continue; | ||||
| 		} | ||||
| 
 | ||||
| @ -676,15 +701,9 @@ vc4_validate_branches(struct vc4_shader_validation_state *validation_state) | ||||
| 		} | ||||
| 		set_bit(after_delay_ip, validation_state->branch_targets); | ||||
| 		max_branch_target = max(max_branch_target, after_delay_ip); | ||||
| 
 | ||||
| 		/* There are two delay slots after program end is signaled
 | ||||
| 		 * that are still executed, then we're finished. | ||||
| 		 */ | ||||
| 		if (found_shader_end && ip == shader_end_ip + 2) | ||||
| 			break; | ||||
| 	} | ||||
| 
 | ||||
| 	if (max_branch_target > shader_end_ip) { | ||||
| 	if (max_branch_target > validation_state->max_ip - 3) { | ||||
| 		DRM_ERROR("Branch landed after QPU_SIG_PROG_END"); | ||||
| 		return false; | ||||
| 	} | ||||
| @ -756,6 +775,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) | ||||
| { | ||||
| 	bool found_shader_end = false; | ||||
| 	int shader_end_ip = 0; | ||||
| 	uint32_t last_thread_switch_ip = -3; | ||||
| 	uint32_t ip; | ||||
| 	struct vc4_validated_shader_info *validated_shader = NULL; | ||||
| 	struct vc4_shader_validation_state validation_state; | ||||
| @ -788,6 +808,17 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) | ||||
| 		if (!vc4_handle_branch_target(&validation_state)) | ||||
| 			goto fail; | ||||
| 
 | ||||
| 		if (ip == last_thread_switch_ip + 3) { | ||||
| 			/* Reset r0-r3 live clamp data */ | ||||
| 			int i; | ||||
| 
 | ||||
| 			for (i = 64; i < LIVE_REG_COUNT; i++) { | ||||
| 				validation_state.live_min_clamp_offsets[i] = ~0; | ||||
| 				validation_state.live_max_clamp_regs[i] = false; | ||||
| 				validation_state.live_immediates[i] = ~0; | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		switch (sig) { | ||||
| 		case QPU_SIG_NONE: | ||||
| 		case QPU_SIG_WAIT_FOR_SCOREBOARD: | ||||
| @ -797,6 +828,8 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) | ||||
| 		case QPU_SIG_LOAD_TMU1: | ||||
| 		case QPU_SIG_PROG_END: | ||||
| 		case QPU_SIG_SMALL_IMM: | ||||
| 		case QPU_SIG_THREAD_SWITCH: | ||||
| 		case QPU_SIG_LAST_THREAD_SWITCH: | ||||
| 			if (!check_instruction_writes(validated_shader, | ||||
| 						      &validation_state)) { | ||||
| 				DRM_ERROR("Bad write at ip %d\n", ip); | ||||
| @ -812,6 +845,18 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) | ||||
| 				shader_end_ip = ip; | ||||
| 			} | ||||
| 
 | ||||
| 			if (sig == QPU_SIG_THREAD_SWITCH || | ||||
| 			    sig == QPU_SIG_LAST_THREAD_SWITCH) { | ||||
| 				validated_shader->is_threaded = true; | ||||
| 
 | ||||
| 				if (ip < last_thread_switch_ip + 3) { | ||||
| 					DRM_ERROR("Thread switch too soon after " | ||||
| 						  "last switch at ip %d\n", ip); | ||||
| 					goto fail; | ||||
| 				} | ||||
| 				last_thread_switch_ip = ip; | ||||
| 			} | ||||
| 
 | ||||
| 			break; | ||||
| 
 | ||||
| 		case QPU_SIG_LOAD_IMM: | ||||
| @ -826,6 +871,13 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) | ||||
| 			if (!check_branch(inst, validated_shader, | ||||
| 					  &validation_state, ip)) | ||||
| 				goto fail; | ||||
| 
 | ||||
| 			if (ip < last_thread_switch_ip + 3) { | ||||
| 				DRM_ERROR("Branch in thread switch at ip %d", | ||||
| 					  ip); | ||||
| 				goto fail; | ||||
| 			} | ||||
| 
 | ||||
| 			break; | ||||
| 		default: | ||||
| 			DRM_ERROR("Unsupported QPU signal %d at " | ||||
| @ -847,6 +899,14 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) | ||||
| 		goto fail; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Might corrupt other thread */ | ||||
| 	if (validated_shader->is_threaded && | ||||
| 	    validation_state.all_registers_used) { | ||||
| 		DRM_ERROR("Shader uses threading, but uses the upper " | ||||
| 			  "half of the registers, too\n"); | ||||
| 		goto fail; | ||||
| 	} | ||||
| 
 | ||||
| 	/* If we did a backwards branch and we haven't emitted a uniforms
 | ||||
| 	 * reset since then, we still need the uniforms stream to have the | ||||
| 	 * uniforms address available so that the backwards branch can do its | ||||
|  | ||||
| @ -286,6 +286,8 @@ struct drm_vc4_get_hang_state { | ||||
| #define DRM_VC4_PARAM_V3D_IDENT1		1 | ||||
| #define DRM_VC4_PARAM_V3D_IDENT2		2 | ||||
| #define DRM_VC4_PARAM_SUPPORTS_BRANCHES		3 | ||||
| #define DRM_VC4_PARAM_SUPPORTS_ETC1		4 | ||||
| #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS	5 | ||||
| 
 | ||||
| struct drm_vc4_get_param { | ||||
| 	__u32 param; | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user