forked from Minki/linux
drm/sched: fix the bug of time out calculation(v4)
issue: in cleanup_job the cancle_delayed_work will cancel a TO timer even the its corresponding job is still running. fix: do not cancel the timer in cleanup_job, instead do the cancelling only when the heading job is signaled, and if there is a "next" job we start_timeout again. v2: further cleanup the logic, and do the TDR timer cancelling if the signaled job is the last one in its scheduler. v3: change the issue description remove the cancel_delayed_work in the begining of the cleanup_job recover the implement of drm_sched_job_begin. v4: remove the kthread_should_park() checking in cleanup_job routine, we should cleanup the signaled job asap TODO: 1)introduce pause/resume scheduler in job_timeout to serial the handling of scheduler and job_timeout. 2)drop the bad job's del and insert in scheduler due to above serialization (no race issue anymore with the serialization) Tested-by: jingwen <jingwen.chen@@amd.com> Signed-off-by: Monk Liu <Monk.Liu@amd.com> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/1630457207-13107-1-git-send-email-Monk.Liu@amd.com
This commit is contained in:
parent
282abb5a1f
commit
bcf26654a3
@ -827,15 +827,6 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
|
||||
{
|
||||
struct drm_sched_job *job, *next;
|
||||
|
||||
/*
|
||||
* Don't destroy jobs while the timeout worker is running OR thread
|
||||
* is being parked and hence assumed to not touch pending_list
|
||||
*/
|
||||
if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
|
||||
!cancel_delayed_work(&sched->work_tdr)) ||
|
||||
kthread_should_park())
|
||||
return NULL;
|
||||
|
||||
spin_lock(&sched->job_list_lock);
|
||||
|
||||
job = list_first_entry_or_null(&sched->pending_list,
|
||||
@ -844,17 +835,21 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
|
||||
if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
|
||||
/* remove job from pending_list */
|
||||
list_del_init(&job->list);
|
||||
|
||||
/* cancel this job's TO timer */
|
||||
cancel_delayed_work(&sched->work_tdr);
|
||||
/* make the scheduled timestamp more accurate */
|
||||
next = list_first_entry_or_null(&sched->pending_list,
|
||||
typeof(*next), list);
|
||||
if (next)
|
||||
|
||||
if (next) {
|
||||
next->s_fence->scheduled.timestamp =
|
||||
job->s_fence->finished.timestamp;
|
||||
|
||||
/* start TO timer for next job */
|
||||
drm_sched_start_timeout(sched);
|
||||
}
|
||||
} else {
|
||||
job = NULL;
|
||||
/* queue timeout for next job */
|
||||
drm_sched_start_timeout(sched);
|
||||
}
|
||||
|
||||
spin_unlock(&sched->job_list_lock);
|
||||
@ -942,11 +937,8 @@ static int drm_sched_main(void *param)
|
||||
(entity = drm_sched_select_entity(sched))) ||
|
||||
kthread_should_stop());
|
||||
|
||||
if (cleanup_job) {
|
||||
if (cleanup_job)
|
||||
sched->ops->free_job(cleanup_job);
|
||||
/* queue timeout for next job */
|
||||
drm_sched_start_timeout(sched);
|
||||
}
|
||||
|
||||
if (!entity)
|
||||
continue;
|
||||
|
Loading…
Reference in New Issue
Block a user