gpu: host1x: Cancel only job that actually got stuck

Host1x doesn't have information about jobs inter-dependency, that is
something that will become available once host1x will get a proper
jobs scheduler implementation. Currently a hang job causes other unrelated
jobs to be canceled, that is a relic from downstream driver which is
irrelevant to upstream. Let's cancel only the hanging job and not to touch
other jobs in queue.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
This commit is contained in:
Dmitry Osipenko 2018-08-07 16:07:11 +03:00 committed by Thierry Reding
parent 6d6c815daa
commit e8bad65938

View File

@ -408,13 +408,11 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,
} }
/* /*
* Walk the sync_queue, first incrementing with the CPU syncpts that * Increment with CPU the remaining syncpts of a partially executed job.
* are partially executed (the first buffer) or fully skipped while
* still in the current context (slots are also NOP-ed).
* *
* At the point contexts are interleaved, syncpt increments must be * Syncpt increments must be done inline with the pushbuffer from a
* done inline with the pushbuffer from a GATHER buffer to maintain * GATHER buffer to maintain the order (slots are modified to be a
* the order (slots are modified to be a GATHER of syncpt incrs). * GATHER of syncpt incrs).
* *
* Note: save in restart_addr the location where the timed out buffer * Note: save in restart_addr the location where the timed out buffer
* started in the PB, so we can start the refetch from there (with the * started in the PB, so we can start the refetch from there (with the
@ -422,20 +420,15 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,
* properly for this buffer and resources are freed. * properly for this buffer and resources are freed.
*/ */
dev_dbg(dev, "%s: perform CPU incr on pending same ctx buffers\n", dev_dbg(dev, "%s: perform CPU incr on pending buffers\n", __func__);
__func__);
if (!list_empty(&cdma->sync_queue)) if (!list_empty(&cdma->sync_queue))
restart_addr = job->first_get; restart_addr = job->first_get;
else else
restart_addr = cdma->last_pos; restart_addr = cdma->last_pos;
/* do CPU increments as long as this context continues */ /* do CPU increments for the remaining syncpts */
list_for_each_entry_from(job, &cdma->sync_queue, list) { if (job) {
/* different context, gets us out of this loop */
if (job->client != cdma->timeout.client)
break;
/* won't need a timeout when replayed */ /* won't need a timeout when replayed */
job->timeout = 0; job->timeout = 0;
@ -448,20 +441,8 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,
host1x_hw_cdma_timeout_cpu_incr(host1x, cdma, job->first_get, host1x_hw_cdma_timeout_cpu_incr(host1x, cdma, job->first_get,
syncpt_incrs, job->syncpt_end, syncpt_incrs, job->syncpt_end,
job->num_slots); job->num_slots);
syncpt_val += syncpt_incrs;
} }
/*
* The following sumbits from the same client may be dependent on the
* failed submit and therefore they may fail. Force a small timeout
* to make the queue cleanup faster.
*/
list_for_each_entry_from(job, &cdma->sync_queue, list)
if (job->client == cdma->timeout.client)
job->timeout = min_t(unsigned int, job->timeout, 500);
dev_dbg(dev, "%s: finished sync_queue modification\n", __func__); dev_dbg(dev, "%s: finished sync_queue modification\n", __func__);
/* roll back DMAGET and start up channel again */ /* roll back DMAGET and start up channel again */