mirror of
https://github.com/torvalds/linux.git
synced 2024-11-25 21:51:40 +00:00
drm/v3d: Create a CPU job extension for a indirect CSD job
A CPU job is a type of job that performs operations that requires CPU intervention. An indirect CSD job is a job that, when executed in the queue, will map the indirect buffer, read the dispatch parameters, and submit a regular dispatch. Therefore, it is a job that needs CPU intervention. So, create a user extension for the CPU job that enables the creation of an indirect CSD. This user extension will allow the creation of a CSD job linked to a CPU job. The CPU job will wait for the indirect CSD job dependencies and, once they are signaled, it will update the CSD job parameters. Co-developed-by: Melissa Wen <mwen@igalia.com> Signed-off-by: Melissa Wen <mwen@igalia.com> Signed-off-by: Maíra Canal <mcanal@igalia.com> Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-14-mcanal@igalia.com
This commit is contained in:
parent
7c13132c40
commit
18b8413b25
@ -316,12 +316,41 @@ struct v3d_csd_job {
|
||||
struct drm_v3d_submit_csd args;
|
||||
};
|
||||
|
||||
enum v3d_cpu_job_type {};
|
||||
enum v3d_cpu_job_type {
|
||||
V3D_CPU_JOB_TYPE_INDIRECT_CSD = 1,
|
||||
};
|
||||
|
||||
struct v3d_indirect_csd_info {
|
||||
/* Indirect CSD */
|
||||
struct v3d_csd_job *job;
|
||||
|
||||
/* Clean cache job associated to the Indirect CSD job */
|
||||
struct v3d_job *clean_job;
|
||||
|
||||
/* Offset within the BO where the workgroup counts are stored */
|
||||
u32 offset;
|
||||
|
||||
/* Workgroups size */
|
||||
u32 wg_size;
|
||||
|
||||
/* Indices of the uniforms with the workgroup dispatch counts
|
||||
* in the uniform stream.
|
||||
*/
|
||||
u32 wg_uniform_offsets[3];
|
||||
|
||||
/* Indirect BO */
|
||||
struct drm_gem_object *indirect;
|
||||
|
||||
/* Context of the Indirect CSD job */
|
||||
struct ww_acquire_ctx acquire_ctx;
|
||||
};
|
||||
|
||||
struct v3d_cpu_job {
|
||||
struct v3d_job base;
|
||||
|
||||
enum v3d_cpu_job_type job_type;
|
||||
|
||||
struct v3d_indirect_csd_info indirect_csd;
|
||||
};
|
||||
|
||||
typedef void (*v3d_cpu_job_fn)(struct v3d_cpu_job *);
|
||||
|
@ -25,6 +25,8 @@
|
||||
#include "v3d_regs.h"
|
||||
#include "v3d_trace.h"
|
||||
|
||||
#define V3D_CSD_CFG012_WG_COUNT_SHIFT 16
|
||||
|
||||
static struct v3d_job *
|
||||
to_v3d_job(struct drm_sched_job *sched_job)
|
||||
{
|
||||
@ -268,7 +270,44 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
|
||||
return fence;
|
||||
}
|
||||
|
||||
static const v3d_cpu_job_fn cpu_job_function[] = { };
|
||||
static void
|
||||
v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job)
|
||||
{
|
||||
struct v3d_indirect_csd_info *indirect_csd = &job->indirect_csd;
|
||||
struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
|
||||
struct v3d_bo *indirect = to_v3d_bo(indirect_csd->indirect);
|
||||
struct drm_v3d_submit_csd *args = &indirect_csd->job->args;
|
||||
u32 *wg_counts;
|
||||
|
||||
v3d_get_bo_vaddr(bo);
|
||||
v3d_get_bo_vaddr(indirect);
|
||||
|
||||
wg_counts = (uint32_t *)(bo->vaddr + indirect_csd->offset);
|
||||
|
||||
if (wg_counts[0] == 0 || wg_counts[1] == 0 || wg_counts[2] == 0)
|
||||
return;
|
||||
|
||||
args->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
|
||||
args->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
|
||||
args->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
|
||||
args->cfg[4] = DIV_ROUND_UP(indirect_csd->wg_size, 16) *
|
||||
(wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1;
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
/* 0xffffffff indicates that the uniform rewrite is not needed */
|
||||
if (indirect_csd->wg_uniform_offsets[i] != 0xffffffff) {
|
||||
u32 uniform_idx = indirect_csd->wg_uniform_offsets[i];
|
||||
((uint32_t *)indirect->vaddr)[uniform_idx] = wg_counts[i];
|
||||
}
|
||||
}
|
||||
|
||||
v3d_put_bo_vaddr(indirect);
|
||||
v3d_put_bo_vaddr(bo);
|
||||
}
|
||||
|
||||
static const v3d_cpu_job_fn cpu_job_function[] = {
|
||||
[V3D_CPU_JOB_TYPE_INDIRECT_CSD] = v3d_rewrite_csd_job_wg_counts_from_indirect,
|
||||
};
|
||||
|
||||
static struct dma_fence *
|
||||
v3d_cpu_job_run(struct drm_sched_job *sched_job)
|
||||
|
@ -391,6 +391,48 @@ v3d_get_multisync_submit_deps(struct drm_file *file_priv,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Get data for the indirect CSD job submission. */
|
||||
static int
|
||||
v3d_get_cpu_indirect_csd_params(struct drm_file *file_priv,
|
||||
struct drm_v3d_extension __user *ext,
|
||||
struct v3d_cpu_job *job)
|
||||
{
|
||||
struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
|
||||
struct v3d_dev *v3d = v3d_priv->v3d;
|
||||
struct drm_v3d_indirect_csd indirect_csd;
|
||||
struct v3d_indirect_csd_info *info = &job->indirect_csd;
|
||||
|
||||
if (!job) {
|
||||
DRM_DEBUG("CPU job extension was attached to a GPU job.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (job->job_type) {
|
||||
DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (copy_from_user(&indirect_csd, ext, sizeof(indirect_csd)))
|
||||
return -EFAULT;
|
||||
|
||||
if (!v3d_has_csd(v3d)) {
|
||||
DRM_DEBUG("Attempting CSD submit on non-CSD hardware.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
job->job_type = V3D_CPU_JOB_TYPE_INDIRECT_CSD;
|
||||
info->offset = indirect_csd.offset;
|
||||
info->wg_size = indirect_csd.wg_size;
|
||||
memcpy(&info->wg_uniform_offsets, &indirect_csd.wg_uniform_offsets,
|
||||
sizeof(indirect_csd.wg_uniform_offsets));
|
||||
|
||||
info->indirect = drm_gem_object_lookup(file_priv, indirect_csd.indirect);
|
||||
|
||||
return v3d_setup_csd_jobs_and_bos(file_priv, v3d, &indirect_csd.submit,
|
||||
&info->job, &info->clean_job,
|
||||
NULL, &info->acquire_ctx);
|
||||
}
|
||||
|
||||
/* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data
|
||||
* according to the extension id (name).
|
||||
*/
|
||||
@ -416,6 +458,9 @@ v3d_get_extensions(struct drm_file *file_priv,
|
||||
case DRM_V3D_EXT_ID_MULTI_SYNC:
|
||||
ret = v3d_get_multisync_submit_deps(file_priv, user_ext, se);
|
||||
break;
|
||||
case DRM_V3D_EXT_ID_CPU_INDIRECT_CSD:
|
||||
ret = v3d_get_cpu_indirect_csd_params(file_priv, user_ext, job);
|
||||
break;
|
||||
default:
|
||||
DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id);
|
||||
return -EINVAL;
|
||||
@ -790,7 +835,9 @@ fail:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const unsigned int cpu_job_bo_handle_count[] = { };
|
||||
static const unsigned int cpu_job_bo_handle_count[] = {
|
||||
[V3D_CPU_JOB_TYPE_INDIRECT_CSD] = 1,
|
||||
};
|
||||
|
||||
/**
|
||||
* v3d_submit_cpu_ioctl() - Submits a CPU job to the V3D.
|
||||
@ -808,7 +855,10 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data,
|
||||
struct v3d_dev *v3d = to_v3d_dev(dev);
|
||||
struct drm_v3d_submit_cpu *args = data;
|
||||
struct v3d_submit_ext se = {0};
|
||||
struct v3d_submit_ext *out_se = NULL;
|
||||
struct v3d_cpu_job *cpu_job = NULL;
|
||||
struct v3d_csd_job *csd_job = NULL;
|
||||
struct v3d_job *clean_job = NULL;
|
||||
struct ww_acquire_ctx acquire_ctx;
|
||||
int ret;
|
||||
|
||||
@ -847,6 +897,9 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data,
|
||||
if (ret)
|
||||
goto fail;
|
||||
|
||||
clean_job = cpu_job->indirect_csd.clean_job;
|
||||
csd_job = cpu_job->indirect_csd.job;
|
||||
|
||||
if (args->bo_handle_count) {
|
||||
ret = v3d_lookup_bos(dev, file_priv, &cpu_job->base,
|
||||
args->bo_handles, args->bo_handle_count);
|
||||
@ -860,19 +913,66 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data,
|
||||
|
||||
mutex_lock(&v3d->sched_lock);
|
||||
v3d_push_job(&cpu_job->base);
|
||||
|
||||
switch (cpu_job->job_type) {
|
||||
case V3D_CPU_JOB_TYPE_INDIRECT_CSD:
|
||||
ret = drm_sched_job_add_dependency(&csd_job->base.base,
|
||||
dma_fence_get(cpu_job->base.done_fence));
|
||||
if (ret)
|
||||
goto fail_unreserve;
|
||||
|
||||
v3d_push_job(&csd_job->base);
|
||||
|
||||
ret = drm_sched_job_add_dependency(&clean_job->base,
|
||||
dma_fence_get(csd_job->base.done_fence));
|
||||
if (ret)
|
||||
goto fail_unreserve;
|
||||
|
||||
v3d_push_job(clean_job);
|
||||
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&v3d->sched_lock);
|
||||
|
||||
out_se = (cpu_job->job_type == V3D_CPU_JOB_TYPE_INDIRECT_CSD) ? NULL : &se;
|
||||
|
||||
v3d_attach_fences_and_unlock_reservation(file_priv,
|
||||
&cpu_job->base,
|
||||
&acquire_ctx, 0,
|
||||
NULL, cpu_job->base.done_fence);
|
||||
out_se, cpu_job->base.done_fence);
|
||||
|
||||
switch (cpu_job->job_type) {
|
||||
case V3D_CPU_JOB_TYPE_INDIRECT_CSD:
|
||||
v3d_attach_fences_and_unlock_reservation(file_priv,
|
||||
clean_job,
|
||||
&cpu_job->indirect_csd.acquire_ctx,
|
||||
0, &se, clean_job->done_fence);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
v3d_job_put(&cpu_job->base);
|
||||
v3d_job_put(&csd_job->base);
|
||||
v3d_job_put(clean_job);
|
||||
|
||||
return 0;
|
||||
|
||||
fail_unreserve:
|
||||
mutex_unlock(&v3d->sched_lock);
|
||||
|
||||
drm_gem_unlock_reservations(cpu_job->base.bo, cpu_job->base.bo_count,
|
||||
&acquire_ctx);
|
||||
|
||||
drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count,
|
||||
&cpu_job->indirect_csd.acquire_ctx);
|
||||
|
||||
fail:
|
||||
v3d_job_cleanup((void *)cpu_job);
|
||||
v3d_job_cleanup((void *)csd_job);
|
||||
v3d_job_cleanup(clean_job);
|
||||
v3d_put_multisync_post_deps(&se);
|
||||
|
||||
return ret;
|
||||
|
@ -71,7 +71,8 @@ extern "C" {
|
||||
struct drm_v3d_extension {
|
||||
__u64 next;
|
||||
__u32 id;
|
||||
#define DRM_V3D_EXT_ID_MULTI_SYNC 0x01
|
||||
#define DRM_V3D_EXT_ID_MULTI_SYNC 0x01
|
||||
#define DRM_V3D_EXT_ID_CPU_INDIRECT_CSD 0x02
|
||||
__u32 flags; /* mbz */
|
||||
};
|
||||
|
||||
@ -365,8 +366,46 @@ struct drm_v3d_submit_csd {
|
||||
__u32 pad;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_v3d_indirect_csd - ioctl extension for the CPU job to create an
|
||||
* indirect CSD
|
||||
*
|
||||
* When an extension of DRM_V3D_EXT_ID_CPU_INDIRECT_CSD id is defined, it
|
||||
* points to this extension to define a indirect CSD submission. It creates a
|
||||
* CPU job linked to a CSD job. The CPU job waits for the indirect CSD
|
||||
* dependencies and, once they are signaled, it updates the CSD job config
|
||||
* before allowing the CSD job execution.
|
||||
*/
|
||||
struct drm_v3d_indirect_csd {
|
||||
struct drm_v3d_extension base;
|
||||
|
||||
/* Indirect CSD */
|
||||
struct drm_v3d_submit_csd submit;
|
||||
|
||||
/* Handle of the indirect BO, that should be also attached to the
|
||||
* indirect CSD.
|
||||
*/
|
||||
__u32 indirect;
|
||||
|
||||
/* Offset within the BO where the workgroup counts are stored */
|
||||
__u32 offset;
|
||||
|
||||
/* Workgroups size */
|
||||
__u32 wg_size;
|
||||
|
||||
/* Indices of the uniforms with the workgroup dispatch counts
|
||||
* in the uniform stream. If the uniform rewrite is not needed,
|
||||
* the offset must be 0xffffffff.
|
||||
*/
|
||||
__u32 wg_uniform_offsets[3];
|
||||
};
|
||||
|
||||
struct drm_v3d_submit_cpu {
|
||||
/* Pointer to a u32 array of the BOs that are referenced by the job. */
|
||||
/* Pointer to a u32 array of the BOs that are referenced by the job.
|
||||
*
|
||||
* For DRM_V3D_EXT_ID_CPU_INDIRECT_CSD, it must contain only one BO,
|
||||
* that contains the workgroup counts.
|
||||
*/
|
||||
__u64 bo_handles;
|
||||
|
||||
/* Number of BO handles passed in (size is that times 4). */
|
||||
|
Loading…
Reference in New Issue
Block a user