diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c index 6d033aecc8fc..64ac65d9268b 100644 --- a/drivers/misc/habanalabs/common/context.c +++ b/drivers/misc/habanalabs/common/context.c @@ -125,15 +125,22 @@ void hl_ctx_do_release(struct kref *ref) hl_ctx_fini(ctx); - if (ctx->hpriv) - hl_hpriv_put(ctx->hpriv); + if (ctx->hpriv) { + struct hl_fpriv *hpriv = ctx->hpriv; + + mutex_lock(&hpriv->ctx_lock); + hpriv->ctx = NULL; + mutex_unlock(&hpriv->ctx_lock); + + hl_hpriv_put(hpriv); + } kfree(ctx); } int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv) { - struct hl_ctx_mgr *mgr = &hpriv->ctx_mgr; + struct hl_ctx_mgr *ctx_mgr = &hpriv->ctx_mgr; struct hl_ctx *ctx; int rc; @@ -143,9 +150,9 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv) goto out_err; } - mutex_lock(&mgr->ctx_lock); - rc = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL); - mutex_unlock(&mgr->ctx_lock); + mutex_lock(&ctx_mgr->lock); + rc = idr_alloc(&ctx_mgr->handles, ctx, 1, 0, GFP_KERNEL); + mutex_unlock(&ctx_mgr->lock); if (rc < 0) { dev_err(hdev->dev, "Failed to allocate IDR for a new CTX\n"); @@ -170,9 +177,9 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv) return 0; remove_from_idr: - mutex_lock(&mgr->ctx_lock); - idr_remove(&mgr->ctx_handles, ctx->handle); - mutex_unlock(&mgr->ctx_lock); + mutex_lock(&ctx_mgr->lock); + idr_remove(&ctx_mgr->handles, ctx->handle); + mutex_unlock(&ctx_mgr->lock); free_ctx: kfree(ctx); out_err: @@ -269,6 +276,11 @@ err_hw_block_mem_fini: return rc; } +static int hl_ctx_get_unless_zero(struct hl_ctx *ctx) +{ + return kref_get_unless_zero(&ctx->refcount); +} + void hl_ctx_get(struct hl_ctx *ctx) { kref_get(&ctx->refcount); @@ -287,11 +299,15 @@ struct hl_ctx *hl_get_compute_ctx(struct hl_device *hdev) mutex_lock(&hdev->fpriv_list_lock); list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) { - /* There can only be a single user which has opened the compute device, so exit - * immediately once we find him - */ + mutex_lock(&hpriv->ctx_lock); ctx = hpriv->ctx; - hl_ctx_get(ctx); + if (ctx && !hl_ctx_get_unless_zero(ctx)) + ctx = NULL; + mutex_unlock(&hpriv->ctx_lock); + + /* There can only be a single user which has opened the compute device, so exit + * immediately once we find its context or if we see that it has been released + */ break; } @@ -383,37 +399,37 @@ int hl_ctx_get_fences(struct hl_ctx *ctx, u64 *seq_arr, /* * hl_ctx_mgr_init - initialize the context manager * - * @mgr: pointer to context manager structure + * @ctx_mgr: pointer to context manager structure * * This manager is an object inside the hpriv object of the user process. * The function is called when a user process opens the FD. */ -void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr) +void hl_ctx_mgr_init(struct hl_ctx_mgr *ctx_mgr) { - mutex_init(&mgr->ctx_lock); - idr_init(&mgr->ctx_handles); + mutex_init(&ctx_mgr->lock); + idr_init(&ctx_mgr->handles); } /* * hl_ctx_mgr_fini - finalize the context manager * * @hdev: pointer to device structure - * @mgr: pointer to context manager structure + * @ctx_mgr: pointer to context manager structure * * This function goes over all the contexts in the manager and frees them. * It is called when a process closes the FD. */ -void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr) +void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *ctx_mgr) { struct hl_ctx *ctx; struct idr *idp; u32 id; - idp = &mgr->ctx_handles; + idp = &ctx_mgr->handles; idr_for_each_entry(idp, ctx, id) kref_put(&ctx->refcount, hl_ctx_do_release); - idr_destroy(&mgr->ctx_handles); - mutex_destroy(&mgr->ctx_lock); + idr_destroy(&ctx_mgr->handles); + mutex_destroy(&ctx_mgr->lock); } diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index b4f14c6d3970..38e1ad432e51 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -245,6 +245,7 @@ static void hpriv_release(struct kref *ref) hl_debugfs_remove_file(hpriv); + mutex_destroy(&hpriv->ctx_lock); mutex_destroy(&hpriv->restore_phase_mutex); if ((!hdev->pldm) && (hdev->pdev) && diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 3023ecfc19c9..1ab64e8a05c6 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1638,12 +1638,12 @@ struct hl_ctx { /** * struct hl_ctx_mgr - for handling multiple contexts. - * @ctx_lock: protects ctx_handles. - * @ctx_handles: idr to hold all ctx handles. + * @lock: protects ctx_handles. + * @handles: idr to hold all ctx handles. */ struct hl_ctx_mgr { - struct mutex ctx_lock; - struct idr ctx_handles; + struct mutex lock; + struct idr handles; }; @@ -1998,6 +1998,8 @@ struct hl_notifier_event { * @dev_node: node in the device list of file private data * @refcount: number of related contexts. * @restore_phase_mutex: lock for context switch and restore phase. + * @ctx_lock: protects the pointer to current executing context pointer. TODO: remove for multiple + * ctx per process. */ struct hl_fpriv { struct hl_device *hdev; @@ -2011,6 +2013,7 @@ struct hl_fpriv { struct list_head dev_node; struct kref refcount; struct mutex restore_phase_mutex; + struct mutex ctx_lock; }; diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index e182637c2d93..e617cc394ff7 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -137,6 +137,7 @@ int hl_device_open(struct inode *inode, struct file *filp) mutex_init(&hpriv->notifier_event.lock); mutex_init(&hpriv->restore_phase_mutex); + mutex_init(&hpriv->ctx_lock); kref_init(&hpriv->refcount); nonseekable_open(inode, filp); @@ -209,6 +210,7 @@ out_err: hl_mem_mgr_fini(&hpriv->mem_mgr); hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); filp->private_data = NULL; + mutex_destroy(&hpriv->ctx_lock); mutex_destroy(&hpriv->restore_phase_mutex); mutex_destroy(&hpriv->notifier_event.lock); put_pid(hpriv->taskpid);