linux/drivers/gpu/drm/drm_syncobj.c

932 lines
22 KiB
C
Raw Normal View History

/*
* Copyright 2017 Red Hat
* Parts ported from amdgpu (fence wait code).
* Copyright 2016 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
*
*/
/**
* DOC: Overview
*
* DRM synchronisation objects (syncobj, see struct &drm_syncobj) are
* persistent objects that contain an optional fence. The fence can be updated
* with a new fence, or be NULL.
*
* syncobj's can be waited upon, where it will wait for the underlying
* fence.
*
* syncobj's can be export to fd's and back, these fd's are opaque and
* have no other use case, except passing the syncobj between processes.
*
* Their primary use-case is to implement Vulkan fences and semaphores.
*
* syncobj have a kref reference count, but also have an optional file.
* The file is only created once the syncobj is exported.
* The file takes a reference on the kref.
*/
#include <drm/drmP.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/anon_inodes.h>
#include <linux/sync_file.h>
drm/syncobj: Allow wait for submit and signal behavior (v5) Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@redhat.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
2017-08-25 17:52:24 +00:00
#include <linux/sched/signal.h>
#include "drm_internal.h"
#include <drm/drm_syncobj.h>
struct syncobj_wait_entry {
struct list_head node;
struct task_struct *task;
struct dma_fence *fence;
struct dma_fence_cb fence_cb;
};
static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
struct syncobj_wait_entry *wait);
/**
* drm_syncobj_find - lookup and reference a sync object.
* @file_private: drm file private pointer
* @handle: sync object handle to lookup.
*
* Returns a reference to the syncobj pointed to by handle or NULL. The
* reference must be released by calling drm_syncobj_put().
*/
struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
u32 handle)
{
struct drm_syncobj *syncobj;
spin_lock(&file_private->syncobj_table_lock);
/* Check if we currently have a reference on the object */
syncobj = idr_find(&file_private->syncobj_idr, handle);
if (syncobj)
drm_syncobj_get(syncobj);
spin_unlock(&file_private->syncobj_table_lock);
return syncobj;
}
EXPORT_SYMBOL(drm_syncobj_find);
static void drm_syncobj_fence_add_wait(struct drm_syncobj *syncobj,
struct syncobj_wait_entry *wait)
{
if (wait->fence)
return;
drm: Revert syncobj timeline changes. Daniel suggested I submit this, since we're still seeing regressions from it. This is a revert to before 48197bc564c7 ("drm: add syncobj timeline support v9") and its followon fixes. Fixes this on first V3D testcase execution: [ 48.767088] ============================================ [ 48.772410] WARNING: possible recursive locking detected [ 48.777739] 4.19.0-rc6+ #489 Not tainted [ 48.781668] -------------------------------------------- [ 48.786993] shader_runner/3284 is trying to acquire lock: [ 48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.800714] [ 48.800714] but task is already holding lock: [ 48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.814862] [ 48.814862] other info that might help us debug this: [ 48.821410] Possible unsafe locking scenario: [ 48.821410] [ 48.827338] CPU0 [ 48.829788] ---- [ 48.832239] lock(&(&array->lock)->rlock); [ 48.836434] lock(&(&array->lock)->rlock); [ 48.840640] [ 48.840640] *** DEADLOCK *** [ 48.840640] [ 48.846582] May be due to missing lock nesting notation [ 130.763560] 1 lock held by cts-runner/3270: [ 130.767745] #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c [ 130.776461] stack backtrace: [ 130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486 [ 130.787706] Hardware name: Broadcom STB (Flattened Device Tree) [ 130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14) [ 130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4) [ 130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68) [ 130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c) [ 130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68) [ 130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c) [ 130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec) [ 130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c) [ 130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614) [ 130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0) [ 130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390) [ 130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac) [ 130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60) [ 130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28) Cc: Chunming Zhou <david1.zhou@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Eric Anholt <eric@anholt.net> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/261044/
2018-11-08 16:04:22 +00:00
spin_lock(&syncobj->lock);
/* We've already tried once to get a fence and failed. Now that we
* have the lock, try one more time just to be sure we don't add a
* callback when a fence has already been set.
*/
if (syncobj->fence)
wait->fence = dma_fence_get(
rcu_dereference_protected(syncobj->fence, 1));
else
list_add_tail(&wait->node, &syncobj->cb_list);
drm: Revert syncobj timeline changes. Daniel suggested I submit this, since we're still seeing regressions from it. This is a revert to before 48197bc564c7 ("drm: add syncobj timeline support v9") and its followon fixes. Fixes this on first V3D testcase execution: [ 48.767088] ============================================ [ 48.772410] WARNING: possible recursive locking detected [ 48.777739] 4.19.0-rc6+ #489 Not tainted [ 48.781668] -------------------------------------------- [ 48.786993] shader_runner/3284 is trying to acquire lock: [ 48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.800714] [ 48.800714] but task is already holding lock: [ 48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.814862] [ 48.814862] other info that might help us debug this: [ 48.821410] Possible unsafe locking scenario: [ 48.821410] [ 48.827338] CPU0 [ 48.829788] ---- [ 48.832239] lock(&(&array->lock)->rlock); [ 48.836434] lock(&(&array->lock)->rlock); [ 48.840640] [ 48.840640] *** DEADLOCK *** [ 48.840640] [ 48.846582] May be due to missing lock nesting notation [ 130.763560] 1 lock held by cts-runner/3270: [ 130.767745] #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c [ 130.776461] stack backtrace: [ 130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486 [ 130.787706] Hardware name: Broadcom STB (Flattened Device Tree) [ 130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14) [ 130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4) [ 130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68) [ 130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c) [ 130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68) [ 130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c) [ 130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec) [ 130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c) [ 130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614) [ 130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0) [ 130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390) [ 130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac) [ 130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60) [ 130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28) Cc: Chunming Zhou <david1.zhou@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Eric Anholt <eric@anholt.net> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/261044/
2018-11-08 16:04:22 +00:00
spin_unlock(&syncobj->lock);
drm: add syncobj timeline support v9 This patch is for VK_KHR_timeline_semaphore extension, semaphore is called syncobj in kernel side: This extension introduces a new type of syncobj that has an integer payload identifying a point in a timeline. Such timeline syncobjs support the following operations: * CPU query - A host operation that allows querying the payload of the timeline syncobj. * CPU wait - A host operation that allows a blocking wait for a timeline syncobj to reach a specified value. * Device wait - A device operation that allows waiting for a timeline syncobj to reach a specified value. * Device signal - A device operation that allows advancing the timeline syncobj to a specified value. v1: Since it's a timeline, that means the front time point(PT) always is signaled before the late PT. a. signal PT design: Signal PT fence N depends on PT[N-1] fence and signal opertion fence, when PT[N] fence is signaled, the timeline will increase to value of PT[N]. b. wait PT design: Wait PT fence is signaled by reaching timeline point value, when timeline is increasing, will compare wait PTs value with new timeline value, if PT value is lower than timeline value, then wait PT will be signaled, otherwise keep in list. syncobj wait operation can wait on any point of timeline, so need a RB tree to order them. And wait PT could ahead of signal PT, we need a sumission fence to perform that. v2: 1. remove unused DRM_SYNCOBJ_CREATE_TYPE_NORMAL. (Christian) 2. move unexposed denitions to .c file. (Daniel Vetter) 3. split up the change to drm_syncobj_find_fence() in a separate patch. (Christian) 4. split up the change to drm_syncobj_replace_fence() in a separate patch. 5. drop the submission_fence implementation and instead use wait_event() for that. (Christian) 6. WARN_ON(point != 0) for NORMAL type syncobj case. (Daniel Vetter) v3: 1. replace normal syncobj with timeline implemenation. (Vetter and Christian) a. normal syncobj signal op will create a signal PT to tail of signal pt list. b. normal syncobj wait op will create a wait pt with last signal point, and this wait PT is only signaled by related signal point PT. 2. many bug fix and clean up 3. stub fence moving is moved to other patch. v4: 1. fix RB tree loop with while(node=rb_first(...)). (Christian) 2. fix syncobj lifecycle. (Christian) 3. only enable_signaling when there is wait_pt. (Christian) 4. fix timeline path issues. 5. write a timeline test in libdrm v5: (Christian) 1. semaphore is called syncobj in kernel side. 2. don't need 'timeline' characters in some function name. 3. keep syncobj cb. v6: (Christian) 1. merge syncobj_timeline to syncobj structure. 2. simplify some check sentences. 3. some misc change. 4. fix CTS failed issue. v7: (Christian) 1. error handling when creating signal pt. 2. remove timeline naming in func. 3. export flags in find_fence. 4. allow reset timeline. v8: 1. use wait_event_interruptible without timeout 2. rename _TYPE_INDIVIDUAL to _TYPE_BINARY v9: 1. rename signal_pt->base to signal_pt->fence_array to avoid misleading 2. improve kerneldoc individual syncobj is tested by ./deqp-vk -n dEQP-VK*semaphore* timeline syncobj is tested by ./amdgpu_test -s 9 Signed-off-by: Chunming Zhou <david1.zhou@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Cc: Christian Konig <christian.koenig@amd.com> Cc: Dave Airlie <airlied@redhat.com> Cc: Daniel Rakos <Daniel.Rakos@amd.com> Cc: Daniel Vetter <daniel@ffwll.ch> Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Cc: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Christian König <christian.koenig@amd.com> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: https://patchwork.freedesktop.org/patch/257258/
2018-10-18 06:18:36 +00:00
}
static void drm_syncobj_remove_wait(struct drm_syncobj *syncobj,
struct syncobj_wait_entry *wait)
drm: add syncobj timeline support v9 This patch is for VK_KHR_timeline_semaphore extension, semaphore is called syncobj in kernel side: This extension introduces a new type of syncobj that has an integer payload identifying a point in a timeline. Such timeline syncobjs support the following operations: * CPU query - A host operation that allows querying the payload of the timeline syncobj. * CPU wait - A host operation that allows a blocking wait for a timeline syncobj to reach a specified value. * Device wait - A device operation that allows waiting for a timeline syncobj to reach a specified value. * Device signal - A device operation that allows advancing the timeline syncobj to a specified value. v1: Since it's a timeline, that means the front time point(PT) always is signaled before the late PT. a. signal PT design: Signal PT fence N depends on PT[N-1] fence and signal opertion fence, when PT[N] fence is signaled, the timeline will increase to value of PT[N]. b. wait PT design: Wait PT fence is signaled by reaching timeline point value, when timeline is increasing, will compare wait PTs value with new timeline value, if PT value is lower than timeline value, then wait PT will be signaled, otherwise keep in list. syncobj wait operation can wait on any point of timeline, so need a RB tree to order them. And wait PT could ahead of signal PT, we need a sumission fence to perform that. v2: 1. remove unused DRM_SYNCOBJ_CREATE_TYPE_NORMAL. (Christian) 2. move unexposed denitions to .c file. (Daniel Vetter) 3. split up the change to drm_syncobj_find_fence() in a separate patch. (Christian) 4. split up the change to drm_syncobj_replace_fence() in a separate patch. 5. drop the submission_fence implementation and instead use wait_event() for that. (Christian) 6. WARN_ON(point != 0) for NORMAL type syncobj case. (Daniel Vetter) v3: 1. replace normal syncobj with timeline implemenation. (Vetter and Christian) a. normal syncobj signal op will create a signal PT to tail of signal pt list. b. normal syncobj wait op will create a wait pt with last signal point, and this wait PT is only signaled by related signal point PT. 2. many bug fix and clean up 3. stub fence moving is moved to other patch. v4: 1. fix RB tree loop with while(node=rb_first(...)). (Christian) 2. fix syncobj lifecycle. (Christian) 3. only enable_signaling when there is wait_pt. (Christian) 4. fix timeline path issues. 5. write a timeline test in libdrm v5: (Christian) 1. semaphore is called syncobj in kernel side. 2. don't need 'timeline' characters in some function name. 3. keep syncobj cb. v6: (Christian) 1. merge syncobj_timeline to syncobj structure. 2. simplify some check sentences. 3. some misc change. 4. fix CTS failed issue. v7: (Christian) 1. error handling when creating signal pt. 2. remove timeline naming in func. 3. export flags in find_fence. 4. allow reset timeline. v8: 1. use wait_event_interruptible without timeout 2. rename _TYPE_INDIVIDUAL to _TYPE_BINARY v9: 1. rename signal_pt->base to signal_pt->fence_array to avoid misleading 2. improve kerneldoc individual syncobj is tested by ./deqp-vk -n dEQP-VK*semaphore* timeline syncobj is tested by ./amdgpu_test -s 9 Signed-off-by: Chunming Zhou <david1.zhou@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Cc: Christian Konig <christian.koenig@amd.com> Cc: Dave Airlie <airlied@redhat.com> Cc: Daniel Rakos <Daniel.Rakos@amd.com> Cc: Daniel Vetter <daniel@ffwll.ch> Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Cc: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Christian König <christian.koenig@amd.com> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: https://patchwork.freedesktop.org/patch/257258/
2018-10-18 06:18:36 +00:00
{
if (!wait->node.next)
return;
drm: add syncobj timeline support v9 This patch is for VK_KHR_timeline_semaphore extension, semaphore is called syncobj in kernel side: This extension introduces a new type of syncobj that has an integer payload identifying a point in a timeline. Such timeline syncobjs support the following operations: * CPU query - A host operation that allows querying the payload of the timeline syncobj. * CPU wait - A host operation that allows a blocking wait for a timeline syncobj to reach a specified value. * Device wait - A device operation that allows waiting for a timeline syncobj to reach a specified value. * Device signal - A device operation that allows advancing the timeline syncobj to a specified value. v1: Since it's a timeline, that means the front time point(PT) always is signaled before the late PT. a. signal PT design: Signal PT fence N depends on PT[N-1] fence and signal opertion fence, when PT[N] fence is signaled, the timeline will increase to value of PT[N]. b. wait PT design: Wait PT fence is signaled by reaching timeline point value, when timeline is increasing, will compare wait PTs value with new timeline value, if PT value is lower than timeline value, then wait PT will be signaled, otherwise keep in list. syncobj wait operation can wait on any point of timeline, so need a RB tree to order them. And wait PT could ahead of signal PT, we need a sumission fence to perform that. v2: 1. remove unused DRM_SYNCOBJ_CREATE_TYPE_NORMAL. (Christian) 2. move unexposed denitions to .c file. (Daniel Vetter) 3. split up the change to drm_syncobj_find_fence() in a separate patch. (Christian) 4. split up the change to drm_syncobj_replace_fence() in a separate patch. 5. drop the submission_fence implementation and instead use wait_event() for that. (Christian) 6. WARN_ON(point != 0) for NORMAL type syncobj case. (Daniel Vetter) v3: 1. replace normal syncobj with timeline implemenation. (Vetter and Christian) a. normal syncobj signal op will create a signal PT to tail of signal pt list. b. normal syncobj wait op will create a wait pt with last signal point, and this wait PT is only signaled by related signal point PT. 2. many bug fix and clean up 3. stub fence moving is moved to other patch. v4: 1. fix RB tree loop with while(node=rb_first(...)). (Christian) 2. fix syncobj lifecycle. (Christian) 3. only enable_signaling when there is wait_pt. (Christian) 4. fix timeline path issues. 5. write a timeline test in libdrm v5: (Christian) 1. semaphore is called syncobj in kernel side. 2. don't need 'timeline' characters in some function name. 3. keep syncobj cb. v6: (Christian) 1. merge syncobj_timeline to syncobj structure. 2. simplify some check sentences. 3. some misc change. 4. fix CTS failed issue. v7: (Christian) 1. error handling when creating signal pt. 2. remove timeline naming in func. 3. export flags in find_fence. 4. allow reset timeline. v8: 1. use wait_event_interruptible without timeout 2. rename _TYPE_INDIVIDUAL to _TYPE_BINARY v9: 1. rename signal_pt->base to signal_pt->fence_array to avoid misleading 2. improve kerneldoc individual syncobj is tested by ./deqp-vk -n dEQP-VK*semaphore* timeline syncobj is tested by ./amdgpu_test -s 9 Signed-off-by: Chunming Zhou <david1.zhou@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Cc: Christian Konig <christian.koenig@amd.com> Cc: Dave Airlie <airlied@redhat.com> Cc: Daniel Rakos <Daniel.Rakos@amd.com> Cc: Daniel Vetter <daniel@ffwll.ch> Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Cc: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Christian König <christian.koenig@amd.com> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: https://patchwork.freedesktop.org/patch/257258/
2018-10-18 06:18:36 +00:00
drm: Revert syncobj timeline changes. Daniel suggested I submit this, since we're still seeing regressions from it. This is a revert to before 48197bc564c7 ("drm: add syncobj timeline support v9") and its followon fixes. Fixes this on first V3D testcase execution: [ 48.767088] ============================================ [ 48.772410] WARNING: possible recursive locking detected [ 48.777739] 4.19.0-rc6+ #489 Not tainted [ 48.781668] -------------------------------------------- [ 48.786993] shader_runner/3284 is trying to acquire lock: [ 48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.800714] [ 48.800714] but task is already holding lock: [ 48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.814862] [ 48.814862] other info that might help us debug this: [ 48.821410] Possible unsafe locking scenario: [ 48.821410] [ 48.827338] CPU0 [ 48.829788] ---- [ 48.832239] lock(&(&array->lock)->rlock); [ 48.836434] lock(&(&array->lock)->rlock); [ 48.840640] [ 48.840640] *** DEADLOCK *** [ 48.840640] [ 48.846582] May be due to missing lock nesting notation [ 130.763560] 1 lock held by cts-runner/3270: [ 130.767745] #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c [ 130.776461] stack backtrace: [ 130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486 [ 130.787706] Hardware name: Broadcom STB (Flattened Device Tree) [ 130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14) [ 130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4) [ 130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68) [ 130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c) [ 130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68) [ 130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c) [ 130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec) [ 130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c) [ 130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614) [ 130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0) [ 130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390) [ 130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac) [ 130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60) [ 130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28) Cc: Chunming Zhou <david1.zhou@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Eric Anholt <eric@anholt.net> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/261044/
2018-11-08 16:04:22 +00:00
spin_lock(&syncobj->lock);
list_del_init(&wait->node);
drm: Revert syncobj timeline changes. Daniel suggested I submit this, since we're still seeing regressions from it. This is a revert to before 48197bc564c7 ("drm: add syncobj timeline support v9") and its followon fixes. Fixes this on first V3D testcase execution: [ 48.767088] ============================================ [ 48.772410] WARNING: possible recursive locking detected [ 48.777739] 4.19.0-rc6+ #489 Not tainted [ 48.781668] -------------------------------------------- [ 48.786993] shader_runner/3284 is trying to acquire lock: [ 48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.800714] [ 48.800714] but task is already holding lock: [ 48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.814862] [ 48.814862] other info that might help us debug this: [ 48.821410] Possible unsafe locking scenario: [ 48.821410] [ 48.827338] CPU0 [ 48.829788] ---- [ 48.832239] lock(&(&array->lock)->rlock); [ 48.836434] lock(&(&array->lock)->rlock); [ 48.840640] [ 48.840640] *** DEADLOCK *** [ 48.840640] [ 48.846582] May be due to missing lock nesting notation [ 130.763560] 1 lock held by cts-runner/3270: [ 130.767745] #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c [ 130.776461] stack backtrace: [ 130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486 [ 130.787706] Hardware name: Broadcom STB (Flattened Device Tree) [ 130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14) [ 130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4) [ 130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68) [ 130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c) [ 130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68) [ 130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c) [ 130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec) [ 130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c) [ 130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614) [ 130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0) [ 130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390) [ 130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac) [ 130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60) [ 130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28) Cc: Chunming Zhou <david1.zhou@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Eric Anholt <eric@anholt.net> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/261044/
2018-11-08 16:04:22 +00:00
spin_unlock(&syncobj->lock);
drm: add syncobj timeline support v9 This patch is for VK_KHR_timeline_semaphore extension, semaphore is called syncobj in kernel side: This extension introduces a new type of syncobj that has an integer payload identifying a point in a timeline. Such timeline syncobjs support the following operations: * CPU query - A host operation that allows querying the payload of the timeline syncobj. * CPU wait - A host operation that allows a blocking wait for a timeline syncobj to reach a specified value. * Device wait - A device operation that allows waiting for a timeline syncobj to reach a specified value. * Device signal - A device operation that allows advancing the timeline syncobj to a specified value. v1: Since it's a timeline, that means the front time point(PT) always is signaled before the late PT. a. signal PT design: Signal PT fence N depends on PT[N-1] fence and signal opertion fence, when PT[N] fence is signaled, the timeline will increase to value of PT[N]. b. wait PT design: Wait PT fence is signaled by reaching timeline point value, when timeline is increasing, will compare wait PTs value with new timeline value, if PT value is lower than timeline value, then wait PT will be signaled, otherwise keep in list. syncobj wait operation can wait on any point of timeline, so need a RB tree to order them. And wait PT could ahead of signal PT, we need a sumission fence to perform that. v2: 1. remove unused DRM_SYNCOBJ_CREATE_TYPE_NORMAL. (Christian) 2. move unexposed denitions to .c file. (Daniel Vetter) 3. split up the change to drm_syncobj_find_fence() in a separate patch. (Christian) 4. split up the change to drm_syncobj_replace_fence() in a separate patch. 5. drop the submission_fence implementation and instead use wait_event() for that. (Christian) 6. WARN_ON(point != 0) for NORMAL type syncobj case. (Daniel Vetter) v3: 1. replace normal syncobj with timeline implemenation. (Vetter and Christian) a. normal syncobj signal op will create a signal PT to tail of signal pt list. b. normal syncobj wait op will create a wait pt with last signal point, and this wait PT is only signaled by related signal point PT. 2. many bug fix and clean up 3. stub fence moving is moved to other patch. v4: 1. fix RB tree loop with while(node=rb_first(...)). (Christian) 2. fix syncobj lifecycle. (Christian) 3. only enable_signaling when there is wait_pt. (Christian) 4. fix timeline path issues. 5. write a timeline test in libdrm v5: (Christian) 1. semaphore is called syncobj in kernel side. 2. don't need 'timeline' characters in some function name. 3. keep syncobj cb. v6: (Christian) 1. merge syncobj_timeline to syncobj structure. 2. simplify some check sentences. 3. some misc change. 4. fix CTS failed issue. v7: (Christian) 1. error handling when creating signal pt. 2. remove timeline naming in func. 3. export flags in find_fence. 4. allow reset timeline. v8: 1. use wait_event_interruptible without timeout 2. rename _TYPE_INDIVIDUAL to _TYPE_BINARY v9: 1. rename signal_pt->base to signal_pt->fence_array to avoid misleading 2. improve kerneldoc individual syncobj is tested by ./deqp-vk -n dEQP-VK*semaphore* timeline syncobj is tested by ./amdgpu_test -s 9 Signed-off-by: Chunming Zhou <david1.zhou@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Cc: Christian Konig <christian.koenig@amd.com> Cc: Dave Airlie <airlied@redhat.com> Cc: Daniel Rakos <Daniel.Rakos@amd.com> Cc: Daniel Vetter <daniel@ffwll.ch> Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Cc: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Christian König <christian.koenig@amd.com> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: https://patchwork.freedesktop.org/patch/257258/
2018-10-18 06:18:36 +00:00
}
/**
* drm_syncobj_replace_fence - replace fence in a sync object.
* @syncobj: Sync object to replace fence in
* @fence: fence to install in sync file.
*
* This replaces the fence on a sync object.
*/
void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
struct dma_fence *fence)
{
drm: Revert syncobj timeline changes. Daniel suggested I submit this, since we're still seeing regressions from it. This is a revert to before 48197bc564c7 ("drm: add syncobj timeline support v9") and its followon fixes. Fixes this on first V3D testcase execution: [ 48.767088] ============================================ [ 48.772410] WARNING: possible recursive locking detected [ 48.777739] 4.19.0-rc6+ #489 Not tainted [ 48.781668] -------------------------------------------- [ 48.786993] shader_runner/3284 is trying to acquire lock: [ 48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.800714] [ 48.800714] but task is already holding lock: [ 48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.814862] [ 48.814862] other info that might help us debug this: [ 48.821410] Possible unsafe locking scenario: [ 48.821410] [ 48.827338] CPU0 [ 48.829788] ---- [ 48.832239] lock(&(&array->lock)->rlock); [ 48.836434] lock(&(&array->lock)->rlock); [ 48.840640] [ 48.840640] *** DEADLOCK *** [ 48.840640] [ 48.846582] May be due to missing lock nesting notation [ 130.763560] 1 lock held by cts-runner/3270: [ 130.767745] #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c [ 130.776461] stack backtrace: [ 130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486 [ 130.787706] Hardware name: Broadcom STB (Flattened Device Tree) [ 130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14) [ 130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4) [ 130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68) [ 130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c) [ 130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68) [ 130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c) [ 130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec) [ 130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c) [ 130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614) [ 130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0) [ 130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390) [ 130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac) [ 130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60) [ 130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28) Cc: Chunming Zhou <david1.zhou@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Eric Anholt <eric@anholt.net> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/261044/
2018-11-08 16:04:22 +00:00
struct dma_fence *old_fence;
struct syncobj_wait_entry *cur, *tmp;
drm: Revert syncobj timeline changes. Daniel suggested I submit this, since we're still seeing regressions from it. This is a revert to before 48197bc564c7 ("drm: add syncobj timeline support v9") and its followon fixes. Fixes this on first V3D testcase execution: [ 48.767088] ============================================ [ 48.772410] WARNING: possible recursive locking detected [ 48.777739] 4.19.0-rc6+ #489 Not tainted [ 48.781668] -------------------------------------------- [ 48.786993] shader_runner/3284 is trying to acquire lock: [ 48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.800714] [ 48.800714] but task is already holding lock: [ 48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.814862] [ 48.814862] other info that might help us debug this: [ 48.821410] Possible unsafe locking scenario: [ 48.821410] [ 48.827338] CPU0 [ 48.829788] ---- [ 48.832239] lock(&(&array->lock)->rlock); [ 48.836434] lock(&(&array->lock)->rlock); [ 48.840640] [ 48.840640] *** DEADLOCK *** [ 48.840640] [ 48.846582] May be due to missing lock nesting notation [ 130.763560] 1 lock held by cts-runner/3270: [ 130.767745] #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c [ 130.776461] stack backtrace: [ 130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486 [ 130.787706] Hardware name: Broadcom STB (Flattened Device Tree) [ 130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14) [ 130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4) [ 130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68) [ 130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c) [ 130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68) [ 130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c) [ 130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec) [ 130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c) [ 130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614) [ 130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0) [ 130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390) [ 130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac) [ 130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60) [ 130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28) Cc: Chunming Zhou <david1.zhou@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Eric Anholt <eric@anholt.net> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/261044/
2018-11-08 16:04:22 +00:00
if (fence)
dma_fence_get(fence);
spin_lock(&syncobj->lock);
old_fence = rcu_dereference_protected(syncobj->fence,
lockdep_is_held(&syncobj->lock));
rcu_assign_pointer(syncobj->fence, fence);
drm: Revert syncobj timeline changes. Daniel suggested I submit this, since we're still seeing regressions from it. This is a revert to before 48197bc564c7 ("drm: add syncobj timeline support v9") and its followon fixes. Fixes this on first V3D testcase execution: [ 48.767088] ============================================ [ 48.772410] WARNING: possible recursive locking detected [ 48.777739] 4.19.0-rc6+ #489 Not tainted [ 48.781668] -------------------------------------------- [ 48.786993] shader_runner/3284 is trying to acquire lock: [ 48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.800714] [ 48.800714] but task is already holding lock: [ 48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.814862] [ 48.814862] other info that might help us debug this: [ 48.821410] Possible unsafe locking scenario: [ 48.821410] [ 48.827338] CPU0 [ 48.829788] ---- [ 48.832239] lock(&(&array->lock)->rlock); [ 48.836434] lock(&(&array->lock)->rlock); [ 48.840640] [ 48.840640] *** DEADLOCK *** [ 48.840640] [ 48.846582] May be due to missing lock nesting notation [ 130.763560] 1 lock held by cts-runner/3270: [ 130.767745] #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c [ 130.776461] stack backtrace: [ 130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486 [ 130.787706] Hardware name: Broadcom STB (Flattened Device Tree) [ 130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14) [ 130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4) [ 130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68) [ 130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c) [ 130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68) [ 130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c) [ 130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec) [ 130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c) [ 130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614) [ 130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0) [ 130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390) [ 130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac) [ 130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60) [ 130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28) Cc: Chunming Zhou <david1.zhou@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Eric Anholt <eric@anholt.net> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/261044/
2018-11-08 16:04:22 +00:00
if (fence != old_fence) {
list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) {
list_del_init(&cur->node);
syncobj_wait_syncobj_func(syncobj, cur);
}
}
drm: Revert syncobj timeline changes. Daniel suggested I submit this, since we're still seeing regressions from it. This is a revert to before 48197bc564c7 ("drm: add syncobj timeline support v9") and its followon fixes. Fixes this on first V3D testcase execution: [ 48.767088] ============================================ [ 48.772410] WARNING: possible recursive locking detected [ 48.777739] 4.19.0-rc6+ #489 Not tainted [ 48.781668] -------------------------------------------- [ 48.786993] shader_runner/3284 is trying to acquire lock: [ 48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.800714] [ 48.800714] but task is already holding lock: [ 48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.814862] [ 48.814862] other info that might help us debug this: [ 48.821410] Possible unsafe locking scenario: [ 48.821410] [ 48.827338] CPU0 [ 48.829788] ---- [ 48.832239] lock(&(&array->lock)->rlock); [ 48.836434] lock(&(&array->lock)->rlock); [ 48.840640] [ 48.840640] *** DEADLOCK *** [ 48.840640] [ 48.846582] May be due to missing lock nesting notation [ 130.763560] 1 lock held by cts-runner/3270: [ 130.767745] #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c [ 130.776461] stack backtrace: [ 130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486 [ 130.787706] Hardware name: Broadcom STB (Flattened Device Tree) [ 130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14) [ 130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4) [ 130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68) [ 130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c) [ 130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68) [ 130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c) [ 130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec) [ 130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c) [ 130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614) [ 130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0) [ 130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390) [ 130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac) [ 130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60) [ 130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28) Cc: Chunming Zhou <david1.zhou@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Eric Anholt <eric@anholt.net> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/261044/
2018-11-08 16:04:22 +00:00
spin_unlock(&syncobj->lock);
dma_fence_put(old_fence);
}
EXPORT_SYMBOL(drm_syncobj_replace_fence);
/**
* drm_syncobj_assign_null_handle - assign a stub fence to the sync object
* @syncobj: sync object to assign the fence on
*
* Assign a already signaled stub fence to the sync object.
*/
static void drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
{
struct dma_fence *fence = dma_fence_get_stub();
drm_syncobj_replace_fence(syncobj, fence);
dma_fence_put(fence);
}
/**
* drm_syncobj_find_fence - lookup and reference the fence in a sync object
* @file_private: drm file private pointer
* @handle: sync object handle to lookup.
* @point: timeline point
* @flags: DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT or not
* @fence: out parameter for the fence
*
* This is just a convenience function that combines drm_syncobj_find() and
drm: Revert syncobj timeline changes. Daniel suggested I submit this, since we're still seeing regressions from it. This is a revert to before 48197bc564c7 ("drm: add syncobj timeline support v9") and its followon fixes. Fixes this on first V3D testcase execution: [ 48.767088] ============================================ [ 48.772410] WARNING: possible recursive locking detected [ 48.777739] 4.19.0-rc6+ #489 Not tainted [ 48.781668] -------------------------------------------- [ 48.786993] shader_runner/3284 is trying to acquire lock: [ 48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.800714] [ 48.800714] but task is already holding lock: [ 48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.814862] [ 48.814862] other info that might help us debug this: [ 48.821410] Possible unsafe locking scenario: [ 48.821410] [ 48.827338] CPU0 [ 48.829788] ---- [ 48.832239] lock(&(&array->lock)->rlock); [ 48.836434] lock(&(&array->lock)->rlock); [ 48.840640] [ 48.840640] *** DEADLOCK *** [ 48.840640] [ 48.846582] May be due to missing lock nesting notation [ 130.763560] 1 lock held by cts-runner/3270: [ 130.767745] #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c [ 130.776461] stack backtrace: [ 130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486 [ 130.787706] Hardware name: Broadcom STB (Flattened Device Tree) [ 130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14) [ 130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4) [ 130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68) [ 130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c) [ 130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68) [ 130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c) [ 130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec) [ 130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c) [ 130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614) [ 130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0) [ 130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390) [ 130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac) [ 130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60) [ 130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28) Cc: Chunming Zhou <david1.zhou@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Eric Anholt <eric@anholt.net> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/261044/
2018-11-08 16:04:22 +00:00
* drm_syncobj_fence_get().
*
* Returns 0 on success or a negative error value on failure. On success @fence
* contains a reference to the fence, which must be released by calling
* dma_fence_put().
*/
int drm_syncobj_find_fence(struct drm_file *file_private,
u32 handle, u64 point, u64 flags,
struct dma_fence **fence)
{
struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
drm: Revert syncobj timeline changes. Daniel suggested I submit this, since we're still seeing regressions from it. This is a revert to before 48197bc564c7 ("drm: add syncobj timeline support v9") and its followon fixes. Fixes this on first V3D testcase execution: [ 48.767088] ============================================ [ 48.772410] WARNING: possible recursive locking detected [ 48.777739] 4.19.0-rc6+ #489 Not tainted [ 48.781668] -------------------------------------------- [ 48.786993] shader_runner/3284 is trying to acquire lock: [ 48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.800714] [ 48.800714] but task is already holding lock: [ 48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.814862] [ 48.814862] other info that might help us debug this: [ 48.821410] Possible unsafe locking scenario: [ 48.821410] [ 48.827338] CPU0 [ 48.829788] ---- [ 48.832239] lock(&(&array->lock)->rlock); [ 48.836434] lock(&(&array->lock)->rlock); [ 48.840640] [ 48.840640] *** DEADLOCK *** [ 48.840640] [ 48.846582] May be due to missing lock nesting notation [ 130.763560] 1 lock held by cts-runner/3270: [ 130.767745] #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c [ 130.776461] stack backtrace: [ 130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486 [ 130.787706] Hardware name: Broadcom STB (Flattened Device Tree) [ 130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14) [ 130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4) [ 130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68) [ 130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c) [ 130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68) [ 130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c) [ 130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec) [ 130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c) [ 130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614) [ 130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0) [ 130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390) [ 130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac) [ 130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60) [ 130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28) Cc: Chunming Zhou <david1.zhou@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Eric Anholt <eric@anholt.net> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/261044/
2018-11-08 16:04:22 +00:00
int ret = 0;
drm: Revert syncobj timeline changes. Daniel suggested I submit this, since we're still seeing regressions from it. This is a revert to before 48197bc564c7 ("drm: add syncobj timeline support v9") and its followon fixes. Fixes this on first V3D testcase execution: [ 48.767088] ============================================ [ 48.772410] WARNING: possible recursive locking detected [ 48.777739] 4.19.0-rc6+ #489 Not tainted [ 48.781668] -------------------------------------------- [ 48.786993] shader_runner/3284 is trying to acquire lock: [ 48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.800714] [ 48.800714] but task is already holding lock: [ 48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.814862] [ 48.814862] other info that might help us debug this: [ 48.821410] Possible unsafe locking scenario: [ 48.821410] [ 48.827338] CPU0 [ 48.829788] ---- [ 48.832239] lock(&(&array->lock)->rlock); [ 48.836434] lock(&(&array->lock)->rlock); [ 48.840640] [ 48.840640] *** DEADLOCK *** [ 48.840640] [ 48.846582] May be due to missing lock nesting notation [ 130.763560] 1 lock held by cts-runner/3270: [ 130.767745] #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c [ 130.776461] stack backtrace: [ 130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486 [ 130.787706] Hardware name: Broadcom STB (Flattened Device Tree) [ 130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14) [ 130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4) [ 130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68) [ 130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c) [ 130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68) [ 130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c) [ 130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec) [ 130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c) [ 130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614) [ 130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0) [ 130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390) [ 130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac) [ 130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60) [ 130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28) Cc: Chunming Zhou <david1.zhou@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Eric Anholt <eric@anholt.net> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/261044/
2018-11-08 16:04:22 +00:00
if (!syncobj)
return -ENOENT;
*fence = drm_syncobj_fence_get(syncobj);
if (!*fence) {
ret = -EINVAL;
}
drm_syncobj_put(syncobj);
return ret;
}
EXPORT_SYMBOL(drm_syncobj_find_fence);
/**
* drm_syncobj_free - free a sync object.
* @kref: kref to free.
*
* Only to be called from kref_put in drm_syncobj_put.
*/
void drm_syncobj_free(struct kref *kref)
{
struct drm_syncobj *syncobj = container_of(kref,
struct drm_syncobj,
refcount);
drm_syncobj_replace_fence(syncobj, NULL);
kfree(syncobj);
}
EXPORT_SYMBOL(drm_syncobj_free);
/**
* drm_syncobj_create - create a new syncobj
* @out_syncobj: returned syncobj
* @flags: DRM_SYNCOBJ_* flags
* @fence: if non-NULL, the syncobj will represent this fence
*
* This is the first function to create a sync object. After creating, drivers
* probably want to make it available to userspace, either through
* drm_syncobj_get_handle() or drm_syncobj_get_fd().
*
* Returns 0 on success or a negative error value on failure.
*/
int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
struct dma_fence *fence)
{
struct drm_syncobj *syncobj;
syncobj = kzalloc(sizeof(struct drm_syncobj), GFP_KERNEL);
if (!syncobj)
return -ENOMEM;
kref_init(&syncobj->refcount);
INIT_LIST_HEAD(&syncobj->cb_list);
drm: Revert syncobj timeline changes. Daniel suggested I submit this, since we're still seeing regressions from it. This is a revert to before 48197bc564c7 ("drm: add syncobj timeline support v9") and its followon fixes. Fixes this on first V3D testcase execution: [ 48.767088] ============================================ [ 48.772410] WARNING: possible recursive locking detected [ 48.777739] 4.19.0-rc6+ #489 Not tainted [ 48.781668] -------------------------------------------- [ 48.786993] shader_runner/3284 is trying to acquire lock: [ 48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.800714] [ 48.800714] but task is already holding lock: [ 48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.814862] [ 48.814862] other info that might help us debug this: [ 48.821410] Possible unsafe locking scenario: [ 48.821410] [ 48.827338] CPU0 [ 48.829788] ---- [ 48.832239] lock(&(&array->lock)->rlock); [ 48.836434] lock(&(&array->lock)->rlock); [ 48.840640] [ 48.840640] *** DEADLOCK *** [ 48.840640] [ 48.846582] May be due to missing lock nesting notation [ 130.763560] 1 lock held by cts-runner/3270: [ 130.767745] #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c [ 130.776461] stack backtrace: [ 130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486 [ 130.787706] Hardware name: Broadcom STB (Flattened Device Tree) [ 130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14) [ 130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4) [ 130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68) [ 130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c) [ 130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68) [ 130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c) [ 130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec) [ 130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c) [ 130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614) [ 130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0) [ 130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390) [ 130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac) [ 130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60) [ 130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28) Cc: Chunming Zhou <david1.zhou@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Eric Anholt <eric@anholt.net> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/261044/
2018-11-08 16:04:22 +00:00
spin_lock_init(&syncobj->lock);
if (flags & DRM_SYNCOBJ_CREATE_SIGNALED)
drm_syncobj_assign_null_handle(syncobj);
if (fence)
drm_syncobj_replace_fence(syncobj, fence);
*out_syncobj = syncobj;
return 0;
}
EXPORT_SYMBOL(drm_syncobj_create);
/**
* drm_syncobj_get_handle - get a handle from a syncobj
* @file_private: drm file private pointer
* @syncobj: Sync object to export
* @handle: out parameter with the new handle
*
* Exports a sync object created with drm_syncobj_create() as a handle on
* @file_private to userspace.
*
* Returns 0 on success or a negative error value on failure.
*/
int drm_syncobj_get_handle(struct drm_file *file_private,
struct drm_syncobj *syncobj, u32 *handle)
{
int ret;
/* take a reference to put in the idr */
drm_syncobj_get(syncobj);
idr_preload(GFP_KERNEL);
spin_lock(&file_private->syncobj_table_lock);
ret = idr_alloc(&file_private->syncobj_idr, syncobj, 1, 0, GFP_NOWAIT);
spin_unlock(&file_private->syncobj_table_lock);
idr_preload_end();
if (ret < 0) {
drm_syncobj_put(syncobj);
return ret;
}
*handle = ret;
return 0;
}
EXPORT_SYMBOL(drm_syncobj_get_handle);
static int drm_syncobj_create_as_handle(struct drm_file *file_private,
u32 *handle, uint32_t flags)
{
int ret;
struct drm_syncobj *syncobj;
ret = drm_syncobj_create(&syncobj, flags, NULL);
if (ret)
return ret;
ret = drm_syncobj_get_handle(file_private, syncobj, handle);
drm_syncobj_put(syncobj);
return ret;
}
static int drm_syncobj_destroy(struct drm_file *file_private,
u32 handle)
{
struct drm_syncobj *syncobj;
spin_lock(&file_private->syncobj_table_lock);
syncobj = idr_remove(&file_private->syncobj_idr, handle);
spin_unlock(&file_private->syncobj_table_lock);
if (!syncobj)
return -EINVAL;
drm_syncobj_put(syncobj);
return 0;
}
static int drm_syncobj_file_release(struct inode *inode, struct file *file)
{
struct drm_syncobj *syncobj = file->private_data;
drm_syncobj_put(syncobj);
return 0;
}
static const struct file_operations drm_syncobj_file_fops = {
.release = drm_syncobj_file_release,
};
/**
* drm_syncobj_get_fd - get a file descriptor from a syncobj
* @syncobj: Sync object to export
* @p_fd: out parameter with the new file descriptor
*
* Exports a sync object created with drm_syncobj_create() as a file descriptor.
*
* Returns 0 on success or a negative error value on failure.
*/
int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd)
{
struct file *file;
int fd;
fd = get_unused_fd_flags(O_CLOEXEC);
if (fd < 0)
return fd;
file = anon_inode_getfile("syncobj_file",
&drm_syncobj_file_fops,
syncobj, 0);
if (IS_ERR(file)) {
put_unused_fd(fd);
return PTR_ERR(file);
}
drm_syncobj_get(syncobj);
fd_install(fd, file);
*p_fd = fd;
return 0;
}
EXPORT_SYMBOL(drm_syncobj_get_fd);
static int drm_syncobj_handle_to_fd(struct drm_file *file_private,
u32 handle, int *p_fd)
{
struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
int ret;
if (!syncobj)
return -EINVAL;
ret = drm_syncobj_get_fd(syncobj, p_fd);
drm_syncobj_put(syncobj);
return ret;
}
static int drm_syncobj_fd_to_handle(struct drm_file *file_private,
int fd, u32 *handle)
{
struct drm_syncobj *syncobj;
struct file *file;
int ret;
file = fget(fd);
if (!file)
return -EINVAL;
if (file->f_op != &drm_syncobj_file_fops) {
fput(file);
return -EINVAL;
}
/* take a reference to put in the idr */
syncobj = file->private_data;
drm_syncobj_get(syncobj);
idr_preload(GFP_KERNEL);
spin_lock(&file_private->syncobj_table_lock);
ret = idr_alloc(&file_private->syncobj_idr, syncobj, 1, 0, GFP_NOWAIT);
spin_unlock(&file_private->syncobj_table_lock);
idr_preload_end();
if (ret > 0) {
*handle = ret;
ret = 0;
} else
drm_syncobj_put(syncobj);
fput(file);
return ret;
}
static int drm_syncobj_import_sync_file_fence(struct drm_file *file_private,
int fd, int handle)
{
struct dma_fence *fence = sync_file_get_fence(fd);
struct drm_syncobj *syncobj;
if (!fence)
return -EINVAL;
syncobj = drm_syncobj_find(file_private, handle);
if (!syncobj) {
dma_fence_put(fence);
return -ENOENT;
}
drm_syncobj_replace_fence(syncobj, fence);
dma_fence_put(fence);
drm_syncobj_put(syncobj);
return 0;
}
static int drm_syncobj_export_sync_file(struct drm_file *file_private,
int handle, int *p_fd)
{
int ret;
struct dma_fence *fence;
struct sync_file *sync_file;
int fd = get_unused_fd_flags(O_CLOEXEC);
if (fd < 0)
return fd;
ret = drm_syncobj_find_fence(file_private, handle, 0, 0, &fence);
if (ret)
goto err_put_fd;
sync_file = sync_file_create(fence);
dma_fence_put(fence);
if (!sync_file) {
ret = -EINVAL;
goto err_put_fd;
}
fd_install(fd, sync_file->file);
*p_fd = fd;
return 0;
err_put_fd:
put_unused_fd(fd);
return ret;
}
/**
* drm_syncobj_open - initalizes syncobj file-private structures at devnode open time
* @file_private: drm file-private structure to set up
*
* Called at device open time, sets up the structure for handling refcounting
* of sync objects.
*/
void
drm_syncobj_open(struct drm_file *file_private)
{
idr_init_base(&file_private->syncobj_idr, 1);
spin_lock_init(&file_private->syncobj_table_lock);
}
static int
drm_syncobj_release_handle(int id, void *ptr, void *data)
{
struct drm_syncobj *syncobj = ptr;
drm_syncobj_put(syncobj);
return 0;
}
/**
* drm_syncobj_release - release file-private sync object resources
* @file_private: drm file-private structure to clean up
*
* Called at close time when the filp is going away.
*
* Releases any remaining references on objects by this filp.
*/
void
drm_syncobj_release(struct drm_file *file_private)
{
idr_for_each(&file_private->syncobj_idr,
&drm_syncobj_release_handle, file_private);
idr_destroy(&file_private->syncobj_idr);
}
int
drm_syncobj_create_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_private)
{
struct drm_syncobj_create *args = data;
if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
return -EOPNOTSUPP;
/* no valid flags yet */
drm: Revert syncobj timeline changes. Daniel suggested I submit this, since we're still seeing regressions from it. This is a revert to before 48197bc564c7 ("drm: add syncobj timeline support v9") and its followon fixes. Fixes this on first V3D testcase execution: [ 48.767088] ============================================ [ 48.772410] WARNING: possible recursive locking detected [ 48.777739] 4.19.0-rc6+ #489 Not tainted [ 48.781668] -------------------------------------------- [ 48.786993] shader_runner/3284 is trying to acquire lock: [ 48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.800714] [ 48.800714] but task is already holding lock: [ 48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.814862] [ 48.814862] other info that might help us debug this: [ 48.821410] Possible unsafe locking scenario: [ 48.821410] [ 48.827338] CPU0 [ 48.829788] ---- [ 48.832239] lock(&(&array->lock)->rlock); [ 48.836434] lock(&(&array->lock)->rlock); [ 48.840640] [ 48.840640] *** DEADLOCK *** [ 48.840640] [ 48.846582] May be due to missing lock nesting notation [ 130.763560] 1 lock held by cts-runner/3270: [ 130.767745] #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c [ 130.776461] stack backtrace: [ 130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486 [ 130.787706] Hardware name: Broadcom STB (Flattened Device Tree) [ 130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14) [ 130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4) [ 130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68) [ 130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c) [ 130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68) [ 130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c) [ 130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec) [ 130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c) [ 130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614) [ 130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0) [ 130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390) [ 130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac) [ 130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60) [ 130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28) Cc: Chunming Zhou <david1.zhou@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Eric Anholt <eric@anholt.net> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/261044/
2018-11-08 16:04:22 +00:00
if (args->flags & ~DRM_SYNCOBJ_CREATE_SIGNALED)
return -EINVAL;
return drm_syncobj_create_as_handle(file_private,
&args->handle, args->flags);
}
int
drm_syncobj_destroy_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_private)
{
struct drm_syncobj_destroy *args = data;
if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
return -EOPNOTSUPP;
/* make sure padding is empty */
if (args->pad)
return -EINVAL;
return drm_syncobj_destroy(file_private, args->handle);
}
int
drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_private)
{
struct drm_syncobj_handle *args = data;
if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
return -EOPNOTSUPP;
if (args->pad)
return -EINVAL;
if (args->flags != 0 &&
args->flags != DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE)
return -EINVAL;
if (args->flags & DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE)
return drm_syncobj_export_sync_file(file_private, args->handle,
&args->fd);
return drm_syncobj_handle_to_fd(file_private, args->handle,
&args->fd);
}
int
drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_private)
{
struct drm_syncobj_handle *args = data;
if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
return -EOPNOTSUPP;
if (args->pad)
return -EINVAL;
if (args->flags != 0 &&
args->flags != DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE)
return -EINVAL;
if (args->flags & DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE)
return drm_syncobj_import_sync_file_fence(file_private,
args->fd,
args->handle);
return drm_syncobj_fd_to_handle(file_private, args->fd,
&args->handle);
}
drm/syncobj: Allow wait for submit and signal behavior (v5) Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@redhat.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
2017-08-25 17:52:24 +00:00
static void syncobj_wait_fence_func(struct dma_fence *fence,
struct dma_fence_cb *cb)
{
struct syncobj_wait_entry *wait =
container_of(cb, struct syncobj_wait_entry, fence_cb);
wake_up_process(wait->task);
}
static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
struct syncobj_wait_entry *wait)
drm/syncobj: Allow wait for submit and signal behavior (v5) Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@redhat.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
2017-08-25 17:52:24 +00:00
{
drm: Revert syncobj timeline changes. Daniel suggested I submit this, since we're still seeing regressions from it. This is a revert to before 48197bc564c7 ("drm: add syncobj timeline support v9") and its followon fixes. Fixes this on first V3D testcase execution: [ 48.767088] ============================================ [ 48.772410] WARNING: possible recursive locking detected [ 48.777739] 4.19.0-rc6+ #489 Not tainted [ 48.781668] -------------------------------------------- [ 48.786993] shader_runner/3284 is trying to acquire lock: [ 48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.800714] [ 48.800714] but task is already holding lock: [ 48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.814862] [ 48.814862] other info that might help us debug this: [ 48.821410] Possible unsafe locking scenario: [ 48.821410] [ 48.827338] CPU0 [ 48.829788] ---- [ 48.832239] lock(&(&array->lock)->rlock); [ 48.836434] lock(&(&array->lock)->rlock); [ 48.840640] [ 48.840640] *** DEADLOCK *** [ 48.840640] [ 48.846582] May be due to missing lock nesting notation [ 130.763560] 1 lock held by cts-runner/3270: [ 130.767745] #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c [ 130.776461] stack backtrace: [ 130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486 [ 130.787706] Hardware name: Broadcom STB (Flattened Device Tree) [ 130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14) [ 130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4) [ 130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68) [ 130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c) [ 130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68) [ 130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c) [ 130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec) [ 130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c) [ 130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614) [ 130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0) [ 130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390) [ 130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac) [ 130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60) [ 130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28) Cc: Chunming Zhou <david1.zhou@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Eric Anholt <eric@anholt.net> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/261044/
2018-11-08 16:04:22 +00:00
/* This happens inside the syncobj lock */
wait->fence = dma_fence_get(rcu_dereference_protected(syncobj->fence,
lockdep_is_held(&syncobj->lock)));
drm/syncobj: Allow wait for submit and signal behavior (v5) Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@redhat.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
2017-08-25 17:52:24 +00:00
wake_up_process(wait->task);
}
static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
uint32_t count,
uint32_t flags,
signed long timeout,
uint32_t *idx)
{
struct syncobj_wait_entry *entries;
struct dma_fence *fence;
uint32_t signaled_count, i;
entries = kcalloc(count, sizeof(*entries), GFP_KERNEL);
if (!entries)
return -ENOMEM;
/* Walk the list of sync objects and initialize entries. We do
* this up-front so that we can properly return -EINVAL if there is
* a syncobj with a missing fence and then never have the chance of
* returning -EINVAL again.
*/
signaled_count = 0;
for (i = 0; i < count; ++i) {
entries[i].task = current;
drm: Revert syncobj timeline changes. Daniel suggested I submit this, since we're still seeing regressions from it. This is a revert to before 48197bc564c7 ("drm: add syncobj timeline support v9") and its followon fixes. Fixes this on first V3D testcase execution: [ 48.767088] ============================================ [ 48.772410] WARNING: possible recursive locking detected [ 48.777739] 4.19.0-rc6+ #489 Not tainted [ 48.781668] -------------------------------------------- [ 48.786993] shader_runner/3284 is trying to acquire lock: [ 48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.800714] [ 48.800714] but task is already holding lock: [ 48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.814862] [ 48.814862] other info that might help us debug this: [ 48.821410] Possible unsafe locking scenario: [ 48.821410] [ 48.827338] CPU0 [ 48.829788] ---- [ 48.832239] lock(&(&array->lock)->rlock); [ 48.836434] lock(&(&array->lock)->rlock); [ 48.840640] [ 48.840640] *** DEADLOCK *** [ 48.840640] [ 48.846582] May be due to missing lock nesting notation [ 130.763560] 1 lock held by cts-runner/3270: [ 130.767745] #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c [ 130.776461] stack backtrace: [ 130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486 [ 130.787706] Hardware name: Broadcom STB (Flattened Device Tree) [ 130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14) [ 130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4) [ 130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68) [ 130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c) [ 130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68) [ 130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c) [ 130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec) [ 130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c) [ 130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614) [ 130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0) [ 130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390) [ 130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac) [ 130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60) [ 130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28) Cc: Chunming Zhou <david1.zhou@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Eric Anholt <eric@anholt.net> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/261044/
2018-11-08 16:04:22 +00:00
entries[i].fence = drm_syncobj_fence_get(syncobjs[i]);
drm/syncobj: Allow wait for submit and signal behavior (v5) Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@redhat.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
2017-08-25 17:52:24 +00:00
if (!entries[i].fence) {
if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
continue;
} else {
timeout = -EINVAL;
drm/syncobj: Allow wait for submit and signal behavior (v5) Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@redhat.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
2017-08-25 17:52:24 +00:00
goto cleanup_entries;
}
}
if (dma_fence_is_signaled(entries[i].fence)) {
if (signaled_count == 0 && idx)
*idx = i;
signaled_count++;
}
}
if (signaled_count == count ||
(signaled_count > 0 &&
!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)))
goto cleanup_entries;
/* There's a very annoying laxness in the dma_fence API here, in
* that backends are not required to automatically report when a
* fence is signaled prior to fence->ops->enable_signaling() being
* called. So here if we fail to match signaled_count, we need to
* fallthough and try a 0 timeout wait!
*/
if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
for (i = 0; i < count; ++i)
drm_syncobj_fence_add_wait(syncobjs[i], &entries[i]);
drm/syncobj: Allow wait for submit and signal behavior (v5) Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@redhat.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
2017-08-25 17:52:24 +00:00
}
do {
set_current_state(TASK_INTERRUPTIBLE);
signaled_count = 0;
for (i = 0; i < count; ++i) {
fence = entries[i].fence;
if (!fence)
continue;
if (dma_fence_is_signaled(fence) ||
(!entries[i].fence_cb.func &&
dma_fence_add_callback(fence,
&entries[i].fence_cb,
syncobj_wait_fence_func))) {
/* The fence has been signaled */
if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
signaled_count++;
} else {
if (idx)
*idx = i;
goto done_waiting;
}
}
}
if (signaled_count == count)
goto done_waiting;
if (timeout == 0) {
timeout = -ETIME;
drm/syncobj: Allow wait for submit and signal behavior (v5) Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@redhat.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
2017-08-25 17:52:24 +00:00
goto done_waiting;
}
if (signal_pending(current)) {
timeout = -ERESTARTSYS;
goto done_waiting;
}
drm/syncobj: Allow wait for submit and signal behavior (v5) Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@redhat.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
2017-08-25 17:52:24 +00:00
timeout = schedule_timeout(timeout);
} while (1);
drm/syncobj: Allow wait for submit and signal behavior (v5) Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@redhat.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
2017-08-25 17:52:24 +00:00
done_waiting:
__set_current_state(TASK_RUNNING);
cleanup_entries:
for (i = 0; i < count; ++i) {
drm_syncobj_remove_wait(syncobjs[i], &entries[i]);
drm/syncobj: Allow wait for submit and signal behavior (v5) Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@redhat.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
2017-08-25 17:52:24 +00:00
if (entries[i].fence_cb.func)
dma_fence_remove_callback(entries[i].fence,
&entries[i].fence_cb);
dma_fence_put(entries[i].fence);
}
kfree(entries);
return timeout;
drm/syncobj: Allow wait for submit and signal behavior (v5) Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@redhat.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
2017-08-25 17:52:24 +00:00
}
/**
* drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute value
*
* @timeout_nsec: timeout nsec component in ns, 0 for poll
*
* Calculate the timeout in jiffies from an absolute time in sec/nsec.
*/
static signed long drm_timeout_abs_to_jiffies(int64_t timeout_nsec)
{
ktime_t abs_timeout, now;
u64 timeout_ns, timeout_jiffies64;
/* make 0 timeout means poll - absolute 0 doesn't seem valid */
if (timeout_nsec == 0)
return 0;
abs_timeout = ns_to_ktime(timeout_nsec);
now = ktime_get();
if (!ktime_after(abs_timeout, now))
return 0;
timeout_ns = ktime_to_ns(ktime_sub(abs_timeout, now));
timeout_jiffies64 = nsecs_to_jiffies64(timeout_ns);
/* clamp timeout to avoid infinite timeout */
if (timeout_jiffies64 >= MAX_SCHEDULE_TIMEOUT - 1)
return MAX_SCHEDULE_TIMEOUT - 1;
return timeout_jiffies64 + 1;
}
drm/syncobj: Allow wait for submit and signal behavior (v5) Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@redhat.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
2017-08-25 17:52:24 +00:00
static int drm_syncobj_array_wait(struct drm_device *dev,
struct drm_file *file_private,
struct drm_syncobj_wait *wait,
struct drm_syncobj **syncobjs)
{
signed long timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec);
uint32_t first = ~0;
timeout = drm_syncobj_array_wait_timeout(syncobjs,
wait->count_handles,
wait->flags,
timeout, &first);
if (timeout < 0)
return timeout;
wait->first_signaled = first;
return 0;
}
static int drm_syncobj_array_find(struct drm_file *file_private,
void __user *user_handles,
uint32_t count_handles,
struct drm_syncobj ***syncobjs_out)
{
uint32_t i, *handles;
drm/syncobj: Allow wait for submit and signal behavior (v5) Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@redhat.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
2017-08-25 17:52:24 +00:00
struct drm_syncobj **syncobjs;
int ret;
handles = kmalloc_array(count_handles, sizeof(*handles), GFP_KERNEL);
if (handles == NULL)
return -ENOMEM;
if (copy_from_user(handles, user_handles,
sizeof(uint32_t) * count_handles)) {
ret = -EFAULT;
goto err_free_handles;
}
syncobjs = kmalloc_array(count_handles, sizeof(*syncobjs), GFP_KERNEL);
if (syncobjs == NULL) {
ret = -ENOMEM;
goto err_free_handles;
}
for (i = 0; i < count_handles; i++) {
drm/syncobj: Allow wait for submit and signal behavior (v5) Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@redhat.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
2017-08-25 17:52:24 +00:00
syncobjs[i] = drm_syncobj_find(file_private, handles[i]);
if (!syncobjs[i]) {
ret = -ENOENT;
goto err_put_syncobjs;
drm/syncobj: Allow wait for submit and signal behavior (v5) Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@redhat.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
2017-08-25 17:52:24 +00:00
}
}
kfree(handles);
*syncobjs_out = syncobjs;
return 0;
err_put_syncobjs:
drm/syncobj: Allow wait for submit and signal behavior (v5) Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@redhat.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
2017-08-25 17:52:24 +00:00
while (i-- > 0)
drm_syncobj_put(syncobjs[i]);
kfree(syncobjs);
err_free_handles:
kfree(handles);
return ret;
}
static void drm_syncobj_array_free(struct drm_syncobj **syncobjs,
uint32_t count)
{
uint32_t i;
for (i = 0; i < count; i++)
drm_syncobj_put(syncobjs[i]);
kfree(syncobjs);
}
int
drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_private)
{
struct drm_syncobj_wait *args = data;
struct drm_syncobj **syncobjs;
int ret = 0;
if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
return -EOPNOTSUPP;
if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
return -EINVAL;
if (args->count_handles == 0)
return -EINVAL;
ret = drm_syncobj_array_find(file_private,
u64_to_user_ptr(args->handles),
args->count_handles,
&syncobjs);
if (ret < 0)
return ret;
ret = drm_syncobj_array_wait(dev, file_private,
args, syncobjs);
drm_syncobj_array_free(syncobjs, args->count_handles);
return ret;
}
int
drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_private)
{
struct drm_syncobj_array *args = data;
struct drm_syncobj **syncobjs;
uint32_t i;
int ret;
if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
return -EOPNOTSUPP;
if (args->pad != 0)
return -EINVAL;
if (args->count_handles == 0)
return -EINVAL;
ret = drm_syncobj_array_find(file_private,
u64_to_user_ptr(args->handles),
args->count_handles,
&syncobjs);
if (ret < 0)
return ret;
drm: Revert syncobj timeline changes. Daniel suggested I submit this, since we're still seeing regressions from it. This is a revert to before 48197bc564c7 ("drm: add syncobj timeline support v9") and its followon fixes. Fixes this on first V3D testcase execution: [ 48.767088] ============================================ [ 48.772410] WARNING: possible recursive locking detected [ 48.777739] 4.19.0-rc6+ #489 Not tainted [ 48.781668] -------------------------------------------- [ 48.786993] shader_runner/3284 is trying to acquire lock: [ 48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.800714] [ 48.800714] but task is already holding lock: [ 48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.814862] [ 48.814862] other info that might help us debug this: [ 48.821410] Possible unsafe locking scenario: [ 48.821410] [ 48.827338] CPU0 [ 48.829788] ---- [ 48.832239] lock(&(&array->lock)->rlock); [ 48.836434] lock(&(&array->lock)->rlock); [ 48.840640] [ 48.840640] *** DEADLOCK *** [ 48.840640] [ 48.846582] May be due to missing lock nesting notation [ 130.763560] 1 lock held by cts-runner/3270: [ 130.767745] #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c [ 130.776461] stack backtrace: [ 130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486 [ 130.787706] Hardware name: Broadcom STB (Flattened Device Tree) [ 130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14) [ 130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4) [ 130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68) [ 130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c) [ 130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68) [ 130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c) [ 130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec) [ 130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c) [ 130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614) [ 130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0) [ 130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390) [ 130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac) [ 130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60) [ 130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28) Cc: Chunming Zhou <david1.zhou@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Eric Anholt <eric@anholt.net> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/261044/
2018-11-08 16:04:22 +00:00
for (i = 0; i < args->count_handles; i++)
drm_syncobj_replace_fence(syncobjs[i], NULL);
drm: Revert syncobj timeline changes. Daniel suggested I submit this, since we're still seeing regressions from it. This is a revert to before 48197bc564c7 ("drm: add syncobj timeline support v9") and its followon fixes. Fixes this on first V3D testcase execution: [ 48.767088] ============================================ [ 48.772410] WARNING: possible recursive locking detected [ 48.777739] 4.19.0-rc6+ #489 Not tainted [ 48.781668] -------------------------------------------- [ 48.786993] shader_runner/3284 is trying to acquire lock: [ 48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.800714] [ 48.800714] but task is already holding lock: [ 48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.814862] [ 48.814862] other info that might help us debug this: [ 48.821410] Possible unsafe locking scenario: [ 48.821410] [ 48.827338] CPU0 [ 48.829788] ---- [ 48.832239] lock(&(&array->lock)->rlock); [ 48.836434] lock(&(&array->lock)->rlock); [ 48.840640] [ 48.840640] *** DEADLOCK *** [ 48.840640] [ 48.846582] May be due to missing lock nesting notation [ 130.763560] 1 lock held by cts-runner/3270: [ 130.767745] #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c [ 130.776461] stack backtrace: [ 130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486 [ 130.787706] Hardware name: Broadcom STB (Flattened Device Tree) [ 130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14) [ 130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4) [ 130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68) [ 130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c) [ 130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68) [ 130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c) [ 130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec) [ 130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c) [ 130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614) [ 130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0) [ 130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390) [ 130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac) [ 130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60) [ 130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28) Cc: Chunming Zhou <david1.zhou@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Eric Anholt <eric@anholt.net> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/261044/
2018-11-08 16:04:22 +00:00
drm_syncobj_array_free(syncobjs, args->count_handles);
drm: Revert syncobj timeline changes. Daniel suggested I submit this, since we're still seeing regressions from it. This is a revert to before 48197bc564c7 ("drm: add syncobj timeline support v9") and its followon fixes. Fixes this on first V3D testcase execution: [ 48.767088] ============================================ [ 48.772410] WARNING: possible recursive locking detected [ 48.777739] 4.19.0-rc6+ #489 Not tainted [ 48.781668] -------------------------------------------- [ 48.786993] shader_runner/3284 is trying to acquire lock: [ 48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.800714] [ 48.800714] but task is already holding lock: [ 48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c [ 48.814862] [ 48.814862] other info that might help us debug this: [ 48.821410] Possible unsafe locking scenario: [ 48.821410] [ 48.827338] CPU0 [ 48.829788] ---- [ 48.832239] lock(&(&array->lock)->rlock); [ 48.836434] lock(&(&array->lock)->rlock); [ 48.840640] [ 48.840640] *** DEADLOCK *** [ 48.840640] [ 48.846582] May be due to missing lock nesting notation [ 130.763560] 1 lock held by cts-runner/3270: [ 130.767745] #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c [ 130.776461] stack backtrace: [ 130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486 [ 130.787706] Hardware name: Broadcom STB (Flattened Device Tree) [ 130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14) [ 130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4) [ 130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68) [ 130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c) [ 130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68) [ 130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c) [ 130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec) [ 130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c) [ 130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614) [ 130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0) [ 130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390) [ 130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac) [ 130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60) [ 130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28) Cc: Chunming Zhou <david1.zhou@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Eric Anholt <eric@anholt.net> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/261044/
2018-11-08 16:04:22 +00:00
return 0;
}
int
drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_private)
{
struct drm_syncobj_array *args = data;
struct drm_syncobj **syncobjs;
uint32_t i;
int ret;
if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
return -EOPNOTSUPP;
if (args->pad != 0)
return -EINVAL;
if (args->count_handles == 0)
return -EINVAL;
ret = drm_syncobj_array_find(file_private,
u64_to_user_ptr(args->handles),
args->count_handles,
&syncobjs);
if (ret < 0)
return ret;
for (i = 0; i < args->count_handles; i++)
drm_syncobj_assign_null_handle(syncobjs[i]);
drm_syncobj_array_free(syncobjs, args->count_handles);
return ret;
}