linux/drivers/gpu/drm/i915/intel_guc.c

546 lines
15 KiB
C
Raw Normal View History

/*
* Copyright © 2014-2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#include "intel_guc.h"
#include "intel_guc_ads.h"
#include "intel_guc_submission.h"
#include "i915_drv.h"
static void gen8_guc_raise_irq(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER);
}
static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i)
{
GEM_BUG_ON(!guc->send_regs.base);
GEM_BUG_ON(!guc->send_regs.count);
GEM_BUG_ON(i >= guc->send_regs.count);
return _MMIO(guc->send_regs.base + 4 * i);
}
void intel_guc_init_send_regs(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
enum forcewake_domains fw_domains = 0;
unsigned int i;
guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0));
guc->send_regs.count = SOFT_SCRATCH_COUNT - 1;
for (i = 0; i < guc->send_regs.count; i++) {
fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
guc_send_reg(guc, i),
FW_REG_READ | FW_REG_WRITE);
}
guc->send_regs.fw_domains = fw_domains;
}
void intel_guc_init_early(struct intel_guc *guc)
{
intel_guc_fw_init_early(guc);
intel_guc_ct_init_early(&guc->ct);
drm/i915/guc: Fix lockdep due to log relay channel handling under struct_mutex This patch fixes lockdep issue due to circular locking dependency of struct_mutex, i_mutex_key, mmap_sem, relay_channels_mutex. For GuC log relay channel we create debugfs file that requires i_mutex_key lock and we are doing that under struct_mutex. So we introduced newer dependency as: &dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem However, there is dependency from mmap_sem to struct_mutex. Hence we separate the relay create/destroy operation from under struct_mutex. Also added runtime check of relay buffer status. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> ====================================================== WARNING: possible circular locking dependency detected 4.15.0-rc6-CI-Patchwork_7614+ #1 Not tainted ------------------------------------------------------ debugfs_test/1388 is trying to acquire lock: (&dev->struct_mutex){+.+.}, at: [<00000000d5e1d915>] i915_mutex_lock_interruptible+0x47/0x130 [i915] but task is already holding lock: (&mm->mmap_sem){++++}, at: [<0000000029a9c131>] __do_page_fault+0x106/0x560 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&mm->mmap_sem){++++}: _copy_to_user+0x1e/0x70 filldir+0x8c/0xf0 dcache_readdir+0xeb/0x160 iterate_dir+0xdc/0x140 SyS_getdents+0xa0/0x130 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #2 (&sb->s_type->i_mutex_key#3){++++}: start_creating+0x59/0x110 __debugfs_create_file+0x2e/0xe0 relay_create_buf_file+0x62/0x80 relay_late_setup_files+0x84/0x250 guc_log_late_setup+0x4f/0x110 [i915] i915_guc_log_register+0x32/0x40 [i915] i915_driver_load+0x7b6/0x1720 [i915] i915_pci_probe+0x2e/0x90 [i915] pci_device_probe+0x9c/0x120 driver_probe_device+0x2a3/0x480 __driver_attach+0xd9/0xe0 bus_for_each_dev+0x57/0x90 bus_add_driver+0x168/0x260 driver_register+0x52/0xc0 do_one_initcall+0x39/0x150 do_init_module+0x56/0x1ef load_module+0x231c/0x2d70 SyS_finit_module+0xa5/0xe0 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #1 (relay_channels_mutex){+.+.}: relay_open+0x12c/0x2b0 intel_guc_log_runtime_create+0xab/0x230 [i915] intel_guc_init+0x81/0x120 [i915] intel_uc_init+0x29/0xa0 [i915] i915_gem_init+0x182/0x530 [i915] i915_driver_load+0xaa9/0x1720 [i915] i915_pci_probe+0x2e/0x90 [i915] pci_device_probe+0x9c/0x120 driver_probe_device+0x2a3/0x480 __driver_attach+0xd9/0xe0 bus_for_each_dev+0x57/0x90 bus_add_driver+0x168/0x260 driver_register+0x52/0xc0 do_one_initcall+0x39/0x150 do_init_module+0x56/0x1ef load_module+0x231c/0x2d70 SyS_finit_module+0xa5/0xe0 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #0 (&dev->struct_mutex){+.+.}: __mutex_lock+0x81/0x9b0 i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_gem_fault+0x201/0x790 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x677/0xdc0 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x4c/0x60 other info that might help us debug this: Chain exists of: &dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&sb->s_type->i_mutex_key#3); lock(&mm->mmap_sem); lock(&dev->struct_mutex); *** DEADLOCK *** 1 lock held by debugfs_test/1388: #0: (&mm->mmap_sem){++++}, at: [<0000000029a9c131>] __do_page_fault+0x106/0x560 stack backtrace: CPU: 2 PID: 1388 Comm: debugfs_test Not tainted 4.15.0-rc6-CI-Patchwork_7614+ #1 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./J4205-ITX, BIOS P1.10 09/29/2016 Call Trace: dump_stack+0x5f/0x86 print_circular_bug.isra.18+0x1d0/0x2c0 __lock_acquire+0x14ae/0x1b60 ? lock_acquire+0xaf/0x200 lock_acquire+0xaf/0x200 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] __mutex_lock+0x81/0x9b0 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_mutex_lock_interruptible+0x47/0x130 [i915] ? __pm_runtime_resume+0x4f/0x80 i915_gem_fault+0x201/0x790 [i915] __do_fault+0x15/0x70 ? _raw_spin_unlock+0x29/0x40 __handle_mm_fault+0x677/0xdc0 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 ? page_fault+0x36/0x60 page_fault+0x4c/0x60 v2: Added lock protection to guc->log.runtime.relay_chan (Chris) Fixed locking inside guc_flush_logs uncovered by new lockdep. v3: Locking guc_read_update_log_buffer entirely with relay_lock. (Chris) Prepared intel_guc_init_early. Moved relay_lock inside relay_create relay_destroy, relay_file_create, guc_read_update_log_buffer. (Michal) Removed struct_mutex lock around guc_log_flush and removed usage of guc_log_has_relay() from runtime_create path as it needs struct_mutex lock. v4: Handle NULL relay sub buffer pointer earlier in read_update_log_buffer (Chris). Fixed comment suffix **/. (Michal) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104693 Testcase: igt/debugfs_test/read_all_entries # with enable_guc=1 and guc_log_level=1 Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Marta Lofstedt <marta.lofstedt@intel.com> Cc: Michal Winiarski <michal.winiarski@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/1516808821-3638-3-git-send-email-sagar.a.kamble@intel.com
2018-01-24 15:46:58 +00:00
intel_guc_log_init_early(guc);
mutex_init(&guc->send_mutex);
guc->send = intel_guc_send_nop;
guc->notify = gen8_guc_raise_irq;
}
drm/i915/guc: Move GuC workqueue allocations outside of the mutex This gets rid of the following lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 4.15.0-rc2-CI-Patchwork_7428+ #1 Not tainted ------------------------------------------------------ debugfs_test/1351 is trying to acquire lock: (&dev->struct_mutex){+.+.}, at: [<000000009d90d1a3>] i915_mutex_lock_interruptible+0x47/0x130 [i915] but task is already holding lock: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #6 (&mm->mmap_sem){++++}: __might_fault+0x63/0x90 _copy_to_user+0x1e/0x70 filldir+0x8c/0xf0 dcache_readdir+0xeb/0x160 iterate_dir+0xe6/0x150 SyS_getdents+0xa0/0x130 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #5 (&sb->s_type->i_mutex_key#5){++++}: lockref_get+0x9/0x20 -> #4 ((completion)&req.done){+.+.}: wait_for_common+0x54/0x210 devtmpfs_create_node+0x130/0x150 device_add+0x5ad/0x5e0 device_create_groups_vargs+0xd4/0xe0 device_create+0x35/0x40 msr_device_create+0x22/0x40 cpuhp_invoke_callback+0xc5/0xbf0 cpuhp_thread_fun+0x167/0x210 smpboot_thread_fn+0x17f/0x270 kthread+0x173/0x1b0 ret_from_fork+0x24/0x30 -> #3 (cpuhp_state-up){+.+.}: cpuhp_issue_call+0x132/0x1c0 __cpuhp_setup_state_cpuslocked+0x12f/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_writeback_init+0x3a/0x5c start_kernel+0x393/0x3e2 secondary_startup_64+0xa5/0xb0 -> #2 (cpuhp_state_mutex){+.+.}: __mutex_lock+0x81/0x9b0 __cpuhp_setup_state_cpuslocked+0x4b/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_alloc_init+0x1f/0x26 start_kernel+0x139/0x3e2 secondary_startup_64+0xa5/0xb0 -> #1 (cpu_hotplug_lock.rw_sem){++++}: cpus_read_lock+0x34/0xa0 apply_workqueue_attrs+0xd/0x40 __alloc_workqueue_key+0x2c7/0x4e1 intel_guc_submission_init+0x10c/0x650 [i915] intel_uc_init_hw+0x29e/0x460 [i915] i915_gem_init_hw+0xca/0x290 [i915] i915_gem_init+0x115/0x3a0 [i915] i915_driver_load+0x9a8/0x16c0 [i915] i915_pci_probe+0x2e/0x90 [i915] pci_device_probe+0x9c/0x120 driver_probe_device+0x2a3/0x480 __driver_attach+0xd9/0xe0 bus_for_each_dev+0x57/0x90 bus_add_driver+0x168/0x260 driver_register+0x52/0xc0 do_one_initcall+0x39/0x150 do_init_module+0x56/0x1ef load_module+0x231c/0x2d70 SyS_finit_module+0xa5/0xe0 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #0 (&dev->struct_mutex){+.+.}: lock_acquire+0xaf/0x200 __mutex_lock+0x81/0x9b0 i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 other info that might help us debug this: Chain exists of: &dev->struct_mutex --> &sb->s_type->i_mutex_key#5 --> &mm->mmap_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&sb->s_type->i_mutex_key#5); lock(&mm->mmap_sem); lock(&dev->struct_mutex); *** DEADLOCK *** 1 lock held by debugfs_test/1351: #0: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 stack backtrace: CPU: 2 PID: 1351 Comm: debugfs_test Not tainted 4.15.0-rc2-CI-Patchwork_7428+ #1 Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0057.2017.0119.1758 01/19/2017 Call Trace: dump_stack+0x5f/0x86 print_circular_bug+0x230/0x3b0 check_prev_add+0x439/0x7b0 ? lockdep_init_map_crosslock+0x20/0x20 ? unwind_get_return_address+0x16/0x30 ? __lock_acquire+0x1385/0x15a0 __lock_acquire+0x1385/0x15a0 lock_acquire+0xaf/0x200 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] __mutex_lock+0x81/0x9b0 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_mutex_lock_interruptible+0x47/0x130 [i915] ? __pm_runtime_resume+0x4f/0x80 i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 RIP: 0033:0x7f98d6f49116 RSP: 002b:00007ffd6ffc3278 EFLAGS: 00010283 RAX: 00007f98d39a2bc0 RBX: 0000000000000000 RCX: 0000000000001680 RDX: 0000000000001680 RSI: 00007ffd6ffc3400 RDI: 00007f98d39a2bc0 RBP: 00007ffd6ffc33a0 R08: 0000000000000000 R09: 00000000000005a0 R10: 000055e847c2a830 R11: 0000000000000002 R12: 0000000000000001 R13: 000055e847c1d040 R14: 00007ffd6ffc3400 R15: 00007f98d6752ba0 v2: Init preempt_work unconditionally (Chris) v3: Mention that we need the enable_guc=1 for lockdep splat (Chris) Testcase: igt/debugfs_test/read_all_entries # with i915.enable_guc=1 Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-2-michal.winiarski@intel.com
2017-12-13 22:13:47 +00:00
int intel_guc_init_wq(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
/*
* GuC log buffer flush work item has to do register access to
* send the ack to GuC and this work item, if not synced before
* suspend, can potentially get executed after the GFX device is
* suspended.
* By marking the WQ as freezable, we don't have to bother about
* flushing of this work item from the suspend hooks, the pending
* work item if any will be either executed before the suspend
* or scheduled later on resume. This way the handling of work
* item can be kept same between system suspend & rpm suspend.
*/
guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log",
WQ_HIGHPRI | WQ_FREEZABLE);
drm/i915/guc: Fix lockdep due to log relay channel handling under struct_mutex This patch fixes lockdep issue due to circular locking dependency of struct_mutex, i_mutex_key, mmap_sem, relay_channels_mutex. For GuC log relay channel we create debugfs file that requires i_mutex_key lock and we are doing that under struct_mutex. So we introduced newer dependency as: &dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem However, there is dependency from mmap_sem to struct_mutex. Hence we separate the relay create/destroy operation from under struct_mutex. Also added runtime check of relay buffer status. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> ====================================================== WARNING: possible circular locking dependency detected 4.15.0-rc6-CI-Patchwork_7614+ #1 Not tainted ------------------------------------------------------ debugfs_test/1388 is trying to acquire lock: (&dev->struct_mutex){+.+.}, at: [<00000000d5e1d915>] i915_mutex_lock_interruptible+0x47/0x130 [i915] but task is already holding lock: (&mm->mmap_sem){++++}, at: [<0000000029a9c131>] __do_page_fault+0x106/0x560 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&mm->mmap_sem){++++}: _copy_to_user+0x1e/0x70 filldir+0x8c/0xf0 dcache_readdir+0xeb/0x160 iterate_dir+0xdc/0x140 SyS_getdents+0xa0/0x130 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #2 (&sb->s_type->i_mutex_key#3){++++}: start_creating+0x59/0x110 __debugfs_create_file+0x2e/0xe0 relay_create_buf_file+0x62/0x80 relay_late_setup_files+0x84/0x250 guc_log_late_setup+0x4f/0x110 [i915] i915_guc_log_register+0x32/0x40 [i915] i915_driver_load+0x7b6/0x1720 [i915] i915_pci_probe+0x2e/0x90 [i915] pci_device_probe+0x9c/0x120 driver_probe_device+0x2a3/0x480 __driver_attach+0xd9/0xe0 bus_for_each_dev+0x57/0x90 bus_add_driver+0x168/0x260 driver_register+0x52/0xc0 do_one_initcall+0x39/0x150 do_init_module+0x56/0x1ef load_module+0x231c/0x2d70 SyS_finit_module+0xa5/0xe0 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #1 (relay_channels_mutex){+.+.}: relay_open+0x12c/0x2b0 intel_guc_log_runtime_create+0xab/0x230 [i915] intel_guc_init+0x81/0x120 [i915] intel_uc_init+0x29/0xa0 [i915] i915_gem_init+0x182/0x530 [i915] i915_driver_load+0xaa9/0x1720 [i915] i915_pci_probe+0x2e/0x90 [i915] pci_device_probe+0x9c/0x120 driver_probe_device+0x2a3/0x480 __driver_attach+0xd9/0xe0 bus_for_each_dev+0x57/0x90 bus_add_driver+0x168/0x260 driver_register+0x52/0xc0 do_one_initcall+0x39/0x150 do_init_module+0x56/0x1ef load_module+0x231c/0x2d70 SyS_finit_module+0xa5/0xe0 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #0 (&dev->struct_mutex){+.+.}: __mutex_lock+0x81/0x9b0 i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_gem_fault+0x201/0x790 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x677/0xdc0 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x4c/0x60 other info that might help us debug this: Chain exists of: &dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&sb->s_type->i_mutex_key#3); lock(&mm->mmap_sem); lock(&dev->struct_mutex); *** DEADLOCK *** 1 lock held by debugfs_test/1388: #0: (&mm->mmap_sem){++++}, at: [<0000000029a9c131>] __do_page_fault+0x106/0x560 stack backtrace: CPU: 2 PID: 1388 Comm: debugfs_test Not tainted 4.15.0-rc6-CI-Patchwork_7614+ #1 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./J4205-ITX, BIOS P1.10 09/29/2016 Call Trace: dump_stack+0x5f/0x86 print_circular_bug.isra.18+0x1d0/0x2c0 __lock_acquire+0x14ae/0x1b60 ? lock_acquire+0xaf/0x200 lock_acquire+0xaf/0x200 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] __mutex_lock+0x81/0x9b0 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_mutex_lock_interruptible+0x47/0x130 [i915] ? __pm_runtime_resume+0x4f/0x80 i915_gem_fault+0x201/0x790 [i915] __do_fault+0x15/0x70 ? _raw_spin_unlock+0x29/0x40 __handle_mm_fault+0x677/0xdc0 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 ? page_fault+0x36/0x60 page_fault+0x4c/0x60 v2: Added lock protection to guc->log.runtime.relay_chan (Chris) Fixed locking inside guc_flush_logs uncovered by new lockdep. v3: Locking guc_read_update_log_buffer entirely with relay_lock. (Chris) Prepared intel_guc_init_early. Moved relay_lock inside relay_create relay_destroy, relay_file_create, guc_read_update_log_buffer. (Michal) Removed struct_mutex lock around guc_log_flush and removed usage of guc_log_has_relay() from runtime_create path as it needs struct_mutex lock. v4: Handle NULL relay sub buffer pointer earlier in read_update_log_buffer (Chris). Fixed comment suffix **/. (Michal) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104693 Testcase: igt/debugfs_test/read_all_entries # with enable_guc=1 and guc_log_level=1 Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Marta Lofstedt <marta.lofstedt@intel.com> Cc: Michal Winiarski <michal.winiarski@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/1516808821-3638-3-git-send-email-sagar.a.kamble@intel.com
2018-01-24 15:46:58 +00:00
if (!guc->log.runtime.flush_wq) {
DRM_ERROR("Couldn't allocate workqueue for GuC log\n");
drm/i915/guc: Move GuC workqueue allocations outside of the mutex This gets rid of the following lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 4.15.0-rc2-CI-Patchwork_7428+ #1 Not tainted ------------------------------------------------------ debugfs_test/1351 is trying to acquire lock: (&dev->struct_mutex){+.+.}, at: [<000000009d90d1a3>] i915_mutex_lock_interruptible+0x47/0x130 [i915] but task is already holding lock: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #6 (&mm->mmap_sem){++++}: __might_fault+0x63/0x90 _copy_to_user+0x1e/0x70 filldir+0x8c/0xf0 dcache_readdir+0xeb/0x160 iterate_dir+0xe6/0x150 SyS_getdents+0xa0/0x130 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #5 (&sb->s_type->i_mutex_key#5){++++}: lockref_get+0x9/0x20 -> #4 ((completion)&req.done){+.+.}: wait_for_common+0x54/0x210 devtmpfs_create_node+0x130/0x150 device_add+0x5ad/0x5e0 device_create_groups_vargs+0xd4/0xe0 device_create+0x35/0x40 msr_device_create+0x22/0x40 cpuhp_invoke_callback+0xc5/0xbf0 cpuhp_thread_fun+0x167/0x210 smpboot_thread_fn+0x17f/0x270 kthread+0x173/0x1b0 ret_from_fork+0x24/0x30 -> #3 (cpuhp_state-up){+.+.}: cpuhp_issue_call+0x132/0x1c0 __cpuhp_setup_state_cpuslocked+0x12f/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_writeback_init+0x3a/0x5c start_kernel+0x393/0x3e2 secondary_startup_64+0xa5/0xb0 -> #2 (cpuhp_state_mutex){+.+.}: __mutex_lock+0x81/0x9b0 __cpuhp_setup_state_cpuslocked+0x4b/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_alloc_init+0x1f/0x26 start_kernel+0x139/0x3e2 secondary_startup_64+0xa5/0xb0 -> #1 (cpu_hotplug_lock.rw_sem){++++}: cpus_read_lock+0x34/0xa0 apply_workqueue_attrs+0xd/0x40 __alloc_workqueue_key+0x2c7/0x4e1 intel_guc_submission_init+0x10c/0x650 [i915] intel_uc_init_hw+0x29e/0x460 [i915] i915_gem_init_hw+0xca/0x290 [i915] i915_gem_init+0x115/0x3a0 [i915] i915_driver_load+0x9a8/0x16c0 [i915] i915_pci_probe+0x2e/0x90 [i915] pci_device_probe+0x9c/0x120 driver_probe_device+0x2a3/0x480 __driver_attach+0xd9/0xe0 bus_for_each_dev+0x57/0x90 bus_add_driver+0x168/0x260 driver_register+0x52/0xc0 do_one_initcall+0x39/0x150 do_init_module+0x56/0x1ef load_module+0x231c/0x2d70 SyS_finit_module+0xa5/0xe0 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #0 (&dev->struct_mutex){+.+.}: lock_acquire+0xaf/0x200 __mutex_lock+0x81/0x9b0 i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 other info that might help us debug this: Chain exists of: &dev->struct_mutex --> &sb->s_type->i_mutex_key#5 --> &mm->mmap_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&sb->s_type->i_mutex_key#5); lock(&mm->mmap_sem); lock(&dev->struct_mutex); *** DEADLOCK *** 1 lock held by debugfs_test/1351: #0: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 stack backtrace: CPU: 2 PID: 1351 Comm: debugfs_test Not tainted 4.15.0-rc2-CI-Patchwork_7428+ #1 Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0057.2017.0119.1758 01/19/2017 Call Trace: dump_stack+0x5f/0x86 print_circular_bug+0x230/0x3b0 check_prev_add+0x439/0x7b0 ? lockdep_init_map_crosslock+0x20/0x20 ? unwind_get_return_address+0x16/0x30 ? __lock_acquire+0x1385/0x15a0 __lock_acquire+0x1385/0x15a0 lock_acquire+0xaf/0x200 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] __mutex_lock+0x81/0x9b0 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_mutex_lock_interruptible+0x47/0x130 [i915] ? __pm_runtime_resume+0x4f/0x80 i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 RIP: 0033:0x7f98d6f49116 RSP: 002b:00007ffd6ffc3278 EFLAGS: 00010283 RAX: 00007f98d39a2bc0 RBX: 0000000000000000 RCX: 0000000000001680 RDX: 0000000000001680 RSI: 00007ffd6ffc3400 RDI: 00007f98d39a2bc0 RBP: 00007ffd6ffc33a0 R08: 0000000000000000 R09: 00000000000005a0 R10: 000055e847c2a830 R11: 0000000000000002 R12: 0000000000000001 R13: 000055e847c1d040 R14: 00007ffd6ffc3400 R15: 00007f98d6752ba0 v2: Init preempt_work unconditionally (Chris) v3: Mention that we need the enable_guc=1 for lockdep splat (Chris) Testcase: igt/debugfs_test/read_all_entries # with i915.enable_guc=1 Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-2-michal.winiarski@intel.com
2017-12-13 22:13:47 +00:00
return -ENOMEM;
drm/i915/guc: Fix lockdep due to log relay channel handling under struct_mutex This patch fixes lockdep issue due to circular locking dependency of struct_mutex, i_mutex_key, mmap_sem, relay_channels_mutex. For GuC log relay channel we create debugfs file that requires i_mutex_key lock and we are doing that under struct_mutex. So we introduced newer dependency as: &dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem However, there is dependency from mmap_sem to struct_mutex. Hence we separate the relay create/destroy operation from under struct_mutex. Also added runtime check of relay buffer status. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> ====================================================== WARNING: possible circular locking dependency detected 4.15.0-rc6-CI-Patchwork_7614+ #1 Not tainted ------------------------------------------------------ debugfs_test/1388 is trying to acquire lock: (&dev->struct_mutex){+.+.}, at: [<00000000d5e1d915>] i915_mutex_lock_interruptible+0x47/0x130 [i915] but task is already holding lock: (&mm->mmap_sem){++++}, at: [<0000000029a9c131>] __do_page_fault+0x106/0x560 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&mm->mmap_sem){++++}: _copy_to_user+0x1e/0x70 filldir+0x8c/0xf0 dcache_readdir+0xeb/0x160 iterate_dir+0xdc/0x140 SyS_getdents+0xa0/0x130 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #2 (&sb->s_type->i_mutex_key#3){++++}: start_creating+0x59/0x110 __debugfs_create_file+0x2e/0xe0 relay_create_buf_file+0x62/0x80 relay_late_setup_files+0x84/0x250 guc_log_late_setup+0x4f/0x110 [i915] i915_guc_log_register+0x32/0x40 [i915] i915_driver_load+0x7b6/0x1720 [i915] i915_pci_probe+0x2e/0x90 [i915] pci_device_probe+0x9c/0x120 driver_probe_device+0x2a3/0x480 __driver_attach+0xd9/0xe0 bus_for_each_dev+0x57/0x90 bus_add_driver+0x168/0x260 driver_register+0x52/0xc0 do_one_initcall+0x39/0x150 do_init_module+0x56/0x1ef load_module+0x231c/0x2d70 SyS_finit_module+0xa5/0xe0 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #1 (relay_channels_mutex){+.+.}: relay_open+0x12c/0x2b0 intel_guc_log_runtime_create+0xab/0x230 [i915] intel_guc_init+0x81/0x120 [i915] intel_uc_init+0x29/0xa0 [i915] i915_gem_init+0x182/0x530 [i915] i915_driver_load+0xaa9/0x1720 [i915] i915_pci_probe+0x2e/0x90 [i915] pci_device_probe+0x9c/0x120 driver_probe_device+0x2a3/0x480 __driver_attach+0xd9/0xe0 bus_for_each_dev+0x57/0x90 bus_add_driver+0x168/0x260 driver_register+0x52/0xc0 do_one_initcall+0x39/0x150 do_init_module+0x56/0x1ef load_module+0x231c/0x2d70 SyS_finit_module+0xa5/0xe0 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #0 (&dev->struct_mutex){+.+.}: __mutex_lock+0x81/0x9b0 i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_gem_fault+0x201/0x790 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x677/0xdc0 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x4c/0x60 other info that might help us debug this: Chain exists of: &dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&sb->s_type->i_mutex_key#3); lock(&mm->mmap_sem); lock(&dev->struct_mutex); *** DEADLOCK *** 1 lock held by debugfs_test/1388: #0: (&mm->mmap_sem){++++}, at: [<0000000029a9c131>] __do_page_fault+0x106/0x560 stack backtrace: CPU: 2 PID: 1388 Comm: debugfs_test Not tainted 4.15.0-rc6-CI-Patchwork_7614+ #1 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./J4205-ITX, BIOS P1.10 09/29/2016 Call Trace: dump_stack+0x5f/0x86 print_circular_bug.isra.18+0x1d0/0x2c0 __lock_acquire+0x14ae/0x1b60 ? lock_acquire+0xaf/0x200 lock_acquire+0xaf/0x200 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] __mutex_lock+0x81/0x9b0 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_mutex_lock_interruptible+0x47/0x130 [i915] ? __pm_runtime_resume+0x4f/0x80 i915_gem_fault+0x201/0x790 [i915] __do_fault+0x15/0x70 ? _raw_spin_unlock+0x29/0x40 __handle_mm_fault+0x677/0xdc0 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 ? page_fault+0x36/0x60 page_fault+0x4c/0x60 v2: Added lock protection to guc->log.runtime.relay_chan (Chris) Fixed locking inside guc_flush_logs uncovered by new lockdep. v3: Locking guc_read_update_log_buffer entirely with relay_lock. (Chris) Prepared intel_guc_init_early. Moved relay_lock inside relay_create relay_destroy, relay_file_create, guc_read_update_log_buffer. (Michal) Removed struct_mutex lock around guc_log_flush and removed usage of guc_log_has_relay() from runtime_create path as it needs struct_mutex lock. v4: Handle NULL relay sub buffer pointer earlier in read_update_log_buffer (Chris). Fixed comment suffix **/. (Michal) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104693 Testcase: igt/debugfs_test/read_all_entries # with enable_guc=1 and guc_log_level=1 Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Marta Lofstedt <marta.lofstedt@intel.com> Cc: Michal Winiarski <michal.winiarski@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/1516808821-3638-3-git-send-email-sagar.a.kamble@intel.com
2018-01-24 15:46:58 +00:00
}
drm/i915/guc: Move GuC workqueue allocations outside of the mutex This gets rid of the following lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 4.15.0-rc2-CI-Patchwork_7428+ #1 Not tainted ------------------------------------------------------ debugfs_test/1351 is trying to acquire lock: (&dev->struct_mutex){+.+.}, at: [<000000009d90d1a3>] i915_mutex_lock_interruptible+0x47/0x130 [i915] but task is already holding lock: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #6 (&mm->mmap_sem){++++}: __might_fault+0x63/0x90 _copy_to_user+0x1e/0x70 filldir+0x8c/0xf0 dcache_readdir+0xeb/0x160 iterate_dir+0xe6/0x150 SyS_getdents+0xa0/0x130 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #5 (&sb->s_type->i_mutex_key#5){++++}: lockref_get+0x9/0x20 -> #4 ((completion)&req.done){+.+.}: wait_for_common+0x54/0x210 devtmpfs_create_node+0x130/0x150 device_add+0x5ad/0x5e0 device_create_groups_vargs+0xd4/0xe0 device_create+0x35/0x40 msr_device_create+0x22/0x40 cpuhp_invoke_callback+0xc5/0xbf0 cpuhp_thread_fun+0x167/0x210 smpboot_thread_fn+0x17f/0x270 kthread+0x173/0x1b0 ret_from_fork+0x24/0x30 -> #3 (cpuhp_state-up){+.+.}: cpuhp_issue_call+0x132/0x1c0 __cpuhp_setup_state_cpuslocked+0x12f/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_writeback_init+0x3a/0x5c start_kernel+0x393/0x3e2 secondary_startup_64+0xa5/0xb0 -> #2 (cpuhp_state_mutex){+.+.}: __mutex_lock+0x81/0x9b0 __cpuhp_setup_state_cpuslocked+0x4b/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_alloc_init+0x1f/0x26 start_kernel+0x139/0x3e2 secondary_startup_64+0xa5/0xb0 -> #1 (cpu_hotplug_lock.rw_sem){++++}: cpus_read_lock+0x34/0xa0 apply_workqueue_attrs+0xd/0x40 __alloc_workqueue_key+0x2c7/0x4e1 intel_guc_submission_init+0x10c/0x650 [i915] intel_uc_init_hw+0x29e/0x460 [i915] i915_gem_init_hw+0xca/0x290 [i915] i915_gem_init+0x115/0x3a0 [i915] i915_driver_load+0x9a8/0x16c0 [i915] i915_pci_probe+0x2e/0x90 [i915] pci_device_probe+0x9c/0x120 driver_probe_device+0x2a3/0x480 __driver_attach+0xd9/0xe0 bus_for_each_dev+0x57/0x90 bus_add_driver+0x168/0x260 driver_register+0x52/0xc0 do_one_initcall+0x39/0x150 do_init_module+0x56/0x1ef load_module+0x231c/0x2d70 SyS_finit_module+0xa5/0xe0 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #0 (&dev->struct_mutex){+.+.}: lock_acquire+0xaf/0x200 __mutex_lock+0x81/0x9b0 i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 other info that might help us debug this: Chain exists of: &dev->struct_mutex --> &sb->s_type->i_mutex_key#5 --> &mm->mmap_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&sb->s_type->i_mutex_key#5); lock(&mm->mmap_sem); lock(&dev->struct_mutex); *** DEADLOCK *** 1 lock held by debugfs_test/1351: #0: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 stack backtrace: CPU: 2 PID: 1351 Comm: debugfs_test Not tainted 4.15.0-rc2-CI-Patchwork_7428+ #1 Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0057.2017.0119.1758 01/19/2017 Call Trace: dump_stack+0x5f/0x86 print_circular_bug+0x230/0x3b0 check_prev_add+0x439/0x7b0 ? lockdep_init_map_crosslock+0x20/0x20 ? unwind_get_return_address+0x16/0x30 ? __lock_acquire+0x1385/0x15a0 __lock_acquire+0x1385/0x15a0 lock_acquire+0xaf/0x200 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] __mutex_lock+0x81/0x9b0 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_mutex_lock_interruptible+0x47/0x130 [i915] ? __pm_runtime_resume+0x4f/0x80 i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 RIP: 0033:0x7f98d6f49116 RSP: 002b:00007ffd6ffc3278 EFLAGS: 00010283 RAX: 00007f98d39a2bc0 RBX: 0000000000000000 RCX: 0000000000001680 RDX: 0000000000001680 RSI: 00007ffd6ffc3400 RDI: 00007f98d39a2bc0 RBP: 00007ffd6ffc33a0 R08: 0000000000000000 R09: 00000000000005a0 R10: 000055e847c2a830 R11: 0000000000000002 R12: 0000000000000001 R13: 000055e847c1d040 R14: 00007ffd6ffc3400 R15: 00007f98d6752ba0 v2: Init preempt_work unconditionally (Chris) v3: Mention that we need the enable_guc=1 for lockdep splat (Chris) Testcase: igt/debugfs_test/read_all_entries # with i915.enable_guc=1 Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-2-michal.winiarski@intel.com
2017-12-13 22:13:47 +00:00
/*
* Even though both sending GuC action, and adding a new workitem to
* GuC workqueue are serialized (each with its own locking), since
* we're using mutliple engines, it's possible that we're going to
* issue a preempt request with two (or more - each for different
* engine) workitems in GuC queue. In this situation, GuC may submit
* all of them, which will make us very confused.
* Our preemption contexts may even already be complete - before we
* even had the chance to sent the preempt action to GuC!. Rather
* than introducing yet another lock, we can just use ordered workqueue
* to make sure we're always sending a single preemption request with a
* single workitem.
*/
if (HAS_LOGICAL_RING_PREEMPTION(dev_priv) &&
USES_GUC_SUBMISSION(dev_priv)) {
guc->preempt_wq = alloc_ordered_workqueue("i915-guc_preempt",
WQ_HIGHPRI);
if (!guc->preempt_wq) {
destroy_workqueue(guc->log.runtime.flush_wq);
drm/i915/guc: Fix lockdep due to log relay channel handling under struct_mutex This patch fixes lockdep issue due to circular locking dependency of struct_mutex, i_mutex_key, mmap_sem, relay_channels_mutex. For GuC log relay channel we create debugfs file that requires i_mutex_key lock and we are doing that under struct_mutex. So we introduced newer dependency as: &dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem However, there is dependency from mmap_sem to struct_mutex. Hence we separate the relay create/destroy operation from under struct_mutex. Also added runtime check of relay buffer status. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> ====================================================== WARNING: possible circular locking dependency detected 4.15.0-rc6-CI-Patchwork_7614+ #1 Not tainted ------------------------------------------------------ debugfs_test/1388 is trying to acquire lock: (&dev->struct_mutex){+.+.}, at: [<00000000d5e1d915>] i915_mutex_lock_interruptible+0x47/0x130 [i915] but task is already holding lock: (&mm->mmap_sem){++++}, at: [<0000000029a9c131>] __do_page_fault+0x106/0x560 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&mm->mmap_sem){++++}: _copy_to_user+0x1e/0x70 filldir+0x8c/0xf0 dcache_readdir+0xeb/0x160 iterate_dir+0xdc/0x140 SyS_getdents+0xa0/0x130 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #2 (&sb->s_type->i_mutex_key#3){++++}: start_creating+0x59/0x110 __debugfs_create_file+0x2e/0xe0 relay_create_buf_file+0x62/0x80 relay_late_setup_files+0x84/0x250 guc_log_late_setup+0x4f/0x110 [i915] i915_guc_log_register+0x32/0x40 [i915] i915_driver_load+0x7b6/0x1720 [i915] i915_pci_probe+0x2e/0x90 [i915] pci_device_probe+0x9c/0x120 driver_probe_device+0x2a3/0x480 __driver_attach+0xd9/0xe0 bus_for_each_dev+0x57/0x90 bus_add_driver+0x168/0x260 driver_register+0x52/0xc0 do_one_initcall+0x39/0x150 do_init_module+0x56/0x1ef load_module+0x231c/0x2d70 SyS_finit_module+0xa5/0xe0 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #1 (relay_channels_mutex){+.+.}: relay_open+0x12c/0x2b0 intel_guc_log_runtime_create+0xab/0x230 [i915] intel_guc_init+0x81/0x120 [i915] intel_uc_init+0x29/0xa0 [i915] i915_gem_init+0x182/0x530 [i915] i915_driver_load+0xaa9/0x1720 [i915] i915_pci_probe+0x2e/0x90 [i915] pci_device_probe+0x9c/0x120 driver_probe_device+0x2a3/0x480 __driver_attach+0xd9/0xe0 bus_for_each_dev+0x57/0x90 bus_add_driver+0x168/0x260 driver_register+0x52/0xc0 do_one_initcall+0x39/0x150 do_init_module+0x56/0x1ef load_module+0x231c/0x2d70 SyS_finit_module+0xa5/0xe0 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #0 (&dev->struct_mutex){+.+.}: __mutex_lock+0x81/0x9b0 i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_gem_fault+0x201/0x790 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x677/0xdc0 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x4c/0x60 other info that might help us debug this: Chain exists of: &dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&sb->s_type->i_mutex_key#3); lock(&mm->mmap_sem); lock(&dev->struct_mutex); *** DEADLOCK *** 1 lock held by debugfs_test/1388: #0: (&mm->mmap_sem){++++}, at: [<0000000029a9c131>] __do_page_fault+0x106/0x560 stack backtrace: CPU: 2 PID: 1388 Comm: debugfs_test Not tainted 4.15.0-rc6-CI-Patchwork_7614+ #1 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./J4205-ITX, BIOS P1.10 09/29/2016 Call Trace: dump_stack+0x5f/0x86 print_circular_bug.isra.18+0x1d0/0x2c0 __lock_acquire+0x14ae/0x1b60 ? lock_acquire+0xaf/0x200 lock_acquire+0xaf/0x200 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] __mutex_lock+0x81/0x9b0 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_mutex_lock_interruptible+0x47/0x130 [i915] ? __pm_runtime_resume+0x4f/0x80 i915_gem_fault+0x201/0x790 [i915] __do_fault+0x15/0x70 ? _raw_spin_unlock+0x29/0x40 __handle_mm_fault+0x677/0xdc0 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 ? page_fault+0x36/0x60 page_fault+0x4c/0x60 v2: Added lock protection to guc->log.runtime.relay_chan (Chris) Fixed locking inside guc_flush_logs uncovered by new lockdep. v3: Locking guc_read_update_log_buffer entirely with relay_lock. (Chris) Prepared intel_guc_init_early. Moved relay_lock inside relay_create relay_destroy, relay_file_create, guc_read_update_log_buffer. (Michal) Removed struct_mutex lock around guc_log_flush and removed usage of guc_log_has_relay() from runtime_create path as it needs struct_mutex lock. v4: Handle NULL relay sub buffer pointer earlier in read_update_log_buffer (Chris). Fixed comment suffix **/. (Michal) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104693 Testcase: igt/debugfs_test/read_all_entries # with enable_guc=1 and guc_log_level=1 Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Marta Lofstedt <marta.lofstedt@intel.com> Cc: Michal Winiarski <michal.winiarski@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/1516808821-3638-3-git-send-email-sagar.a.kamble@intel.com
2018-01-24 15:46:58 +00:00
DRM_ERROR("Couldn't allocate workqueue for GuC "
"preemption\n");
drm/i915/guc: Move GuC workqueue allocations outside of the mutex This gets rid of the following lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 4.15.0-rc2-CI-Patchwork_7428+ #1 Not tainted ------------------------------------------------------ debugfs_test/1351 is trying to acquire lock: (&dev->struct_mutex){+.+.}, at: [<000000009d90d1a3>] i915_mutex_lock_interruptible+0x47/0x130 [i915] but task is already holding lock: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #6 (&mm->mmap_sem){++++}: __might_fault+0x63/0x90 _copy_to_user+0x1e/0x70 filldir+0x8c/0xf0 dcache_readdir+0xeb/0x160 iterate_dir+0xe6/0x150 SyS_getdents+0xa0/0x130 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #5 (&sb->s_type->i_mutex_key#5){++++}: lockref_get+0x9/0x20 -> #4 ((completion)&req.done){+.+.}: wait_for_common+0x54/0x210 devtmpfs_create_node+0x130/0x150 device_add+0x5ad/0x5e0 device_create_groups_vargs+0xd4/0xe0 device_create+0x35/0x40 msr_device_create+0x22/0x40 cpuhp_invoke_callback+0xc5/0xbf0 cpuhp_thread_fun+0x167/0x210 smpboot_thread_fn+0x17f/0x270 kthread+0x173/0x1b0 ret_from_fork+0x24/0x30 -> #3 (cpuhp_state-up){+.+.}: cpuhp_issue_call+0x132/0x1c0 __cpuhp_setup_state_cpuslocked+0x12f/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_writeback_init+0x3a/0x5c start_kernel+0x393/0x3e2 secondary_startup_64+0xa5/0xb0 -> #2 (cpuhp_state_mutex){+.+.}: __mutex_lock+0x81/0x9b0 __cpuhp_setup_state_cpuslocked+0x4b/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_alloc_init+0x1f/0x26 start_kernel+0x139/0x3e2 secondary_startup_64+0xa5/0xb0 -> #1 (cpu_hotplug_lock.rw_sem){++++}: cpus_read_lock+0x34/0xa0 apply_workqueue_attrs+0xd/0x40 __alloc_workqueue_key+0x2c7/0x4e1 intel_guc_submission_init+0x10c/0x650 [i915] intel_uc_init_hw+0x29e/0x460 [i915] i915_gem_init_hw+0xca/0x290 [i915] i915_gem_init+0x115/0x3a0 [i915] i915_driver_load+0x9a8/0x16c0 [i915] i915_pci_probe+0x2e/0x90 [i915] pci_device_probe+0x9c/0x120 driver_probe_device+0x2a3/0x480 __driver_attach+0xd9/0xe0 bus_for_each_dev+0x57/0x90 bus_add_driver+0x168/0x260 driver_register+0x52/0xc0 do_one_initcall+0x39/0x150 do_init_module+0x56/0x1ef load_module+0x231c/0x2d70 SyS_finit_module+0xa5/0xe0 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #0 (&dev->struct_mutex){+.+.}: lock_acquire+0xaf/0x200 __mutex_lock+0x81/0x9b0 i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 other info that might help us debug this: Chain exists of: &dev->struct_mutex --> &sb->s_type->i_mutex_key#5 --> &mm->mmap_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&sb->s_type->i_mutex_key#5); lock(&mm->mmap_sem); lock(&dev->struct_mutex); *** DEADLOCK *** 1 lock held by debugfs_test/1351: #0: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 stack backtrace: CPU: 2 PID: 1351 Comm: debugfs_test Not tainted 4.15.0-rc2-CI-Patchwork_7428+ #1 Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0057.2017.0119.1758 01/19/2017 Call Trace: dump_stack+0x5f/0x86 print_circular_bug+0x230/0x3b0 check_prev_add+0x439/0x7b0 ? lockdep_init_map_crosslock+0x20/0x20 ? unwind_get_return_address+0x16/0x30 ? __lock_acquire+0x1385/0x15a0 __lock_acquire+0x1385/0x15a0 lock_acquire+0xaf/0x200 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] __mutex_lock+0x81/0x9b0 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_mutex_lock_interruptible+0x47/0x130 [i915] ? __pm_runtime_resume+0x4f/0x80 i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 RIP: 0033:0x7f98d6f49116 RSP: 002b:00007ffd6ffc3278 EFLAGS: 00010283 RAX: 00007f98d39a2bc0 RBX: 0000000000000000 RCX: 0000000000001680 RDX: 0000000000001680 RSI: 00007ffd6ffc3400 RDI: 00007f98d39a2bc0 RBP: 00007ffd6ffc33a0 R08: 0000000000000000 R09: 00000000000005a0 R10: 000055e847c2a830 R11: 0000000000000002 R12: 0000000000000001 R13: 000055e847c1d040 R14: 00007ffd6ffc3400 R15: 00007f98d6752ba0 v2: Init preempt_work unconditionally (Chris) v3: Mention that we need the enable_guc=1 for lockdep splat (Chris) Testcase: igt/debugfs_test/read_all_entries # with i915.enable_guc=1 Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-2-michal.winiarski@intel.com
2017-12-13 22:13:47 +00:00
return -ENOMEM;
}
}
return 0;
}
void intel_guc_fini_wq(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
if (HAS_LOGICAL_RING_PREEMPTION(dev_priv) &&
USES_GUC_SUBMISSION(dev_priv))
destroy_workqueue(guc->preempt_wq);
destroy_workqueue(guc->log.runtime.flush_wq);
}
drm/i915/guc: Move shared data allocation away from submission path We need shared data for actions (e.g. guc suspend/resume), and we're using those with GuC submission disabled. Let's introduce intel_guc_init and move shared data alloc there. This fixes GPF during module unload with HuC, but without GuC submission: BUG: unable to handle kernel NULL pointer dereference at 000000005aee7809 IP: intel_guc_suspend+0x34/0x140 [i915] PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP Modules linked in: i915(O-) netconsole x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel mei_me i2c_i801 mei prime_numbers [last unloaded: i915] CPU: 2 PID: 2794 Comm: rmmod Tainted: G U W O 4.15.0-rc2+ #297 Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0054.2016.0930.1102 09/30/2016 task: 0000000055945c61 task.stack: 00000000264ccb43 RIP: 0010:intel_guc_suspend+0x34/0x140 [i915] RSP: 0018:ffffc90000483df8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff880829180000 RCX: 0000000000000000 RDX: 0000000000000006 RSI: ffff880844c2c938 RDI: ffff880844c2c000 RBP: ffff880829180000 R08: 00000000a29c58c1 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffa040ba40 R13: ffffffffa040bab0 R14: ffff88084a195060 R15: 000055df3ef357a0 FS: 00007ff43c043740(0000) GS:ffff88084e200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000f9 CR3: 000000083f179005 CR4: 00000000003606e0 Call Trace: i915_gem_suspend+0x9d/0x130 [i915] ? i915_driver_unload+0x68/0x180 [i915] i915_driver_unload+0x70/0x180 [i915] i915_pci_remove+0x15/0x20 [i915] pci_device_remove+0x36/0xb0 device_release_driver_internal+0x15f/0x220 driver_detach+0x3a/0x80 bus_remove_driver+0x58/0xd0 pci_unregister_driver+0x29/0x90 SyS_delete_module+0x150/0x1e0 entry_SYSCALL_64_fastpath+0x23/0x9a RIP: 0033:0x7ff43b51b5c7 RSP: 002b:00007ffe6825a758 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007ff43b51b5c7 RDX: 000000000000000a RSI: 0000000000000800 RDI: 000055df3ef35808 RBP: 0000000000000000 R08: 00007ffe682596d1 R09: 0000000000000000 R10: 00007ff43b594880 R11: 0000000000000206 R12: 000055df3ef357a0 R13: 00007ffe68259740 R14: 000055df3ef35260 R15: 000055df3ef357a0 Code: 00 00 02 74 03 31 c0 c3 53 48 89 fb 48 83 ec 10 e8 52 0f f8 ff 48 b8 01 05 00 00 02 00 00 00 48 89 44 24 04 48 8b 83 00 12 00 00 <f6> 80 f9 00 00 00 01 0f 84 a7 00 00 00 f6 80 98 00 00 00 01 0f RIP: intel_guc_suspend+0x34/0x140 [i915] RSP: ffffc90000483df8 CR2: 00000000000000f9 ---[ end trace 23a192a61d937a3e ]--- Fixes: b8e5eb960b28 ("drm/i915/guc: Allocate separate shared data object for GuC communication") Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-1-michal.winiarski@intel.com
2017-12-13 22:13:46 +00:00
static int guc_shared_data_create(struct intel_guc *guc)
{
struct i915_vma *vma;
void *vaddr;
vma = intel_guc_allocate_vma(guc, PAGE_SIZE);
if (IS_ERR(vma))
return PTR_ERR(vma);
vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
if (IS_ERR(vaddr)) {
i915_vma_unpin_and_release(&vma);
return PTR_ERR(vaddr);
}
guc->shared_data = vma;
guc->shared_data_vaddr = vaddr;
return 0;
}
static void guc_shared_data_destroy(struct intel_guc *guc)
{
i915_gem_object_unpin_map(guc->shared_data->obj);
i915_vma_unpin_and_release(&guc->shared_data);
}
int intel_guc_init(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
int ret;
ret = guc_shared_data_create(guc);
if (ret)
return ret;
GEM_BUG_ON(!guc->shared_data);
ret = intel_guc_log_create(guc);
if (ret)
goto err_shared;
ret = intel_guc_ads_create(guc);
if (ret)
goto err_log;
GEM_BUG_ON(!guc->ads_vma);
drm/i915/guc: Move shared data allocation away from submission path We need shared data for actions (e.g. guc suspend/resume), and we're using those with GuC submission disabled. Let's introduce intel_guc_init and move shared data alloc there. This fixes GPF during module unload with HuC, but without GuC submission: BUG: unable to handle kernel NULL pointer dereference at 000000005aee7809 IP: intel_guc_suspend+0x34/0x140 [i915] PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP Modules linked in: i915(O-) netconsole x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel mei_me i2c_i801 mei prime_numbers [last unloaded: i915] CPU: 2 PID: 2794 Comm: rmmod Tainted: G U W O 4.15.0-rc2+ #297 Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0054.2016.0930.1102 09/30/2016 task: 0000000055945c61 task.stack: 00000000264ccb43 RIP: 0010:intel_guc_suspend+0x34/0x140 [i915] RSP: 0018:ffffc90000483df8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff880829180000 RCX: 0000000000000000 RDX: 0000000000000006 RSI: ffff880844c2c938 RDI: ffff880844c2c000 RBP: ffff880829180000 R08: 00000000a29c58c1 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffa040ba40 R13: ffffffffa040bab0 R14: ffff88084a195060 R15: 000055df3ef357a0 FS: 00007ff43c043740(0000) GS:ffff88084e200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000f9 CR3: 000000083f179005 CR4: 00000000003606e0 Call Trace: i915_gem_suspend+0x9d/0x130 [i915] ? i915_driver_unload+0x68/0x180 [i915] i915_driver_unload+0x70/0x180 [i915] i915_pci_remove+0x15/0x20 [i915] pci_device_remove+0x36/0xb0 device_release_driver_internal+0x15f/0x220 driver_detach+0x3a/0x80 bus_remove_driver+0x58/0xd0 pci_unregister_driver+0x29/0x90 SyS_delete_module+0x150/0x1e0 entry_SYSCALL_64_fastpath+0x23/0x9a RIP: 0033:0x7ff43b51b5c7 RSP: 002b:00007ffe6825a758 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007ff43b51b5c7 RDX: 000000000000000a RSI: 0000000000000800 RDI: 000055df3ef35808 RBP: 0000000000000000 R08: 00007ffe682596d1 R09: 0000000000000000 R10: 00007ff43b594880 R11: 0000000000000206 R12: 000055df3ef357a0 R13: 00007ffe68259740 R14: 000055df3ef35260 R15: 000055df3ef357a0 Code: 00 00 02 74 03 31 c0 c3 53 48 89 fb 48 83 ec 10 e8 52 0f f8 ff 48 b8 01 05 00 00 02 00 00 00 48 89 44 24 04 48 8b 83 00 12 00 00 <f6> 80 f9 00 00 00 01 0f 84 a7 00 00 00 f6 80 98 00 00 00 01 0f RIP: intel_guc_suspend+0x34/0x140 [i915] RSP: ffffc90000483df8 CR2: 00000000000000f9 ---[ end trace 23a192a61d937a3e ]--- Fixes: b8e5eb960b28 ("drm/i915/guc: Allocate separate shared data object for GuC communication") Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-1-michal.winiarski@intel.com
2017-12-13 22:13:46 +00:00
/* We need to notify the guc whenever we change the GGTT */
i915_ggtt_enable_guc(dev_priv);
return 0;
err_log:
intel_guc_log_destroy(guc);
err_shared:
guc_shared_data_destroy(guc);
return ret;
drm/i915/guc: Move shared data allocation away from submission path We need shared data for actions (e.g. guc suspend/resume), and we're using those with GuC submission disabled. Let's introduce intel_guc_init and move shared data alloc there. This fixes GPF during module unload with HuC, but without GuC submission: BUG: unable to handle kernel NULL pointer dereference at 000000005aee7809 IP: intel_guc_suspend+0x34/0x140 [i915] PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP Modules linked in: i915(O-) netconsole x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel mei_me i2c_i801 mei prime_numbers [last unloaded: i915] CPU: 2 PID: 2794 Comm: rmmod Tainted: G U W O 4.15.0-rc2+ #297 Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0054.2016.0930.1102 09/30/2016 task: 0000000055945c61 task.stack: 00000000264ccb43 RIP: 0010:intel_guc_suspend+0x34/0x140 [i915] RSP: 0018:ffffc90000483df8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff880829180000 RCX: 0000000000000000 RDX: 0000000000000006 RSI: ffff880844c2c938 RDI: ffff880844c2c000 RBP: ffff880829180000 R08: 00000000a29c58c1 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffa040ba40 R13: ffffffffa040bab0 R14: ffff88084a195060 R15: 000055df3ef357a0 FS: 00007ff43c043740(0000) GS:ffff88084e200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000f9 CR3: 000000083f179005 CR4: 00000000003606e0 Call Trace: i915_gem_suspend+0x9d/0x130 [i915] ? i915_driver_unload+0x68/0x180 [i915] i915_driver_unload+0x70/0x180 [i915] i915_pci_remove+0x15/0x20 [i915] pci_device_remove+0x36/0xb0 device_release_driver_internal+0x15f/0x220 driver_detach+0x3a/0x80 bus_remove_driver+0x58/0xd0 pci_unregister_driver+0x29/0x90 SyS_delete_module+0x150/0x1e0 entry_SYSCALL_64_fastpath+0x23/0x9a RIP: 0033:0x7ff43b51b5c7 RSP: 002b:00007ffe6825a758 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007ff43b51b5c7 RDX: 000000000000000a RSI: 0000000000000800 RDI: 000055df3ef35808 RBP: 0000000000000000 R08: 00007ffe682596d1 R09: 0000000000000000 R10: 00007ff43b594880 R11: 0000000000000206 R12: 000055df3ef357a0 R13: 00007ffe68259740 R14: 000055df3ef35260 R15: 000055df3ef357a0 Code: 00 00 02 74 03 31 c0 c3 53 48 89 fb 48 83 ec 10 e8 52 0f f8 ff 48 b8 01 05 00 00 02 00 00 00 48 89 44 24 04 48 8b 83 00 12 00 00 <f6> 80 f9 00 00 00 01 0f 84 a7 00 00 00 f6 80 98 00 00 00 01 0f RIP: intel_guc_suspend+0x34/0x140 [i915] RSP: ffffc90000483df8 CR2: 00000000000000f9 ---[ end trace 23a192a61d937a3e ]--- Fixes: b8e5eb960b28 ("drm/i915/guc: Allocate separate shared data object for GuC communication") Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-1-michal.winiarski@intel.com
2017-12-13 22:13:46 +00:00
}
void intel_guc_fini(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
i915_ggtt_disable_guc(dev_priv);
intel_guc_ads_destroy(guc);
intel_guc_log_destroy(guc);
drm/i915/guc: Move shared data allocation away from submission path We need shared data for actions (e.g. guc suspend/resume), and we're using those with GuC submission disabled. Let's introduce intel_guc_init and move shared data alloc there. This fixes GPF during module unload with HuC, but without GuC submission: BUG: unable to handle kernel NULL pointer dereference at 000000005aee7809 IP: intel_guc_suspend+0x34/0x140 [i915] PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP Modules linked in: i915(O-) netconsole x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel mei_me i2c_i801 mei prime_numbers [last unloaded: i915] CPU: 2 PID: 2794 Comm: rmmod Tainted: G U W O 4.15.0-rc2+ #297 Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0054.2016.0930.1102 09/30/2016 task: 0000000055945c61 task.stack: 00000000264ccb43 RIP: 0010:intel_guc_suspend+0x34/0x140 [i915] RSP: 0018:ffffc90000483df8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff880829180000 RCX: 0000000000000000 RDX: 0000000000000006 RSI: ffff880844c2c938 RDI: ffff880844c2c000 RBP: ffff880829180000 R08: 00000000a29c58c1 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffa040ba40 R13: ffffffffa040bab0 R14: ffff88084a195060 R15: 000055df3ef357a0 FS: 00007ff43c043740(0000) GS:ffff88084e200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000f9 CR3: 000000083f179005 CR4: 00000000003606e0 Call Trace: i915_gem_suspend+0x9d/0x130 [i915] ? i915_driver_unload+0x68/0x180 [i915] i915_driver_unload+0x70/0x180 [i915] i915_pci_remove+0x15/0x20 [i915] pci_device_remove+0x36/0xb0 device_release_driver_internal+0x15f/0x220 driver_detach+0x3a/0x80 bus_remove_driver+0x58/0xd0 pci_unregister_driver+0x29/0x90 SyS_delete_module+0x150/0x1e0 entry_SYSCALL_64_fastpath+0x23/0x9a RIP: 0033:0x7ff43b51b5c7 RSP: 002b:00007ffe6825a758 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007ff43b51b5c7 RDX: 000000000000000a RSI: 0000000000000800 RDI: 000055df3ef35808 RBP: 0000000000000000 R08: 00007ffe682596d1 R09: 0000000000000000 R10: 00007ff43b594880 R11: 0000000000000206 R12: 000055df3ef357a0 R13: 00007ffe68259740 R14: 000055df3ef35260 R15: 000055df3ef357a0 Code: 00 00 02 74 03 31 c0 c3 53 48 89 fb 48 83 ec 10 e8 52 0f f8 ff 48 b8 01 05 00 00 02 00 00 00 48 89 44 24 04 48 8b 83 00 12 00 00 <f6> 80 f9 00 00 00 01 0f 84 a7 00 00 00 f6 80 98 00 00 00 01 0f RIP: intel_guc_suspend+0x34/0x140 [i915] RSP: ffffc90000483df8 CR2: 00000000000000f9 ---[ end trace 23a192a61d937a3e ]--- Fixes: b8e5eb960b28 ("drm/i915/guc: Allocate separate shared data object for GuC communication") Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-1-michal.winiarski@intel.com
2017-12-13 22:13:46 +00:00
guc_shared_data_destroy(guc);
}
static u32 get_gt_type(struct drm_i915_private *dev_priv)
{
/* XXX: GT type based on PCI device ID? field seems unused by fw */
return 0;
}
static u32 get_core_family(struct drm_i915_private *dev_priv)
{
u32 gen = INTEL_GEN(dev_priv);
switch (gen) {
case 9:
return GUC_CORE_FAMILY_GEN9;
default:
MISSING_CASE(gen);
return GUC_CORE_FAMILY_UNKNOWN;
}
}
static u32 get_log_verbosity_flags(void)
{
if (i915_modparams.guc_log_level > 0) {
u32 verbosity = i915_modparams.guc_log_level - 1;
GEM_BUG_ON(verbosity > GUC_LOG_VERBOSITY_MAX);
return verbosity << GUC_LOG_VERBOSITY_SHIFT;
}
GEM_BUG_ON(i915_modparams.enable_guc < 0);
return GUC_LOG_DISABLED;
}
/*
* Initialise the GuC parameter block before starting the firmware
* transfer. These parameters are read by the firmware on startup
* and cannot be changed thereafter.
*/
void intel_guc_init_params(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
u32 params[GUC_CTL_MAX_DWORDS];
int i;
memset(params, 0, sizeof(params));
params[GUC_CTL_DEVICE_INFO] |=
(get_gt_type(dev_priv) << GUC_CTL_GT_TYPE_SHIFT) |
(get_core_family(dev_priv) << GUC_CTL_CORE_FAMILY_SHIFT);
/*
* GuC ARAT increment is 10 ns. GuC default scheduler quantum is one
* second. This ARAR is calculated by:
* Scheduler-Quantum-in-ns / ARAT-increment-in-ns = 1000000000 / 10
*/
params[GUC_CTL_ARAT_HIGH] = 0;
params[GUC_CTL_ARAT_LOW] = 100000000;
params[GUC_CTL_WA] |= GUC_CTL_WA_UK_BY_DRIVER;
params[GUC_CTL_FEATURE] |= GUC_CTL_DISABLE_SCHEDULER |
GUC_CTL_VCS2_ENABLED;
params[GUC_CTL_LOG_PARAMS] = guc->log.flags;
params[GUC_CTL_DEBUG] = get_log_verbosity_flags();
/* If GuC submission is enabled, set up additional parameters here */
if (USES_GUC_SUBMISSION(dev_priv)) {
u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT;
u32 pgs = guc_ggtt_offset(dev_priv->guc.stage_desc_pool);
u32 ctx_in_16 = GUC_MAX_STAGE_DESCRIPTORS / 16;
params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT;
params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED;
pgs >>= PAGE_SHIFT;
params[GUC_CTL_CTXINFO] = (pgs << GUC_CTL_BASE_ADDR_SHIFT) |
(ctx_in_16 << GUC_CTL_CTXNUM_IN16_SHIFT);
params[GUC_CTL_FEATURE] |= GUC_CTL_KERNEL_SUBMISSIONS;
/* Unmask this bit to enable the GuC's internal scheduler */
params[GUC_CTL_FEATURE] &= ~GUC_CTL_DISABLE_SCHEDULER;
}
/*
* All SOFT_SCRATCH registers are in FORCEWAKE_BLITTER domain and
* they are power context saved so it's ok to release forcewake
* when we are done here and take it again at xfer time.
*/
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_BLITTER);
I915_WRITE(SOFT_SCRATCH(0), 0);
for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
I915_WRITE(SOFT_SCRATCH(1 + i), params[i]);
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_BLITTER);
}
int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len)
{
WARN(1, "Unexpected send: action=%#x\n", *action);
return -ENODEV;
}
/*
* This function implements the MMIO based host to GuC interface.
*/
int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
u32 status;
int i;
int ret;
GEM_BUG_ON(!len);
GEM_BUG_ON(len > guc->send_regs.count);
/* If CT is available, we expect to use MMIO only during init/fini */
GEM_BUG_ON(HAS_GUC_CT(dev_priv) &&
*action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER &&
*action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER);
mutex_lock(&guc->send_mutex);
intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains);
for (i = 0; i < len; i++)
I915_WRITE(guc_send_reg(guc, i), action[i]);
POSTING_READ(guc_send_reg(guc, i - 1));
intel_guc_notify(guc);
/*
* No GuC command should ever take longer than 10ms.
* Fast commands should still complete in 10us.
*/
ret = __intel_wait_for_register_fw(dev_priv,
guc_send_reg(guc, 0),
INTEL_GUC_RECV_MASK,
INTEL_GUC_RECV_MASK,
10, 10, &status);
if (status != INTEL_GUC_STATUS_SUCCESS) {
/*
* Either the GuC explicitly returned an error (which
* we convert to -EIO here) or no response at all was
* received within the timeout limit (-ETIMEDOUT)
*/
if (ret != -ETIMEDOUT)
ret = -EIO;
DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;"
" ret=%d status=0x%08X response=0x%08X\n",
action[0], ret, status, I915_READ(SOFT_SCRATCH(15)));
}
intel_uncore_forcewake_put(dev_priv, guc->send_regs.fw_domains);
mutex_unlock(&guc->send_mutex);
return ret;
}
void intel_guc_to_host_event_handler(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
u32 msg, flush;
/*
* Sample the log buffer flush related bits & clear them out now
* itself from the message identity register to minimize the
* probability of losing a flush interrupt, when there are back
* to back flush interrupts.
* There can be a new flush interrupt, for different log buffer
* type (like for ISR), whilst Host is handling one (for DPC).
* Since same bit is used in message register for ISR & DPC, it
* could happen that GuC sets the bit for 2nd interrupt but Host
* clears out the bit on handling the 1st interrupt.
*/
msg = I915_READ(SOFT_SCRATCH(15));
flush = msg & (INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED |
INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER);
if (flush) {
/* Clear the message bits that are handled */
I915_WRITE(SOFT_SCRATCH(15), msg & ~flush);
/* Handle flush interrupt in bottom half */
queue_work(guc->log.runtime.flush_wq,
&guc->log.runtime.flush_work);
guc->log.flush_interrupt_count++;
} else {
/*
* Not clearing of unhandled event bits won't result in
* re-triggering of the interrupt.
*/
}
}
int intel_guc_sample_forcewake(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
u32 action[2];
action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE;
/* WaRsDisableCoarsePowerGating:skl,cnl */
if (!HAS_RC6(dev_priv) || NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
action[1] = 0;
else
/* bit 0 and 1 are for Render and Media domain separately */
action[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA;
return intel_guc_send(guc, action, ARRAY_SIZE(action));
}
/**
* intel_guc_auth_huc() - Send action to GuC to authenticate HuC ucode
* @guc: intel_guc structure
* @rsa_offset: rsa offset w.r.t ggtt base of huc vma
*
* Triggers a HuC firmware authentication request to the GuC via intel_guc_send
* INTEL_GUC_ACTION_AUTHENTICATE_HUC interface. This function is invoked by
* intel_huc_auth().
*
* Return: non-zero code on error
*/
int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset)
{
u32 action[] = {
INTEL_GUC_ACTION_AUTHENTICATE_HUC,
rsa_offset
};
return intel_guc_send(guc, action, ARRAY_SIZE(action));
}
/**
* intel_guc_suspend() - notify GuC entering suspend state
* @guc: the guc
*/
int intel_guc_suspend(struct intel_guc *guc)
{
u32 data[] = {
INTEL_GUC_ACTION_ENTER_S_STATE,
GUC_POWER_D1, /* any value greater than GUC_POWER_D0 */
guc_ggtt_offset(guc->shared_data)
};
return intel_guc_send(guc, data, ARRAY_SIZE(data));
}
drm/i915/guc: Add support for reset engine using GuC commands This patch adds per engine reset and recovery (TDR) support when GuC is used to submit workloads to GPU. In the case of i915 directly submission to ELSP, driver manages hang detection, recovery and resubmission. With GuC submission these tasks are shared between driver and GuC. i915 is still responsible for detecting a hang, and when it does it only requests GuC to reset that Engine. GuC internally manages acquiring forcewake and idling the engine before resetting it. Once the reset is successful, i915 takes over again and handles the resubmission. The scheduler in i915 knows which requests are pending so after resetting a engine, pending workloads/requests are resubmitted again. v2: s/i915_guc_request_engine_reset/i915_guc_reset_engine/ to match the non-guc function names. v3: Removed debug message about engine restarting from which request, since the new baseline do it regardless of submission mode. (Chris) v4: Rebase. v5: Do not pass unnecessary reporting flags to the fw (Jeff); tasklet_schedule(&execlists->irq_tasklet) handles the resubmit; rebase. v6: Rename the existing reset engine function and share a similar interface between guc and non-guc paths (Chris). Signed-off-by: Michel Thierry <michel.thierry@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20171031225309.10888-1-michel.thierry@intel.com Reviewed-by: Jeff McGee <jeff.mcgee@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
2017-10-31 22:53:09 +00:00
/**
* intel_guc_reset_engine() - ask GuC to reset an engine
* @guc: intel_guc structure
* @engine: engine to be reset
*/
int intel_guc_reset_engine(struct intel_guc *guc,
struct intel_engine_cs *engine)
{
u32 data[7];
GEM_BUG_ON(!guc->execbuf_client);
data[0] = INTEL_GUC_ACTION_REQUEST_ENGINE_RESET;
data[1] = engine->guc_id;
data[2] = 0;
data[3] = 0;
data[4] = 0;
data[5] = guc->execbuf_client->stage_id;
data[6] = guc_ggtt_offset(guc->shared_data);
return intel_guc_send(guc, data, ARRAY_SIZE(data));
}
/**
* intel_guc_resume() - notify GuC resuming from suspend state
* @guc: the guc
*/
int intel_guc_resume(struct intel_guc *guc)
{
u32 data[] = {
INTEL_GUC_ACTION_EXIT_S_STATE,
GUC_POWER_D0,
guc_ggtt_offset(guc->shared_data)
};
return intel_guc_send(guc, data, ARRAY_SIZE(data));
}
/**
* intel_guc_allocate_vma() - Allocate a GGTT VMA for GuC usage
* @guc: the guc
* @size: size of area to allocate (both virtual space and memory)
*
* This is a wrapper to create an object for use with the GuC. In order to
* use it inside the GuC, an object needs to be pinned lifetime, so we allocate
* both some backing storage and a range inside the Global GTT. We must pin
* it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because that
* range is reserved inside GuC.
*
* Return: A i915_vma if successful, otherwise an ERR_PTR.
*/
struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
int ret;
obj = i915_gem_object_create(dev_priv, size);
if (IS_ERR(obj))
return ERR_CAST(obj);
vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL);
if (IS_ERR(vma))
goto err;
ret = i915_vma_pin(vma, 0, PAGE_SIZE,
PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
if (ret) {
vma = ERR_PTR(ret);
goto err;
}
return vma;
err:
i915_gem_object_put(obj);
return vma;
}
u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv)
{
u32 wopcm_size = GUC_WOPCM_TOP;
/* On BXT, the top of WOPCM is reserved for RC6 context */
if (IS_GEN9_LP(dev_priv))
wopcm_size -= BXT_GUC_WOPCM_RC6_RESERVED;
return wopcm_size;
}