linux/drivers/gpu/drm/i915/gt/uc/intel_uc.c

634 lines
15 KiB
C
Raw Normal View History

// SPDX-License-Identifier: MIT
/*
* Copyright © 2016-2019 Intel Corporation
*/
#include "gt/intel_gt.h"
#include "gt/intel_reset.h"
drm/i915/guc: Move shared data allocation away from submission path We need shared data for actions (e.g. guc suspend/resume), and we're using those with GuC submission disabled. Let's introduce intel_guc_init and move shared data alloc there. This fixes GPF during module unload with HuC, but without GuC submission: BUG: unable to handle kernel NULL pointer dereference at 000000005aee7809 IP: intel_guc_suspend+0x34/0x140 [i915] PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP Modules linked in: i915(O-) netconsole x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel mei_me i2c_i801 mei prime_numbers [last unloaded: i915] CPU: 2 PID: 2794 Comm: rmmod Tainted: G U W O 4.15.0-rc2+ #297 Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0054.2016.0930.1102 09/30/2016 task: 0000000055945c61 task.stack: 00000000264ccb43 RIP: 0010:intel_guc_suspend+0x34/0x140 [i915] RSP: 0018:ffffc90000483df8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff880829180000 RCX: 0000000000000000 RDX: 0000000000000006 RSI: ffff880844c2c938 RDI: ffff880844c2c000 RBP: ffff880829180000 R08: 00000000a29c58c1 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffa040ba40 R13: ffffffffa040bab0 R14: ffff88084a195060 R15: 000055df3ef357a0 FS: 00007ff43c043740(0000) GS:ffff88084e200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000f9 CR3: 000000083f179005 CR4: 00000000003606e0 Call Trace: i915_gem_suspend+0x9d/0x130 [i915] ? i915_driver_unload+0x68/0x180 [i915] i915_driver_unload+0x70/0x180 [i915] i915_pci_remove+0x15/0x20 [i915] pci_device_remove+0x36/0xb0 device_release_driver_internal+0x15f/0x220 driver_detach+0x3a/0x80 bus_remove_driver+0x58/0xd0 pci_unregister_driver+0x29/0x90 SyS_delete_module+0x150/0x1e0 entry_SYSCALL_64_fastpath+0x23/0x9a RIP: 0033:0x7ff43b51b5c7 RSP: 002b:00007ffe6825a758 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007ff43b51b5c7 RDX: 000000000000000a RSI: 0000000000000800 RDI: 000055df3ef35808 RBP: 0000000000000000 R08: 00007ffe682596d1 R09: 0000000000000000 R10: 00007ff43b594880 R11: 0000000000000206 R12: 000055df3ef357a0 R13: 00007ffe68259740 R14: 000055df3ef35260 R15: 000055df3ef357a0 Code: 00 00 02 74 03 31 c0 c3 53 48 89 fb 48 83 ec 10 e8 52 0f f8 ff 48 b8 01 05 00 00 02 00 00 00 48 89 44 24 04 48 8b 83 00 12 00 00 <f6> 80 f9 00 00 00 01 0f 84 a7 00 00 00 f6 80 98 00 00 00 01 0f RIP: intel_guc_suspend+0x34/0x140 [i915] RSP: ffffc90000483df8 CR2: 00000000000000f9 ---[ end trace 23a192a61d937a3e ]--- Fixes: b8e5eb960b28 ("drm/i915/guc: Allocate separate shared data object for GuC communication") Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-1-michal.winiarski@intel.com
2017-12-13 22:13:46 +00:00
#include "intel_guc.h"
#include "intel_guc_ads.h"
#include "intel_guc_submission.h"
#include "intel_uc.h"
#include "i915_drv.h"
/* Reset GuC providing us with fresh state for both GuC and HuC.
*/
static int __intel_uc_reset_hw(struct intel_uc *uc)
{
struct intel_gt *gt = uc_to_gt(uc);
int ret;
u32 guc_status;
ret = i915_inject_load_error(gt->i915, -ENXIO);
if (ret)
return ret;
ret = intel_reset_guc(gt);
if (ret) {
DRM_ERROR("Failed to reset GuC, ret = %d\n", ret);
return ret;
}
guc_status = intel_uncore_read(gt->uncore, GUC_STATUS);
WARN(!(guc_status & GS_MIA_IN_RESET),
"GuC status: 0x%x, MIA core expected to be in reset\n",
guc_status);
return ret;
}
static void __confirm_options(struct intel_uc *uc)
{
struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
DRM_DEV_DEBUG_DRIVER(i915->drm.dev,
"enable_guc=%d (guc:%s submission:%s huc:%s)\n",
i915_modparams.enable_guc,
yesno(intel_uc_uses_guc(uc)),
yesno(intel_uc_uses_guc_submission(uc)),
yesno(intel_uc_uses_huc(uc)));
if (i915_modparams.enable_guc == -1)
return;
if (i915_modparams.enable_guc == 0) {
GEM_BUG_ON(intel_uc_uses_guc(uc));
GEM_BUG_ON(intel_uc_uses_guc_submission(uc));
GEM_BUG_ON(intel_uc_uses_huc(uc));
return;
}
if (!intel_uc_supports_guc(uc))
dev_info(i915->drm.dev,
"Incompatible option enable_guc=%d - %s\n",
i915_modparams.enable_guc, "GuC is not supported!");
if (i915_modparams.enable_guc & ENABLE_GUC_LOAD_HUC &&
!intel_uc_supports_huc(uc))
dev_info(i915->drm.dev,
"Incompatible option enable_guc=%d - %s\n",
i915_modparams.enable_guc, "HuC is not supported!");
if (i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION &&
!intel_uc_supports_guc_submission(uc))
dev_info(i915->drm.dev,
"Incompatible option enable_guc=%d - %s\n",
i915_modparams.enable_guc, "GuC submission is N/A");
if (i915_modparams.enable_guc & ~(ENABLE_GUC_SUBMISSION |
ENABLE_GUC_LOAD_HUC))
dev_info(i915->drm.dev,
"Incompatible option enable_guc=%d - %s\n",
i915_modparams.enable_guc, "undocumented flag");
}
void intel_uc_init_early(struct intel_uc *uc)
{
intel_guc_init_early(&uc->guc);
intel_huc_init_early(&uc->huc);
__confirm_options(uc);
}
void intel_uc_driver_late_release(struct intel_uc *uc)
{
}
/**
* intel_uc_init_mmio - setup uC MMIO access
* @uc: the intel_uc structure
*
* Setup minimal state necessary for MMIO accesses later in the
* initialization sequence.
*/
void intel_uc_init_mmio(struct intel_uc *uc)
{
intel_guc_init_send_regs(&uc->guc);
}
drm/i915/uc: Move GuC error log to uc and release it on fini When we fail to load GuC and want to abort probe, we hit: <7> [229.915779] i915 0000:00:02.0: [drm:intel_uc_init_hw [i915]] GuC initialization failed -6 <7> [229.915813] i915 0000:00:02.0: [drm:i915_gem_init_hw [i915]] Enabling uc failed (-6) <4> [229.953354] ------------[ cut here ]------------ <4> [229.953355] WARN_ON(dev_priv->mm.shrink_count) <4> [229.953406] WARNING: CPU: 9 PID: 3287 at drivers/gpu/drm/i915/i915_gem.c:1684 i915_gem_cleanup_early+0xfc/0x110 [i915] <4> [229.953464] Call Trace: <4> [229.953489] i915_driver_late_release+0x19/0x60 [i915] <4> [229.953514] i915_driver_probe+0xb82/0x18a0 [i915] <4> [229.953519] ? __pm_runtime_resume+0x4f/0x80 <4> [229.953545] i915_pci_probe+0x43/0x1b0 [i915] ... <4> [229.962951] ------------[ cut here ]------------ <4> [229.962956] DEBUG_LOCKS_WARN_ON(lock->magic != lock) <4> [229.962959] WARNING: CPU: 8 PID: 2395 at kernel/locking/mutex.c:912 __mutex_lock+0x750/0x9b0 <4> [229.963091] Call Trace: <4> [229.963129] ? i915_vma_destroy+0x86/0x350 [i915] <4> [229.963166] ? i915_vma_destroy+0x86/0x350 [i915] <4> [229.963201] i915_vma_destroy+0x86/0x350 [i915] <4> [229.963236] __i915_gem_free_objects+0xb8/0x510 [i915] <4> [229.963270] __i915_gem_free_work+0x5a/0x90 [i915] <4> [229.963275] process_one_work+0x245/0x610 as since commit 6f76098fe0f3 ("drm/i915/uc: Move uC early functions inside the GT ones") we cleanup uc after gem. Move captured GuC load error log to uc struct and release it in intel_uc_fini() instead of intel_uc_driver_late_release() Note that intel_uc_driver_late_release() is now empty, but we can leave it as a placeholder for future code. Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20190802184055.31988-5-michal.wajdeczko@intel.com
2019-08-02 18:40:53 +00:00
static void __uc_capture_load_err_log(struct intel_uc *uc)
{
drm/i915/uc: Move GuC error log to uc and release it on fini When we fail to load GuC and want to abort probe, we hit: <7> [229.915779] i915 0000:00:02.0: [drm:intel_uc_init_hw [i915]] GuC initialization failed -6 <7> [229.915813] i915 0000:00:02.0: [drm:i915_gem_init_hw [i915]] Enabling uc failed (-6) <4> [229.953354] ------------[ cut here ]------------ <4> [229.953355] WARN_ON(dev_priv->mm.shrink_count) <4> [229.953406] WARNING: CPU: 9 PID: 3287 at drivers/gpu/drm/i915/i915_gem.c:1684 i915_gem_cleanup_early+0xfc/0x110 [i915] <4> [229.953464] Call Trace: <4> [229.953489] i915_driver_late_release+0x19/0x60 [i915] <4> [229.953514] i915_driver_probe+0xb82/0x18a0 [i915] <4> [229.953519] ? __pm_runtime_resume+0x4f/0x80 <4> [229.953545] i915_pci_probe+0x43/0x1b0 [i915] ... <4> [229.962951] ------------[ cut here ]------------ <4> [229.962956] DEBUG_LOCKS_WARN_ON(lock->magic != lock) <4> [229.962959] WARNING: CPU: 8 PID: 2395 at kernel/locking/mutex.c:912 __mutex_lock+0x750/0x9b0 <4> [229.963091] Call Trace: <4> [229.963129] ? i915_vma_destroy+0x86/0x350 [i915] <4> [229.963166] ? i915_vma_destroy+0x86/0x350 [i915] <4> [229.963201] i915_vma_destroy+0x86/0x350 [i915] <4> [229.963236] __i915_gem_free_objects+0xb8/0x510 [i915] <4> [229.963270] __i915_gem_free_work+0x5a/0x90 [i915] <4> [229.963275] process_one_work+0x245/0x610 as since commit 6f76098fe0f3 ("drm/i915/uc: Move uC early functions inside the GT ones") we cleanup uc after gem. Move captured GuC load error log to uc struct and release it in intel_uc_fini() instead of intel_uc_driver_late_release() Note that intel_uc_driver_late_release() is now empty, but we can leave it as a placeholder for future code. Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20190802184055.31988-5-michal.wajdeczko@intel.com
2019-08-02 18:40:53 +00:00
struct intel_guc *guc = &uc->guc;
drm/i915/uc: Move GuC error log to uc and release it on fini When we fail to load GuC and want to abort probe, we hit: <7> [229.915779] i915 0000:00:02.0: [drm:intel_uc_init_hw [i915]] GuC initialization failed -6 <7> [229.915813] i915 0000:00:02.0: [drm:i915_gem_init_hw [i915]] Enabling uc failed (-6) <4> [229.953354] ------------[ cut here ]------------ <4> [229.953355] WARN_ON(dev_priv->mm.shrink_count) <4> [229.953406] WARNING: CPU: 9 PID: 3287 at drivers/gpu/drm/i915/i915_gem.c:1684 i915_gem_cleanup_early+0xfc/0x110 [i915] <4> [229.953464] Call Trace: <4> [229.953489] i915_driver_late_release+0x19/0x60 [i915] <4> [229.953514] i915_driver_probe+0xb82/0x18a0 [i915] <4> [229.953519] ? __pm_runtime_resume+0x4f/0x80 <4> [229.953545] i915_pci_probe+0x43/0x1b0 [i915] ... <4> [229.962951] ------------[ cut here ]------------ <4> [229.962956] DEBUG_LOCKS_WARN_ON(lock->magic != lock) <4> [229.962959] WARNING: CPU: 8 PID: 2395 at kernel/locking/mutex.c:912 __mutex_lock+0x750/0x9b0 <4> [229.963091] Call Trace: <4> [229.963129] ? i915_vma_destroy+0x86/0x350 [i915] <4> [229.963166] ? i915_vma_destroy+0x86/0x350 [i915] <4> [229.963201] i915_vma_destroy+0x86/0x350 [i915] <4> [229.963236] __i915_gem_free_objects+0xb8/0x510 [i915] <4> [229.963270] __i915_gem_free_work+0x5a/0x90 [i915] <4> [229.963275] process_one_work+0x245/0x610 as since commit 6f76098fe0f3 ("drm/i915/uc: Move uC early functions inside the GT ones") we cleanup uc after gem. Move captured GuC load error log to uc struct and release it in intel_uc_fini() instead of intel_uc_driver_late_release() Note that intel_uc_driver_late_release() is now empty, but we can leave it as a placeholder for future code. Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20190802184055.31988-5-michal.wajdeczko@intel.com
2019-08-02 18:40:53 +00:00
if (guc->log.vma && !uc->load_err_log)
uc->load_err_log = i915_gem_object_get(guc->log.vma->obj);
}
drm/i915/uc: Move GuC error log to uc and release it on fini When we fail to load GuC and want to abort probe, we hit: <7> [229.915779] i915 0000:00:02.0: [drm:intel_uc_init_hw [i915]] GuC initialization failed -6 <7> [229.915813] i915 0000:00:02.0: [drm:i915_gem_init_hw [i915]] Enabling uc failed (-6) <4> [229.953354] ------------[ cut here ]------------ <4> [229.953355] WARN_ON(dev_priv->mm.shrink_count) <4> [229.953406] WARNING: CPU: 9 PID: 3287 at drivers/gpu/drm/i915/i915_gem.c:1684 i915_gem_cleanup_early+0xfc/0x110 [i915] <4> [229.953464] Call Trace: <4> [229.953489] i915_driver_late_release+0x19/0x60 [i915] <4> [229.953514] i915_driver_probe+0xb82/0x18a0 [i915] <4> [229.953519] ? __pm_runtime_resume+0x4f/0x80 <4> [229.953545] i915_pci_probe+0x43/0x1b0 [i915] ... <4> [229.962951] ------------[ cut here ]------------ <4> [229.962956] DEBUG_LOCKS_WARN_ON(lock->magic != lock) <4> [229.962959] WARNING: CPU: 8 PID: 2395 at kernel/locking/mutex.c:912 __mutex_lock+0x750/0x9b0 <4> [229.963091] Call Trace: <4> [229.963129] ? i915_vma_destroy+0x86/0x350 [i915] <4> [229.963166] ? i915_vma_destroy+0x86/0x350 [i915] <4> [229.963201] i915_vma_destroy+0x86/0x350 [i915] <4> [229.963236] __i915_gem_free_objects+0xb8/0x510 [i915] <4> [229.963270] __i915_gem_free_work+0x5a/0x90 [i915] <4> [229.963275] process_one_work+0x245/0x610 as since commit 6f76098fe0f3 ("drm/i915/uc: Move uC early functions inside the GT ones") we cleanup uc after gem. Move captured GuC load error log to uc struct and release it in intel_uc_fini() instead of intel_uc_driver_late_release() Note that intel_uc_driver_late_release() is now empty, but we can leave it as a placeholder for future code. Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20190802184055.31988-5-michal.wajdeczko@intel.com
2019-08-02 18:40:53 +00:00
static void __uc_free_load_err_log(struct intel_uc *uc)
{
drm/i915/uc: Move GuC error log to uc and release it on fini When we fail to load GuC and want to abort probe, we hit: <7> [229.915779] i915 0000:00:02.0: [drm:intel_uc_init_hw [i915]] GuC initialization failed -6 <7> [229.915813] i915 0000:00:02.0: [drm:i915_gem_init_hw [i915]] Enabling uc failed (-6) <4> [229.953354] ------------[ cut here ]------------ <4> [229.953355] WARN_ON(dev_priv->mm.shrink_count) <4> [229.953406] WARNING: CPU: 9 PID: 3287 at drivers/gpu/drm/i915/i915_gem.c:1684 i915_gem_cleanup_early+0xfc/0x110 [i915] <4> [229.953464] Call Trace: <4> [229.953489] i915_driver_late_release+0x19/0x60 [i915] <4> [229.953514] i915_driver_probe+0xb82/0x18a0 [i915] <4> [229.953519] ? __pm_runtime_resume+0x4f/0x80 <4> [229.953545] i915_pci_probe+0x43/0x1b0 [i915] ... <4> [229.962951] ------------[ cut here ]------------ <4> [229.962956] DEBUG_LOCKS_WARN_ON(lock->magic != lock) <4> [229.962959] WARNING: CPU: 8 PID: 2395 at kernel/locking/mutex.c:912 __mutex_lock+0x750/0x9b0 <4> [229.963091] Call Trace: <4> [229.963129] ? i915_vma_destroy+0x86/0x350 [i915] <4> [229.963166] ? i915_vma_destroy+0x86/0x350 [i915] <4> [229.963201] i915_vma_destroy+0x86/0x350 [i915] <4> [229.963236] __i915_gem_free_objects+0xb8/0x510 [i915] <4> [229.963270] __i915_gem_free_work+0x5a/0x90 [i915] <4> [229.963275] process_one_work+0x245/0x610 as since commit 6f76098fe0f3 ("drm/i915/uc: Move uC early functions inside the GT ones") we cleanup uc after gem. Move captured GuC load error log to uc struct and release it in intel_uc_fini() instead of intel_uc_driver_late_release() Note that intel_uc_driver_late_release() is now empty, but we can leave it as a placeholder for future code. Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20190802184055.31988-5-michal.wajdeczko@intel.com
2019-08-02 18:40:53 +00:00
struct drm_i915_gem_object *log = fetch_and_zero(&uc->load_err_log);
if (log)
i915_gem_object_put(log);
}
/*
* Events triggered while CT buffers are disabled are logged in the SCRATCH_15
* register using the same bits used in the CT message payload. Since our
* communication channel with guc is turned off at this point, we can save the
* message and handle it after we turn it back on.
*/
static void guc_clear_mmio_msg(struct intel_guc *guc)
{
intel_uncore_write(guc_to_gt(guc)->uncore, SOFT_SCRATCH(15), 0);
}
static void guc_get_mmio_msg(struct intel_guc *guc)
{
u32 val;
spin_lock_irq(&guc->irq_lock);
val = intel_uncore_read(guc_to_gt(guc)->uncore, SOFT_SCRATCH(15));
guc->mmio_msg |= val & guc->msg_enabled_mask;
/*
* clear all events, including the ones we're not currently servicing,
* to make sure we don't try to process a stale message if we enable
* handling of more events later.
*/
guc_clear_mmio_msg(guc);
spin_unlock_irq(&guc->irq_lock);
}
static void guc_handle_mmio_msg(struct intel_guc *guc)
{
struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
/* we need communication to be enabled to reply to GuC */
GEM_BUG_ON(guc->handler == intel_guc_to_host_event_handler_nop);
if (!guc->mmio_msg)
return;
spin_lock_irq(&i915->irq_lock);
intel_guc_to_host_process_recv_msg(guc, &guc->mmio_msg, 1);
spin_unlock_irq(&i915->irq_lock);
guc->mmio_msg = 0;
}
static void guc_reset_interrupts(struct intel_guc *guc)
{
guc->interrupts.reset(guc);
}
static void guc_enable_interrupts(struct intel_guc *guc)
{
guc->interrupts.enable(guc);
}
static void guc_disable_interrupts(struct intel_guc *guc)
{
guc->interrupts.disable(guc);
}
static inline bool guc_communication_enabled(struct intel_guc *guc)
{
return guc->send != intel_guc_send_nop;
}
static int guc_enable_communication(struct intel_guc *guc)
{
struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
int ret;
GEM_BUG_ON(guc_communication_enabled(guc));
ret = i915_inject_load_error(i915, -ENXIO);
if (ret)
return ret;
ret = intel_guc_ct_enable(&guc->ct);
if (ret)
return ret;
guc->send = intel_guc_send_ct;
guc->handler = intel_guc_to_host_event_handler_ct;
/* check for mmio messages received before/during the CT enable */
guc_get_mmio_msg(guc);
guc_handle_mmio_msg(guc);
guc_enable_interrupts(guc);
/* check for CT messages received before we enabled interrupts */
spin_lock_irq(&i915->irq_lock);
intel_guc_to_host_event_handler_ct(guc);
spin_unlock_irq(&i915->irq_lock);
DRM_INFO("GuC communication enabled\n");
return 0;
}
static void __guc_stop_communication(struct intel_guc *guc)
{
/*
* Events generated during or after CT disable are logged by guc in
* via mmio. Make sure the register is clear before disabling CT since
* all events we cared about have already been processed via CT.
*/
guc_clear_mmio_msg(guc);
guc_disable_interrupts(guc);
guc->send = intel_guc_send_nop;
guc->handler = intel_guc_to_host_event_handler_nop;
}
static void guc_stop_communication(struct intel_guc *guc)
{
intel_guc_ct_stop(&guc->ct);
__guc_stop_communication(guc);
DRM_INFO("GuC communication stopped\n");
}
static void guc_disable_communication(struct intel_guc *guc)
{
__guc_stop_communication(guc);
intel_guc_ct_disable(&guc->ct);
/*
* Check for messages received during/after the CT disable. We do not
* expect any messages to have arrived via CT between the interrupt
* disable and the CT disable because GuC should've been idle until we
* triggered the CT disable protocol.
*/
guc_get_mmio_msg(guc);
DRM_INFO("GuC communication disabled\n");
}
void intel_uc_fetch_firmwares(struct intel_uc *uc)
drm/i915/guc: Move GuC workqueue allocations outside of the mutex This gets rid of the following lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 4.15.0-rc2-CI-Patchwork_7428+ #1 Not tainted ------------------------------------------------------ debugfs_test/1351 is trying to acquire lock: (&dev->struct_mutex){+.+.}, at: [<000000009d90d1a3>] i915_mutex_lock_interruptible+0x47/0x130 [i915] but task is already holding lock: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #6 (&mm->mmap_sem){++++}: __might_fault+0x63/0x90 _copy_to_user+0x1e/0x70 filldir+0x8c/0xf0 dcache_readdir+0xeb/0x160 iterate_dir+0xe6/0x150 SyS_getdents+0xa0/0x130 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #5 (&sb->s_type->i_mutex_key#5){++++}: lockref_get+0x9/0x20 -> #4 ((completion)&req.done){+.+.}: wait_for_common+0x54/0x210 devtmpfs_create_node+0x130/0x150 device_add+0x5ad/0x5e0 device_create_groups_vargs+0xd4/0xe0 device_create+0x35/0x40 msr_device_create+0x22/0x40 cpuhp_invoke_callback+0xc5/0xbf0 cpuhp_thread_fun+0x167/0x210 smpboot_thread_fn+0x17f/0x270 kthread+0x173/0x1b0 ret_from_fork+0x24/0x30 -> #3 (cpuhp_state-up){+.+.}: cpuhp_issue_call+0x132/0x1c0 __cpuhp_setup_state_cpuslocked+0x12f/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_writeback_init+0x3a/0x5c start_kernel+0x393/0x3e2 secondary_startup_64+0xa5/0xb0 -> #2 (cpuhp_state_mutex){+.+.}: __mutex_lock+0x81/0x9b0 __cpuhp_setup_state_cpuslocked+0x4b/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_alloc_init+0x1f/0x26 start_kernel+0x139/0x3e2 secondary_startup_64+0xa5/0xb0 -> #1 (cpu_hotplug_lock.rw_sem){++++}: cpus_read_lock+0x34/0xa0 apply_workqueue_attrs+0xd/0x40 __alloc_workqueue_key+0x2c7/0x4e1 intel_guc_submission_init+0x10c/0x650 [i915] intel_uc_init_hw+0x29e/0x460 [i915] i915_gem_init_hw+0xca/0x290 [i915] i915_gem_init+0x115/0x3a0 [i915] i915_driver_load+0x9a8/0x16c0 [i915] i915_pci_probe+0x2e/0x90 [i915] pci_device_probe+0x9c/0x120 driver_probe_device+0x2a3/0x480 __driver_attach+0xd9/0xe0 bus_for_each_dev+0x57/0x90 bus_add_driver+0x168/0x260 driver_register+0x52/0xc0 do_one_initcall+0x39/0x150 do_init_module+0x56/0x1ef load_module+0x231c/0x2d70 SyS_finit_module+0xa5/0xe0 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #0 (&dev->struct_mutex){+.+.}: lock_acquire+0xaf/0x200 __mutex_lock+0x81/0x9b0 i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 other info that might help us debug this: Chain exists of: &dev->struct_mutex --> &sb->s_type->i_mutex_key#5 --> &mm->mmap_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&sb->s_type->i_mutex_key#5); lock(&mm->mmap_sem); lock(&dev->struct_mutex); *** DEADLOCK *** 1 lock held by debugfs_test/1351: #0: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 stack backtrace: CPU: 2 PID: 1351 Comm: debugfs_test Not tainted 4.15.0-rc2-CI-Patchwork_7428+ #1 Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0057.2017.0119.1758 01/19/2017 Call Trace: dump_stack+0x5f/0x86 print_circular_bug+0x230/0x3b0 check_prev_add+0x439/0x7b0 ? lockdep_init_map_crosslock+0x20/0x20 ? unwind_get_return_address+0x16/0x30 ? __lock_acquire+0x1385/0x15a0 __lock_acquire+0x1385/0x15a0 lock_acquire+0xaf/0x200 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] __mutex_lock+0x81/0x9b0 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_mutex_lock_interruptible+0x47/0x130 [i915] ? __pm_runtime_resume+0x4f/0x80 i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 RIP: 0033:0x7f98d6f49116 RSP: 002b:00007ffd6ffc3278 EFLAGS: 00010283 RAX: 00007f98d39a2bc0 RBX: 0000000000000000 RCX: 0000000000001680 RDX: 0000000000001680 RSI: 00007ffd6ffc3400 RDI: 00007f98d39a2bc0 RBP: 00007ffd6ffc33a0 R08: 0000000000000000 R09: 00000000000005a0 R10: 000055e847c2a830 R11: 0000000000000002 R12: 0000000000000001 R13: 000055e847c1d040 R14: 00007ffd6ffc3400 R15: 00007f98d6752ba0 v2: Init preempt_work unconditionally (Chris) v3: Mention that we need the enable_guc=1 for lockdep splat (Chris) Testcase: igt/debugfs_test/read_all_entries # with i915.enable_guc=1 Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-2-michal.winiarski@intel.com
2017-12-13 22:13:47 +00:00
{
struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
int err;
if (!intel_uc_uses_guc(uc))
return;
err = intel_uc_fw_fetch(&uc->guc.fw, i915);
if (err)
return;
if (intel_uc_uses_huc(uc))
intel_uc_fw_fetch(&uc->huc.fw, i915);
drm/i915/guc: Move GuC workqueue allocations outside of the mutex This gets rid of the following lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 4.15.0-rc2-CI-Patchwork_7428+ #1 Not tainted ------------------------------------------------------ debugfs_test/1351 is trying to acquire lock: (&dev->struct_mutex){+.+.}, at: [<000000009d90d1a3>] i915_mutex_lock_interruptible+0x47/0x130 [i915] but task is already holding lock: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #6 (&mm->mmap_sem){++++}: __might_fault+0x63/0x90 _copy_to_user+0x1e/0x70 filldir+0x8c/0xf0 dcache_readdir+0xeb/0x160 iterate_dir+0xe6/0x150 SyS_getdents+0xa0/0x130 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #5 (&sb->s_type->i_mutex_key#5){++++}: lockref_get+0x9/0x20 -> #4 ((completion)&req.done){+.+.}: wait_for_common+0x54/0x210 devtmpfs_create_node+0x130/0x150 device_add+0x5ad/0x5e0 device_create_groups_vargs+0xd4/0xe0 device_create+0x35/0x40 msr_device_create+0x22/0x40 cpuhp_invoke_callback+0xc5/0xbf0 cpuhp_thread_fun+0x167/0x210 smpboot_thread_fn+0x17f/0x270 kthread+0x173/0x1b0 ret_from_fork+0x24/0x30 -> #3 (cpuhp_state-up){+.+.}: cpuhp_issue_call+0x132/0x1c0 __cpuhp_setup_state_cpuslocked+0x12f/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_writeback_init+0x3a/0x5c start_kernel+0x393/0x3e2 secondary_startup_64+0xa5/0xb0 -> #2 (cpuhp_state_mutex){+.+.}: __mutex_lock+0x81/0x9b0 __cpuhp_setup_state_cpuslocked+0x4b/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_alloc_init+0x1f/0x26 start_kernel+0x139/0x3e2 secondary_startup_64+0xa5/0xb0 -> #1 (cpu_hotplug_lock.rw_sem){++++}: cpus_read_lock+0x34/0xa0 apply_workqueue_attrs+0xd/0x40 __alloc_workqueue_key+0x2c7/0x4e1 intel_guc_submission_init+0x10c/0x650 [i915] intel_uc_init_hw+0x29e/0x460 [i915] i915_gem_init_hw+0xca/0x290 [i915] i915_gem_init+0x115/0x3a0 [i915] i915_driver_load+0x9a8/0x16c0 [i915] i915_pci_probe+0x2e/0x90 [i915] pci_device_probe+0x9c/0x120 driver_probe_device+0x2a3/0x480 __driver_attach+0xd9/0xe0 bus_for_each_dev+0x57/0x90 bus_add_driver+0x168/0x260 driver_register+0x52/0xc0 do_one_initcall+0x39/0x150 do_init_module+0x56/0x1ef load_module+0x231c/0x2d70 SyS_finit_module+0xa5/0xe0 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #0 (&dev->struct_mutex){+.+.}: lock_acquire+0xaf/0x200 __mutex_lock+0x81/0x9b0 i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 other info that might help us debug this: Chain exists of: &dev->struct_mutex --> &sb->s_type->i_mutex_key#5 --> &mm->mmap_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&sb->s_type->i_mutex_key#5); lock(&mm->mmap_sem); lock(&dev->struct_mutex); *** DEADLOCK *** 1 lock held by debugfs_test/1351: #0: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 stack backtrace: CPU: 2 PID: 1351 Comm: debugfs_test Not tainted 4.15.0-rc2-CI-Patchwork_7428+ #1 Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0057.2017.0119.1758 01/19/2017 Call Trace: dump_stack+0x5f/0x86 print_circular_bug+0x230/0x3b0 check_prev_add+0x439/0x7b0 ? lockdep_init_map_crosslock+0x20/0x20 ? unwind_get_return_address+0x16/0x30 ? __lock_acquire+0x1385/0x15a0 __lock_acquire+0x1385/0x15a0 lock_acquire+0xaf/0x200 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] __mutex_lock+0x81/0x9b0 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_mutex_lock_interruptible+0x47/0x130 [i915] ? __pm_runtime_resume+0x4f/0x80 i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 RIP: 0033:0x7f98d6f49116 RSP: 002b:00007ffd6ffc3278 EFLAGS: 00010283 RAX: 00007f98d39a2bc0 RBX: 0000000000000000 RCX: 0000000000001680 RDX: 0000000000001680 RSI: 00007ffd6ffc3400 RDI: 00007f98d39a2bc0 RBP: 00007ffd6ffc33a0 R08: 0000000000000000 R09: 00000000000005a0 R10: 000055e847c2a830 R11: 0000000000000002 R12: 0000000000000001 R13: 000055e847c1d040 R14: 00007ffd6ffc3400 R15: 00007f98d6752ba0 v2: Init preempt_work unconditionally (Chris) v3: Mention that we need the enable_guc=1 for lockdep splat (Chris) Testcase: igt/debugfs_test/read_all_entries # with i915.enable_guc=1 Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-2-michal.winiarski@intel.com
2017-12-13 22:13:47 +00:00
}
void intel_uc_cleanup_firmwares(struct intel_uc *uc)
drm/i915/guc: Move GuC workqueue allocations outside of the mutex This gets rid of the following lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 4.15.0-rc2-CI-Patchwork_7428+ #1 Not tainted ------------------------------------------------------ debugfs_test/1351 is trying to acquire lock: (&dev->struct_mutex){+.+.}, at: [<000000009d90d1a3>] i915_mutex_lock_interruptible+0x47/0x130 [i915] but task is already holding lock: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #6 (&mm->mmap_sem){++++}: __might_fault+0x63/0x90 _copy_to_user+0x1e/0x70 filldir+0x8c/0xf0 dcache_readdir+0xeb/0x160 iterate_dir+0xe6/0x150 SyS_getdents+0xa0/0x130 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #5 (&sb->s_type->i_mutex_key#5){++++}: lockref_get+0x9/0x20 -> #4 ((completion)&req.done){+.+.}: wait_for_common+0x54/0x210 devtmpfs_create_node+0x130/0x150 device_add+0x5ad/0x5e0 device_create_groups_vargs+0xd4/0xe0 device_create+0x35/0x40 msr_device_create+0x22/0x40 cpuhp_invoke_callback+0xc5/0xbf0 cpuhp_thread_fun+0x167/0x210 smpboot_thread_fn+0x17f/0x270 kthread+0x173/0x1b0 ret_from_fork+0x24/0x30 -> #3 (cpuhp_state-up){+.+.}: cpuhp_issue_call+0x132/0x1c0 __cpuhp_setup_state_cpuslocked+0x12f/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_writeback_init+0x3a/0x5c start_kernel+0x393/0x3e2 secondary_startup_64+0xa5/0xb0 -> #2 (cpuhp_state_mutex){+.+.}: __mutex_lock+0x81/0x9b0 __cpuhp_setup_state_cpuslocked+0x4b/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_alloc_init+0x1f/0x26 start_kernel+0x139/0x3e2 secondary_startup_64+0xa5/0xb0 -> #1 (cpu_hotplug_lock.rw_sem){++++}: cpus_read_lock+0x34/0xa0 apply_workqueue_attrs+0xd/0x40 __alloc_workqueue_key+0x2c7/0x4e1 intel_guc_submission_init+0x10c/0x650 [i915] intel_uc_init_hw+0x29e/0x460 [i915] i915_gem_init_hw+0xca/0x290 [i915] i915_gem_init+0x115/0x3a0 [i915] i915_driver_load+0x9a8/0x16c0 [i915] i915_pci_probe+0x2e/0x90 [i915] pci_device_probe+0x9c/0x120 driver_probe_device+0x2a3/0x480 __driver_attach+0xd9/0xe0 bus_for_each_dev+0x57/0x90 bus_add_driver+0x168/0x260 driver_register+0x52/0xc0 do_one_initcall+0x39/0x150 do_init_module+0x56/0x1ef load_module+0x231c/0x2d70 SyS_finit_module+0xa5/0xe0 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #0 (&dev->struct_mutex){+.+.}: lock_acquire+0xaf/0x200 __mutex_lock+0x81/0x9b0 i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 other info that might help us debug this: Chain exists of: &dev->struct_mutex --> &sb->s_type->i_mutex_key#5 --> &mm->mmap_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&sb->s_type->i_mutex_key#5); lock(&mm->mmap_sem); lock(&dev->struct_mutex); *** DEADLOCK *** 1 lock held by debugfs_test/1351: #0: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 stack backtrace: CPU: 2 PID: 1351 Comm: debugfs_test Not tainted 4.15.0-rc2-CI-Patchwork_7428+ #1 Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0057.2017.0119.1758 01/19/2017 Call Trace: dump_stack+0x5f/0x86 print_circular_bug+0x230/0x3b0 check_prev_add+0x439/0x7b0 ? lockdep_init_map_crosslock+0x20/0x20 ? unwind_get_return_address+0x16/0x30 ? __lock_acquire+0x1385/0x15a0 __lock_acquire+0x1385/0x15a0 lock_acquire+0xaf/0x200 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] __mutex_lock+0x81/0x9b0 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_mutex_lock_interruptible+0x47/0x130 [i915] ? __pm_runtime_resume+0x4f/0x80 i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 RIP: 0033:0x7f98d6f49116 RSP: 002b:00007ffd6ffc3278 EFLAGS: 00010283 RAX: 00007f98d39a2bc0 RBX: 0000000000000000 RCX: 0000000000001680 RDX: 0000000000001680 RSI: 00007ffd6ffc3400 RDI: 00007f98d39a2bc0 RBP: 00007ffd6ffc33a0 R08: 0000000000000000 R09: 00000000000005a0 R10: 000055e847c2a830 R11: 0000000000000002 R12: 0000000000000001 R13: 000055e847c1d040 R14: 00007ffd6ffc3400 R15: 00007f98d6752ba0 v2: Init preempt_work unconditionally (Chris) v3: Mention that we need the enable_guc=1 for lockdep splat (Chris) Testcase: igt/debugfs_test/read_all_entries # with i915.enable_guc=1 Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-2-michal.winiarski@intel.com
2017-12-13 22:13:47 +00:00
{
if (!intel_uc_uses_guc(uc))
drm/i915/guc: Move GuC workqueue allocations outside of the mutex This gets rid of the following lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 4.15.0-rc2-CI-Patchwork_7428+ #1 Not tainted ------------------------------------------------------ debugfs_test/1351 is trying to acquire lock: (&dev->struct_mutex){+.+.}, at: [<000000009d90d1a3>] i915_mutex_lock_interruptible+0x47/0x130 [i915] but task is already holding lock: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #6 (&mm->mmap_sem){++++}: __might_fault+0x63/0x90 _copy_to_user+0x1e/0x70 filldir+0x8c/0xf0 dcache_readdir+0xeb/0x160 iterate_dir+0xe6/0x150 SyS_getdents+0xa0/0x130 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #5 (&sb->s_type->i_mutex_key#5){++++}: lockref_get+0x9/0x20 -> #4 ((completion)&req.done){+.+.}: wait_for_common+0x54/0x210 devtmpfs_create_node+0x130/0x150 device_add+0x5ad/0x5e0 device_create_groups_vargs+0xd4/0xe0 device_create+0x35/0x40 msr_device_create+0x22/0x40 cpuhp_invoke_callback+0xc5/0xbf0 cpuhp_thread_fun+0x167/0x210 smpboot_thread_fn+0x17f/0x270 kthread+0x173/0x1b0 ret_from_fork+0x24/0x30 -> #3 (cpuhp_state-up){+.+.}: cpuhp_issue_call+0x132/0x1c0 __cpuhp_setup_state_cpuslocked+0x12f/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_writeback_init+0x3a/0x5c start_kernel+0x393/0x3e2 secondary_startup_64+0xa5/0xb0 -> #2 (cpuhp_state_mutex){+.+.}: __mutex_lock+0x81/0x9b0 __cpuhp_setup_state_cpuslocked+0x4b/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_alloc_init+0x1f/0x26 start_kernel+0x139/0x3e2 secondary_startup_64+0xa5/0xb0 -> #1 (cpu_hotplug_lock.rw_sem){++++}: cpus_read_lock+0x34/0xa0 apply_workqueue_attrs+0xd/0x40 __alloc_workqueue_key+0x2c7/0x4e1 intel_guc_submission_init+0x10c/0x650 [i915] intel_uc_init_hw+0x29e/0x460 [i915] i915_gem_init_hw+0xca/0x290 [i915] i915_gem_init+0x115/0x3a0 [i915] i915_driver_load+0x9a8/0x16c0 [i915] i915_pci_probe+0x2e/0x90 [i915] pci_device_probe+0x9c/0x120 driver_probe_device+0x2a3/0x480 __driver_attach+0xd9/0xe0 bus_for_each_dev+0x57/0x90 bus_add_driver+0x168/0x260 driver_register+0x52/0xc0 do_one_initcall+0x39/0x150 do_init_module+0x56/0x1ef load_module+0x231c/0x2d70 SyS_finit_module+0xa5/0xe0 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #0 (&dev->struct_mutex){+.+.}: lock_acquire+0xaf/0x200 __mutex_lock+0x81/0x9b0 i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 other info that might help us debug this: Chain exists of: &dev->struct_mutex --> &sb->s_type->i_mutex_key#5 --> &mm->mmap_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&sb->s_type->i_mutex_key#5); lock(&mm->mmap_sem); lock(&dev->struct_mutex); *** DEADLOCK *** 1 lock held by debugfs_test/1351: #0: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 stack backtrace: CPU: 2 PID: 1351 Comm: debugfs_test Not tainted 4.15.0-rc2-CI-Patchwork_7428+ #1 Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0057.2017.0119.1758 01/19/2017 Call Trace: dump_stack+0x5f/0x86 print_circular_bug+0x230/0x3b0 check_prev_add+0x439/0x7b0 ? lockdep_init_map_crosslock+0x20/0x20 ? unwind_get_return_address+0x16/0x30 ? __lock_acquire+0x1385/0x15a0 __lock_acquire+0x1385/0x15a0 lock_acquire+0xaf/0x200 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] __mutex_lock+0x81/0x9b0 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_mutex_lock_interruptible+0x47/0x130 [i915] ? __pm_runtime_resume+0x4f/0x80 i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 RIP: 0033:0x7f98d6f49116 RSP: 002b:00007ffd6ffc3278 EFLAGS: 00010283 RAX: 00007f98d39a2bc0 RBX: 0000000000000000 RCX: 0000000000001680 RDX: 0000000000001680 RSI: 00007ffd6ffc3400 RDI: 00007f98d39a2bc0 RBP: 00007ffd6ffc33a0 R08: 0000000000000000 R09: 00000000000005a0 R10: 000055e847c2a830 R11: 0000000000000002 R12: 0000000000000001 R13: 000055e847c1d040 R14: 00007ffd6ffc3400 R15: 00007f98d6752ba0 v2: Init preempt_work unconditionally (Chris) v3: Mention that we need the enable_guc=1 for lockdep splat (Chris) Testcase: igt/debugfs_test/read_all_entries # with i915.enable_guc=1 Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-2-michal.winiarski@intel.com
2017-12-13 22:13:47 +00:00
return;
if (intel_uc_uses_huc(uc))
intel_uc_fw_cleanup_fetch(&uc->huc.fw);
intel_uc_fw_cleanup_fetch(&uc->guc.fw);
drm/i915/guc: Move GuC workqueue allocations outside of the mutex This gets rid of the following lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 4.15.0-rc2-CI-Patchwork_7428+ #1 Not tainted ------------------------------------------------------ debugfs_test/1351 is trying to acquire lock: (&dev->struct_mutex){+.+.}, at: [<000000009d90d1a3>] i915_mutex_lock_interruptible+0x47/0x130 [i915] but task is already holding lock: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #6 (&mm->mmap_sem){++++}: __might_fault+0x63/0x90 _copy_to_user+0x1e/0x70 filldir+0x8c/0xf0 dcache_readdir+0xeb/0x160 iterate_dir+0xe6/0x150 SyS_getdents+0xa0/0x130 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #5 (&sb->s_type->i_mutex_key#5){++++}: lockref_get+0x9/0x20 -> #4 ((completion)&req.done){+.+.}: wait_for_common+0x54/0x210 devtmpfs_create_node+0x130/0x150 device_add+0x5ad/0x5e0 device_create_groups_vargs+0xd4/0xe0 device_create+0x35/0x40 msr_device_create+0x22/0x40 cpuhp_invoke_callback+0xc5/0xbf0 cpuhp_thread_fun+0x167/0x210 smpboot_thread_fn+0x17f/0x270 kthread+0x173/0x1b0 ret_from_fork+0x24/0x30 -> #3 (cpuhp_state-up){+.+.}: cpuhp_issue_call+0x132/0x1c0 __cpuhp_setup_state_cpuslocked+0x12f/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_writeback_init+0x3a/0x5c start_kernel+0x393/0x3e2 secondary_startup_64+0xa5/0xb0 -> #2 (cpuhp_state_mutex){+.+.}: __mutex_lock+0x81/0x9b0 __cpuhp_setup_state_cpuslocked+0x4b/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_alloc_init+0x1f/0x26 start_kernel+0x139/0x3e2 secondary_startup_64+0xa5/0xb0 -> #1 (cpu_hotplug_lock.rw_sem){++++}: cpus_read_lock+0x34/0xa0 apply_workqueue_attrs+0xd/0x40 __alloc_workqueue_key+0x2c7/0x4e1 intel_guc_submission_init+0x10c/0x650 [i915] intel_uc_init_hw+0x29e/0x460 [i915] i915_gem_init_hw+0xca/0x290 [i915] i915_gem_init+0x115/0x3a0 [i915] i915_driver_load+0x9a8/0x16c0 [i915] i915_pci_probe+0x2e/0x90 [i915] pci_device_probe+0x9c/0x120 driver_probe_device+0x2a3/0x480 __driver_attach+0xd9/0xe0 bus_for_each_dev+0x57/0x90 bus_add_driver+0x168/0x260 driver_register+0x52/0xc0 do_one_initcall+0x39/0x150 do_init_module+0x56/0x1ef load_module+0x231c/0x2d70 SyS_finit_module+0xa5/0xe0 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #0 (&dev->struct_mutex){+.+.}: lock_acquire+0xaf/0x200 __mutex_lock+0x81/0x9b0 i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 other info that might help us debug this: Chain exists of: &dev->struct_mutex --> &sb->s_type->i_mutex_key#5 --> &mm->mmap_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&sb->s_type->i_mutex_key#5); lock(&mm->mmap_sem); lock(&dev->struct_mutex); *** DEADLOCK *** 1 lock held by debugfs_test/1351: #0: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 stack backtrace: CPU: 2 PID: 1351 Comm: debugfs_test Not tainted 4.15.0-rc2-CI-Patchwork_7428+ #1 Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0057.2017.0119.1758 01/19/2017 Call Trace: dump_stack+0x5f/0x86 print_circular_bug+0x230/0x3b0 check_prev_add+0x439/0x7b0 ? lockdep_init_map_crosslock+0x20/0x20 ? unwind_get_return_address+0x16/0x30 ? __lock_acquire+0x1385/0x15a0 __lock_acquire+0x1385/0x15a0 lock_acquire+0xaf/0x200 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] __mutex_lock+0x81/0x9b0 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_mutex_lock_interruptible+0x47/0x130 [i915] ? __pm_runtime_resume+0x4f/0x80 i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 RIP: 0033:0x7f98d6f49116 RSP: 002b:00007ffd6ffc3278 EFLAGS: 00010283 RAX: 00007f98d39a2bc0 RBX: 0000000000000000 RCX: 0000000000001680 RDX: 0000000000001680 RSI: 00007ffd6ffc3400 RDI: 00007f98d39a2bc0 RBP: 00007ffd6ffc33a0 R08: 0000000000000000 R09: 00000000000005a0 R10: 000055e847c2a830 R11: 0000000000000002 R12: 0000000000000001 R13: 000055e847c1d040 R14: 00007ffd6ffc3400 R15: 00007f98d6752ba0 v2: Init preempt_work unconditionally (Chris) v3: Mention that we need the enable_guc=1 for lockdep splat (Chris) Testcase: igt/debugfs_test/read_all_entries # with i915.enable_guc=1 Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-2-michal.winiarski@intel.com
2017-12-13 22:13:47 +00:00
}
void intel_uc_init(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
struct intel_huc *huc = &uc->huc;
int ret;
if (!intel_uc_uses_guc(uc))
return;
/* XXX: GuC submission is unavailable for now */
GEM_BUG_ON(intel_uc_supports_guc_submission(uc));
drm/i915/guc: Move shared data allocation away from submission path We need shared data for actions (e.g. guc suspend/resume), and we're using those with GuC submission disabled. Let's introduce intel_guc_init and move shared data alloc there. This fixes GPF during module unload with HuC, but without GuC submission: BUG: unable to handle kernel NULL pointer dereference at 000000005aee7809 IP: intel_guc_suspend+0x34/0x140 [i915] PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP Modules linked in: i915(O-) netconsole x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel mei_me i2c_i801 mei prime_numbers [last unloaded: i915] CPU: 2 PID: 2794 Comm: rmmod Tainted: G U W O 4.15.0-rc2+ #297 Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0054.2016.0930.1102 09/30/2016 task: 0000000055945c61 task.stack: 00000000264ccb43 RIP: 0010:intel_guc_suspend+0x34/0x140 [i915] RSP: 0018:ffffc90000483df8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff880829180000 RCX: 0000000000000000 RDX: 0000000000000006 RSI: ffff880844c2c938 RDI: ffff880844c2c000 RBP: ffff880829180000 R08: 00000000a29c58c1 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffa040ba40 R13: ffffffffa040bab0 R14: ffff88084a195060 R15: 000055df3ef357a0 FS: 00007ff43c043740(0000) GS:ffff88084e200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000f9 CR3: 000000083f179005 CR4: 00000000003606e0 Call Trace: i915_gem_suspend+0x9d/0x130 [i915] ? i915_driver_unload+0x68/0x180 [i915] i915_driver_unload+0x70/0x180 [i915] i915_pci_remove+0x15/0x20 [i915] pci_device_remove+0x36/0xb0 device_release_driver_internal+0x15f/0x220 driver_detach+0x3a/0x80 bus_remove_driver+0x58/0xd0 pci_unregister_driver+0x29/0x90 SyS_delete_module+0x150/0x1e0 entry_SYSCALL_64_fastpath+0x23/0x9a RIP: 0033:0x7ff43b51b5c7 RSP: 002b:00007ffe6825a758 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007ff43b51b5c7 RDX: 000000000000000a RSI: 0000000000000800 RDI: 000055df3ef35808 RBP: 0000000000000000 R08: 00007ffe682596d1 R09: 0000000000000000 R10: 00007ff43b594880 R11: 0000000000000206 R12: 000055df3ef357a0 R13: 00007ffe68259740 R14: 000055df3ef35260 R15: 000055df3ef357a0 Code: 00 00 02 74 03 31 c0 c3 53 48 89 fb 48 83 ec 10 e8 52 0f f8 ff 48 b8 01 05 00 00 02 00 00 00 48 89 44 24 04 48 8b 83 00 12 00 00 <f6> 80 f9 00 00 00 01 0f 84 a7 00 00 00 f6 80 98 00 00 00 01 0f RIP: intel_guc_suspend+0x34/0x140 [i915] RSP: ffffc90000483df8 CR2: 00000000000000f9 ---[ end trace 23a192a61d937a3e ]--- Fixes: b8e5eb960b28 ("drm/i915/guc: Allocate separate shared data object for GuC communication") Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-1-michal.winiarski@intel.com
2017-12-13 22:13:46 +00:00
ret = intel_guc_init(guc);
if (ret) {
intel_uc_fw_cleanup_fetch(&huc->fw);
return;
}
if (intel_uc_uses_huc(uc))
intel_huc_init(huc);
}
void intel_uc_fini(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
if (!intel_uc_uses_guc(uc))
return;
if (intel_uc_uses_huc(uc))
intel_huc_fini(&uc->huc);
intel_guc_fini(guc);
drm/i915/uc: Move GuC error log to uc and release it on fini When we fail to load GuC and want to abort probe, we hit: <7> [229.915779] i915 0000:00:02.0: [drm:intel_uc_init_hw [i915]] GuC initialization failed -6 <7> [229.915813] i915 0000:00:02.0: [drm:i915_gem_init_hw [i915]] Enabling uc failed (-6) <4> [229.953354] ------------[ cut here ]------------ <4> [229.953355] WARN_ON(dev_priv->mm.shrink_count) <4> [229.953406] WARNING: CPU: 9 PID: 3287 at drivers/gpu/drm/i915/i915_gem.c:1684 i915_gem_cleanup_early+0xfc/0x110 [i915] <4> [229.953464] Call Trace: <4> [229.953489] i915_driver_late_release+0x19/0x60 [i915] <4> [229.953514] i915_driver_probe+0xb82/0x18a0 [i915] <4> [229.953519] ? __pm_runtime_resume+0x4f/0x80 <4> [229.953545] i915_pci_probe+0x43/0x1b0 [i915] ... <4> [229.962951] ------------[ cut here ]------------ <4> [229.962956] DEBUG_LOCKS_WARN_ON(lock->magic != lock) <4> [229.962959] WARNING: CPU: 8 PID: 2395 at kernel/locking/mutex.c:912 __mutex_lock+0x750/0x9b0 <4> [229.963091] Call Trace: <4> [229.963129] ? i915_vma_destroy+0x86/0x350 [i915] <4> [229.963166] ? i915_vma_destroy+0x86/0x350 [i915] <4> [229.963201] i915_vma_destroy+0x86/0x350 [i915] <4> [229.963236] __i915_gem_free_objects+0xb8/0x510 [i915] <4> [229.963270] __i915_gem_free_work+0x5a/0x90 [i915] <4> [229.963275] process_one_work+0x245/0x610 as since commit 6f76098fe0f3 ("drm/i915/uc: Move uC early functions inside the GT ones") we cleanup uc after gem. Move captured GuC load error log to uc struct and release it in intel_uc_fini() instead of intel_uc_driver_late_release() Note that intel_uc_driver_late_release() is now empty, but we can leave it as a placeholder for future code. Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20190802184055.31988-5-michal.wajdeczko@intel.com
2019-08-02 18:40:53 +00:00
__uc_free_load_err_log(uc);
}
static int __uc_sanitize(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
struct intel_huc *huc = &uc->huc;
GEM_BUG_ON(!intel_uc_supports_guc(uc));
intel_huc_sanitize(huc);
intel_guc_sanitize(guc);
return __intel_uc_reset_hw(uc);
}
void intel_uc_sanitize(struct intel_uc *uc)
{
if (!intel_uc_supports_guc(uc))
return;
__uc_sanitize(uc);
}
/* Initialize and verify the uC regs related to uC positioning in WOPCM */
static int uc_init_wopcm(struct intel_uc *uc)
{
struct intel_gt *gt = uc_to_gt(uc);
struct intel_uncore *uncore = gt->uncore;
u32 base = intel_wopcm_guc_base(&gt->i915->wopcm);
u32 size = intel_wopcm_guc_size(&gt->i915->wopcm);
u32 huc_agent = intel_uc_uses_huc(uc) ? HUC_LOADING_AGENT_GUC : 0;
u32 mask;
int err;
if (unlikely(!base || !size)) {
i915_probe_error(gt->i915, "Unsuccessful WOPCM partitioning\n");
return -E2BIG;
}
GEM_BUG_ON(!intel_uc_supports_guc(uc));
GEM_BUG_ON(!(base & GUC_WOPCM_OFFSET_MASK));
GEM_BUG_ON(base & ~GUC_WOPCM_OFFSET_MASK);
GEM_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK));
GEM_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK);
err = i915_inject_load_error(gt->i915, -ENXIO);
if (err)
return err;
mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED;
err = intel_uncore_write_and_verify(uncore, GUC_WOPCM_SIZE, size, mask,
size | GUC_WOPCM_SIZE_LOCKED);
if (err)
goto err_out;
mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent;
err = intel_uncore_write_and_verify(uncore, DMA_GUC_WOPCM_OFFSET,
base | huc_agent, mask,
base | huc_agent |
GUC_WOPCM_OFFSET_VALID);
if (err)
goto err_out;
return 0;
err_out:
i915_probe_error(gt->i915, "Failed to init uC WOPCM registers!\n");
i915_probe_error(gt->i915, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET",
i915_mmio_reg_offset(DMA_GUC_WOPCM_OFFSET),
intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET));
i915_probe_error(gt->i915, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE",
i915_mmio_reg_offset(GUC_WOPCM_SIZE),
intel_uncore_read(uncore, GUC_WOPCM_SIZE));
return err;
}
static bool uc_is_wopcm_locked(struct intel_uc *uc)
{
struct intel_gt *gt = uc_to_gt(uc);
struct intel_uncore *uncore = gt->uncore;
return (intel_uncore_read(uncore, GUC_WOPCM_SIZE) & GUC_WOPCM_SIZE_LOCKED) ||
(intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET) & GUC_WOPCM_OFFSET_VALID);
}
int intel_uc_init_hw(struct intel_uc *uc)
{
struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
struct intel_guc *guc = &uc->guc;
struct intel_huc *huc = &uc->huc;
int ret, attempts;
if (!intel_uc_supports_guc(uc))
return 0;
/*
* We can silently continue without GuC only if it was never enabled
* before on this system after reboot, otherwise we risk GPU hangs.
* To check if GuC was loaded before we look at WOPCM registers.
*/
if (!intel_uc_uses_guc(uc) && !uc_is_wopcm_locked(uc))
return 0;
if (!intel_uc_fw_is_available(&guc->fw)) {
ret = uc_is_wopcm_locked(uc) ||
intel_uc_fw_is_overridden(&guc->fw) ||
intel_uc_supports_guc_submission(uc) ?
intel_uc_fw_status_to_error(guc->fw.status) : 0;
goto err_out;
}
ret = uc_init_wopcm(uc);
if (ret)
goto err_out;
guc_reset_interrupts(guc);
/* WaEnableuKernelHeaderValidFix:skl */
/* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */
if (IS_GEN(i915, 9))
attempts = 3;
else
attempts = 1;
while (attempts--) {
/*
* Always reset the GuC just before (re)loading, so
* that the state and timing are fairly predictable
*/
ret = __uc_sanitize(uc);
if (ret)
goto err_out;
intel_huc_fw_upload(huc);
intel_guc_ads_reset(guc);
intel_guc_write_params(guc);
ret = intel_guc_fw_upload(guc);
if (ret == 0)
break;
DRM_DEBUG_DRIVER("GuC fw load failed: %d; will reset and "
"retry %d more time(s)\n", ret, attempts);
}
/* Did we succeded or run out of retries? */
if (ret)
goto err_log_capture;
ret = guc_enable_communication(guc);
if (ret)
goto err_log_capture;
intel_huc_auth(huc);
ret = intel_guc_sample_forcewake(guc);
if (ret)
goto err_communication;
if (intel_uc_supports_guc_submission(uc)) {
ret = intel_guc_submission_enable(guc);
if (ret)
goto err_communication;
}
dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n",
intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path,
guc->fw.major_ver_found, guc->fw.minor_ver_found,
"submission",
enableddisabled(intel_uc_supports_guc_submission(uc)));
if (intel_uc_uses_huc(uc)) {
dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n",
intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC),
huc->fw.path,
huc->fw.major_ver_found, huc->fw.minor_ver_found,
"authenticated",
yesno(intel_huc_is_authenticated(huc)));
}
return 0;
/*
* We've failed to load the firmware :(
*/
err_communication:
guc_disable_communication(guc);
err_log_capture:
drm/i915/uc: Move GuC error log to uc and release it on fini When we fail to load GuC and want to abort probe, we hit: <7> [229.915779] i915 0000:00:02.0: [drm:intel_uc_init_hw [i915]] GuC initialization failed -6 <7> [229.915813] i915 0000:00:02.0: [drm:i915_gem_init_hw [i915]] Enabling uc failed (-6) <4> [229.953354] ------------[ cut here ]------------ <4> [229.953355] WARN_ON(dev_priv->mm.shrink_count) <4> [229.953406] WARNING: CPU: 9 PID: 3287 at drivers/gpu/drm/i915/i915_gem.c:1684 i915_gem_cleanup_early+0xfc/0x110 [i915] <4> [229.953464] Call Trace: <4> [229.953489] i915_driver_late_release+0x19/0x60 [i915] <4> [229.953514] i915_driver_probe+0xb82/0x18a0 [i915] <4> [229.953519] ? __pm_runtime_resume+0x4f/0x80 <4> [229.953545] i915_pci_probe+0x43/0x1b0 [i915] ... <4> [229.962951] ------------[ cut here ]------------ <4> [229.962956] DEBUG_LOCKS_WARN_ON(lock->magic != lock) <4> [229.962959] WARNING: CPU: 8 PID: 2395 at kernel/locking/mutex.c:912 __mutex_lock+0x750/0x9b0 <4> [229.963091] Call Trace: <4> [229.963129] ? i915_vma_destroy+0x86/0x350 [i915] <4> [229.963166] ? i915_vma_destroy+0x86/0x350 [i915] <4> [229.963201] i915_vma_destroy+0x86/0x350 [i915] <4> [229.963236] __i915_gem_free_objects+0xb8/0x510 [i915] <4> [229.963270] __i915_gem_free_work+0x5a/0x90 [i915] <4> [229.963275] process_one_work+0x245/0x610 as since commit 6f76098fe0f3 ("drm/i915/uc: Move uC early functions inside the GT ones") we cleanup uc after gem. Move captured GuC load error log to uc struct and release it in intel_uc_fini() instead of intel_uc_driver_late_release() Note that intel_uc_driver_late_release() is now empty, but we can leave it as a placeholder for future code. Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20190802184055.31988-5-michal.wajdeczko@intel.com
2019-08-02 18:40:53 +00:00
__uc_capture_load_err_log(uc);
err_out:
__uc_sanitize(uc);
if (!ret) {
dev_notice(i915->drm.dev, "GuC is uninitialized\n");
/* We want to run without GuC submission */
return 0;
}
i915_probe_error(i915, "GuC initialization failed %d\n", ret);
/* We want to keep KMS alive */
return -EIO;
}
void intel_uc_fini_hw(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
if (!intel_guc_is_running(guc))
return;
if (intel_uc_supports_guc_submission(uc))
intel_guc_submission_disable(guc);
if (guc_communication_enabled(guc))
guc_disable_communication(guc);
__uc_sanitize(uc);
}
/**
* intel_uc_reset_prepare - Prepare for reset
* @uc: the intel_uc structure
*
* Preparing for full gpu reset.
*/
void intel_uc_reset_prepare(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
if (!intel_guc_is_running(guc))
return;
guc_stop_communication(guc);
__uc_sanitize(uc);
}
void intel_uc_runtime_suspend(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
int err;
if (!intel_guc_is_running(guc))
drm/i915: Invert the GEM wakeref hierarchy In the current scheme, on submitting a request we take a single global GEM wakeref, which trickles down to wake up all GT power domains. This is undesirable as we would like to be able to localise our power management to the available power domains and to remove the global GEM operations from the heart of the driver. (The intent there is to push global GEM decisions to the boundary as used by the GEM user interface.) Now during request construction, each request is responsible via its logical context to acquire a wakeref on each power domain it intends to utilize. Currently, each request takes a wakeref on the engine(s) and the engines themselves take a chipset wakeref. This gives us a transition on each engine which we can extend if we want to insert more powermangement control (such as soft rc6). The global GEM operations that currently require a struct_mutex are reduced to listening to pm events from the chipset GT wakeref. As we reduce the struct_mutex requirement, these listeners should evaporate. Perhaps the biggest immediate change is that this removes the struct_mutex requirement around GT power management, allowing us greater flexibility in request construction. Another important knock-on effect, is that by tracking engine usage, we can insert a switch back to the kernel context on that engine immediately, avoiding any extra delay or inserting global synchronisation barriers. This makes tracking when an engine and its associated contexts are idle much easier -- important for when we forgo our assumed execution ordering and need idle barriers to unpin used contexts. In the process, it means we remove a large chunk of code whose only purpose was to switch back to the kernel context. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Imre Deak <imre.deak@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190424200717.1686-5-chris@chris-wilson.co.uk
2019-04-24 20:07:17 +00:00
return;
err = intel_guc_suspend(guc);
if (err)
DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err);
guc_disable_communication(guc);
}
void intel_uc_suspend(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
intel_wakeref_t wakeref;
if (!intel_guc_is_running(guc))
return;
with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref)
intel_uc_runtime_suspend(uc);
}
static int __uc_resume(struct intel_uc *uc, bool enable_communication)
{
struct intel_guc *guc = &uc->guc;
int err;
if (!intel_guc_is_running(guc))
return 0;
/* Make sure we enable communication if and only if it's disabled */
GEM_BUG_ON(enable_communication == guc_communication_enabled(guc));
if (enable_communication)
guc_enable_communication(guc);
err = intel_guc_resume(guc);
if (err) {
DRM_DEBUG_DRIVER("Failed to resume GuC, err=%d", err);
return err;
}
return 0;
}
int intel_uc_resume(struct intel_uc *uc)
{
/*
* When coming out of S3/S4 we sanitize and re-init the HW, so
* communication is already re-enabled at this point.
*/
return __uc_resume(uc, false);
}
int intel_uc_runtime_resume(struct intel_uc *uc)
{
/*
* During runtime resume we don't sanitize, so we need to re-init
* communication as well.
*/
return __uc_resume(uc, true);
}