From 83dcf232cc7919725a331359a300fb3929651b6e Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 28 Aug 2024 11:43:42 +0100 Subject: [PATCH 1/7] drm/xe: prevent potential UAF in pf_provision_vf_ggtt() The node ptr can point to an already freed ptr, if we hit the path with an already allocated node. We later dereference that pointer with: xe_gt_assert(gt, !xe_ggtt_node_allocated(node)); which is a potential UAF. Fix this by not stashing the ptr for node. Also since it is likely a bad idea to leave config->ggtt_region pointing to a stale ptr, also set that to NULL by calling pf_release_vf_config_ggtt() instead of pf_release_ggtt(). Fixes: 34e804220f69 ("drm/xe: Make xe_ggtt_node struct independent") Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240828104341.180111-2-matthew.auld@intel.com (cherry picked from commit 89076b5a8b4e0a01040585e156a0b014cd472fd3) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index a95e546b7744..8250ef71e685 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -399,7 +399,7 @@ static void pf_release_vf_config_ggtt(struct xe_gt *gt, struct xe_gt_sriov_confi static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); - struct xe_ggtt_node *node = config->ggtt_region; + struct xe_ggtt_node *node; struct xe_tile *tile = gt_to_tile(gt); struct xe_ggtt *ggtt = tile->mem.ggtt; u64 alignment = pf_get_ggtt_alignment(gt); @@ -411,14 +411,14 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) size = round_up(size, alignment); - if (xe_ggtt_node_allocated(node)) { + if (xe_ggtt_node_allocated(config->ggtt_region)) { err = pf_distribute_config_ggtt(tile, vfid, 0, 0); if (unlikely(err)) return err; - pf_release_ggtt(tile, node); + pf_release_vf_config_ggtt(gt, config); } - xe_gt_assert(gt, !xe_ggtt_node_allocated(node)); + xe_gt_assert(gt, !xe_ggtt_node_allocated(config->ggtt_region)); if (!size) return 0; From ad92f52312614b0ef6eee07ee64f1e7661072a49 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 5 Sep 2024 10:02:15 -0400 Subject: [PATCH 2/7] drm/xe: Suppress missing outer rpm protection warning Do not raise a WARN if we are likely within suspending or resuming path. This is likely this false positive: rpm_status: 0000:03:00.0 status=RPM_SUSPENDING console: xe_bo_evict_all (called from suspend) xe_sched_job_create: dev=0000:03:00.0, ... xe_sched_job_exec: dev=0000:03:00.0, ... xe_pm_runtime_put: dev=0000:03:00.0, ... xe_sched_job_run: dev=0000:03:00.0, ... rpm_usage: 0000:03:00.0 flags-0 cnt-2 ... rpm_usage: 0000:03:00.0 flags-0 cnt-2 ... rpm_usage: 0000:03:00.0 flags-0 cnt-2 ... console: xe 0000:03:00.0: [drm] Missing outer runtime PM protection console: xe_guc_ct_send+0x15/0x50 [xe] console: guc_exec_queue_run_job+0x1509/0x3950 [xe] [snip] console: drm_sched_run_job_work+0x649/0xc20 At this point, BOs are getting evicted from VRAM with rpm usage-counter = 2, but rpm status = SUSPENDING. The xe->pm_callback_task won't be equal 'current' because this call is coming from a work queue. So, pm_runtime_get_if_active() will be called and return 0 because rpm status != ACTIVE (but equal SUSPENDING or RESUMING). v2: Still get the reference even on non suspending/resuming path (Jonathan, Brost). Cc: Matthew Brost Cc: Matthew Auld Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240905140215.56404-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit cb85e39dc5d1717fab82810984cce0e54712a3c2) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pm.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index e518557e0eec..9c59a30d7646 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -595,6 +595,18 @@ bool xe_pm_runtime_get_if_in_use(struct xe_device *xe) return pm_runtime_get_if_in_use(xe->drm.dev) > 0; } +/* + * Very unreliable! Should only be used to suppress the false positive case + * in the missing outer rpm protection warning. + */ +static bool xe_pm_suspending_or_resuming(struct xe_device *xe) +{ + struct device *dev = xe->drm.dev; + + return dev->power.runtime_status == RPM_SUSPENDING || + dev->power.runtime_status == RPM_RESUMING; +} + /** * xe_pm_runtime_get_noresume - Bump runtime PM usage counter without resuming * @xe: xe device instance @@ -611,8 +623,11 @@ void xe_pm_runtime_get_noresume(struct xe_device *xe) ref = xe_pm_runtime_get_if_in_use(xe); - if (drm_WARN(&xe->drm, !ref, "Missing outer runtime PM protection\n")) + if (!ref) { pm_runtime_get_noresume(xe->drm.dev); + drm_WARN(&xe->drm, !xe_pm_suspending_or_resuming(xe), + "Missing outer runtime PM protection\n"); + } } /** From 457ca96d04f599d2f95bd61144851f2181ccacc4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 9 Sep 2024 20:25:08 +0000 Subject: [PATCH 3/7] drm/xe: fix build warning with CONFIG_PM=n The 'runtime_status' field is an implementation detail of the power management code, so a device driver should not normally touch this: drivers/gpu/drm/xe/xe_pm.c: In function 'xe_pm_suspending_or_resuming': drivers/gpu/drm/xe/xe_pm.c:606:26: error: 'struct dev_pm_info' has no member named 'runtime_status' 606 | return dev->power.runtime_status == RPM_SUSPENDING || | ^ drivers/gpu/drm/xe/xe_pm.c:607:27: error: 'struct dev_pm_info' has no member named 'runtime_status' 607 | dev->power.runtime_status == RPM_RESUMING; | ^ drivers/gpu/drm/xe/xe_pm.c:608:1: error: control reaches end of non-void function [-Werror=return-type] Add an #ifdef check to avoid the build regression. Fixes: ad92f5231261 ("drm/xe: Suppress missing outer rpm protection warning") Reviewed-by: Rodrigo Vivi Signed-off-by: Arnd Bergmann Link: https://patchwork.freedesktop.org/patch/msgid/20240909202521.1018439-1-arnd@kernel.org Signed-off-by: Rodrigo Vivi (cherry picked from commit 1c129ed07de47684ff2471e32b52fa823533aa06) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 9c59a30d7646..a3d1509066f7 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -601,10 +601,14 @@ bool xe_pm_runtime_get_if_in_use(struct xe_device *xe) */ static bool xe_pm_suspending_or_resuming(struct xe_device *xe) { +#ifdef CONFIG_PM struct device *dev = xe->drm.dev; return dev->power.runtime_status == RPM_SUSPENDING || dev->power.runtime_status == RPM_RESUMING; +#else + return false; +#endif } /** From 2efba0c095419f93f8913f1cbae8bf3fb030db20 Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Sun, 1 Sep 2024 07:42:27 +0300 Subject: [PATCH 4/7] drm/xe: fix missing 'xe_vm_put' Fix memleak caused by missing xe_vm_put Fixes: 852856e3b6f6 ("drm/xe: Use reserved copy engine for user binds on faulting devices") Signed-off-by: Dafna Hirschfeld Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240901044227.1177211-1-dhirschfeld@habana.ai Signed-off-by: Rodrigo Vivi (cherry picked from commit 249df8cbecf0ab4877eab66cae857748631831a9) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_exec_queue.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 5a9cbc97f0be..7f28b7fc68d5 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -223,8 +223,10 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, gt->usm.reserved_bcs_instance, false); - if (!hwe) + if (!hwe) { + xe_vm_put(migrate_vm); return ERR_PTR(-EINVAL); + } q = xe_exec_queue_create(xe, migrate_vm, BIT(hwe->logical_instance), 1, hwe, From 70b4ab5489da0fe5b699a8466aa4f73ea304ae65 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sat, 7 Sep 2024 00:03:48 +0200 Subject: [PATCH 5/7] drm/xe: Don't keep stale pointer to bo->ggtt_node When we fail to map a BO in the GGTT, we release our GGTT node placeholder, but leave stale bo->ggtt_node pointer to it, which triggers an assert immediately followed by a crash, due to UAF: [ ] xe 0000:00:02.0: [drm] Assertion `bo->ggtt_node->base.size == bo->size` failed! [ ] WARNING: CPU: 4 PID: 126 at drivers/gpu/drm/xe/xe_ggtt.c:689 xe_ggtt_remove_bo+0x1d9/0x250 [xe] [ ] RIP: 0010:xe_ggtt_remove_bo+0x1d9/0x250 [xe] [ ] Call Trace: [ ] [ ] ? __warn+0x88/0x190 [ ] ? xe_ggtt_remove_bo+0x1d9/0x250 [xe] [ ] ? report_bug+0x1c3/0x1d0 [ ] ? handle_bug+0x42/0x70 [ ] ? exc_invalid_op+0x14/0x70 [ ] ? asm_exc_invalid_op+0x16/0x20 [ ] ? xe_ggtt_remove_bo+0x1d9/0x250 [xe] [ ] ? xe_ggtt_remove_bo+0x1d9/0x250 [xe] [ ] xe_ttm_bo_destroy+0x11f/0x260 [xe] [ ] ? ttm_bo_release+0x31c/0x350 [ttm] [ ] ? __mutex_unlock_slowpath+0x35/0x270 [ ] __xe_bo_create_locked+0x4a0/0x550 [xe] [ ] ? mark_held_locks+0x49/0x80 [ ] xe_bo_create_pin_map_at+0x37/0x200 [xe] [ ] xe_bo_create_pin_map+0x11/0x20 [xe] While around, for similar reason, also don't keep an error pointer if we fail to allocate ggtt_node placeholder. Fixes: 34e804220f69 ("drm/xe: Make xe_ggtt_node struct independent") Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240906220348.1836-1-michal.wajdeczko@intel.com (cherry picked from commit f2710d95724ebbfa35d6d4b82017eeab70994509) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_ggtt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index f3fca5565d32..2895f154654c 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -619,16 +619,19 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, bo->ggtt_node = xe_ggtt_node_init(ggtt); if (IS_ERR(bo->ggtt_node)) { err = PTR_ERR(bo->ggtt_node); + bo->ggtt_node = NULL; goto out; } mutex_lock(&ggtt->lock); err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node->base, bo->size, alignment, 0, start, end, 0); - if (err) + if (err) { xe_ggtt_node_fini(bo->ggtt_node); - else + bo->ggtt_node = NULL; + } else { xe_ggtt_map_bo(ggtt, bo); + } mutex_unlock(&ggtt->lock); if (!err && bo->flags & XE_BO_FLAG_GGTT_INVALIDATE) From da9a73b7b25eab574cb9c984fcce0b5e240bdd2c Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 4 Sep 2024 15:43:33 +0530 Subject: [PATCH 6/7] drm/xe/xe2hpg: Add Wa_15016589081 Wa_15016589081 applies to xe2_hpg renderCS V2(Gustavo) - rename bit macro Signed-off-by: Tejas Upadhyay Reviewed-by: Gustavo Sousa Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240904101333.2049655-1-tejas.upadhyay@intel.com Signed-off-by: Nirmoy Das (cherry picked from commit 9db969b36b2fbca13ad4088aff725ebd5e8142f5) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_wa.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 0d1a4a9f4e11..660ff42e45a6 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -105,6 +105,7 @@ #define CHICKEN_RASTER_1 XE_REG_MCR(0x6204, XE_REG_OPTION_MASKED) #define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) +#define DIS_CLIP_NEGATIVE_BOUNDING_BOX REG_BIT(6) #define CHICKEN_RASTER_2 XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED) #define TBIMR_FAST_CLIP REG_BIT(5) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 28b7f95b6c2f..d424992514a4 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -733,6 +733,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_PARTIAL_AUTOSTRIP | DIS_AUTOSTRIP)) }, + { XE_RTP_NAME("15016589081"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) + }, {} }; From f1a4dceeb2bd4b4478e4f0c77dac55569d153fb3 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 5 Sep 2024 17:00:49 +0200 Subject: [PATCH 7/7] drm/xe: Fix missing conversion to xe_display_pm_runtime_resume This error path was missed when converting away from xe_display_pm_resume with second argument. Fixes: 66a0f6b9f5fc ("drm/xe/display: handle HPD polling in display runtime suspend/resume") Cc: Arun R Murthy Cc: Vinod Govindapillai Signed-off-by: Maarten Lankhorst Reviewed-by: Lucas De Marchi Reviewed-by: Vinod Govindapillai Link: https://patchwork.freedesktop.org/patch/msgid/20240905150052.174895-2-maarten.lankhorst@linux.intel.com (cherry picked from commit 474f64cb988a410db8a0b779d6afdaa2a7fc5759) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index a3d1509066f7..7cf2160fe040 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -416,7 +416,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_display_pm_suspend_late(xe); out: if (err) - xe_display_pm_resume(xe, true); + xe_display_pm_runtime_resume(xe); xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); return err;