drm/xe/hw_engine_group: Introduce xe_hw_engine_group

A xe_hw_engine_group is a group of hw engines. Two hw engines belong to
the same xe_hw_engine_group if one hw engine cannot make progress while
the other is stuck on a page fault.

Typically, hw engines of the same group share some resources such as EUs,
but this really depends on the hardware configuration of the platforms.
The simple engines partitioning proposed here might be too conservative
but is intended to work for existing platforms. It can be optimized later
if more sets of independent engines are identified.

The hw engine groups are intended to be used in the context of faulting
long-running jobs submissions.

v2: Move to own files, improve error handling (Matt Brost)

v3: Fix build issue reported by CI, improve commit message (Matt Roper)

v4: Fix kernel doc

v5: Add switch case for XE_ENGINE_CLASS_OTHER

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-2-francois.dugast@intel.com
This commit is contained in:
Francois Dugast 2024-08-09 17:51:26 +02:00 committed by Matthew Brost
parent 852856e3b6
commit f784750c67
6 changed files with 176 additions and 0 deletions

View File

@ -66,6 +66,7 @@ xe-y += xe_bb.o \
xe_heci_gsc.o \
xe_hw_engine.o \
xe_hw_engine_class_sysfs.o \
xe_hw_engine_group.o \
xe_hw_fence.o \
xe_huc.o \
xe_irq.o \

View File

@ -23,6 +23,7 @@
#include "xe_gt_printk.h"
#include "xe_gt_mcr.h"
#include "xe_gt_topology.h"
#include "xe_hw_engine_group.h"
#include "xe_hw_fence.h"
#include "xe_irq.h"
#include "xe_lrc.h"
@ -790,6 +791,9 @@ int xe_hw_engines_init(struct xe_gt *gt)
}
hw_engine_setup_logical_mapping(gt);
err = xe_hw_engine_setup_groups(gt);
if (err)
return err;
return 0;
}

View File

@ -0,0 +1,102 @@
// SPDX-License-Identifier: MIT
/*
* Copyright © 2024 Intel Corporation
*/
#include <drm/drm_managed.h>
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_hw_engine_group.h"
static void
hw_engine_group_free(struct drm_device *drm, void *arg)
{
struct xe_hw_engine_group *group = arg;
kfree(group);
}
static struct xe_hw_engine_group *
hw_engine_group_alloc(struct xe_device *xe)
{
struct xe_hw_engine_group *group;
int err;
group = kzalloc(sizeof(*group), GFP_KERNEL);
if (!group)
return ERR_PTR(-ENOMEM);
init_rwsem(&group->mode_sem);
INIT_LIST_HEAD(&group->exec_queue_list);
err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group);
if (err)
return ERR_PTR(err);
return group;
}
/**
* xe_hw_engine_setup_groups() - Setup the hw engine groups for the gt
* @gt: The gt for which groups are setup
*
* Return: 0 on success, negative error code on error.
*/
int xe_hw_engine_setup_groups(struct xe_gt *gt)
{
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;
struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs;
struct xe_device *xe = gt_to_xe(gt);
int err;
group_rcs_ccs = hw_engine_group_alloc(xe);
if (IS_ERR(group_rcs_ccs)) {
err = PTR_ERR(group_rcs_ccs);
goto err_group_rcs_ccs;
}
group_bcs = hw_engine_group_alloc(xe);
if (IS_ERR(group_bcs)) {
err = PTR_ERR(group_bcs);
goto err_group_bcs;
}
group_vcs_vecs = hw_engine_group_alloc(xe);
if (IS_ERR(group_vcs_vecs)) {
err = PTR_ERR(group_vcs_vecs);
goto err_group_vcs_vecs;
}
for_each_hw_engine(hwe, gt, id) {
switch (hwe->class) {
case XE_ENGINE_CLASS_COPY:
hwe->hw_engine_group = group_bcs;
break;
case XE_ENGINE_CLASS_RENDER:
case XE_ENGINE_CLASS_COMPUTE:
hwe->hw_engine_group = group_rcs_ccs;
break;
case XE_ENGINE_CLASS_VIDEO_DECODE:
case XE_ENGINE_CLASS_VIDEO_ENHANCE:
hwe->hw_engine_group = group_vcs_vecs;
break;
case XE_ENGINE_CLASS_OTHER:
break;
default:
drm_warn(&xe->drm, "NOT POSSIBLE");
}
}
return 0;
err_group_vcs_vecs:
kfree(group_vcs_vecs);
err_group_bcs:
kfree(group_bcs);
err_group_rcs_ccs:
kfree(group_rcs_ccs);
return err;
}

View File

@ -0,0 +1,16 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2024 Intel Corporation
*/
#ifndef _XE_HW_ENGINE_GROUP_H_
#define _XE_HW_ENGINE_GROUP_H_
#include "xe_hw_engine_group_types.h"
struct drm_device;
struct xe_gt;
int xe_hw_engine_setup_groups(struct xe_gt *gt);
#endif

View File

@ -0,0 +1,51 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2024 Intel Corporation
*/
#ifndef _XE_HW_ENGINE_GROUP_TYPES_H_
#define _XE_HW_ENGINE_GROUP_TYPES_H_
#include "xe_force_wake_types.h"
#include "xe_lrc_types.h"
#include "xe_reg_sr_types.h"
/**
* enum xe_hw_engine_group_execution_mode - possible execution modes of a hw
* engine group
*
* @EXEC_MODE_LR: execution in long-running mode
* @EXEC_MODE_DMA_FENCE: execution in dma fence mode
*/
enum xe_hw_engine_group_execution_mode {
EXEC_MODE_LR,
EXEC_MODE_DMA_FENCE,
};
/**
* struct xe_hw_engine_group - Hardware engine group
*
* hw engines belong to the same group if they share hardware resources in a way
* that prevents them from making progress when one is stuck on a page fault.
*/
struct xe_hw_engine_group {
/**
* @exec_queue_list: list of exec queues attached to this
* xe_hw_engine_group
*/
struct list_head exec_queue_list;
/** @resume_work: worker to resume faulting LR exec queues */
struct work_struct resume_work;
/** @resume_wq: workqueue to resume faulting LR exec queues */
struct workqueue_struct *resume_wq;
/**
* @mode_sem: used to protect this group's hardware resources and ensure
* mutual exclusion between execution only in faulting LR mode and
* execution only in DMA_FENCE mode
*/
struct rw_semaphore mode_sem;
/** @cur_mode: current execution mode of this hw engine group */
enum xe_hw_engine_group_execution_mode cur_mode;
};
#endif

View File

@ -150,6 +150,8 @@ struct xe_hw_engine {
struct xe_hw_engine_class_intf *eclass;
/** @oa_unit: oa unit for this hw engine */
struct xe_oa_unit *oa_unit;
/** @hw_engine_group: the group of hw engines this one belongs to */
struct xe_hw_engine_group *hw_engine_group;
};
/**