drm/amdgpu: Add enforce_isolation sysfs attribute

This commit adds a new sysfs attribute 'enforce_isolation' to control
the 'enforce_isolation' setting per GPU. The attribute can be read and
written, and accepts values 0 (disabled) and 1 (enabled).

When 'enforce_isolation' is enabled, reserved VMIDs are allocated for
each ring. When it's disabled, the reserved VMIDs are freed.

The set function locks a mutex before changing the 'enforce_isolation'
flag and the VMIDs, and unlocks it afterwards. This ensures that these
operations are atomic and prevents race conditions and other concurrency
issues.

Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Suggested-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Srinivasan Shanmugam 2024-05-27 07:30:47 +05:30 committed by Alex Deucher
parent dba1a6cfc3
commit e189be9b2e
4 changed files with 107 additions and 0 deletions

View File

@ -1164,6 +1164,8 @@ struct amdgpu_device {
bool debug_enable_ras_aca;
bool enforce_isolation[MAX_XCP];
/* Added this mutex for cleaner shader isolation between GFX and compute processes */
struct mutex enforce_isolation_mutex;
};
static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,

View File

@ -4065,6 +4065,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->pm.stable_pstate_ctx_lock);
mutex_init(&adev->benchmark_mutex);
mutex_init(&adev->gfx.reset_sem_mutex);
/* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
mutex_init(&adev->enforce_isolation_mutex);
amdgpu_device_init_apu_flags(adev);

View File

@ -1391,6 +1391,88 @@ static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
return sysfs_emit(buf, "%s\n", supported_partition);
}
static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
int i;
ssize_t size = 0;
if (adev->xcp_mgr) {
for (i = 0; i < adev->xcp_mgr->num_xcps; i++) {
size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]);
if (i < (adev->xcp_mgr->num_xcps - 1))
size += sysfs_emit_at(buf, size, " ");
}
buf[size++] = '\n';
} else {
size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]);
}
return size;
}
static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
long partition_values[MAX_XCP] = {0};
int ret, i, num_partitions;
const char *input_buf = buf;
for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
ret = sscanf(input_buf, "%ld", &partition_values[i]);
if (ret <= 0)
break;
/* Move the pointer to the next value in the string */
input_buf = strchr(input_buf, ' ');
if (input_buf) {
input_buf++;
} else {
i++;
break;
}
}
num_partitions = i;
if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps)
return -EINVAL;
if (!adev->xcp_mgr && num_partitions != 1)
return -EINVAL;
for (i = 0; i < num_partitions; i++) {
if (partition_values[i] != 0 && partition_values[i] != 1)
return -EINVAL;
}
mutex_lock(&adev->enforce_isolation_mutex);
for (i = 0; i < num_partitions; i++) {
if (adev->enforce_isolation[i] && !partition_values[i]) {
/* Going from enabled to disabled */
amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i));
} else if (!adev->enforce_isolation[i] && partition_values[i]) {
/* Going from disabled to enabled */
amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
}
adev->enforce_isolation[i] = partition_values[i];
}
mutex_unlock(&adev->enforce_isolation_mutex);
return count;
}
static DEVICE_ATTR(enforce_isolation, 0644,
amdgpu_gfx_get_enforce_isolation,
amdgpu_gfx_set_enforce_isolation);
static DEVICE_ATTR(current_compute_partition, 0644,
amdgpu_gfx_get_current_compute_partition,
amdgpu_gfx_set_compute_partition);
@ -1417,6 +1499,25 @@ void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
device_remove_file(adev->dev, &dev_attr_available_compute_partition);
}
int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
{
int r;
if (!amdgpu_sriov_vf(adev)) {
r = device_create_file(adev->dev, &dev_attr_enforce_isolation);
if (r)
return r;
}
return 0;
}
void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev)
{
if (!amdgpu_sriov_vf(adev))
device_remove_file(adev->dev, &dev_attr_enforce_isolation);
}
int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
unsigned int cleaner_shader_size)
{

View File

@ -561,6 +561,8 @@ void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev);
void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
unsigned int cleaner_shader_size,
const void *cleaner_shader_ptr);
int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev);
void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev);
static inline const char *amdgpu_gfx_compute_mode_desc(int mode)
{