drm/amdgpu: add mutex to protect ras shared memory

Add mutex to protect ras shared memory.

v2:
  Add TA_RAS_COMMAND__TRIGGER_ERROR command call
  status check.

Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
YiPeng Chai 2024-04-12 13:46:03 +08:00 committed by Alex Deucher
parent 0e2c796b49
commit b3fb79cda5
3 changed files with 86 additions and 40 deletions

View File

@ -1591,6 +1591,68 @@ static void psp_ras_ta_check_status(struct psp_context *psp)
}
}
static int psp_ras_send_cmd(struct psp_context *psp,
enum ras_command cmd_id, void *in, void *out)
{
struct ta_ras_shared_memory *ras_cmd;
uint32_t cmd = cmd_id;
int ret = 0;
if (!in)
return -EINVAL;
mutex_lock(&psp->ras_context.mutex);
ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
switch (cmd) {
case TA_RAS_COMMAND__ENABLE_FEATURES:
case TA_RAS_COMMAND__DISABLE_FEATURES:
memcpy(&ras_cmd->ras_in_message,
in, sizeof(ras_cmd->ras_in_message));
break;
case TA_RAS_COMMAND__TRIGGER_ERROR:
memcpy(&ras_cmd->ras_in_message.trigger_error,
in, sizeof(ras_cmd->ras_in_message.trigger_error));
break;
case TA_RAS_COMMAND__QUERY_ADDRESS:
memcpy(&ras_cmd->ras_in_message.address,
in, sizeof(ras_cmd->ras_in_message.address));
break;
default:
dev_err(psp->adev->dev, "Invalid ras cmd id: %u\n", cmd);
ret = -EINVAL;
goto err_out;
}
ras_cmd->cmd_id = cmd;
ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
switch (cmd) {
case TA_RAS_COMMAND__TRIGGER_ERROR:
if (ret || psp->cmd_buf_mem->resp.status)
ret = -EINVAL;
else if (out)
memcpy(out, &ras_cmd->ras_status, sizeof(ras_cmd->ras_status));
break;
case TA_RAS_COMMAND__QUERY_ADDRESS:
if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
ret = -EINVAL;
else if (out)
memcpy(out,
&ras_cmd->ras_out_message.address,
sizeof(ras_cmd->ras_out_message.address));
break;
default:
break;
}
err_out:
mutex_unlock(&psp->ras_context.mutex);
return ret;
}
int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
{
struct ta_ras_shared_memory *ras_cmd;
@ -1632,23 +1694,15 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable)
{
struct ta_ras_shared_memory *ras_cmd;
enum ras_command cmd_id;
int ret;
if (!psp->ras_context.context.initialized)
if (!psp->ras_context.context.initialized || !info)
return -EINVAL;
ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
if (enable)
ras_cmd->cmd_id = TA_RAS_COMMAND__ENABLE_FEATURES;
else
ras_cmd->cmd_id = TA_RAS_COMMAND__DISABLE_FEATURES;
ras_cmd->ras_in_message = *info;
ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
cmd_id = enable ?
TA_RAS_COMMAND__ENABLE_FEATURES : TA_RAS_COMMAND__DISABLE_FEATURES;
ret = psp_ras_send_cmd(psp, cmd_id, info, NULL);
if (ret)
return -EINVAL;
@ -1672,6 +1726,8 @@ int psp_ras_terminate(struct psp_context *psp)
psp->ras_context.context.initialized = false;
mutex_destroy(&psp->ras_context.mutex);
return ret;
}
@ -1756,9 +1812,10 @@ int psp_ras_initialize(struct psp_context *psp)
ret = psp_ta_load(psp, &psp->ras_context.context);
if (!ret && !ras_cmd->ras_status)
if (!ret && !ras_cmd->ras_status) {
psp->ras_context.context.initialized = true;
else {
mutex_init(&psp->ras_context.mutex);
} else {
if (ras_cmd->ras_status)
dev_warn(adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status);
@ -1772,12 +1829,12 @@ int psp_ras_initialize(struct psp_context *psp)
int psp_ras_trigger_error(struct psp_context *psp,
struct ta_ras_trigger_error_input *info, uint32_t instance_mask)
{
struct ta_ras_shared_memory *ras_cmd;
struct amdgpu_device *adev = psp->adev;
int ret;
uint32_t dev_mask;
uint32_t ras_status = 0;
if (!psp->ras_context.context.initialized)
if (!psp->ras_context.context.initialized || !info)
return -EINVAL;
switch (info->block_id) {
@ -1801,13 +1858,8 @@ int psp_ras_trigger_error(struct psp_context *psp,
dev_mask &= AMDGPU_RAS_INST_MASK;
info->sub_block_index |= dev_mask;
ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
ras_cmd->cmd_id = TA_RAS_COMMAND__TRIGGER_ERROR;
ras_cmd->ras_in_message.trigger_error = *info;
ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
ret = psp_ras_send_cmd(psp,
TA_RAS_COMMAND__TRIGGER_ERROR, info, &ras_status);
if (ret)
return -EINVAL;
@ -1817,9 +1869,9 @@ int psp_ras_trigger_error(struct psp_context *psp,
if (amdgpu_ras_intr_triggered())
return 0;
if (ras_cmd->ras_status == TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED)
if (ras_status == TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED)
return -EACCES;
else if (ras_cmd->ras_status)
else if (ras_status)
return -EINVAL;
return 0;
@ -1829,25 +1881,16 @@ int psp_ras_query_address(struct psp_context *psp,
struct ta_ras_query_address_input *addr_in,
struct ta_ras_query_address_output *addr_out)
{
struct ta_ras_shared_memory *ras_cmd;
int ret;
if (!psp->ras_context.context.initialized)
if (!psp->ras_context.context.initialized ||
!addr_in || !addr_out)
return -EINVAL;
ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
ret = psp_ras_send_cmd(psp,
TA_RAS_COMMAND__QUERY_ADDRESS, addr_in, addr_out);
ras_cmd->cmd_id = TA_RAS_COMMAND__QUERY_ADDRESS;
ras_cmd->ras_in_message.address = *addr_in;
ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
return -EINVAL;
*addr_out = ras_cmd->ras_out_message.address;
return 0;
return ret;
}
// ras end

View File

@ -200,6 +200,7 @@ struct psp_xgmi_context {
struct psp_ras_context {
struct ta_context context;
struct amdgpu_ras *ras;
struct mutex mutex;
};
#define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942

View File

@ -348,6 +348,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size
context->session_id = ta_id;
mutex_lock(&psp->ras_context.mutex);
ret = prep_ta_mem_context(&context->mem_context, shared_buf, shared_buf_len);
if (ret)
goto err_free_shared_buf;
@ -366,6 +367,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size
ret = -EFAULT;
err_free_shared_buf:
mutex_unlock(&psp->ras_context.mutex);
kfree(shared_buf);
return ret;