habanalabs: read f/w's 2-nd sts and err registers

Maintain both STS1 and ERR1 registers used for status communication
with F/W.
Those are not maintained as we currently have less than 31
statuses/error defined and so LKD did not refer to those register.
The reason to read them now is to try to support future f/w versions
with current driver.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
Ohad Sharabi 2021-05-02 15:45:21 +03:00 committed by Oded Gabbay
parent ea7d5e7b10
commit e67a60400f
4 changed files with 291 additions and 133 deletions

View File

@ -146,6 +146,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
u16 len, u32 timeout, u64 *result)
{
struct hl_hw_queue *queue = &hdev->kernel_queues[hw_queue_id];
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct cpucp_packet *pkt;
dma_addr_t pkt_dma_addr;
u32 tmp, expected_ack_val;
@ -180,8 +181,9 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
goto out;
}
if (hdev->asic_prop.fw_app_security_map &
CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
if (prop->fw_cpu_boot_dev_sts0_valid &&
(prop->fw_app_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN))
expected_ack_val = queue->pi;
else
expected_ack_val = CPUCP_PACKET_FENCE_VAL;
@ -344,24 +346,13 @@ int hl_fw_send_heartbeat(struct hl_device *hdev)
return rc;
}
static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
u32 cpu_security_boot_status_reg)
static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
u32 sts_val)
{
u32 err_val, security_val;
bool err_exists = false;
/* Some of the firmware status codes are deprecated in newer f/w
* versions. In those versions, the errors are reported
* in different registers. Therefore, we need to check those
* registers and print the exact errors. Moreover, there
* may be multiple errors, so we need to report on each error
* separately. Some of the error codes might indicate a state
* that is not an error per-se, but it is an error in production
* environment
*/
err_val = RREG32(boot_err0_reg);
if (!(err_val & CPU_BOOT_ERR0_ENABLED))
return 0;
return false;
if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL) {
dev_err(hdev->dev,
@ -432,6 +423,20 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
err_exists = true;
}
if (err_val & CPU_BOOT_ERR0_PRI_IMG_VER_FAIL) {
dev_warn(hdev->dev,
"Device boot warning - Failed to load preboot primary image\n");
/* This is a warning so we don't want it to disable the
* device as we have a secondary preboot image
*/
err_val &= ~CPU_BOOT_ERR0_PRI_IMG_VER_FAIL;
}
if (err_val & CPU_BOOT_ERR0_SEC_IMG_VER_FAIL) {
dev_err(hdev->dev, "Device boot error - Failed to load preboot secondary image\n");
err_exists = true;
}
if (err_val & CPU_BOOT_ERR0_PLL_FAIL) {
dev_err(hdev->dev, "Device boot error - PLL failure\n");
err_exists = true;
@ -443,28 +448,89 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
err_val &= ~CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL;
}
security_val = RREG32(cpu_security_boot_status_reg);
if (security_val & CPU_BOOT_DEV_STS0_ENABLED)
dev_dbg(hdev->dev, "Device security status %#x\n",
security_val);
if (sts_val & CPU_BOOT_DEV_STS0_ENABLED)
dev_dbg(hdev->dev, "Device status0 %#x\n", sts_val);
if (!err_exists && (err_val & ~CPU_BOOT_ERR0_ENABLED)) {
dev_err(hdev->dev,
"Device boot error - unknown error 0x%08x\n",
err_val);
"Device boot error - unknown ERR0 error 0x%08x\n", err_val);
err_exists = true;
}
/* return error only if it's in the predefined mask */
if (err_exists && ((err_val & ~CPU_BOOT_ERR0_ENABLED) &
lower_32_bits(hdev->boot_error_status_mask)))
return true;
return false;
}
/* placeholder for ERR1 as no errors defined there yet */
static bool fw_report_boot_dev1(struct hl_device *hdev, u32 err_val,
u32 sts_val)
{
/*
* keep this variable to preserve the logic of the function.
* this way it would require less modifications when error will be
* added to DEV_ERR1
*/
bool err_exists = false;
if (!(err_val & CPU_BOOT_ERR1_ENABLED))
return false;
if (sts_val & CPU_BOOT_DEV_STS1_ENABLED)
dev_dbg(hdev->dev, "Device status1 %#x\n", sts_val);
if (!err_exists && (err_val & ~CPU_BOOT_ERR1_ENABLED)) {
dev_err(hdev->dev,
"Device boot error - unknown ERR1 error 0x%08x\n",
err_val);
err_exists = true;
}
/* return error only if it's in the predefined mask */
if (err_exists && ((err_val & ~CPU_BOOT_ERR1_ENABLED) &
upper_32_bits(hdev->boot_error_status_mask)))
return true;
return false;
}
static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
u32 boot_err1_reg, u32 cpu_boot_dev_status0_reg,
u32 cpu_boot_dev_status1_reg)
{
u32 err_val, status_val;
bool err_exists = false;
/* Some of the firmware status codes are deprecated in newer f/w
* versions. In those versions, the errors are reported
* in different registers. Therefore, we need to check those
* registers and print the exact errors. Moreover, there
* may be multiple errors, so we need to report on each error
* separately. Some of the error codes might indicate a state
* that is not an error per-se, but it is an error in production
* environment
*/
err_val = RREG32(boot_err0_reg);
status_val = RREG32(cpu_boot_dev_status0_reg);
err_exists = fw_report_boot_dev0(hdev, err_val, status_val);
err_val = RREG32(boot_err1_reg);
status_val = RREG32(cpu_boot_dev_status1_reg);
err_exists |= fw_report_boot_dev1(hdev, err_val, status_val);
if (err_exists)
return -EIO;
return 0;
}
int hl_fw_cpucp_info_get(struct hl_device *hdev,
u32 cpu_security_boot_status_reg,
u32 boot_err0_reg)
u32 sts_boot_dev_sts0_reg,
u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
u32 boot_err1_reg)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct cpucp_packet pkt = {};
@ -498,7 +564,8 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev,
goto out;
}
rc = fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
rc = fw_read_errors(hdev, boot_err0_reg, boot_err1_reg,
sts_boot_dev_sts0_reg, sts_boot_dev_sts1_reg);
if (rc) {
dev_err(hdev->dev, "Errors in device boot\n");
goto out;
@ -516,9 +583,13 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev,
}
/* Read FW application security bits again */
if (hdev->asic_prop.fw_security_status_valid)
hdev->asic_prop.fw_app_security_map =
RREG32(cpu_security_boot_status_reg);
if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid)
hdev->asic_prop.fw_app_cpu_boot_dev_sts0 =
RREG32(sts_boot_dev_sts0_reg);
if (hdev->asic_prop.fw_cpu_boot_dev_sts1_valid)
hdev->asic_prop.fw_app_cpu_boot_dev_sts1 =
RREG32(sts_boot_dev_sts1_reg);
out:
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
@ -582,13 +653,15 @@ static int hl_fw_send_msi_info_msg(struct hl_device *hdev)
}
int hl_fw_cpucp_handshake(struct hl_device *hdev,
u32 cpu_security_boot_status_reg,
u32 boot_err0_reg)
u32 sts_boot_dev_sts0_reg,
u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
u32 boot_err1_reg)
{
int rc;
rc = hl_fw_cpucp_info_get(hdev, cpu_security_boot_status_reg,
boot_err0_reg);
rc = hl_fw_cpucp_info_get(hdev, sts_boot_dev_sts0_reg,
sts_boot_dev_sts1_reg, boot_err0_reg,
boot_err1_reg);
if (rc)
return rc;
@ -723,8 +796,8 @@ int get_used_pll_index(struct hl_device *hdev, u32 input_pll_index,
bool dynamic_pll;
int fw_pll_idx;
dynamic_pll = prop->fw_security_status_valid &&
(prop->fw_app_security_map & CPU_BOOT_DEV_STS0_DYN_PLL_EN);
dynamic_pll = prop->fw_cpu_boot_dev_sts0_valid &&
(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_DYN_PLL_EN);
if (!dynamic_pll) {
/*
@ -867,8 +940,10 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
static int hl_fw_read_preboot_caps(struct hl_device *hdev,
u32 cpu_boot_status_reg,
u32 cpu_boot_caps_reg,
u32 boot_err0_reg, u32 timeout)
u32 sts_boot_dev_sts0_reg,
u32 sts_boot_dev_sts1_reg,
u32 boot_err0_reg, u32 boot_err1_reg,
u32 timeout)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u32 status;
@ -903,15 +978,20 @@ static int hl_fw_read_preboot_caps(struct hl_device *hdev,
* of reading specific errors
*/
if (status != -1)
fw_read_errors(hdev, boot_err0_reg,
cpu_boot_status_reg);
fw_read_errors(hdev, boot_err0_reg, boot_err1_reg,
sts_boot_dev_sts0_reg,
sts_boot_dev_sts1_reg);
return -EIO;
}
prop->fw_preboot_caps_map = RREG32(cpu_boot_caps_reg);
prop->fw_preboot_cpu_boot_dev_sts0 = RREG32(sts_boot_dev_sts0_reg);
prop->fw_preboot_cpu_boot_dev_sts1 = RREG32(sts_boot_dev_sts1_reg);
prop->dynamic_fw_load = !!(prop->fw_preboot_caps_map &
if (prop->fw_preboot_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_ENABLED)
prop->dynamic_fw_load = !!(prop->fw_preboot_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_FW_LD_COM_EN);
else
prop->dynamic_fw_load = 0;
/* initialize FW loader once we know what load protocol is used */
hdev->asic_funcs->init_firmware_loader(hdev);
@ -978,9 +1058,10 @@ static int hl_fw_static_read_device_fw_version(struct hl_device *hdev,
static void hl_fw_preboot_update_state(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u32 preboot_caps;
u32 cpu_boot_dev_sts0, cpu_boot_dev_sts1;
preboot_caps = prop->fw_preboot_caps_map;
cpu_boot_dev_sts0 = prop->fw_preboot_cpu_boot_dev_sts0;
cpu_boot_dev_sts1 = prop->fw_preboot_cpu_boot_dev_sts1;
/* We read security status multiple times during boot:
* 1. preboot - a. Check whether the security status bits are valid
@ -995,23 +1076,30 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev)
* Check security status bit (CPU_BOOT_DEV_STS0_ENABLED), if it is set
* check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN)
*/
if (preboot_caps & CPU_BOOT_DEV_STS0_ENABLED) {
prop->fw_security_status_valid = 1;
if (cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_ENABLED) {
prop->fw_cpu_boot_dev_sts0_valid = 1;
/* FW security should be derived from PCI ID, we keep this
* check for backward compatibility
*/
if (preboot_caps & CPU_BOOT_DEV_STS0_SECURITY_EN)
if (cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_SECURITY_EN)
prop->fw_security_disabled = false;
if (preboot_caps & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
if (cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
prop->hard_reset_done_by_fw = true;
} else {
prop->fw_security_status_valid = 0;
prop->fw_cpu_boot_dev_sts0_valid = 0;
}
dev_dbg(hdev->dev, "Firmware preboot security status %#x\n",
preboot_caps);
/* place holder for STS1 as no statuses are defined yet */
prop->fw_cpu_boot_dev_sts1_valid =
!!(cpu_boot_dev_sts1 & CPU_BOOT_DEV_STS1_ENABLED);
dev_dbg(hdev->dev, "Firmware preboot boot device status0 %#x\n",
cpu_boot_dev_sts0);
dev_dbg(hdev->dev, "Firmware preboot boot device status1 %#x\n",
cpu_boot_dev_sts1);
dev_dbg(hdev->dev, "Firmware preboot hard-reset is %s\n",
prop->hard_reset_done_by_fw ? "enabled" : "disabled");
@ -1020,9 +1108,7 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev)
prop->fw_security_disabled ? "disabled" : "enabled");
}
static int hl_fw_static_read_preboot_status(struct hl_device *hdev,
u32 cpu_boot_status_reg, u32 cpu_security_boot_status_reg,
u32 boot_err0_reg, u32 timeout)
static int hl_fw_static_read_preboot_status(struct hl_device *hdev)
{
int rc;
@ -1036,8 +1122,9 @@ static int hl_fw_static_read_preboot_status(struct hl_device *hdev,
}
int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 cpu_boot_caps_reg, u32 boot_err0_reg,
u32 timeout)
u32 sts_boot_dev_sts0_reg,
u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
u32 boot_err1_reg, u32 timeout)
{
int rc;
@ -1053,8 +1140,9 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
* read the boot caps register
*/
rc = hl_fw_read_preboot_caps(hdev, cpu_boot_status_reg,
cpu_boot_caps_reg, boot_err0_reg,
timeout);
sts_boot_dev_sts0_reg,
sts_boot_dev_sts1_reg, boot_err0_reg,
boot_err1_reg, timeout);
if (rc)
return rc;
@ -1062,9 +1150,7 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
if (hdev->asic_prop.dynamic_fw_load)
return 0;
return hl_fw_static_read_preboot_status(hdev, cpu_boot_status_reg,
cpu_boot_caps_reg, boot_err0_reg,
timeout);
return hl_fw_static_read_preboot_status(hdev);
}
/* associate string with COMM status */
@ -1610,30 +1696,37 @@ static int hl_fw_dynamic_copy_image(struct hl_device *hdev,
* is loaded
*
* @hdev: pointer to the habanalabs device structure
* @cpu_security_boot_status_reg: register holding security status props
* @cpu_boot_dev_sts0_reg: register holding CPU boot dev status 0
* @cpu_boot_dev_sts1_reg: register holding CPU boot dev status 1
*
* @return 0 on success, otherwise non-zero error code
*/
static void hl_fw_boot_fit_update_state(struct hl_device *hdev,
u32 cpu_security_boot_status_reg)
u32 cpu_boot_dev_sts0_reg,
u32 cpu_boot_dev_sts1_reg)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
/* Clear reset status since we need to read it again from boot CPU */
prop->hard_reset_done_by_fw = false;
/* Read boot_cpu security bits */
if (prop->fw_security_status_valid) {
prop->fw_boot_cpu_security_map =
RREG32(cpu_security_boot_status_reg);
/* Read boot_cpu status bits */
if (prop->fw_cpu_boot_dev_sts0_valid) {
prop->fw_bootfit_cpu_boot_dev_sts0 = RREG32(cpu_boot_dev_sts0_reg);
if (prop->fw_boot_cpu_security_map &
if (prop->fw_bootfit_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
prop->hard_reset_done_by_fw = true;
dev_dbg(hdev->dev,
"Firmware boot CPU security status %#x\n",
prop->fw_boot_cpu_security_map);
dev_dbg(hdev->dev, "Firmware boot CPU status0 %#x\n",
prop->fw_bootfit_cpu_boot_dev_sts0);
}
if (prop->fw_cpu_boot_dev_sts1_valid) {
prop->fw_bootfit_cpu_boot_dev_sts1 = RREG32(cpu_boot_dev_sts1_reg);
dev_dbg(hdev->dev, "Firmware boot CPU status1 %#x\n",
prop->fw_bootfit_cpu_boot_dev_sts1);
}
dev_dbg(hdev->dev, "Firmware boot CPU hard-reset is %s\n",
@ -1697,7 +1790,8 @@ static int hl_fw_dynamic_load_image(struct hl_device *hdev,
dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs;
hl_fw_boot_fit_update_state(hdev,
le32_to_cpu(dyn_regs->cpu_boot_status));
le32_to_cpu(dyn_regs->cpu_boot_dev_sts0),
le32_to_cpu(dyn_regs->cpu_boot_dev_sts1));
} else {
/* update state during preboot handshake */
hl_fw_preboot_update_state(hdev);
@ -1783,11 +1877,14 @@ static int hl_fw_dynamic_wait_for_linux_active(struct hl_device *hdev,
*
*
* @hdev: pointer to the habanalabs device structure
* @cpu_boot_dev_sts0_reg: register holding CPU boot dev status 0
* @cpu_boot_dev_sts1_reg: register holding CPU boot dev status 1
*
* @return 0 on success, otherwise non-zero error code
*/
static void hl_fw_linux_update_state(struct hl_device *hdev,
u32 cpu_boot_status_reg)
u32 cpu_boot_dev_sts0_reg,
u32 cpu_boot_dev_sts1_reg)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
@ -1795,17 +1892,26 @@ static void hl_fw_linux_update_state(struct hl_device *hdev,
prop->hard_reset_done_by_fw = false;
/* Read FW application security bits */
if (prop->fw_security_status_valid) {
prop->fw_app_security_map =
RREG32(cpu_boot_status_reg);
if (prop->fw_cpu_boot_dev_sts0_valid) {
prop->fw_app_cpu_boot_dev_sts0 =
RREG32(cpu_boot_dev_sts0_reg);
if (prop->fw_app_security_map &
if (prop->fw_app_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
prop->hard_reset_done_by_fw = true;
dev_dbg(hdev->dev,
"Firmware application CPU security status %#x\n",
prop->fw_app_security_map);
"Firmware application CPU status0 %#x\n",
prop->fw_app_cpu_boot_dev_sts0);
}
if (prop->fw_cpu_boot_dev_sts1_valid) {
prop->fw_app_cpu_boot_dev_sts1 =
RREG32(cpu_boot_dev_sts1_reg);
dev_dbg(hdev->dev,
"Firmware application CPU status1 %#x\n",
prop->fw_app_cpu_boot_dev_sts1);
}
dev_dbg(hdev->dev, "Firmware application CPU hard-reset is %s\n",
@ -1900,13 +2006,16 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
if (rc)
goto protocol_err;
hl_fw_linux_update_state(hdev, le32_to_cpu(dyn_regs->cpu_boot_status));
hl_fw_linux_update_state(hdev, le32_to_cpu(dyn_regs->cpu_boot_dev_sts0),
le32_to_cpu(dyn_regs->cpu_boot_dev_sts1));
return 0;
protocol_err:
fw_read_errors(hdev, le32_to_cpu(dyn_regs->cpu_boot_err0),
le32_to_cpu(dyn_regs->cpu_boot_status));
le32_to_cpu(dyn_regs->cpu_boot_err1),
le32_to_cpu(dyn_regs->cpu_boot_dev_sts0),
le32_to_cpu(dyn_regs->cpu_boot_dev_sts1));
return rc;
}
@ -1922,8 +2031,9 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
struct fw_load_mgr *fw_loader)
{
u32 cpu_msg_status_reg, cpu_timeout, msg_to_cpu_reg, status;
u32 cpu_boot_status_reg, cpu_security_boot_status_reg;
u32 cpu_boot_dev_status0_reg, cpu_boot_dev_status1_reg;
struct static_fw_load_mgr *static_loader;
u32 cpu_boot_status_reg;
int rc;
if (!(hdev->fw_components & FW_TYPE_BOOT_CPU))
@ -1936,7 +2046,8 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
static_loader = &fw_loader->static_loader;
cpu_msg_status_reg = static_loader->cpu_cmd_status_to_host_reg;
msg_to_cpu_reg = static_loader->kmd_msg_to_cpu_reg;
cpu_security_boot_status_reg = static_loader->cpu_boot_dev_status_reg;
cpu_boot_dev_status0_reg = static_loader->cpu_boot_dev_status0_reg;
cpu_boot_dev_status1_reg = static_loader->cpu_boot_dev_status1_reg;
cpu_boot_status_reg = static_loader->cpu_boot_status_reg;
dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n",
@ -2002,7 +2113,8 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
hl_fw_static_read_device_fw_version(hdev, FW_COMP_BOOT_FIT);
/* update state according to boot stage */
hl_fw_boot_fit_update_state(hdev, cpu_security_boot_status_reg);
hl_fw_boot_fit_update_state(hdev, cpu_boot_dev_status0_reg,
cpu_boot_dev_status1_reg);
if (rc) {
detect_cpu_boot_status(hdev, status);
@ -2073,17 +2185,22 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
}
rc = fw_read_errors(hdev, fw_loader->static_loader.boot_err0_reg,
cpu_security_boot_status_reg);
fw_loader->static_loader.boot_err1_reg,
cpu_boot_dev_status0_reg,
cpu_boot_dev_status1_reg);
if (rc)
return rc;
hl_fw_linux_update_state(hdev, cpu_security_boot_status_reg);
hl_fw_linux_update_state(hdev, cpu_boot_dev_status0_reg,
cpu_boot_dev_status1_reg);
return 0;
out:
fw_read_errors(hdev, fw_loader->static_loader.boot_err0_reg,
cpu_security_boot_status_reg);
fw_loader->static_loader.boot_err1_reg,
cpu_boot_dev_status0_reg,
cpu_boot_dev_status1_reg);
return rc;
}

View File

@ -422,15 +422,24 @@ struct hl_mmu_properties {
* @cb_pool_cb_size: size of each CB in the CB pool.
* @max_pending_cs: maximum of concurrent pending command submissions
* @max_queues: maximum amount of queues in the system
* @fw_preboot_caps_map: bitmap representation of preboot cpu capabilities
* reported by FW, bit description can be found in
* CPU_BOOT_DEV_STS*
* @fw_boot_cpu_security_map: bitmap representation of boot cpu security status
* reported by FW, bit description can be found in
* CPU_BOOT_DEV_STS*
* @fw_app_security_map: bitmap representation of application security status
* reported by FW, bit description can be found in
* CPU_BOOT_DEV_STS*
* @fw_preboot_cpu_boot_dev_sts0: bitmap representation of preboot cpu
* capabilities reported by FW, bit description
* can be found in CPU_BOOT_DEV_STS0
* @fw_preboot_cpu_boot_dev_sts1: bitmap representation of preboot cpu
* capabilities reported by FW, bit description
* can be found in CPU_BOOT_DEV_STS1
* @fw_bootfit_cpu_boot_dev_sts0: bitmap representation of boot cpu security
* status reported by FW, bit description can be
* found in CPU_BOOT_DEV_STS0
* @fw_bootfit_cpu_boot_dev_sts1: bitmap representation of boot cpu security
* status reported by FW, bit description can be
* found in CPU_BOOT_DEV_STS1
* @fw_app_cpu_boot_dev_sts0: bitmap representation of application security
* status reported by FW, bit description can be
* found in CPU_BOOT_DEV_STS0
* @fw_app_cpu_boot_dev_sts1: bitmap representation of application security
* status reported by FW, bit description can be
* found in CPU_BOOT_DEV_STS1
* @collective_first_sob: first sync object available for collective use
* @collective_first_mon: first monitor available for collective use
* @sync_stream_first_sob: first sync object available for sync stream use
@ -445,8 +454,10 @@ struct hl_mmu_properties {
* @completion_queues_count: number of completion queues.
* @fw_security_disabled: true if security measures are disabled in firmware,
* false otherwise
* @fw_security_status_valid: security status bits are valid and can be fetched
* from BOOT_DEV_STS0
* @fw_cpu_boot_dev_sts0_valid: status bits are valid and can be fetched from
* BOOT_DEV_STS0
* @fw_cpu_boot_dev_sts1_valid: status bits are valid and can be fetched from
* BOOT_DEV_STS1
* @dram_supports_virtual_memory: is there an MMU towards the DRAM
* @hard_reset_done_by_fw: true if firmware is handling hard reset flow
* @num_functional_hbms: number of functional HBMs in each DCORE.
@ -497,9 +508,12 @@ struct asic_fixed_properties {
u32 cb_pool_cb_size;
u32 max_pending_cs;
u32 max_queues;
u32 fw_preboot_caps_map;
u32 fw_boot_cpu_security_map;
u32 fw_app_security_map;
u32 fw_preboot_cpu_boot_dev_sts0;
u32 fw_preboot_cpu_boot_dev_sts1;
u32 fw_bootfit_cpu_boot_dev_sts0;
u32 fw_bootfit_cpu_boot_dev_sts1;
u32 fw_app_cpu_boot_dev_sts0;
u32 fw_app_cpu_boot_dev_sts1;
u16 collective_first_sob;
u16 collective_first_mon;
u16 sync_stream_first_sob;
@ -512,7 +526,8 @@ struct asic_fixed_properties {
u8 tpc_enabled_mask;
u8 completion_queues_count;
u8 fw_security_disabled;
u8 fw_security_status_valid;
u8 fw_cpu_boot_dev_sts0_valid;
u8 fw_cpu_boot_dev_sts1_valid;
u8 dram_supports_virtual_memory;
u8 hard_reset_done_by_fw;
u8 num_functional_hbms;
@ -853,8 +868,10 @@ struct pci_mem_region {
* @kmd_msg_to_cpu_reg: register address for KDM->CPU messages
* @cpu_cmd_status_to_host_reg: register address for CPU command status response
* @cpu_boot_status_reg: boot status register
* @cpu_boot_dev_status_reg: boot device status register
* @boot_err0_reg: boot error register
* @cpu_boot_dev_status0_reg: boot device status register 0
* @cpu_boot_dev_status1_reg: boot device status register 1
* @boot_err0_reg: boot error register 0
* @boot_err1_reg: boot error register 1
* @preboot_version_offset_reg: SRAM offset to preboot version register
* @boot_fit_version_offset_reg: SRAM offset to boot fit version register
* @sram_offset_mask: mask for getting offset into the SRAM
@ -865,8 +882,10 @@ struct static_fw_load_mgr {
u32 kmd_msg_to_cpu_reg;
u32 cpu_cmd_status_to_host_reg;
u32 cpu_boot_status_reg;
u32 cpu_boot_dev_status_reg;
u32 cpu_boot_dev_status0_reg;
u32 cpu_boot_dev_status1_reg;
u32 boot_err0_reg;
u32 boot_err1_reg;
u32 preboot_version_offset_reg;
u32 boot_fit_version_offset_reg;
u32 sram_offset_mask;
@ -2514,11 +2533,13 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
void *vaddr);
int hl_fw_send_heartbeat(struct hl_device *hdev);
int hl_fw_cpucp_info_get(struct hl_device *hdev,
u32 cpu_security_boot_status_reg,
u32 boot_err0_reg);
u32 sts_boot_dev_sts0_reg,
u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
u32 boot_err1_reg);
int hl_fw_cpucp_handshake(struct hl_device *hdev,
u32 cpu_security_boot_status_reg,
u32 boot_err0_reg);
u32 sts_boot_dev_sts0_reg,
u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
u32 boot_err1_reg);
int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
struct hl_info_pci_counters *counters);
@ -2531,8 +2552,9 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index,
int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power);
int hl_fw_init_cpu(struct hl_device *hdev);
int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 cpu_boot_caps_reg, u32 boot_err0_reg,
u32 timeout);
u32 sts_boot_dev_sts0_reg,
u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
u32 boot_err1_reg, u32 timeout);
int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
bool is_wc[3]);

View File

@ -546,7 +546,8 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
for (i = 0 ; i < HL_MAX_DCORES ; i++)
prop->first_available_cq[i] = USHRT_MAX;
prop->fw_security_status_valid = false;
prop->fw_cpu_boot_dev_sts0_valid = false;
prop->fw_cpu_boot_dev_sts1_valid = false;
prop->hard_reset_done_by_fw = false;
return 0;
@ -706,8 +707,10 @@ pci_init:
* version to determine whether we run with a security-enabled firmware
*/
rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
mmCPU_BOOT_DEV_STS0,
mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
mmCPU_BOOT_ERR1,
GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
if (rc) {
if (hdev->reset_on_preboot_fail)
hdev->asic_funcs->hw_fini(hdev, true);
@ -1925,8 +1928,8 @@ static void gaudi_init_scrambler_sram(struct hl_device *hdev)
if (!hdev->asic_prop.fw_security_disabled)
return;
if (hdev->asic_prop.fw_security_status_valid &&
(hdev->asic_prop.fw_app_security_map &
if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid &&
(hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_SRAM_SCR_EN))
return;
@ -1997,8 +2000,8 @@ static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
if (!hdev->asic_prop.fw_security_disabled)
return;
if (hdev->asic_prop.fw_security_status_valid &&
(hdev->asic_prop.fw_boot_cpu_security_map &
if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid &&
(hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_DRAM_SCR_EN))
return;
@ -2067,8 +2070,8 @@ static void gaudi_init_e2e(struct hl_device *hdev)
if (!hdev->asic_prop.fw_security_disabled)
return;
if (hdev->asic_prop.fw_security_status_valid &&
(hdev->asic_prop.fw_boot_cpu_security_map &
if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid &&
(hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_E2E_CRED_EN))
return;
@ -2442,8 +2445,8 @@ static void gaudi_init_hbm_cred(struct hl_device *hdev)
if (!hdev->asic_prop.fw_security_disabled)
return;
if (hdev->asic_prop.fw_security_status_valid &&
(hdev->asic_prop.fw_boot_cpu_security_map &
if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid &&
(hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_HBM_CRED_EN))
return;
@ -3768,8 +3771,10 @@ static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
static_loader->cpu_boot_dev_status_reg = mmCPU_BOOT_DEV_STS0;
static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
static_loader->sram_offset_mask = ~((u32)SRAM_BASE_ADDR);
@ -3884,8 +3889,10 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
}
/* update FW application security bits */
if (prop->fw_security_status_valid)
prop->fw_app_security_map = RREG32(mmCPU_BOOT_DEV_STS0);
if (prop->fw_cpu_boot_dev_sts0_valid)
prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
if (prop->fw_cpu_boot_dev_sts1_valid)
prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
return 0;
@ -7409,8 +7416,8 @@ static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
int err = 0;
if (hdev->asic_prop.fw_security_status_valid &&
(hdev->asic_prop.fw_app_security_map &
if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid &&
(hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_HBM_ECC_EN)) {
if (!hbm_ecc_data) {
dev_err(hdev->dev, "No FW ECC data");
@ -7975,7 +7982,9 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
return 0;
rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
mmCPU_BOOT_ERR1);
if (rc)
return rc;

View File

@ -461,7 +461,8 @@ int goya_get_fixed_properties(struct hl_device *hdev)
for (i = 0 ; i < HL_MAX_DCORES ; i++)
prop->first_available_cq[i] = USHRT_MAX;
prop->fw_security_status_valid = false;
prop->fw_cpu_boot_dev_sts0_valid = false;
prop->fw_cpu_boot_dev_sts1_valid = false;
prop->hard_reset_done_by_fw = false;
return 0;
@ -641,8 +642,10 @@ pci_init:
* version to determine whether we run with a security-enabled firmware
*/
rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
mmCPU_BOOT_DEV_STS0,
mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
mmCPU_BOOT_ERR1,
GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
if (rc) {
if (hdev->reset_on_preboot_fail)
hdev->asic_funcs->hw_fini(hdev, true);
@ -1297,8 +1300,11 @@ int goya_init_cpu_queues(struct hl_device *hdev)
}
/* update FW application security bits */
if (prop->fw_security_status_valid)
prop->fw_app_security_map = RREG32(mmCPU_BOOT_DEV_STS0);
if (prop->fw_cpu_boot_dev_sts0_valid)
prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
if (prop->fw_cpu_boot_dev_sts1_valid)
prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
goya->hw_cap_initialized |= HW_CAP_CPU_Q;
return 0;
@ -2470,8 +2476,10 @@ static void goya_init_static_firmware_loader(struct hl_device *hdev)
static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
static_loader->cpu_boot_dev_status_reg = mmCPU_BOOT_DEV_STS0;
static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
static_loader->sram_offset_mask = ~((u32)SRAM_BASE_ADDR);
@ -5245,7 +5253,9 @@ int goya_cpucp_info_get(struct hl_device *hdev)
if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
return 0;
rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
mmCPU_BOOT_ERR1);
if (rc)
return rc;