mirror of
https://github.com/torvalds/linux.git
synced 2025-01-01 15:51:46 +00:00
habanalabs: reset device upon fw read failure
failure in reading pre-boot verion is not handled correctly, upon failure we need to reset the device in order to be able to reinstall the driver. Signed-off-by: farah kassabri <fkassabri@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
parent
ba7e389c30
commit
eb10b897e4
@ -607,7 +607,9 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
|
||||
rc = hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
security_status = RREG32(cpu_security_boot_status_reg);
|
||||
|
||||
|
@ -927,7 +927,7 @@ struct hl_asic_funcs {
|
||||
void (*ctx_fini)(struct hl_ctx *ctx);
|
||||
int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
|
||||
u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
|
||||
void (*read_device_fw_version)(struct hl_device *hdev,
|
||||
int (*read_device_fw_version)(struct hl_device *hdev,
|
||||
enum hl_fw_component fwc);
|
||||
int (*load_firmware_to_device)(struct hl_device *hdev);
|
||||
int (*load_boot_fit_to_device)(struct hl_device *hdev);
|
||||
|
@ -390,8 +390,11 @@ int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg,
|
||||
rc = hl_fw_read_preboot_status(hdev, cpu_boot_status_reg,
|
||||
cpu_security_boot_status_reg, boot_err0_reg,
|
||||
preboot_ver_timeout);
|
||||
if (rc)
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to read preboot version\n");
|
||||
hdev->asic_funcs->hw_fini(hdev, true);
|
||||
goto unmap_pci_bars;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
|
@ -3603,7 +3603,7 @@ static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
|
||||
return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
|
||||
}
|
||||
|
||||
static void gaudi_read_device_fw_version(struct hl_device *hdev,
|
||||
static int gaudi_read_device_fw_version(struct hl_device *hdev,
|
||||
enum hl_fw_component fwc)
|
||||
{
|
||||
const char *name;
|
||||
@ -3623,7 +3623,7 @@ static void gaudi_read_device_fw_version(struct hl_device *hdev,
|
||||
break;
|
||||
default:
|
||||
dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
|
||||
return;
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
ver_off &= ~((u32)SRAM_BASE_ADDR);
|
||||
@ -3635,7 +3635,10 @@ static void gaudi_read_device_fw_version(struct hl_device *hdev,
|
||||
dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
|
||||
name, ver_off);
|
||||
strcpy(dest, "unavailable");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gaudi_init_cpu(struct hl_device *hdev)
|
||||
@ -3925,16 +3928,18 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
|
||||
|
||||
WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
|
||||
|
||||
gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
|
||||
HW_CAP_HBM | HW_CAP_PCI_DMA |
|
||||
HW_CAP_MME | HW_CAP_TPC_MASK |
|
||||
HW_CAP_HBM_DMA | HW_CAP_PLL |
|
||||
HW_CAP_NIC_MASK | HW_CAP_MMU |
|
||||
HW_CAP_SRAM_SCRAMBLER |
|
||||
HW_CAP_HBM_SCRAMBLER |
|
||||
HW_CAP_CLK_GATE);
|
||||
if (gaudi) {
|
||||
gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
|
||||
HW_CAP_HBM | HW_CAP_PCI_DMA |
|
||||
HW_CAP_MME | HW_CAP_TPC_MASK |
|
||||
HW_CAP_HBM_DMA | HW_CAP_PLL |
|
||||
HW_CAP_NIC_MASK | HW_CAP_MMU |
|
||||
HW_CAP_SRAM_SCRAMBLER |
|
||||
HW_CAP_HBM_SCRAMBLER |
|
||||
HW_CAP_CLK_GATE);
|
||||
|
||||
memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
|
||||
memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
|
||||
}
|
||||
}
|
||||
|
||||
static int gaudi_suspend(struct hl_device *hdev)
|
||||
|
@ -2341,7 +2341,7 @@ static int goya_load_boot_fit_to_device(struct hl_device *hdev)
|
||||
* FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx.
|
||||
* The version string should be located by that offset.
|
||||
*/
|
||||
static void goya_read_device_fw_version(struct hl_device *hdev,
|
||||
static int goya_read_device_fw_version(struct hl_device *hdev,
|
||||
enum hl_fw_component fwc)
|
||||
{
|
||||
const char *name;
|
||||
@ -2361,7 +2361,7 @@ static void goya_read_device_fw_version(struct hl_device *hdev,
|
||||
break;
|
||||
default:
|
||||
dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
|
||||
return;
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
ver_off &= ~((u32)SRAM_BASE_ADDR);
|
||||
@ -2373,7 +2373,11 @@ static void goya_read_device_fw_version(struct hl_device *hdev,
|
||||
dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
|
||||
name, ver_off);
|
||||
strcpy(dest, "unavailable");
|
||||
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int goya_init_cpu(struct hl_device *hdev)
|
||||
@ -2644,12 +2648,14 @@ static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
|
||||
WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM,
|
||||
0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT);
|
||||
|
||||
goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
|
||||
HW_CAP_DDR_0 | HW_CAP_DDR_1 |
|
||||
HW_CAP_DMA | HW_CAP_MME |
|
||||
HW_CAP_MMU | HW_CAP_TPC_MBIST |
|
||||
HW_CAP_GOLDEN | HW_CAP_TPC);
|
||||
memset(goya->events_stat, 0, sizeof(goya->events_stat));
|
||||
if (goya) {
|
||||
goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
|
||||
HW_CAP_DDR_0 | HW_CAP_DDR_1 |
|
||||
HW_CAP_DMA | HW_CAP_MME |
|
||||
HW_CAP_MMU | HW_CAP_TPC_MBIST |
|
||||
HW_CAP_GOLDEN | HW_CAP_TPC);
|
||||
memset(goya->events_stat, 0, sizeof(goya->events_stat));
|
||||
}
|
||||
}
|
||||
|
||||
int goya_suspend(struct hl_device *hdev)
|
||||
|
Loading…
Reference in New Issue
Block a user