habanalabs: use variable poll interval for fw loading

Using a variable poll interval for fw loading allows us to support
much slower environments (emulation) while changing only a single
line in the code, instead of choosing a different interval in each
function that polls.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
Ohad Sharabi 2021-10-26 15:33:23 +03:00 committed by Oded Gabbay
parent 8f82ff75df
commit f4e7906dbe
3 changed files with 27 additions and 16 deletions

View File

@ -15,8 +15,6 @@
#define FW_FILE_MAX_SIZE 0x1400000 /* maximum size of 20MB */
#define FW_CPU_STATUS_POLL_INTERVAL_USEC 10000
static char *extract_fw_ver_from_str(const char *fw_str)
{
char *str, *fw_ver, *whitespace;
@ -1102,7 +1100,7 @@ static int hl_fw_read_preboot_caps(struct hl_device *hdev,
(status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
(status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
FW_CPU_STATUS_POLL_INTERVAL_USEC,
hdev->fw_poll_interval_usec,
timeout);
if (rc) {
@ -1286,11 +1284,7 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
{
int rc;
/* pldm was added for cases in which we use preboot on pldm and want
* to load boot fit, but we can't wait for preboot because it runs
* very slowly
*/
if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) || hdev->pldm)
if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
return 0;
/*
@ -1436,7 +1430,7 @@ static int hl_fw_dynamic_wait_for_status(struct hl_device *hdev,
le32_to_cpu(dyn_regs->cpu_cmd_status_to_host),
status,
FIELD_GET(COMMS_STATUS_STATUS_MASK, status) == expected_status,
FW_CPU_STATUS_POLL_INTERVAL_USEC,
hdev->fw_poll_interval_usec,
timeout);
if (rc) {
@ -2070,7 +2064,7 @@ static int hl_fw_dynamic_wait_for_boot_fit_active(struct hl_device *hdev,
status,
(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
(status == CPU_BOOT_STATUS_SRAM_AVAIL),
FW_CPU_STATUS_POLL_INTERVAL_USEC,
hdev->fw_poll_interval_usec,
dyn_loader->wait_for_bl_timeout);
if (rc) {
dev_err(hdev->dev, "failed to wait for boot\n");
@ -2097,7 +2091,7 @@ static int hl_fw_dynamic_wait_for_linux_active(struct hl_device *hdev,
le32_to_cpu(dyn_loader->comm_desc.cpu_dyn_regs.cpu_boot_status),
status,
(status == CPU_BOOT_STATUS_SRAM_AVAIL),
FW_CPU_STATUS_POLL_INTERVAL_USEC,
hdev->fw_poll_interval_usec,
fw_loader->cpu_timeout);
if (rc) {
dev_err(hdev->dev, "failed to wait for Linux\n");
@ -2296,6 +2290,15 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
goto protocol_err;
}
/*
* when testing FW load (without Linux) on PLDM we don't want to
* wait until boot fit is active as it may take several hours.
* instead, we load the bootfit and let it do all initializations in
* the background.
*/
if (hdev->pldm && !(hdev->fw_components & FW_TYPE_LINUX))
return 0;
rc = hl_fw_dynamic_wait_for_boot_fit_active(hdev, fw_loader);
if (rc)
goto protocol_err;
@ -2388,7 +2391,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
cpu_boot_status_reg,
status,
status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT,
FW_CPU_STATUS_POLL_INTERVAL_USEC,
hdev->fw_poll_interval_usec,
fw_loader->boot_fit_timeout);
if (rc) {
@ -2411,7 +2414,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
cpu_msg_status_reg,
status,
status == CPU_MSG_OK,
FW_CPU_STATUS_POLL_INTERVAL_USEC,
hdev->fw_poll_interval_usec,
fw_loader->boot_fit_timeout);
if (rc) {
@ -2440,7 +2443,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
(status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
(status == CPU_BOOT_STATUS_SRAM_AVAIL),
FW_CPU_STATUS_POLL_INTERVAL_USEC,
hdev->fw_poll_interval_usec,
cpu_timeout);
dev_dbg(hdev->dev, "uboot status = %d\n", status);
@ -2489,7 +2492,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
cpu_boot_status_reg,
status,
(status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED),
FW_CPU_STATUS_POLL_INTERVAL_USEC,
hdev->fw_poll_interval_usec,
cpu_timeout);
if (rc) {
@ -2509,7 +2512,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
cpu_boot_status_reg,
status,
(status == CPU_BOOT_STATUS_SRAM_AVAIL),
FW_CPU_STATUS_POLL_INTERVAL_USEC,
hdev->fw_poll_interval_usec,
cpu_timeout);
/* Clear message */

View File

@ -61,6 +61,9 @@
#define HL_CPUCP_INFO_TIMEOUT_USEC 10000000 /* 10s */
#define HL_CPUCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */
#define HL_FW_STATUS_POLL_INTERVAL_USEC 10000 /* 10ms */
#define HL_FW_STATUS_PLDM_POLL_INTERVAL_USEC 300000000 /* 300s */
#define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */
#define HL_SIM_MAX_TIMEOUT_US 10000000 /* 10s */
@ -2459,6 +2462,7 @@ struct multi_cs_data {
* @last_open_session_duration_jif: duration (jiffies) of the last device open
* session.
* @open_counter: number of successful device open operations.
* @fw_poll_interval_usec: FW status poll interval in usec.
* @in_reset: is device in reset flow.
* @curr_pll_profile: current PLL profile.
* @card_type: Various ASICs have several card types. This indicates the card
@ -2607,6 +2611,7 @@ struct hl_device {
u64 last_successful_open_jif;
u64 last_open_session_duration_jif;
u64 open_counter;
u64 fw_poll_interval_usec;
atomic_t in_reset;
enum hl_pll_frequency curr_pll_profile;
enum cpucp_card_types card_type;

View File

@ -345,6 +345,9 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
set_driver_behavior_per_device(hdev);
hdev->fw_poll_interval_usec = hdev->pldm ? HL_FW_STATUS_PLDM_POLL_INTERVAL_USEC :
HL_FW_STATUS_POLL_INTERVAL_USEC;
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
hdev->prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;