net/mlx5: Fix use after free in mlx5_health_wait_pci_up

The device health recovery flow calls mlx5_health_wait_pci_up() which
queries the device for FW_RESET timeout after freeing the device
timeouts structure on mlx5_function_teardown(). Fix this bug by moving
timeouts structure init/cleanup to the device's init/uninit phases.
Since it is necessary to reset default software timeouts on function
reload, extract setting of defaults values from mlx5_tout_init() and
call mlx5_tout_set_def_val() directly from mlx5_function_setup().

Fixes: 5945e1adea ("net/mlx5: Read timeout values from init segment")
Reported by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: Amir Tzin <amirtz@nvidia.com>
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
This commit is contained in:
Amir Tzin 2021-10-20 12:45:05 +03:00 committed by Saeed Mahameed
parent e219440da0
commit 76091b0fb6
3 changed files with 15 additions and 13 deletions

View File

@ -31,11 +31,11 @@ static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_type
dev->timeouts->to[type] = val;
}
static void tout_set_def_val(struct mlx5_core_dev *dev)
void mlx5_tout_set_def_val(struct mlx5_core_dev *dev)
{
int i;
for (i = MLX5_TO_FW_PRE_INIT_TIMEOUT_MS; i < MAX_TIMEOUT_TYPES; i++)
for (i = 0; i < MAX_TIMEOUT_TYPES; i++)
tout_set(dev, tout_def_sw_val[i], i);
}
@ -45,7 +45,6 @@ int mlx5_tout_init(struct mlx5_core_dev *dev)
if (!dev->timeouts)
return -ENOMEM;
tout_set_def_val(dev);
return 0;
}

View File

@ -34,6 +34,7 @@ int mlx5_tout_init(struct mlx5_core_dev *dev);
void mlx5_tout_cleanup(struct mlx5_core_dev *dev);
void mlx5_tout_query_iseg(struct mlx5_core_dev *dev);
int mlx5_tout_query_dtor(struct mlx5_core_dev *dev);
void mlx5_tout_set_def_val(struct mlx5_core_dev *dev);
u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type);
#define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS)

View File

@ -992,11 +992,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
if (mlx5_core_is_pf(dev))
pcie_print_link_status(dev->pdev);
err = mlx5_tout_init(dev);
if (err) {
mlx5_core_err(dev, "Failed initializing timeouts, aborting\n");
return err;
}
mlx5_tout_set_def_val(dev);
/* wait for firmware to accept initialization segments configurations
*/
@ -1005,13 +1001,13 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
if (err) {
mlx5_core_err(dev, "Firmware over %llu MS in pre-initializing state, aborting\n",
mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT));
goto err_tout_cleanup;
return err;
}
err = mlx5_cmd_init(dev);
if (err) {
mlx5_core_err(dev, "Failed initializing command interface, aborting\n");
goto err_tout_cleanup;
return err;
}
mlx5_tout_query_iseg(dev);
@ -1094,8 +1090,6 @@ err_disable_hca:
err_cmd_cleanup:
mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
mlx5_cmd_cleanup(dev);
err_tout_cleanup:
mlx5_tout_cleanup(dev);
return err;
}
@ -1114,7 +1108,6 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot)
mlx5_core_disable_hca(dev, 0);
mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
mlx5_cmd_cleanup(dev);
mlx5_tout_cleanup(dev);
return 0;
}
@ -1476,6 +1469,12 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
mlx5_debugfs_root);
INIT_LIST_HEAD(&priv->traps);
err = mlx5_tout_init(dev);
if (err) {
mlx5_core_err(dev, "Failed initializing timeouts, aborting\n");
goto err_timeout_init;
}
err = mlx5_health_init(dev);
if (err)
goto err_health_init;
@ -1501,6 +1500,8 @@ err_adev_init:
err_pagealloc_init:
mlx5_health_cleanup(dev);
err_health_init:
mlx5_tout_cleanup(dev);
err_timeout_init:
debugfs_remove(dev->priv.dbg_root);
mutex_destroy(&priv->pgdir_mutex);
mutex_destroy(&priv->alloc_mutex);
@ -1518,6 +1519,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
mlx5_adev_cleanup(dev);
mlx5_pagealloc_cleanup(dev);
mlx5_health_cleanup(dev);
mlx5_tout_cleanup(dev);
debugfs_remove_recursive(dev->priv.dbg_root);
mutex_destroy(&priv->pgdir_mutex);
mutex_destroy(&priv->alloc_mutex);