habanalabs: add new return code to device fd open

In order to be more informative during device open, we are adding a
new return code -EAGAIN that indicates device is still going through
resource reclaiming and hence it cannot be used yet.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
Ofir Bitton 2022-02-28 16:08:20 +02:00 committed by Greg Kroah-Hartman
parent 050a6f349a
commit 4c3b9f6e3b
3 changed files with 20 additions and 1 deletions

View File

@ -107,6 +107,8 @@ static void hpriv_release(struct kref *ref)
hdev->is_compute_ctx_active = false;
mutex_unlock(&hdev->fpriv_list_lock);
hdev->compute_ctx_in_release = 0;
kfree(hpriv);
}
@ -150,6 +152,8 @@ static int hl_device_release(struct inode *inode, struct file *filp)
hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr);
hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
hdev->compute_ctx_in_release = 1;
if (!hl_hpriv_put(hpriv))
dev_notice(hdev->dev,
"User process closed FD but device still in use\n");

View File

@ -2710,6 +2710,7 @@ struct hl_reset_info {
* cases where Linux was not loaded to device CPU
* @supports_wait_for_multi_cs: true if wait for multi CS is supported
* @is_compute_ctx_active: Whether there is an active compute context executing.
* @compute_ctx_in_release: true if the current compute context is being released.
*/
struct hl_device {
struct pci_dev *pdev;
@ -2828,6 +2829,7 @@ struct hl_device {
u8 supports_wait_for_multi_cs;
u8 stream_master_qid_arr_size;
u8 is_compute_ctx_active;
u8 compute_ctx_in_release;
/* Parameters for bring-up */
u64 nic_ports_mask;

View File

@ -150,7 +150,20 @@ int hl_device_open(struct inode *inode, struct file *filp)
dev_err_ratelimited(hdev->dev,
"Can't open %s because it is %s\n",
dev_name(hdev->dev), hdev->status[status]);
rc = -EPERM;
if (status == HL_DEVICE_STATUS_IN_RESET)
rc = -EAGAIN;
else
rc = -EPERM;
goto out_err;
}
if (hdev->compute_ctx_in_release) {
dev_dbg_ratelimited(hdev->dev,
"Can't open %s because another user is still releasing it\n",
dev_name(hdev->dev));
rc = -EAGAIN;
goto out_err;
}