2019-02-16 00:39:21 +02:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Copyright 2016-2019 HabanaLabs, Ltd.
|
|
|
|
|
* All Rights Reserved.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <uapi/misc/habanalabs.h>
|
|
|
|
|
#include "habanalabs.h"
|
|
|
|
|
|
|
|
|
|
#include <linux/uaccess.h>
|
|
|
|
|
#include <linux/slab.h>
|
|
|
|
|
|
|
|
|
|
static void job_wq_completion(struct work_struct *work);
|
|
|
|
|
static long _hl_cs_wait_ioctl(struct hl_device *hdev,
|
|
|
|
|
struct hl_ctx *ctx, u64 timeout_us, u64 seq);
|
|
|
|
|
static void cs_do_release(struct kref *ref);
|
|
|
|
|
|
2020-05-07 14:31:49 +03:00
|
|
|
static void hl_sob_reset(struct kref *ref)
|
|
|
|
|
{
|
|
|
|
|
struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
|
|
|
|
|
kref);
|
|
|
|
|
struct hl_device *hdev = hw_sob->hdev;
|
|
|
|
|
|
|
|
|
|
hdev->asic_funcs->reset_sob(hdev, hw_sob);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void hl_sob_reset_error(struct kref *ref)
|
|
|
|
|
{
|
|
|
|
|
struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
|
|
|
|
|
kref);
|
|
|
|
|
struct hl_device *hdev = hw_sob->hdev;
|
|
|
|
|
|
|
|
|
|
dev_crit(hdev->dev,
|
|
|
|
|
"SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
|
|
|
|
|
hw_sob->q_idx, hw_sob->sob_id);
|
|
|
|
|
}
|
|
|
|
|
|
2020-09-10 09:40:35 +03:00
|
|
|
/**
|
|
|
|
|
* hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
|
|
|
|
|
* @sob_base: sob base id
|
|
|
|
|
* @sob_mask: sob user mask, each bit represents a sob offset from sob base
|
|
|
|
|
* @mask: generated mask
|
|
|
|
|
*
|
|
|
|
|
* Return: 0 if given parameters are valid
|
|
|
|
|
*/
|
|
|
|
|
int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
|
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
if (sob_mask == 0)
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
|
|
if (sob_mask == 0x1) {
|
|
|
|
|
*mask = ~(1 << (sob_base & 0x7));
|
|
|
|
|
} else {
|
|
|
|
|
/* find msb in order to verify sob range is valid */
|
|
|
|
|
for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--)
|
|
|
|
|
if (BIT(i) & sob_mask)
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1))
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
|
|
*mask = ~sob_mask;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-15 08:52:39 +03:00
|
|
|
static void hl_fence_release(struct kref *kref)
|
2019-02-16 00:39:21 +02:00
|
|
|
{
|
2020-07-15 08:52:39 +03:00
|
|
|
struct hl_fence *fence =
|
|
|
|
|
container_of(kref, struct hl_fence, refcount);
|
habanalabs: handle the h/w sync object
Define a structure representing the h/w sync object (SOB).
a SOB can contain up to 2^15 values. Each signal CS will increment the SOB
by 1, so after some time we will reach the maximum number the SOB can
represent. When that happens, the driver needs to move to a different SOB
for the signal operation.
A SOB can be in 1 of 4 states:
1. Working state with value < 2^15
2. We reached a value of 2^15, but the signal operations weren't completed
yet OR there are pending waits on this signal. For the next submission, the
driver will move to another SOB.
3. ALL the signal operations on the SOB have finished AND there are no more
pending waits on the SOB AND we reached a value of 2^15 (This basically
means the refcnt of the SOB is 0 - see explanation below). When that
happens, the driver can clear the SOB by simply doing WREG32 0 to it and
set the refcnt back to 1.
4. The SOB is cleared and can be used next time by the driver when it needs
to reuse an SOB.
Per SOB, the driver will maintain a single refcnt, that will be initialized
to 1. When a signal or wait operation on this SOB is submitted to the PQ,
the refcnt will be incremented. When a signal or wait operation on this SOB
completes, the refcnt will be decremented. After the submission of the
signal operation that increments the SOB to a value of 2^15, the refcnt is
also decremented.
Signed-off-by: Omer Shpigelman <oshpigelman@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
2020-05-07 13:57:36 +03:00
|
|
|
struct hl_cs_compl *hl_cs_cmpl =
|
|
|
|
|
container_of(fence, struct hl_cs_compl, base_fence);
|
2020-05-07 14:31:49 +03:00
|
|
|
struct hl_device *hdev = hl_cs_cmpl->hdev;
|
|
|
|
|
|
2020-06-11 11:43:23 +03:00
|
|
|
/* EBUSY means the CS was never submitted and hence we don't have
|
|
|
|
|
* an attached hw_sob object that we should handle here
|
|
|
|
|
*/
|
|
|
|
|
if (fence->error == -EBUSY)
|
|
|
|
|
goto free;
|
|
|
|
|
|
2020-05-07 14:31:49 +03:00
|
|
|
if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
|
2020-09-10 10:10:55 +03:00
|
|
|
(hl_cs_cmpl->type == CS_TYPE_WAIT) ||
|
|
|
|
|
(hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)) {
|
2020-05-07 14:31:49 +03:00
|
|
|
|
|
|
|
|
dev_dbg(hdev->dev,
|
|
|
|
|
"CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
|
|
|
|
|
hl_cs_cmpl->cs_seq,
|
|
|
|
|
hl_cs_cmpl->type,
|
|
|
|
|
hl_cs_cmpl->hw_sob->sob_id,
|
|
|
|
|
hl_cs_cmpl->sob_val);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* A signal CS can get completion while the corresponding wait
|
|
|
|
|
* for signal CS is on its way to the PQ. The wait for signal CS
|
|
|
|
|
* will get stuck if the signal CS incremented the SOB to its
|
|
|
|
|
* max value and there are no pending (submitted) waits on this
|
|
|
|
|
* SOB.
|
|
|
|
|
* We do the following to void this situation:
|
|
|
|
|
* 1. The wait for signal CS must get a ref for the signal CS as
|
|
|
|
|
* soon as possible in cs_ioctl_signal_wait() and put it
|
|
|
|
|
* before being submitted to the PQ but after it incremented
|
|
|
|
|
* the SOB refcnt in init_signal_wait_cs().
|
|
|
|
|
* 2. Signal/Wait for signal CS will decrement the SOB refcnt
|
|
|
|
|
* here.
|
|
|
|
|
* These two measures guarantee that the wait for signal CS will
|
|
|
|
|
* reset the SOB upon completion rather than the signal CS and
|
|
|
|
|
* hence the above scenario is avoided.
|
|
|
|
|
*/
|
|
|
|
|
kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
|
2020-09-10 10:10:55 +03:00
|
|
|
|
|
|
|
|
if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
|
|
|
|
|
hdev->asic_funcs->reset_sob_group(hdev,
|
|
|
|
|
hl_cs_cmpl->sob_group);
|
2020-05-07 14:31:49 +03:00
|
|
|
}
|
2019-02-16 00:39:21 +02:00
|
|
|
|
2020-06-11 11:43:23 +03:00
|
|
|
free:
|
2020-07-15 08:52:39 +03:00
|
|
|
kfree(hl_cs_cmpl);
|
2019-02-16 00:39:21 +02:00
|
|
|
}
|
|
|
|
|
|
2020-07-15 08:52:39 +03:00
|
|
|
void hl_fence_put(struct hl_fence *fence)
|
|
|
|
|
{
|
|
|
|
|
if (fence)
|
|
|
|
|
kref_put(&fence->refcount, hl_fence_release);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void hl_fence_get(struct hl_fence *fence)
|
|
|
|
|
{
|
|
|
|
|
if (fence)
|
|
|
|
|
kref_get(&fence->refcount);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void hl_fence_init(struct hl_fence *fence)
|
|
|
|
|
{
|
|
|
|
|
kref_init(&fence->refcount);
|
|
|
|
|
fence->error = 0;
|
|
|
|
|
init_completion(&fence->completion);
|
|
|
|
|
}
|
2019-02-16 00:39:21 +02:00
|
|
|
|
2020-09-10 10:56:26 +03:00
|
|
|
void cs_get(struct hl_cs *cs)
|
2019-02-16 00:39:21 +02:00
|
|
|
{
|
|
|
|
|
kref_get(&cs->refcount);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int cs_get_unless_zero(struct hl_cs *cs)
|
|
|
|
|
{
|
|
|
|
|
return kref_get_unless_zero(&cs->refcount);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void cs_put(struct hl_cs *cs)
|
|
|
|
|
{
|
|
|
|
|
kref_put(&cs->refcount, cs_do_release);
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-10 17:30:35 +03:00
|
|
|
static void cs_job_do_release(struct kref *ref)
|
|
|
|
|
{
|
|
|
|
|
struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount);
|
|
|
|
|
|
|
|
|
|
kfree(job);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void cs_job_put(struct hl_cs_job *job)
|
|
|
|
|
{
|
|
|
|
|
kref_put(&job->refcount, cs_job_do_release);
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-03 15:22:36 +00:00
|
|
|
static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* Patched CB is created for external queues jobs, and for H/W queues
|
|
|
|
|
* jobs if the user CB was allocated by driver and MMU is disabled.
|
|
|
|
|
*/
|
|
|
|
|
return (job->queue_type == QUEUE_TYPE_EXT ||
|
|
|
|
|
(job->queue_type == QUEUE_TYPE_HW &&
|
|
|
|
|
job->is_kernel_allocated_cb &&
|
|
|
|
|
!hdev->mmu_enable));
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-16 00:39:21 +02:00
|
|
|
/*
|
|
|
|
|
* cs_parser - parse the user command submission
|
|
|
|
|
*
|
|
|
|
|
* @hpriv : pointer to the private data of the fd
|
|
|
|
|
* @job : pointer to the job that holds the command submission info
|
|
|
|
|
*
|
|
|
|
|
* The function parses the command submission of the user. It calls the
|
|
|
|
|
* ASIC specific parser, which returns a list of memory blocks to send
|
|
|
|
|
* to the device as different command buffers
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
|
|
|
|
|
{
|
|
|
|
|
struct hl_device *hdev = hpriv->hdev;
|
|
|
|
|
struct hl_cs_parser parser;
|
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
|
|
parser.ctx_id = job->cs->ctx->asid;
|
|
|
|
|
parser.cs_sequence = job->cs->sequence;
|
|
|
|
|
parser.job_id = job->id;
|
|
|
|
|
|
|
|
|
|
parser.hw_queue_id = job->hw_queue_id;
|
|
|
|
|
parser.job_userptr_list = &job->userptr_list;
|
|
|
|
|
parser.patched_cb = NULL;
|
|
|
|
|
parser.user_cb = job->user_cb;
|
|
|
|
|
parser.user_cb_size = job->user_cb_size;
|
2019-10-03 15:22:36 +00:00
|
|
|
parser.queue_type = job->queue_type;
|
|
|
|
|
parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
|
2019-02-16 00:39:21 +02:00
|
|
|
job->patched_cb = NULL;
|
|
|
|
|
|
|
|
|
|
rc = hdev->asic_funcs->cs_parser(hdev, &parser);
|
2019-10-03 15:22:36 +00:00
|
|
|
|
|
|
|
|
if (is_cb_patched(hdev, job)) {
|
2019-02-16 00:39:21 +02:00
|
|
|
if (!rc) {
|
|
|
|
|
job->patched_cb = parser.patched_cb;
|
|
|
|
|
job->job_cb_size = parser.patched_cb_size;
|
2020-03-31 11:57:11 +03:00
|
|
|
job->contains_dma_pkt = parser.contains_dma_pkt;
|
2019-02-16 00:39:21 +02:00
|
|
|
|
|
|
|
|
spin_lock(&job->patched_cb->lock);
|
|
|
|
|
job->patched_cb->cs_cnt++;
|
|
|
|
|
spin_unlock(&job->patched_cb->lock);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Whether the parsing worked or not, we don't need the
|
|
|
|
|
* original CB anymore because it was already parsed and
|
|
|
|
|
* won't be accessed again for this CS
|
|
|
|
|
*/
|
|
|
|
|
spin_lock(&job->user_cb->lock);
|
|
|
|
|
job->user_cb->cs_cnt--;
|
|
|
|
|
spin_unlock(&job->user_cb->lock);
|
|
|
|
|
hl_cb_put(job->user_cb);
|
|
|
|
|
job->user_cb = NULL;
|
2019-12-16 08:42:14 +00:00
|
|
|
} else if (!rc) {
|
|
|
|
|
job->job_cb_size = job->user_cb_size;
|
2019-02-16 00:39:21 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-10 17:30:35 +03:00
|
|
|
static void complete_job(struct hl_device *hdev, struct hl_cs_job *job)
|
2019-02-16 00:39:21 +02:00
|
|
|
{
|
|
|
|
|
struct hl_cs *cs = job->cs;
|
|
|
|
|
|
2019-10-03 15:22:36 +00:00
|
|
|
if (is_cb_patched(hdev, job)) {
|
2019-02-16 00:39:21 +02:00
|
|
|
hl_userptr_delete_list(hdev, &job->userptr_list);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We might arrive here from rollback and patched CB wasn't
|
|
|
|
|
* created, so we need to check it's not NULL
|
|
|
|
|
*/
|
|
|
|
|
if (job->patched_cb) {
|
|
|
|
|
spin_lock(&job->patched_cb->lock);
|
|
|
|
|
job->patched_cb->cs_cnt--;
|
|
|
|
|
spin_unlock(&job->patched_cb->lock);
|
|
|
|
|
|
|
|
|
|
hl_cb_put(job->patched_cb);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-03 15:22:36 +00:00
|
|
|
/* For H/W queue jobs, if a user CB was allocated by driver and MMU is
|
|
|
|
|
* enabled, the user CB isn't released in cs_parser() and thus should be
|
|
|
|
|
* released here.
|
2020-09-10 10:10:55 +03:00
|
|
|
* This is also true for INT queues jobs which were allocated by driver
|
2019-10-03 15:22:36 +00:00
|
|
|
*/
|
2020-09-10 10:10:55 +03:00
|
|
|
if (job->is_kernel_allocated_cb &&
|
|
|
|
|
((job->queue_type == QUEUE_TYPE_HW && hdev->mmu_enable) ||
|
|
|
|
|
job->queue_type == QUEUE_TYPE_INT)) {
|
2019-10-03 15:22:36 +00:00
|
|
|
spin_lock(&job->user_cb->lock);
|
|
|
|
|
job->user_cb->cs_cnt--;
|
|
|
|
|
spin_unlock(&job->user_cb->lock);
|
|
|
|
|
|
|
|
|
|
hl_cb_put(job->user_cb);
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-16 00:39:21 +02:00
|
|
|
/*
|
|
|
|
|
* This is the only place where there can be multiple threads
|
|
|
|
|
* modifying the list at the same time
|
|
|
|
|
*/
|
|
|
|
|
spin_lock(&cs->job_lock);
|
|
|
|
|
list_del(&job->cs_node);
|
|
|
|
|
spin_unlock(&cs->job_lock);
|
|
|
|
|
|
2019-02-16 00:39:24 +02:00
|
|
|
hl_debugfs_remove_job(hdev, job);
|
|
|
|
|
|
2019-10-03 15:22:36 +00:00
|
|
|
if (job->queue_type == QUEUE_TYPE_EXT ||
|
|
|
|
|
job->queue_type == QUEUE_TYPE_HW)
|
2019-02-16 00:39:21 +02:00
|
|
|
cs_put(cs);
|
|
|
|
|
|
2020-08-10 17:30:35 +03:00
|
|
|
cs_job_put(job);
|
2019-02-16 00:39:21 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void cs_do_release(struct kref *ref)
|
|
|
|
|
{
|
2020-07-27 23:49:41 +03:00
|
|
|
struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
|
2019-02-16 00:39:21 +02:00
|
|
|
struct hl_device *hdev = cs->ctx->hdev;
|
|
|
|
|
struct hl_cs_job *job, *tmp;
|
|
|
|
|
|
|
|
|
|
cs->completed = true;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Although if we reached here it means that all external jobs have
|
|
|
|
|
* finished, because each one of them took refcnt to CS, we still
|
2020-08-10 17:30:35 +03:00
|
|
|
* need to go over the internal jobs and complete them. Otherwise, we
|
2019-02-16 00:39:21 +02:00
|
|
|
* will have leaked memory and what's worse, the CS object (and
|
|
|
|
|
* potentially the CTX object) could be released, while the JOB
|
|
|
|
|
* still holds a pointer to them (but no reference).
|
|
|
|
|
*/
|
|
|
|
|
list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
|
2020-08-10 17:30:35 +03:00
|
|
|
complete_job(hdev, job);
|
2019-02-16 00:39:21 +02:00
|
|
|
|
2020-07-27 23:49:41 +03:00
|
|
|
if (!cs->submitted) {
|
|
|
|
|
/* In case the wait for signal CS was submitted, the put occurs
|
2020-10-25 09:36:08 +02:00
|
|
|
* in init_signal_wait_cs() or collective_wait_init_cs()
|
|
|
|
|
* right before hanging on the PQ.
|
2020-07-27 23:49:41 +03:00
|
|
|
*/
|
2020-10-25 09:36:08 +02:00
|
|
|
if (cs->type == CS_TYPE_WAIT ||
|
|
|
|
|
cs->type == CS_TYPE_COLLECTIVE_WAIT)
|
2020-07-27 23:49:41 +03:00
|
|
|
hl_fence_put(cs->signal_fence);
|
2019-03-03 15:13:15 +02:00
|
|
|
|
2020-07-27 23:49:41 +03:00
|
|
|
goto out;
|
|
|
|
|
}
|
2019-08-28 17:32:04 +03:00
|
|
|
|
2020-07-27 23:49:41 +03:00
|
|
|
hdev->asic_funcs->hw_queues_lock(hdev);
|
2019-08-28 17:32:04 +03:00
|
|
|
|
2020-07-27 23:49:41 +03:00
|
|
|
hdev->cs_active_cnt--;
|
|
|
|
|
if (!hdev->cs_active_cnt) {
|
|
|
|
|
struct hl_device_idle_busy_ts *ts;
|
2019-08-28 17:32:04 +03:00
|
|
|
|
2020-07-27 23:49:41 +03:00
|
|
|
ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++];
|
|
|
|
|
ts->busy_to_idle_ts = ktime_get();
|
2019-03-03 15:13:15 +02:00
|
|
|
|
2020-07-27 23:49:41 +03:00
|
|
|
if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE)
|
|
|
|
|
hdev->idle_busy_ts_idx = 0;
|
|
|
|
|
} else if (hdev->cs_active_cnt < 0) {
|
|
|
|
|
dev_crit(hdev->dev, "CS active cnt %d is negative\n",
|
|
|
|
|
hdev->cs_active_cnt);
|
|
|
|
|
}
|
2019-02-16 00:39:21 +02:00
|
|
|
|
2020-07-27 23:49:41 +03:00
|
|
|
hdev->asic_funcs->hw_queues_unlock(hdev);
|
2019-02-16 00:39:21 +02:00
|
|
|
|
2020-07-27 23:49:41 +03:00
|
|
|
/* Need to update CI for internal queues */
|
|
|
|
|
hl_int_hw_queue_update_ci(cs);
|
2019-02-16 00:39:21 +02:00
|
|
|
|
2020-10-30 11:16:23 +02:00
|
|
|
/* remove CS from CS mirror list */
|
|
|
|
|
spin_lock(&hdev->cs_mirror_lock);
|
2020-07-27 23:49:41 +03:00
|
|
|
list_del_init(&cs->mirror_node);
|
2020-10-30 11:16:23 +02:00
|
|
|
spin_unlock(&hdev->cs_mirror_lock);
|
2019-02-16 00:39:21 +02:00
|
|
|
|
2020-07-27 23:49:41 +03:00
|
|
|
/* Don't cancel TDR in case this CS was timedout because we might be
|
|
|
|
|
* running from the TDR context
|
|
|
|
|
*/
|
2020-10-30 11:16:23 +02:00
|
|
|
if (!cs->timedout && hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
|
2020-07-27 23:49:41 +03:00
|
|
|
struct hl_cs *next;
|
|
|
|
|
|
|
|
|
|
if (cs->tdr_active)
|
|
|
|
|
cancel_delayed_work_sync(&cs->work_tdr);
|
2019-02-16 00:39:21 +02:00
|
|
|
|
2020-10-30 11:16:23 +02:00
|
|
|
spin_lock(&hdev->cs_mirror_lock);
|
2019-02-16 00:39:21 +02:00
|
|
|
|
2020-07-27 23:49:41 +03:00
|
|
|
/* queue TDR for next CS */
|
2020-10-30 11:16:23 +02:00
|
|
|
next = list_first_entry_or_null(&hdev->cs_mirror_list,
|
2020-07-27 23:49:41 +03:00
|
|
|
struct hl_cs, mirror_node);
|
2019-02-16 00:39:21 +02:00
|
|
|
|
2020-07-27 23:49:41 +03:00
|
|
|
if (next && !next->tdr_active) {
|
|
|
|
|
next->tdr_active = true;
|
|
|
|
|
schedule_delayed_work(&next->work_tdr,
|
|
|
|
|
hdev->timeout_jiffies);
|
2019-02-16 00:39:21 +02:00
|
|
|
}
|
2020-07-27 23:49:41 +03:00
|
|
|
|
2020-10-30 11:16:23 +02:00
|
|
|
spin_unlock(&hdev->cs_mirror_lock);
|
2019-02-16 00:39:21 +02:00
|
|
|
}
|
|
|
|
|
|
2020-07-27 23:49:41 +03:00
|
|
|
out:
|
|
|
|
|
/* Must be called before hl_ctx_put because inside we use ctx to get
|
2019-02-16 00:39:24 +02:00
|
|
|
* the device
|
|
|
|
|
*/
|
|
|
|
|
hl_debugfs_remove_cs(cs);
|
|
|
|
|
|
2019-02-16 00:39:21 +02:00
|
|
|
hl_ctx_put(cs->ctx);
|
|
|
|
|
|
2020-06-11 11:43:23 +03:00
|
|
|
/* We need to mark an error for not submitted because in that case
|
2020-07-15 08:52:39 +03:00
|
|
|
* the hl fence release flow is different. Mainly, we don't need
|
2020-06-11 11:43:23 +03:00
|
|
|
* to handle hw_sob for signal/wait
|
|
|
|
|
*/
|
2019-02-16 00:39:21 +02:00
|
|
|
if (cs->timedout)
|
2020-07-15 08:52:39 +03:00
|
|
|
cs->fence->error = -ETIMEDOUT;
|
2019-02-16 00:39:21 +02:00
|
|
|
else if (cs->aborted)
|
2020-07-15 08:52:39 +03:00
|
|
|
cs->fence->error = -EIO;
|
2020-06-11 11:43:23 +03:00
|
|
|
else if (!cs->submitted)
|
2020-07-15 08:52:39 +03:00
|
|
|
cs->fence->error = -EBUSY;
|
2019-02-16 00:39:21 +02:00
|
|
|
|
2020-07-15 08:52:39 +03:00
|
|
|
complete_all(&cs->fence->completion);
|
|
|
|
|
hl_fence_put(cs->fence);
|
2020-06-18 09:51:16 +03:00
|
|
|
|
2020-06-23 14:50:39 +03:00
|
|
|
kfree(cs->jobs_in_queue_cnt);
|
2019-02-16 00:39:21 +02:00
|
|
|
kfree(cs);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void cs_timedout(struct work_struct *work)
|
|
|
|
|
{
|
|
|
|
|
struct hl_device *hdev;
|
2020-07-29 23:59:02 +08:00
|
|
|
int rc;
|
2019-02-16 00:39:21 +02:00
|
|
|
struct hl_cs *cs = container_of(work, struct hl_cs,
|
|
|
|
|
work_tdr.work);
|
|
|
|
|
rc = cs_get_unless_zero(cs);
|
|
|
|
|
if (!rc)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if ((!cs->submitted) || (cs->completed)) {
|
|
|
|
|
cs_put(cs);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Mark the CS is timed out so we won't try to cancel its TDR */
|
|
|
|
|
cs->timedout = true;
|
|
|
|
|
|
|
|
|
|
hdev = cs->ctx->hdev;
|
|
|
|
|
|
2020-06-22 09:52:22 +03:00
|
|
|
dev_err(hdev->dev,
|
|
|
|
|
"Command submission %llu has not finished in time!\n",
|
|
|
|
|
cs->sequence);
|
2019-02-16 00:39:21 +02:00
|
|
|
|
|
|
|
|
cs_put(cs);
|
|
|
|
|
|
|
|
|
|
if (hdev->reset_on_lockup)
|
|
|
|
|
hl_device_reset(hdev, false, false);
|
2020-10-05 14:40:10 +03:00
|
|
|
else
|
|
|
|
|
hdev->needs_reset = true;
|
2019-02-16 00:39:21 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
|
2020-05-07 14:31:49 +03:00
|
|
|
enum hl_cs_type cs_type, struct hl_cs **cs_new)
|
2019-02-16 00:39:21 +02:00
|
|
|
{
|
2020-09-30 14:25:55 +03:00
|
|
|
struct hl_cs_counters_atomic *cntr;
|
2020-07-15 08:52:39 +03:00
|
|
|
struct hl_fence *other = NULL;
|
2020-09-30 14:25:55 +03:00
|
|
|
struct hl_cs_compl *cs_cmpl;
|
2019-02-16 00:39:21 +02:00
|
|
|
struct hl_cs *cs;
|
|
|
|
|
int rc;
|
|
|
|
|
|
2020-09-30 14:25:55 +03:00
|
|
|
cntr = &hdev->aggregated_cs_counters;
|
|
|
|
|
|
2019-02-16 00:39:21 +02:00
|
|
|
cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
|
|
|
|
|
if (!cs)
|
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
|
|
cs->ctx = ctx;
|
|
|
|
|
cs->submitted = false;
|
|
|
|
|
cs->completed = false;
|
2020-05-07 14:31:49 +03:00
|
|
|
cs->type = cs_type;
|
2019-02-16 00:39:21 +02:00
|
|
|
INIT_LIST_HEAD(&cs->job_list);
|
|
|
|
|
INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
|
|
|
|
|
kref_init(&cs->refcount);
|
|
|
|
|
spin_lock_init(&cs->job_lock);
|
|
|
|
|
|
habanalabs: handle the h/w sync object
Define a structure representing the h/w sync object (SOB).
a SOB can contain up to 2^15 values. Each signal CS will increment the SOB
by 1, so after some time we will reach the maximum number the SOB can
represent. When that happens, the driver needs to move to a different SOB
for the signal operation.
A SOB can be in 1 of 4 states:
1. Working state with value < 2^15
2. We reached a value of 2^15, but the signal operations weren't completed
yet OR there are pending waits on this signal. For the next submission, the
driver will move to another SOB.
3. ALL the signal operations on the SOB have finished AND there are no more
pending waits on the SOB AND we reached a value of 2^15 (This basically
means the refcnt of the SOB is 0 - see explanation below). When that
happens, the driver can clear the SOB by simply doing WREG32 0 to it and
set the refcnt back to 1.
4. The SOB is cleared and can be used next time by the driver when it needs
to reuse an SOB.
Per SOB, the driver will maintain a single refcnt, that will be initialized
to 1. When a signal or wait operation on this SOB is submitted to the PQ,
the refcnt will be incremented. When a signal or wait operation on this SOB
completes, the refcnt will be decremented. After the submission of the
signal operation that increments the SOB to a value of 2^15, the refcnt is
also decremented.
Signed-off-by: Omer Shpigelman <oshpigelman@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
2020-05-07 13:57:36 +03:00
|
|
|
cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
|
|
|
|
|
if (!cs_cmpl) {
|
2019-02-16 00:39:21 +02:00
|
|
|
rc = -ENOMEM;
|
|
|
|
|
goto free_cs;
|
|
|
|
|
}
|
|
|
|
|
|
habanalabs: handle the h/w sync object
Define a structure representing the h/w sync object (SOB).
a SOB can contain up to 2^15 values. Each signal CS will increment the SOB
by 1, so after some time we will reach the maximum number the SOB can
represent. When that happens, the driver needs to move to a different SOB
for the signal operation.
A SOB can be in 1 of 4 states:
1. Working state with value < 2^15
2. We reached a value of 2^15, but the signal operations weren't completed
yet OR there are pending waits on this signal. For the next submission, the
driver will move to another SOB.
3. ALL the signal operations on the SOB have finished AND there are no more
pending waits on the SOB AND we reached a value of 2^15 (This basically
means the refcnt of the SOB is 0 - see explanation below). When that
happens, the driver can clear the SOB by simply doing WREG32 0 to it and
set the refcnt back to 1.
4. The SOB is cleared and can be used next time by the driver when it needs
to reuse an SOB.
Per SOB, the driver will maintain a single refcnt, that will be initialized
to 1. When a signal or wait operation on this SOB is submitted to the PQ,
the refcnt will be incremented. When a signal or wait operation on this SOB
completes, the refcnt will be decremented. After the submission of the
signal operation that increments the SOB to a value of 2^15, the refcnt is
also decremented.
Signed-off-by: Omer Shpigelman <oshpigelman@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
2020-05-07 13:57:36 +03:00
|
|
|
cs_cmpl->hdev = hdev;
|
2020-05-07 14:31:49 +03:00
|
|
|
cs_cmpl->type = cs->type;
|
habanalabs: handle the h/w sync object
Define a structure representing the h/w sync object (SOB).
a SOB can contain up to 2^15 values. Each signal CS will increment the SOB
by 1, so after some time we will reach the maximum number the SOB can
represent. When that happens, the driver needs to move to a different SOB
for the signal operation.
A SOB can be in 1 of 4 states:
1. Working state with value < 2^15
2. We reached a value of 2^15, but the signal operations weren't completed
yet OR there are pending waits on this signal. For the next submission, the
driver will move to another SOB.
3. ALL the signal operations on the SOB have finished AND there are no more
pending waits on the SOB AND we reached a value of 2^15 (This basically
means the refcnt of the SOB is 0 - see explanation below). When that
happens, the driver can clear the SOB by simply doing WREG32 0 to it and
set the refcnt back to 1.
4. The SOB is cleared and can be used next time by the driver when it needs
to reuse an SOB.
Per SOB, the driver will maintain a single refcnt, that will be initialized
to 1. When a signal or wait operation on this SOB is submitted to the PQ,
the refcnt will be incremented. When a signal or wait operation on this SOB
completes, the refcnt will be decremented. After the submission of the
signal operation that increments the SOB to a value of 2^15, the refcnt is
also decremented.
Signed-off-by: Omer Shpigelman <oshpigelman@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
2020-05-07 13:57:36 +03:00
|
|
|
spin_lock_init(&cs_cmpl->lock);
|
|
|
|
|
cs->fence = &cs_cmpl->base_fence;
|
2019-02-16 00:39:21 +02:00
|
|
|
|
|
|
|
|
spin_lock(&ctx->cs_lock);
|
|
|
|
|
|
habanalabs: handle the h/w sync object
Define a structure representing the h/w sync object (SOB).
a SOB can contain up to 2^15 values. Each signal CS will increment the SOB
by 1, so after some time we will reach the maximum number the SOB can
represent. When that happens, the driver needs to move to a different SOB
for the signal operation.
A SOB can be in 1 of 4 states:
1. Working state with value < 2^15
2. We reached a value of 2^15, but the signal operations weren't completed
yet OR there are pending waits on this signal. For the next submission, the
driver will move to another SOB.
3. ALL the signal operations on the SOB have finished AND there are no more
pending waits on the SOB AND we reached a value of 2^15 (This basically
means the refcnt of the SOB is 0 - see explanation below). When that
happens, the driver can clear the SOB by simply doing WREG32 0 to it and
set the refcnt back to 1.
4. The SOB is cleared and can be used next time by the driver when it needs
to reuse an SOB.
Per SOB, the driver will maintain a single refcnt, that will be initialized
to 1. When a signal or wait operation on this SOB is submitted to the PQ,
the refcnt will be incremented. When a signal or wait operation on this SOB
completes, the refcnt will be decremented. After the submission of the
signal operation that increments the SOB to a value of 2^15, the refcnt is
also decremented.
Signed-off-by: Omer Shpigelman <oshpigelman@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
2020-05-07 13:57:36 +03:00
|
|
|
cs_cmpl->cs_seq = ctx->cs_sequence;
|
2020-06-02 12:28:27 +03:00
|
|
|
other = ctx->cs_pending[cs_cmpl->cs_seq &
|
|
|
|
|
(hdev->asic_prop.max_pending_cs - 1)];
|
2020-07-15 08:52:39 +03:00
|
|
|
|
|
|
|
|
if (other && !completion_done(&other->completion)) {
|
2020-09-01 11:22:05 +03:00
|
|
|
dev_dbg_ratelimited(hdev->dev,
|
2019-07-18 09:03:58 +03:00
|
|
|
"Rejecting CS because of too many in-flights CS\n");
|
2020-10-12 14:30:26 +03:00
|
|
|
atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
|
2020-09-30 14:25:55 +03:00
|
|
|
atomic64_inc(&cntr->max_cs_in_flight_drop_cnt);
|
2019-02-16 00:39:21 +02:00
|
|
|
rc = -EAGAIN;
|
|
|
|
|
goto free_fence;
|
|
|
|
|
}
|
|
|
|
|
|
2020-06-23 14:50:39 +03:00
|
|
|
cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
|
|
|
|
|
sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
|
|
|
|
|
if (!cs->jobs_in_queue_cnt) {
|
|
|
|
|
rc = -ENOMEM;
|
|
|
|
|
goto free_fence;
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-15 08:52:39 +03:00
|
|
|
/* init hl_fence */
|
|
|
|
|
hl_fence_init(&cs_cmpl->base_fence);
|
2019-02-16 00:39:21 +02:00
|
|
|
|
habanalabs: handle the h/w sync object
Define a structure representing the h/w sync object (SOB).
a SOB can contain up to 2^15 values. Each signal CS will increment the SOB
by 1, so after some time we will reach the maximum number the SOB can
represent. When that happens, the driver needs to move to a different SOB
for the signal operation.
A SOB can be in 1 of 4 states:
1. Working state with value < 2^15
2. We reached a value of 2^15, but the signal operations weren't completed
yet OR there are pending waits on this signal. For the next submission, the
driver will move to another SOB.
3. ALL the signal operations on the SOB have finished AND there are no more
pending waits on the SOB AND we reached a value of 2^15 (This basically
means the refcnt of the SOB is 0 - see explanation below). When that
happens, the driver can clear the SOB by simply doing WREG32 0 to it and
set the refcnt back to 1.
4. The SOB is cleared and can be used next time by the driver when it needs
to reuse an SOB.
Per SOB, the driver will maintain a single refcnt, that will be initialized
to 1. When a signal or wait operation on this SOB is submitted to the PQ,
the refcnt will be incremented. When a signal or wait operation on this SOB
completes, the refcnt will be decremented. After the submission of the
signal operation that increments the SOB to a value of 2^15, the refcnt is
also decremented.
Signed-off-by: Omer Shpigelman <oshpigelman@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
2020-05-07 13:57:36 +03:00
|
|
|
cs->sequence = cs_cmpl->cs_seq;
|
2019-02-16 00:39:21 +02:00
|
|
|
|
2020-06-02 12:28:27 +03:00
|
|
|
ctx->cs_pending[cs_cmpl->cs_seq &
|
|
|
|
|
(hdev->asic_prop.max_pending_cs - 1)] =
|
habanalabs: handle the h/w sync object
Define a structure representing the h/w sync object (SOB).
a SOB can contain up to 2^15 values. Each signal CS will increment the SOB
by 1, so after some time we will reach the maximum number the SOB can
represent. When that happens, the driver needs to move to a different SOB
for the signal operation.
A SOB can be in 1 of 4 states:
1. Working state with value < 2^15
2. We reached a value of 2^15, but the signal operations weren't completed
yet OR there are pending waits on this signal. For the next submission, the
driver will move to another SOB.
3. ALL the signal operations on the SOB have finished AND there are no more
pending waits on the SOB AND we reached a value of 2^15 (This basically
means the refcnt of the SOB is 0 - see explanation below). When that
happens, the driver can clear the SOB by simply doing WREG32 0 to it and
set the refcnt back to 1.
4. The SOB is cleared and can be used next time by the driver when it needs
to reuse an SOB.
Per SOB, the driver will maintain a single refcnt, that will be initialized
to 1. When a signal or wait operation on this SOB is submitted to the PQ,
the refcnt will be incremented. When a signal or wait operation on this SOB
completes, the refcnt will be decremented. After the submission of the
signal operation that increments the SOB to a value of 2^15, the refcnt is
also decremented.
Signed-off-by: Omer Shpigelman <oshpigelman@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
2020-05-07 13:57:36 +03:00
|
|
|
&cs_cmpl->base_fence;
|
2019-02-16 00:39:21 +02:00
|
|
|
ctx->cs_sequence++;
|
|
|
|
|
|
2020-07-15 08:52:39 +03:00
|
|
|
hl_fence_get(&cs_cmpl->base_fence);
|
2019-02-16 00:39:21 +02:00
|
|
|
|
2020-07-15 08:52:39 +03:00
|
|
|
hl_fence_put(other);
|
2019-02-16 00:39:21 +02:00
|
|
|
|
|
|
|
|
spin_unlock(&ctx->cs_lock);
|
|
|
|
|
|
|
|
|
|
*cs_new = cs;
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
free_fence:
|
2020-06-23 14:50:39 +03:00
|
|
|
spin_unlock(&ctx->cs_lock);
|
habanalabs: handle the h/w sync object
Define a structure representing the h/w sync object (SOB).
a SOB can contain up to 2^15 values. Each signal CS will increment the SOB
by 1, so after some time we will reach the maximum number the SOB can
represent. When that happens, the driver needs to move to a different SOB
for the signal operation.
A SOB can be in 1 of 4 states:
1. Working state with value < 2^15
2. We reached a value of 2^15, but the signal operations weren't completed
yet OR there are pending waits on this signal. For the next submission, the
driver will move to another SOB.
3. ALL the signal operations on the SOB have finished AND there are no more
pending waits on the SOB AND we reached a value of 2^15 (This basically
means the refcnt of the SOB is 0 - see explanation below). When that
happens, the driver can clear the SOB by simply doing WREG32 0 to it and
set the refcnt back to 1.
4. The SOB is cleared and can be used next time by the driver when it needs
to reuse an SOB.
Per SOB, the driver will maintain a single refcnt, that will be initialized
to 1. When a signal or wait operation on this SOB is submitted to the PQ,
the refcnt will be incremented. When a signal or wait operation on this SOB
completes, the refcnt will be decremented. After the submission of the
signal operation that increments the SOB to a value of 2^15, the refcnt is
also decremented.
Signed-off-by: Omer Shpigelman <oshpigelman@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
2020-05-07 13:57:36 +03:00
|
|
|
kfree(cs_cmpl);
|
2019-02-16 00:39:21 +02:00
|
|
|
free_cs:
|
|
|
|
|
kfree(cs);
|
|
|
|
|
return rc;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
|
|
|
|
|
{
|
|
|
|
|
struct hl_cs_job *job, *tmp;
|
|
|
|
|
|
|
|
|
|
list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
|
2020-08-10 17:30:35 +03:00
|
|
|
complete_job(hdev, job);
|
2019-02-16 00:39:21 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void hl_cs_rollback_all(struct hl_device *hdev)
|
|
|
|
|
{
|
2020-07-05 13:35:51 +03:00
|
|
|
int i;
|
2019-02-16 00:39:21 +02:00
|
|
|
struct hl_cs *cs, *tmp;
|
|
|
|
|
|
|
|
|
|
/* flush all completions */
|
2020-07-05 13:35:51 +03:00
|
|
|
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
|
|
|
|
|
flush_workqueue(hdev->cq_wq[i]);
|
2019-02-16 00:39:21 +02:00
|
|
|
|
|
|
|
|
/* Make sure we don't have leftovers in the H/W queues mirror list */
|
2020-10-30 11:16:23 +02:00
|
|
|
list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
|
2019-02-16 00:39:21 +02:00
|
|
|
cs_get(cs);
|
|
|
|
|
cs->aborted = true;
|
|
|
|
|
dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
|
|
|
|
|
cs->ctx->asid, cs->sequence);
|
|
|
|
|
cs_rollback(hdev, cs);
|
|
|
|
|
cs_put(cs);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void job_wq_completion(struct work_struct *work)
|
|
|
|
|
{
|
|
|
|
|
struct hl_cs_job *job = container_of(work, struct hl_cs_job,
|
|
|
|
|
finish_work);
|
|
|
|
|
struct hl_cs *cs = job->cs;
|
|
|
|
|
struct hl_device *hdev = cs->ctx->hdev;
|
|
|
|
|
|
|
|
|
|
/* job is no longer needed */
|
2020-08-10 17:30:35 +03:00
|
|
|
complete_job(hdev, job);
|
2019-02-16 00:39:21 +02:00
|
|
|
}
|
|
|
|
|
|
2019-10-03 15:22:36 +00:00
|
|
|
static int validate_queue_index(struct hl_device *hdev,
|
|
|
|
|
struct hl_cs_chunk *chunk,
|
|
|
|
|
enum hl_queue_type *queue_type,
|
|
|
|
|
bool *is_kernel_allocated_cb)
|
2019-02-16 00:39:21 +02:00
|
|
|
{
|
|
|
|
|
struct asic_fixed_properties *asic = &hdev->asic_prop;
|
|
|
|
|
struct hw_queue_properties *hw_queue_prop;
|
|
|
|
|
|
2020-06-23 14:50:39 +03:00
|
|
|
/* This must be checked here to prevent out-of-bounds access to
|
|
|
|
|
* hw_queues_props array
|
|
|
|
|
*/
|
|
|
|
|
if (chunk->queue_index >= asic->max_queues) {
|
|
|
|
|
dev_err(hdev->dev, "Queue index %d is invalid\n",
|
|
|
|
|
chunk->queue_index);
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-16 00:39:21 +02:00
|
|
|
hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
|
|
|
|
|
|
2020-06-23 14:50:39 +03:00
|
|
|
if (hw_queue_prop->type == QUEUE_TYPE_NA) {
|
2019-02-16 00:39:21 +02:00
|
|
|
dev_err(hdev->dev, "Queue index %d is invalid\n",
|
|
|
|
|
chunk->queue_index);
|
2019-10-03 15:22:36 +00:00
|
|
|
return -EINVAL;
|
2019-02-16 00:39:21 +02:00
|
|
|
}
|
|
|
|
|
|
2019-08-30 16:59:33 +03:00
|
|
|
if (hw_queue_prop->driver_only) {
|
|
|
|
|
dev_err(hdev->dev,
|
|
|
|
|
"Queue index %d is restricted for the kernel driver\n",
|
2019-02-16 00:39:21 +02:00
|
|
|
chunk->queue_index);
|
2019-10-03 15:22:36 +00:00
|
|
|
return -EINVAL;
|
2019-10-03 15:22:35 +00:00
|
|
|
}
|
|
|
|
|
|
2020-06-03 09:25:27 +03:00
|
|
|
/* When hw queue type isn't QUEUE_TYPE_HW,
|
|
|
|
|
* USER_ALLOC_CB flag shall be referred as "don't care".
|
|
|
|
|
*/
|
|
|
|
|
if (hw_queue_prop->type == QUEUE_TYPE_HW) {
|
|
|
|
|
if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) {
|
|
|
|
|
if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) {
|
|
|
|
|
dev_err(hdev->dev,
|
|
|
|
|
"Queue index %d doesn't support user CB\n",
|
|
|
|
|
chunk->queue_index);
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*is_kernel_allocated_cb = false;
|
|
|
|
|
} else {
|
|
|
|
|
if (!(hw_queue_prop->cb_alloc_flags &
|
|
|
|
|
CB_ALLOC_KERNEL)) {
|
|
|
|
|
dev_err(hdev->dev,
|
|
|
|
|
"Queue index %d doesn't support kernel CB\n",
|
|
|
|
|
chunk->queue_index);
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
2019-10-03 15:22:36 +00:00
|
|
|
|
2020-06-03 09:25:27 +03:00
|
|
|
*is_kernel_allocated_cb = true;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
*is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags
|
|
|
|
|
& CB_ALLOC_KERNEL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*queue_type = hw_queue_prop->type;
|
2019-10-03 15:22:36 +00:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
|
|
|
|
|
struct hl_cb_mgr *cb_mgr,
|
|
|
|
|
struct hl_cs_chunk *chunk)
|
|
|
|
|
{
|
|
|
|
|
struct hl_cb *cb;
|
|
|
|
|
u32 cb_handle;
|
2019-02-16 00:39:21 +02:00
|
|
|
|
|
|
|
|
cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
|
|
|
|
|
|
|
|
|
|
cb = hl_cb_get(hdev, cb_mgr, cb_handle);
|
|
|
|
|
if (!cb) {
|
|
|
|
|
dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
|
|
|
|
|
dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
|
|
|
|
|
goto release_cb;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
spin_lock(&cb->lock);
|
|
|
|
|
cb->cs_cnt++;
|
|
|
|
|
spin_unlock(&cb->lock);
|
|
|
|
|
|
|
|
|
|
return cb;
|
|
|
|
|
|
|
|
|
|
release_cb:
|
|
|
|
|
hl_cb_put(cb);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-03 15:22:36 +00:00
|
|
|
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
|
|
|
|
|
enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
|
2019-02-16 00:39:21 +02:00
|
|
|
{
|
|
|
|
|
struct hl_cs_job *job;
|
|
|
|
|
|
|
|
|
|
job = kzalloc(sizeof(*job), GFP_ATOMIC);
|
|
|
|
|
if (!job)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
2020-08-10 17:30:35 +03:00
|
|
|
kref_init(&job->refcount);
|
2019-10-03 15:22:36 +00:00
|
|
|
job->queue_type = queue_type;
|
|
|
|
|
job->is_kernel_allocated_cb = is_kernel_allocated_cb;
|
2019-02-16 00:39:21 +02:00
|
|
|
|
2019-10-03 15:22:36 +00:00
|
|
|
if (is_cb_patched(hdev, job))
|
2019-02-16 00:39:21 +02:00
|
|
|
INIT_LIST_HEAD(&job->userptr_list);
|
2019-10-03 15:22:36 +00:00
|
|
|
|
|
|
|
|
if (job->queue_type == QUEUE_TYPE_EXT)
|
2019-02-16 00:39:21 +02:00
|
|
|
INIT_WORK(&job->finish_work, job_wq_completion);
|
|
|
|
|
|
|
|
|
|
return job;
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-19 21:07:15 +03:00
|
|
|
static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
|
|
|
|
|
{
|
|
|
|
|
if (cs_type_flags & HL_CS_FLAGS_SIGNAL)
|
|
|
|
|
return CS_TYPE_SIGNAL;
|
|
|
|
|
else if (cs_type_flags & HL_CS_FLAGS_WAIT)
|
|
|
|
|
return CS_TYPE_WAIT;
|
|
|
|
|
else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
|
|
|
|
|
return CS_TYPE_COLLECTIVE_WAIT;
|
|
|
|
|
else
|
|
|
|
|
return CS_TYPE_DEFAULT;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
|
2019-02-16 00:39:21 +02:00
|
|
|
{
|
|
|
|
|
struct hl_device *hdev = hpriv->hdev;
|
2020-07-19 21:07:15 +03:00
|
|
|
struct hl_ctx *ctx = hpriv->ctx;
|
|
|
|
|
u32 cs_type_flags, num_chunks;
|
2020-10-05 14:40:10 +03:00
|
|
|
enum hl_device_status status;
|
2020-07-19 21:07:15 +03:00
|
|
|
enum hl_cs_type cs_type;
|
2019-02-16 00:39:21 +02:00
|
|
|
|
2020-10-05 14:40:10 +03:00
|
|
|
if (!hl_device_operational(hdev, &status)) {
|
2020-07-19 21:07:15 +03:00
|
|
|
dev_warn_ratelimited(hdev->dev,
|
|
|
|
|
"Device is %s. Can't submit new CS\n",
|
2020-10-05 14:40:10 +03:00
|
|
|
hdev->status[status]);
|
2020-07-19 21:07:15 +03:00
|
|
|
return -EBUSY;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cs_type_flags = args->in.cs_flags & ~HL_CS_FLAGS_FORCE_RESTORE;
|
|
|
|
|
|
|
|
|
|
if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
|
|
|
|
|
dev_err(hdev->dev,
|
|
|
|
|
"CS type flags are mutually exclusive, context %d\n",
|
|
|
|
|
ctx->asid);
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cs_type = hl_cs_get_cs_type(cs_type_flags);
|
|
|
|
|
num_chunks = args->in.num_chunks_execute;
|
|
|
|
|
|
|
|
|
|
if (unlikely((cs_type != CS_TYPE_DEFAULT) &&
|
|
|
|
|
!hdev->supports_sync_stream)) {
|
|
|
|
|
dev_err(hdev->dev, "Sync stream CS is not supported\n");
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (cs_type == CS_TYPE_DEFAULT) {
|
|
|
|
|
if (!num_chunks) {
|
|
|
|
|
dev_err(hdev->dev,
|
|
|
|
|
"Got execute CS with 0 chunks, context %d\n",
|
|
|
|
|
ctx->asid);
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
} else if (num_chunks != 1) {
|
|
|
|
|
dev_err(hdev->dev,
|
|
|
|
|
"Sync stream CS mandates one chunk only, context %d\n",
|
|
|
|
|
ctx->asid);
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int hl_cs_copy_chunk_array(struct hl_device *hdev,
|
|
|
|
|
struct hl_cs_chunk **cs_chunk_array,
|
|
|
|
|
void __user *chunks, u32 num_chunks)
|
|
|
|
|
{
|
|
|
|
|
u32 size_to_copy;
|
2019-02-16 00:39:21 +02:00
|
|
|
|
|
|
|
|
if (num_chunks > HL_MAX_JOBS_PER_CS) {
|
|
|
|
|
dev_err(hdev->dev,
|
|
|
|
|
"Number of chunks can NOT be larger than %d\n",
|
|
|
|
|
HL_MAX_JOBS_PER_CS);
|
2020-07-19 21:07:15 +03:00
|
|
|
return -EINVAL;
|
2019-02-16 00:39:21 +02:00
|
|
|
}
|
|
|
|
|
|
2020-07-19 21:07:15 +03:00
|
|
|
*cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
|
2019-02-16 00:39:21 +02:00
|
|
|
GFP_ATOMIC);
|
2020-07-19 21:07:15 +03:00
|
|
|
if (!*cs_chunk_array)
|
|
|
|
|
return -ENOMEM;
|
2019-02-16 00:39:21 +02:00
|
|
|
|
|
|
|
|
size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
|
2020-07-19 21:07:15 +03:00
|
|
|
if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
|
2019-02-16 00:39:21 +02:00
|
|
|
dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
|
2020-07-19 21:07:15 +03:00
|
|
|
kfree(*cs_chunk_array);
|
|
|
|
|
return -EFAULT;
|
2019-02-16 00:39:21 +02:00
|
|
|
}
|
|
|
|
|
|
2020-07-19 21:07:15 +03:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
|
|
|
|
|
u32 num_chunks, u64 *cs_seq)
|
|
|
|
|
{
|
|
|
|
|
bool int_queues_only = true;
|
|
|
|
|
struct hl_device *hdev = hpriv->hdev;
|
|
|
|
|
struct hl_cs_chunk *cs_chunk_array;
|
|
|
|
|
struct hl_cs_counters_atomic *cntr;
|
|
|
|
|
struct hl_cs_job *job;
|
|
|
|
|
struct hl_cs *cs;
|
|
|
|
|
struct hl_cb *cb;
|
|
|
|
|
int rc, i;
|
|
|
|
|
|
|
|
|
|
cntr = &hdev->aggregated_cs_counters;
|
|
|
|
|
*cs_seq = ULLONG_MAX;
|
|
|
|
|
|
|
|
|
|
rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
|
|
|
|
|
if (rc)
|
|
|
|
|
goto out;
|
|
|
|
|
|
2019-02-16 00:39:21 +02:00
|
|
|
/* increment refcnt for context */
|
|
|
|
|
hl_ctx_get(hdev, hpriv->ctx);
|
|
|
|
|
|
2020-05-07 14:31:49 +03:00
|
|
|
rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, &cs);
|
2019-02-16 00:39:21 +02:00
|
|
|
if (rc) {
|
|
|
|
|
hl_ctx_put(hpriv->ctx);
|
|
|
|
|
goto free_cs_chunk_array;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*cs_seq = cs->sequence;
|
|
|
|
|
|
2019-02-16 00:39:24 +02:00
|
|
|
hl_debugfs_add_cs(cs);
|
|
|
|
|
|
2019-02-16 00:39:21 +02:00
|
|
|
/* Validate ALL the CS chunks before submitting the CS */
|
2020-02-25 11:24:08 +00:00
|
|
|
for (i = 0 ; i < num_chunks ; i++) {
|
2019-02-16 00:39:21 +02:00
|
|
|
struct hl_cs_chunk *chunk = &cs_chunk_array[i];
|
2019-10-03 15:22:36 +00:00
|
|
|
enum hl_queue_type queue_type;
|
|
|
|
|
bool is_kernel_allocated_cb;
|
|
|
|
|
|
|
|
|
|
rc = validate_queue_index(hdev, chunk, &queue_type,
|
|
|
|
|
&is_kernel_allocated_cb);
|
2020-06-18 09:51:16 +03:00
|
|
|
if (rc) {
|
2020-10-12 14:30:26 +03:00
|
|
|
atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
|
2020-09-30 14:25:55 +03:00
|
|
|
atomic64_inc(&cntr->parsing_drop_cnt);
|
2019-10-03 15:22:36 +00:00
|
|
|
goto free_cs_object;
|
2020-06-18 09:51:16 +03:00
|
|
|
}
|
2019-02-16 00:39:21 +02:00
|
|
|
|
2019-10-03 15:22:36 +00:00
|
|
|
if (is_kernel_allocated_cb) {
|
|
|
|
|
cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
|
2019-02-16 00:39:21 +02:00
|
|
|
if (!cb) {
|
2020-10-12 14:30:26 +03:00
|
|
|
atomic64_inc(
|
|
|
|
|
&hpriv->ctx->cs_counters.parsing_drop_cnt);
|
2020-09-30 14:25:55 +03:00
|
|
|
atomic64_inc(&cntr->parsing_drop_cnt);
|
2019-02-16 00:39:21 +02:00
|
|
|
rc = -EINVAL;
|
|
|
|
|
goto free_cs_object;
|
|
|
|
|
}
|
2019-10-03 15:22:36 +00:00
|
|
|
} else {
|
|
|
|
|
cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
|
2019-02-16 00:39:21 +02:00
|
|
|
}
|
|
|
|
|
|
2019-10-03 15:22:36 +00:00
|
|
|
if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW)
|
|
|
|
|
int_queues_only = false;
|
|
|
|
|
|
|
|
|
|
job = hl_cs_allocate_job(hdev, queue_type,
|
|
|
|
|
is_kernel_allocated_cb);
|
2019-02-16 00:39:21 +02:00
|
|
|
if (!job) {
|
2020-10-12 14:30:26 +03:00
|
|
|
atomic64_inc(
|
|
|
|
|
&hpriv->ctx->cs_counters.out_of_mem_drop_cnt);
|
2020-09-30 14:25:55 +03:00
|
|
|
atomic64_inc(&cntr->out_of_mem_drop_cnt);
|
2019-02-16 00:39:21 +02:00
|
|
|
dev_err(hdev->dev, "Failed to allocate a new job\n");
|
|
|
|
|
rc = -ENOMEM;
|
2019-10-03 15:22:36 +00:00
|
|
|
if (is_kernel_allocated_cb)
|
2019-02-16 00:39:21 +02:00
|
|
|
goto release_cb;
|
2020-08-12 10:19:28 +03:00
|
|
|
|
|
|
|
|
goto free_cs_object;
|
2019-02-16 00:39:21 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
job->id = i + 1;
|
|
|
|
|
job->cs = cs;
|
|
|
|
|
job->user_cb = cb;
|
|
|
|
|
job->user_cb_size = chunk->cb_size;
|
|
|
|
|
job->hw_queue_id = chunk->queue_index;
|
|
|
|
|
|
|
|
|
|
cs->jobs_in_queue_cnt[job->hw_queue_id]++;
|
|
|
|
|
|
|
|
|
|
list_add_tail(&job->cs_node, &cs->job_list);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Increment CS reference. When CS reference is 0, CS is
|
|
|
|
|
* done and can be signaled to user and free all its resources
|
2019-10-03 15:22:36 +00:00
|
|
|
* Only increment for JOB on external or H/W queues, because
|
|
|
|
|
* only for those JOBs we get completion
|
2019-02-16 00:39:21 +02:00
|
|
|
*/
|
2019-10-03 15:22:36 +00:00
|
|
|
if (job->queue_type == QUEUE_TYPE_EXT ||
|
|
|
|
|
job->queue_type == QUEUE_TYPE_HW)
|
2019-02-16 00:39:21 +02:00
|
|
|
cs_get(cs);
|
|
|
|
|
|
2019-02-16 00:39:24 +02:00
|
|
|
hl_debugfs_add_job(hdev, job);
|
|
|
|
|
|
2019-02-16 00:39:21 +02:00
|
|
|
rc = cs_parser(hpriv, job);
|
|
|
|
|
if (rc) {
|
2020-10-12 14:30:26 +03:00
|
|
|
atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
|
2020-09-30 14:25:55 +03:00
|
|
|
atomic64_inc(&cntr->parsing_drop_cnt);
|
2019-02-16 00:39:21 +02:00
|
|
|
dev_err(hdev->dev,
|
|
|
|
|
"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
|
|
|
|
|
cs->ctx->asid, cs->sequence, job->id, rc);
|
|
|
|
|
goto free_cs_object;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-03 15:22:36 +00:00
|
|
|
if (int_queues_only) {
|
2020-10-12 14:30:26 +03:00
|
|
|
atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
|
2020-09-30 14:25:55 +03:00
|
|
|
atomic64_inc(&cntr->parsing_drop_cnt);
|
2019-02-16 00:39:21 +02:00
|
|
|
dev_err(hdev->dev,
|
2019-10-03 15:22:36 +00:00
|
|
|
"Reject CS %d.%llu because only internal queues jobs are present\n",
|
2019-02-16 00:39:21 +02:00
|
|
|
cs->ctx->asid, cs->sequence);
|
|
|
|
|
rc = -EINVAL;
|
|
|
|
|
goto free_cs_object;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
rc = hl_hw_queue_schedule_cs(cs);
|
|
|
|
|
if (rc) {
|
2019-11-10 18:48:06 +02:00
|
|
|
if (rc != -EAGAIN)
|
|
|
|
|
dev_err(hdev->dev,
|
|
|
|
|
"Failed to submit CS %d.%llu to H/W queues, error %d\n",
|
|
|
|
|
cs->ctx->asid, cs->sequence, rc);
|
2019-02-16 00:39:21 +02:00
|
|
|
goto free_cs_object;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
rc = HL_CS_STATUS_SUCCESS;
|
|
|
|
|
goto put_cs;
|
|
|
|
|
|
|
|
|
|
release_cb:
|
|
|
|
|
spin_lock(&cb->lock);
|
|
|
|
|
cb->cs_cnt--;
|
|
|
|
|
spin_unlock(&cb->lock);
|
|
|
|
|
hl_cb_put(cb);
|
|
|
|
|
free_cs_object:
|
|
|
|
|
cs_rollback(hdev, cs);
|
|
|
|
|
*cs_seq = ULLONG_MAX;
|
|
|
|
|
/* The path below is both for good and erroneous exits */
|
|
|
|
|
put_cs:
|
|
|
|
|
/* We finished with the CS in this function, so put the ref */
|
|
|
|
|
cs_put(cs);
|
|
|
|
|
free_cs_chunk_array:
|
|
|
|
|
kfree(cs_chunk_array);
|
|
|
|
|
out:
|
|
|
|
|
return rc;
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-19 21:07:15 +03:00
|
|
|
static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
|
|
|
|
|
u64 *cs_seq)
|
|
|
|
|
{
|
|
|
|
|
struct hl_device *hdev = hpriv->hdev;
|
|
|
|
|
struct hl_ctx *ctx = hpriv->ctx;
|
|
|
|
|
bool need_soft_reset = false;
|
|
|
|
|
int rc = 0, do_ctx_switch;
|
|
|
|
|
void __user *chunks;
|
|
|
|
|
u32 num_chunks, tmp;
|
|
|
|
|
long ret;
|
|
|
|
|
|
|
|
|
|
do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
|
|
|
|
|
|
|
|
|
|
if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
|
|
|
|
|
mutex_lock(&hpriv->restore_phase_mutex);
|
|
|
|
|
|
|
|
|
|
if (do_ctx_switch) {
|
|
|
|
|
rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
|
|
|
|
|
if (rc) {
|
|
|
|
|
dev_err_ratelimited(hdev->dev,
|
|
|
|
|
"Failed to switch to context %d, rejecting CS! %d\n",
|
|
|
|
|
ctx->asid, rc);
|
|
|
|
|
/*
|
|
|
|
|
* If we timedout, or if the device is not IDLE
|
|
|
|
|
* while we want to do context-switch (-EBUSY),
|
|
|
|
|
* we need to soft-reset because QMAN is
|
|
|
|
|
* probably stuck. However, we can't call to
|
|
|
|
|
* reset here directly because of deadlock, so
|
|
|
|
|
* need to do it at the very end of this
|
|
|
|
|
* function
|
|
|
|
|
*/
|
|
|
|
|
if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
|
|
|
|
|
need_soft_reset = true;
|
|
|
|
|
mutex_unlock(&hpriv->restore_phase_mutex);
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hdev->asic_funcs->restore_phase_topology(hdev);
|
|
|
|
|
|
|
|
|
|
chunks = (void __user *) (uintptr_t) args->in.chunks_restore;
|
|
|
|
|
num_chunks = args->in.num_chunks_restore;
|
|
|
|
|
|
|
|
|
|
if (!num_chunks) {
|
|
|
|
|
dev_dbg(hdev->dev,
|
|
|
|
|
"Need to run restore phase but restore CS is empty\n");
|
|
|
|
|
rc = 0;
|
|
|
|
|
} else {
|
|
|
|
|
rc = cs_ioctl_default(hpriv, chunks, num_chunks,
|
|
|
|
|
cs_seq);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
mutex_unlock(&hpriv->restore_phase_mutex);
|
|
|
|
|
|
|
|
|
|
if (rc) {
|
|
|
|
|
dev_err(hdev->dev,
|
|
|
|
|
"Failed to submit restore CS for context %d (%d)\n",
|
|
|
|
|
ctx->asid, rc);
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Need to wait for restore completion before execution phase */
|
|
|
|
|
if (num_chunks) {
|
|
|
|
|
wait_again:
|
|
|
|
|
ret = _hl_cs_wait_ioctl(hdev, ctx,
|
|
|
|
|
jiffies_to_usecs(hdev->timeout_jiffies),
|
|
|
|
|
*cs_seq);
|
|
|
|
|
if (ret <= 0) {
|
|
|
|
|
if (ret == -ERESTARTSYS) {
|
|
|
|
|
usleep_range(100, 200);
|
|
|
|
|
goto wait_again;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dev_err(hdev->dev,
|
|
|
|
|
"Restore CS for context %d failed to complete %ld\n",
|
|
|
|
|
ctx->asid, ret);
|
|
|
|
|
rc = -ENOEXEC;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ctx->thread_ctx_switch_wait_token = 1;
|
|
|
|
|
|
|
|
|
|
} else if (!ctx->thread_ctx_switch_wait_token) {
|
|
|
|
|
rc = hl_poll_timeout_memory(hdev,
|
|
|
|
|
&ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
|
|
|
|
|
100, jiffies_to_usecs(hdev->timeout_jiffies), false);
|
|
|
|
|
|
|
|
|
|
if (rc == -ETIMEDOUT) {
|
|
|
|
|
dev_err(hdev->dev,
|
|
|
|
|
"context switch phase timeout (%d)\n", tmp);
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
out:
|
|
|
|
|
if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset))
|
|
|
|
|
hl_device_reset(hdev, false, false);
|
|
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
|
}
|
|
|
|
|
|
2020-09-10 09:43:43 +03:00
|
|
|
static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
|
|
|
|
|
struct hl_cs_chunk *chunk, u64 *signal_seq)
|
|
|
|
|
{
|
|
|
|
|
u64 *signal_seq_arr = NULL;
|
|
|
|
|
u32 size_to_copy, signal_seq_arr_len;
|
|
|
|
|
int rc = 0;
|
|
|
|
|
|
|
|
|
|
signal_seq_arr_len = chunk->num_signal_seq_arr;
|
|
|
|
|
|
|
|
|
|
/* currently only one signal seq is supported */
|
|
|
|
|
if (signal_seq_arr_len != 1) {
|
|
|
|
|
dev_err(hdev->dev,
|
|
|
|
|
"Wait for signal CS supports only one signal CS seq\n");
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
signal_seq_arr = kmalloc_array(signal_seq_arr_len,
|
|
|
|
|
sizeof(*signal_seq_arr),
|
|
|
|
|
GFP_ATOMIC);
|
|
|
|
|
if (!signal_seq_arr)
|
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
|
|
size_to_copy = chunk->num_signal_seq_arr * sizeof(*signal_seq_arr);
|
|
|
|
|
if (copy_from_user(signal_seq_arr,
|
|
|
|
|
u64_to_user_ptr(chunk->signal_seq_arr),
|
|
|
|
|
size_to_copy)) {
|
|
|
|
|
dev_err(hdev->dev,
|
|
|
|
|
"Failed to copy signal seq array from user\n");
|
|
|
|
|
rc = -EFAULT;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* currently it is guaranteed to have only one signal seq */
|
|
|
|
|
*signal_seq = signal_seq_arr[0];
|
|
|
|
|
|
|
|
|
|
out:
|
|
|
|
|
kfree(signal_seq_arr);
|
|
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
|
|
|
|
|
struct hl_ctx *ctx, struct hl_cs *cs, enum hl_queue_type q_type,
|
|
|
|
|
u32 q_idx)
|
|
|
|
|
{
|
|
|
|
|
struct hl_cs_counters_atomic *cntr;
|
|
|
|
|
struct hl_cs_job *job;
|
|
|
|
|
struct hl_cb *cb;
|
|
|
|
|
u32 cb_size;
|
|
|
|
|
|
|
|
|
|
cntr = &hdev->aggregated_cs_counters;
|
|
|
|
|
|
|
|
|
|
job = hl_cs_allocate_job(hdev, q_type, true);
|
|
|
|
|
if (!job) {
|
2020-10-12 14:30:26 +03:00
|
|
|
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
|
2020-09-10 09:43:43 +03:00
|
|
|
atomic64_inc(&cntr->out_of_mem_drop_cnt);
|
|
|
|
|
dev_err(hdev->dev, "Failed to allocate a new job\n");
|
|
|
|
|
return -ENOMEM;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (cs->type == CS_TYPE_WAIT)
|
|
|
|
|
cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
|
|
|
|
|
else
|
|
|
|
|
cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
|
|
|
|
|
|
|
|
|
|
cb = hl_cb_kernel_create(hdev, cb_size,
|
|
|
|
|
q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
|
|
|
|
|
if (!cb) {
|
2020-10-12 14:30:26 +03:00
|
|
|
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
|
2020-09-10 09:43:43 +03:00
|
|
|
atomic64_inc(&cntr->out_of_mem_drop_cnt);
|
|
|
|
|
kfree(job);
|
|
|
|
|
return -EFAULT;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
job->id = 0;
|
|
|
|
|
job->cs = cs;
|
|
|
|
|
job->user_cb = cb;
|
|
|
|
|
job->user_cb->cs_cnt++;
|
|
|
|
|
job->user_cb_size = cb_size;
|
|
|
|
|
job->hw_queue_id = q_idx;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* No need in parsing, user CB is the patched CB.
|
|
|
|
|
* We call hl_cb_destroy() out of two reasons - we don't need the CB in
|
|
|
|
|
* the CB idr anymore and to decrement its refcount as it was
|
|
|
|
|
* incremented inside hl_cb_kernel_create().
|
|
|
|
|
*/
|
|
|
|
|
job->patched_cb = job->user_cb;
|
|
|
|
|
job->job_cb_size = job->user_cb_size;
|
|
|
|
|
hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
|
|
|
|
|
|
2020-09-10 10:56:26 +03:00
|
|
|
/* increment refcount as for external queues we get completion */
|
|
|
|
|
cs_get(cs);
|
|
|
|
|
|
2020-09-10 09:43:43 +03:00
|
|
|
cs->jobs_in_queue_cnt[job->hw_queue_id]++;
|
|
|
|
|
|
|
|
|
|
list_add_tail(&job->cs_node, &cs->job_list);
|
|
|
|
|
|
|
|
|
|
hl_debugfs_add_job(hdev, job);
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-07 14:31:49 +03:00
|
|
|
static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
|
|
|
|
|
void __user *chunks, u32 num_chunks,
|
|
|
|
|
u64 *cs_seq)
|
|
|
|
|
{
|
|
|
|
|
struct hl_cs_chunk *cs_chunk_array, *chunk;
|
|
|
|
|
struct hw_queue_properties *hw_queue_prop;
|
2020-07-19 21:07:15 +03:00
|
|
|
struct hl_device *hdev = hpriv->hdev;
|
2020-09-10 09:43:43 +03:00
|
|
|
struct hl_cs_compl *sig_waitcs_cmpl;
|
2020-07-19 21:07:15 +03:00
|
|
|
u32 q_idx, collective_engine_id = 0;
|
|
|
|
|
struct hl_fence *sig_fence = NULL;
|
|
|
|
|
struct hl_ctx *ctx = hpriv->ctx;
|
2020-09-10 09:43:43 +03:00
|
|
|
enum hl_queue_type q_type;
|
2020-07-19 21:07:15 +03:00
|
|
|
struct hl_cs *cs;
|
2020-09-10 09:43:43 +03:00
|
|
|
u64 signal_seq;
|
2020-05-07 14:31:49 +03:00
|
|
|
int rc;
|
|
|
|
|
|
|
|
|
|
*cs_seq = ULLONG_MAX;
|
|
|
|
|
|
2020-07-19 21:07:15 +03:00
|
|
|
rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
|
|
|
|
|
if (rc)
|
2020-05-07 14:31:49 +03:00
|
|
|
goto out;
|
|
|
|
|
|
|
|
|
|
/* currently it is guaranteed to have only one chunk */
|
|
|
|
|
chunk = &cs_chunk_array[0];
|
2020-08-05 13:55:12 +03:00
|
|
|
|
|
|
|
|
if (chunk->queue_index >= hdev->asic_prop.max_queues) {
|
|
|
|
|
dev_err(hdev->dev, "Queue index %d is invalid\n",
|
|
|
|
|
chunk->queue_index);
|
|
|
|
|
rc = -EINVAL;
|
|
|
|
|
goto free_cs_chunk_array;
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-07 14:31:49 +03:00
|
|
|
q_idx = chunk->queue_index;
|
|
|
|
|
hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
|
2020-05-14 18:25:47 +03:00
|
|
|
q_type = hw_queue_prop->type;
|
2020-05-07 14:31:49 +03:00
|
|
|
|
2020-10-12 20:56:33 +03:00
|
|
|
if (!hw_queue_prop->supports_sync_stream) {
|
|
|
|
|
dev_err(hdev->dev,
|
|
|
|
|
"Queue index %d does not support sync stream operations\n",
|
|
|
|
|
q_idx);
|
2020-05-07 14:31:49 +03:00
|
|
|
rc = -EINVAL;
|
|
|
|
|
goto free_cs_chunk_array;
|
|
|
|
|
}
|
|
|
|
|
|
2020-09-10 10:10:55 +03:00
|
|
|
if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
|
|
|
|
|
if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
|
|
|
|
|
dev_err(hdev->dev,
|
|
|
|
|
"Queue index %d is invalid\n", q_idx);
|
|
|
|
|
rc = -EINVAL;
|
|
|
|
|
goto free_cs_chunk_array;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
collective_engine_id = chunk->collective_engine_id;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) {
|
2020-09-10 09:43:43 +03:00
|
|
|
rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq);
|
|
|
|
|
if (rc)
|
2020-05-07 14:31:49 +03:00
|
|
|
goto free_cs_chunk_array;
|
|
|
|
|
|
|
|
|
|
sig_fence = hl_ctx_get_fence(ctx, signal_seq);
|
|
|
|
|
if (IS_ERR(sig_fence)) {
|
|
|
|
|
dev_err(hdev->dev,
|
|
|
|
|
"Failed to get signal CS with seq 0x%llx\n",
|
|
|
|
|
signal_seq);
|
|
|
|
|
rc = PTR_ERR(sig_fence);
|
2020-09-10 09:43:43 +03:00
|
|
|
goto free_cs_chunk_array;
|
2020-05-07 14:31:49 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!sig_fence) {
|
|
|
|
|
/* signal CS already finished */
|
|
|
|
|
rc = 0;
|
2020-09-10 09:43:43 +03:00
|
|
|
goto free_cs_chunk_array;
|
2020-05-07 14:31:49 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sig_waitcs_cmpl =
|
|
|
|
|
container_of(sig_fence, struct hl_cs_compl, base_fence);
|
|
|
|
|
|
|
|
|
|
if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
|
|
|
|
|
dev_err(hdev->dev,
|
|
|
|
|
"CS seq 0x%llx is not of a signal CS\n",
|
|
|
|
|
signal_seq);
|
2020-07-15 08:52:39 +03:00
|
|
|
hl_fence_put(sig_fence);
|
2020-05-07 14:31:49 +03:00
|
|
|
rc = -EINVAL;
|
2020-09-10 09:43:43 +03:00
|
|
|
goto free_cs_chunk_array;
|
2020-05-07 14:31:49 +03:00
|
|
|
}
|
|
|
|
|
|
2020-07-15 08:52:39 +03:00
|
|
|
if (completion_done(&sig_fence->completion)) {
|
2020-05-07 14:31:49 +03:00
|
|
|
/* signal CS already finished */
|
2020-07-15 08:52:39 +03:00
|
|
|
hl_fence_put(sig_fence);
|
2020-05-07 14:31:49 +03:00
|
|
|
rc = 0;
|
2020-09-10 09:43:43 +03:00
|
|
|
goto free_cs_chunk_array;
|
2020-05-07 14:31:49 +03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* increment refcnt for context */
|
|
|
|
|
hl_ctx_get(hdev, ctx);
|
|
|
|
|
|
|
|
|
|
rc = allocate_cs(hdev, ctx, cs_type, &cs);
|
|
|
|
|
if (rc) {
|
2020-09-10 10:10:55 +03:00
|
|
|
if (cs_type == CS_TYPE_WAIT ||
|
|
|
|
|
cs_type == CS_TYPE_COLLECTIVE_WAIT)
|
2020-07-15 08:52:39 +03:00
|
|
|
hl_fence_put(sig_fence);
|
2020-05-07 14:31:49 +03:00
|
|
|
hl_ctx_put(ctx);
|
2020-09-10 09:43:43 +03:00
|
|
|
goto free_cs_chunk_array;
|
2020-05-07 14:31:49 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Save the signal CS fence for later initialization right before
|
|
|
|
|
* hanging the wait CS on the queue.
|
|
|
|
|
*/
|
2020-09-10 10:10:55 +03:00
|
|
|
if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT)
|
2020-05-07 14:31:49 +03:00
|
|
|
cs->signal_fence = sig_fence;
|
|
|
|
|
|
|
|
|
|
hl_debugfs_add_cs(cs);
|
|
|
|
|
|
|
|
|
|
*cs_seq = cs->sequence;
|
|
|
|
|
|
2020-09-10 09:43:43 +03:00
|
|
|
if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL)
|
|
|
|
|
rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
|
|
|
|
|
q_idx);
|
2020-10-19 09:06:18 +03:00
|
|
|
else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
|
2020-09-10 10:10:55 +03:00
|
|
|
rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
|
|
|
|
|
cs, q_idx, collective_engine_id);
|
2020-10-19 09:06:18 +03:00
|
|
|
else
|
|
|
|
|
rc = -EINVAL;
|
2020-07-13 13:36:55 +03:00
|
|
|
|
2020-09-10 09:43:43 +03:00
|
|
|
if (rc)
|
2020-09-10 10:56:26 +03:00
|
|
|
goto free_cs_object;
|
2020-05-07 14:31:49 +03:00
|
|
|
|
|
|
|
|
rc = hl_hw_queue_schedule_cs(cs);
|
|
|
|
|
if (rc) {
|
|
|
|
|
if (rc != -EAGAIN)
|
|
|
|
|
dev_err(hdev->dev,
|
|
|
|
|
"Failed to submit CS %d.%llu to H/W queues, error %d\n",
|
|
|
|
|
ctx->asid, cs->sequence, rc);
|
|
|
|
|
goto free_cs_object;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
rc = HL_CS_STATUS_SUCCESS;
|
|
|
|
|
goto put_cs;
|
|
|
|
|
|
|
|
|
|
free_cs_object:
|
|
|
|
|
cs_rollback(hdev, cs);
|
|
|
|
|
*cs_seq = ULLONG_MAX;
|
|
|
|
|
/* The path below is both for good and erroneous exits */
|
|
|
|
|
put_cs:
|
|
|
|
|
/* We finished with the CS in this function, so put the ref */
|
|
|
|
|
cs_put(cs);
|
|
|
|
|
free_cs_chunk_array:
|
|
|
|
|
kfree(cs_chunk_array);
|
|
|
|
|
out:
|
|
|
|
|
return rc;
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-16 00:39:21 +02:00
|
|
|
int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
|
|
|
|
|
{
|
|
|
|
|
union hl_cs_args *args = data;
|
2020-05-07 14:31:49 +03:00
|
|
|
enum hl_cs_type cs_type;
|
2019-02-16 00:39:21 +02:00
|
|
|
u64 cs_seq = ULONG_MAX;
|
2020-07-19 21:07:15 +03:00
|
|
|
void __user *chunks;
|
|
|
|
|
u32 num_chunks;
|
|
|
|
|
int rc;
|
2020-05-07 14:31:49 +03:00
|
|
|
|
2020-07-19 21:07:15 +03:00
|
|
|
rc = hl_cs_sanity_checks(hpriv, args);
|
|
|
|
|
if (rc)
|
2020-05-07 13:41:16 +03:00
|
|
|
goto out;
|
|
|
|
|
|
2020-07-19 21:07:15 +03:00
|
|
|
rc = hl_cs_ctx_switch(hpriv, args, &cs_seq);
|
|
|
|
|
if (rc)
|
2020-01-05 15:11:22 +00:00
|
|
|
goto out;
|
2019-02-16 00:39:21 +02:00
|
|
|
|
2020-07-19 21:07:15 +03:00
|
|
|
cs_type = hl_cs_get_cs_type(args->in.cs_flags &
|
|
|
|
|
~HL_CS_FLAGS_FORCE_RESTORE);
|
|
|
|
|
chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
|
|
|
|
|
num_chunks = args->in.num_chunks_execute;
|
|
|
|
|
|
|
|
|
|
switch (cs_type) {
|
|
|
|
|
case CS_TYPE_SIGNAL:
|
|
|
|
|
case CS_TYPE_WAIT:
|
|
|
|
|
case CS_TYPE_COLLECTIVE_WAIT:
|
|
|
|
|
rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
|
|
|
|
|
&cs_seq);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq);
|
|
|
|
|
break;
|
2019-02-16 00:39:21 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
out:
|
|
|
|
|
if (rc != -EAGAIN) {
|
|
|
|
|
memset(args, 0, sizeof(*args));
|
|
|
|
|
args->out.status = rc;
|
|
|
|
|
args->out.seq = cs_seq;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static long _hl_cs_wait_ioctl(struct hl_device *hdev,
|
|
|
|
|
struct hl_ctx *ctx, u64 timeout_us, u64 seq)
|
|
|
|
|
{
|
2020-07-15 08:52:39 +03:00
|
|
|
struct hl_fence *fence;
|
2019-02-16 00:39:21 +02:00
|
|
|
unsigned long timeout;
|
|
|
|
|
long rc;
|
|
|
|
|
|
|
|
|
|
if (timeout_us == MAX_SCHEDULE_TIMEOUT)
|
|
|
|
|
timeout = timeout_us;
|
|
|
|
|
else
|
|
|
|
|
timeout = usecs_to_jiffies(timeout_us);
|
|
|
|
|
|
|
|
|
|
hl_ctx_get(hdev, ctx);
|
|
|
|
|
|
|
|
|
|
fence = hl_ctx_get_fence(ctx, seq);
|
|
|
|
|
if (IS_ERR(fence)) {
|
|
|
|
|
rc = PTR_ERR(fence);
|
2020-05-07 14:31:49 +03:00
|
|
|
if (rc == -EINVAL)
|
|
|
|
|
dev_notice_ratelimited(hdev->dev,
|
2020-06-22 09:52:22 +03:00
|
|
|
"Can't wait on CS %llu because current CS is at seq %llu\n",
|
2020-05-07 14:31:49 +03:00
|
|
|
seq, ctx->cs_sequence);
|
2019-02-16 00:39:21 +02:00
|
|
|
} else if (fence) {
|
2020-09-07 18:08:51 +03:00
|
|
|
if (!timeout_us)
|
|
|
|
|
rc = completion_done(&fence->completion);
|
|
|
|
|
else
|
|
|
|
|
rc = wait_for_completion_interruptible_timeout(
|
|
|
|
|
&fence->completion, timeout);
|
2020-07-15 08:52:39 +03:00
|
|
|
|
2019-02-16 00:39:21 +02:00
|
|
|
if (fence->error == -ETIMEDOUT)
|
|
|
|
|
rc = -ETIMEDOUT;
|
|
|
|
|
else if (fence->error == -EIO)
|
|
|
|
|
rc = -EIO;
|
2020-07-15 08:52:39 +03:00
|
|
|
|
|
|
|
|
hl_fence_put(fence);
|
2020-05-07 14:31:49 +03:00
|
|
|
} else {
|
|
|
|
|
dev_dbg(hdev->dev,
|
|
|
|
|
"Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
|
|
|
|
|
seq, ctx->cs_sequence);
|
2019-02-16 00:39:21 +02:00
|
|
|
rc = 1;
|
2020-05-07 14:31:49 +03:00
|
|
|
}
|
2019-02-16 00:39:21 +02:00
|
|
|
|
|
|
|
|
hl_ctx_put(ctx);
|
|
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
|
|
|
|
|
{
|
|
|
|
|
struct hl_device *hdev = hpriv->hdev;
|
|
|
|
|
union hl_wait_cs_args *args = data;
|
|
|
|
|
u64 seq = args->in.seq;
|
|
|
|
|
long rc;
|
|
|
|
|
|
|
|
|
|
rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq);
|
|
|
|
|
|
|
|
|
|
memset(args, 0, sizeof(*args));
|
|
|
|
|
|
|
|
|
|
if (rc < 0) {
|
|
|
|
|
if (rc == -ERESTARTSYS) {
|
2020-06-22 09:52:22 +03:00
|
|
|
dev_err_ratelimited(hdev->dev,
|
|
|
|
|
"user process got signal while waiting for CS handle %llu\n",
|
|
|
|
|
seq);
|
2019-02-16 00:39:21 +02:00
|
|
|
args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
|
|
|
|
|
rc = -EINTR;
|
|
|
|
|
} else if (rc == -ETIMEDOUT) {
|
2020-06-22 09:52:22 +03:00
|
|
|
dev_err_ratelimited(hdev->dev,
|
|
|
|
|
"CS %llu has timed-out while user process is waiting for it\n",
|
|
|
|
|
seq);
|
2019-02-16 00:39:21 +02:00
|
|
|
args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
|
|
|
|
|
} else if (rc == -EIO) {
|
2020-06-22 09:52:22 +03:00
|
|
|
dev_err_ratelimited(hdev->dev,
|
|
|
|
|
"CS %llu has been aborted while user process is waiting for it\n",
|
|
|
|
|
seq);
|
2019-02-16 00:39:21 +02:00
|
|
|
args->out.status = HL_WAIT_CS_STATUS_ABORTED;
|
|
|
|
|
}
|
|
|
|
|
return rc;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (rc == 0)
|
|
|
|
|
args->out.status = HL_WAIT_CS_STATUS_BUSY;
|
|
|
|
|
else
|
|
|
|
|
args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|