This tag contains the following changes for kernel 5.9-rc1:
- Remove rate limiters from GAUDI configuration (no longer needed). - Set maximum amount of in-flight CS per ASIC type and increase the maximum amount for GAUDI. - Refactor signal/wait command submissions code - Calculate trace frequency from PLLs to show accurate profiling data - Rephrase error messages to make them more clear to the common user - Add statistics of dropped CS (counter per possible reason for drop) - Get ECC information from firmware - Remove support for partial SoC reset in Gaudi - Halt device CPU only when reset is certain to happen. Sometimes we abort the reset procedure and in that case we can't leave device CPU in halt mode. - set each CQ to its own work queue to prevent a race between completions on different CQs. - Use queue pi/ci in order to determine queue occupancy. This is done to make the code reusable between current and future ASICs. - Add more validations for user inputs. - Refactor PCIe controller configuration to make the code reusable between current and future ASICs. - Update firmware interface headers to latest version - Move all common code to a dedicated common sub-folder -----BEGIN PGP SIGNATURE----- iQFKBAABCgA0FiEE7TEboABC71LctBLFZR1NuKta54AFAl8bHekWHG9kZWQuZ2Fi YmF5QGdtYWlsLmNvbQAKCRBlHU24q1rngCwsCACGVS+NVbDO2E42TOpTxRst26Nu 7V8s8HcICbdhfHbkJWnexsbPkjzRUvi1pXfklmafGtGtoTunwpb1AXx+oWeMqmnT 8IoxVlp0A5doP9uh8WOdKPypDNWem8fL+89y+89C8/ImAHMeeC4X+v2hiIIoRBDu aoO3zgxPjLmNfcUGfUqcaLcar8w/EhVY/JIPjSc21PXouyUer7Jx9oe2MS1OkQ0h FIYBRW4oycEqZSYCs9SLog/ltBt84Kzk/TsWweTcW3V3EpXyhFjq5yG9nBmmNB9z U9wmJV4tpGOg/ehvy3MWMbCtv4xK/xS3R3pn25IJw79X2RWUKizBorgYlXu/ =ZOVe -----END PGP SIGNATURE----- Merge tag 'misc-habanalabs-next-2020-07-24' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next Oded writes: This tag contains the following changes for kernel 5.9-rc1: - Remove rate limiters from GAUDI configuration (no longer needed). - Set maximum amount of in-flight CS per ASIC type and increase the maximum amount for GAUDI. - Refactor signal/wait command submissions code - Calculate trace frequency from PLLs to show accurate profiling data - Rephrase error messages to make them more clear to the common user - Add statistics of dropped CS (counter per possible reason for drop) - Get ECC information from firmware - Remove support for partial SoC reset in Gaudi - Halt device CPU only when reset is certain to happen. Sometimes we abort the reset procedure and in that case we can't leave device CPU in halt mode. - set each CQ to its own work queue to prevent a race between completions on different CQs. - Use queue pi/ci in order to determine queue occupancy. This is done to make the code reusable between current and future ASICs. - Add more validations for user inputs. - Refactor PCIe controller configuration to make the code reusable between current and future ASICs. - Update firmware interface headers to latest version - Move all common code to a dedicated common sub-folder * tag 'misc-habanalabs-next-2020-07-24' of git://people.freedesktop.org/~gabbayo/linux: (28 commits) habanalabs: Fix memory leak in error flow of context initialization habanalabs: use no flags on MMU cache invalidation habanalabs: enable device before hw_init() habanalabs: create internal CB pool habanalabs: update hl_boot_if.h from firmware habanalabs: create common folder habanalabs: check for DMA errors when clearing memory habanalabs: verify queue can contain all cs jobs habanalabs: Assign each CQ with its own work queue habanalabs: halt device CPU only upon certain reset habanalabs: remove unused hash habanalabs: use queue pi/ci in order to determine queue occupancy habanalabs: configure maximum queues per asic habanalabs: remove soft-reset support from GAUDI habanalabs: PCIe iATU refactoring habanalabs: Extract ECC information from FW habanalabs: Add dropped cs statistics info struct habanalabs: extract cpu boot status lookup habanalabs: rephrase error messages habanalabs: Increase queues depth ...
This commit is contained in:
commit
860e73b49c
@ -3,16 +3,15 @@
|
|||||||
# Makefile for HabanaLabs AI accelerators driver
|
# Makefile for HabanaLabs AI accelerators driver
|
||||||
#
|
#
|
||||||
|
|
||||||
obj-m := habanalabs.o
|
obj-$(CONFIG_HABANA_AI) := habanalabs.o
|
||||||
|
|
||||||
habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \
|
include $(src)/common/Makefile
|
||||||
command_buffer.o hw_queue.o irq.o sysfs.o hwmon.o memory.o \
|
habanalabs-y += $(HL_COMMON_FILES)
|
||||||
command_submission.o mmu.o firmware_if.o pci.o
|
|
||||||
|
|
||||||
habanalabs-$(CONFIG_DEBUG_FS) += debugfs.o
|
|
||||||
|
|
||||||
include $(src)/goya/Makefile
|
include $(src)/goya/Makefile
|
||||||
habanalabs-y += $(HL_GOYA_FILES)
|
habanalabs-y += $(HL_GOYA_FILES)
|
||||||
|
|
||||||
include $(src)/gaudi/Makefile
|
include $(src)/gaudi/Makefile
|
||||||
habanalabs-y += $(HL_GAUDI_FILES)
|
habanalabs-y += $(HL_GAUDI_FILES)
|
||||||
|
|
||||||
|
habanalabs-$(CONFIG_DEBUG_FS) += common/debugfs.o
|
||||||
|
9
drivers/misc/habanalabs/common/Makefile
Normal file
9
drivers/misc/habanalabs/common/Makefile
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
subdir-ccflags-y += -I$(src)/common
|
||||||
|
|
||||||
|
HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
|
||||||
|
common/asid.o common/habanalabs_ioctl.o \
|
||||||
|
common/command_buffer.o common/hw_queue.o common/irq.o \
|
||||||
|
common/sysfs.o common/hwmon.o common/memory.o \
|
||||||
|
common/command_submission.o common/mmu.o common/firmware_if.o \
|
||||||
|
common/pci.o
|
@ -10,12 +10,18 @@
|
|||||||
|
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
|
#include <linux/genalloc.h>
|
||||||
|
|
||||||
static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
|
static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
|
||||||
{
|
{
|
||||||
|
if (cb->is_internal)
|
||||||
|
gen_pool_free(hdev->internal_cb_pool,
|
||||||
|
cb->kernel_address, cb->size);
|
||||||
|
else
|
||||||
hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
|
hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
|
||||||
(void *) (uintptr_t) cb->kernel_address,
|
(void *) (uintptr_t) cb->kernel_address,
|
||||||
cb->bus_address);
|
cb->bus_address);
|
||||||
|
|
||||||
kfree(cb);
|
kfree(cb);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -44,9 +50,10 @@ static void cb_release(struct kref *ref)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
|
static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
|
||||||
int ctx_id)
|
int ctx_id, bool internal_cb)
|
||||||
{
|
{
|
||||||
struct hl_cb *cb;
|
struct hl_cb *cb;
|
||||||
|
u32 cb_offset;
|
||||||
void *p;
|
void *p;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -65,13 +72,25 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
|
|||||||
if (!cb)
|
if (!cb)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if (ctx_id == HL_KERNEL_ASID_ID)
|
if (internal_cb) {
|
||||||
|
p = (void *) gen_pool_alloc(hdev->internal_cb_pool, cb_size);
|
||||||
|
if (!p) {
|
||||||
|
kfree(cb);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
cb_offset = p - hdev->internal_cb_pool_virt_addr;
|
||||||
|
cb->is_internal = true;
|
||||||
|
cb->bus_address = hdev->internal_cb_va_base + cb_offset;
|
||||||
|
} else if (ctx_id == HL_KERNEL_ASID_ID) {
|
||||||
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
|
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
|
||||||
&cb->bus_address, GFP_ATOMIC);
|
&cb->bus_address, GFP_ATOMIC);
|
||||||
else
|
} else {
|
||||||
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
|
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
|
||||||
&cb->bus_address,
|
&cb->bus_address,
|
||||||
GFP_USER | __GFP_ZERO);
|
GFP_USER | __GFP_ZERO);
|
||||||
|
}
|
||||||
|
|
||||||
if (!p) {
|
if (!p) {
|
||||||
dev_err(hdev->dev,
|
dev_err(hdev->dev,
|
||||||
"failed to allocate %d of dma memory for CB\n",
|
"failed to allocate %d of dma memory for CB\n",
|
||||||
@ -87,7 +106,7 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
|
|||||||
}
|
}
|
||||||
|
|
||||||
int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
|
int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
|
||||||
u32 cb_size, u64 *handle, int ctx_id)
|
u32 cb_size, u64 *handle, int ctx_id, bool internal_cb)
|
||||||
{
|
{
|
||||||
struct hl_cb *cb;
|
struct hl_cb *cb;
|
||||||
bool alloc_new_cb = true;
|
bool alloc_new_cb = true;
|
||||||
@ -112,6 +131,7 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
|
|||||||
goto out_err;
|
goto out_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!internal_cb) {
|
||||||
/* Minimum allocation must be PAGE SIZE */
|
/* Minimum allocation must be PAGE SIZE */
|
||||||
if (cb_size < PAGE_SIZE)
|
if (cb_size < PAGE_SIZE)
|
||||||
cb_size = PAGE_SIZE;
|
cb_size = PAGE_SIZE;
|
||||||
@ -121,8 +141,8 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
|
|||||||
|
|
||||||
spin_lock(&hdev->cb_pool_lock);
|
spin_lock(&hdev->cb_pool_lock);
|
||||||
if (!list_empty(&hdev->cb_pool)) {
|
if (!list_empty(&hdev->cb_pool)) {
|
||||||
cb = list_first_entry(&hdev->cb_pool, typeof(*cb),
|
cb = list_first_entry(&hdev->cb_pool,
|
||||||
pool_list);
|
typeof(*cb), pool_list);
|
||||||
list_del(&cb->pool_list);
|
list_del(&cb->pool_list);
|
||||||
spin_unlock(&hdev->cb_pool_lock);
|
spin_unlock(&hdev->cb_pool_lock);
|
||||||
alloc_new_cb = false;
|
alloc_new_cb = false;
|
||||||
@ -131,9 +151,10 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
|
|||||||
dev_dbg(hdev->dev, "CB pool is empty\n");
|
dev_dbg(hdev->dev, "CB pool is empty\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (alloc_new_cb) {
|
if (alloc_new_cb) {
|
||||||
cb = hl_cb_alloc(hdev, cb_size, ctx_id);
|
cb = hl_cb_alloc(hdev, cb_size, ctx_id, internal_cb);
|
||||||
if (!cb) {
|
if (!cb) {
|
||||||
rc = -ENOMEM;
|
rc = -ENOMEM;
|
||||||
goto out_err;
|
goto out_err;
|
||||||
@ -230,7 +251,7 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
|
|||||||
} else {
|
} else {
|
||||||
rc = hl_cb_create(hdev, &hpriv->cb_mgr,
|
rc = hl_cb_create(hdev, &hpriv->cb_mgr,
|
||||||
args->in.cb_size, &handle,
|
args->in.cb_size, &handle,
|
||||||
hpriv->ctx->asid);
|
hpriv->ctx->asid, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
memset(args, 0, sizeof(*args));
|
memset(args, 0, sizeof(*args));
|
||||||
@ -398,14 +419,15 @@ void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr)
|
|||||||
idr_destroy(&mgr->cb_handles);
|
idr_destroy(&mgr->cb_handles);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size)
|
struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
|
||||||
|
bool internal_cb)
|
||||||
{
|
{
|
||||||
u64 cb_handle;
|
u64 cb_handle;
|
||||||
struct hl_cb *cb;
|
struct hl_cb *cb;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle,
|
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle,
|
||||||
HL_KERNEL_ASID_ID);
|
HL_KERNEL_ASID_ID, internal_cb);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
dev_err(hdev->dev,
|
dev_err(hdev->dev,
|
||||||
"Failed to allocate CB for the kernel driver %d\n", rc);
|
"Failed to allocate CB for the kernel driver %d\n", rc);
|
||||||
@ -437,7 +459,7 @@ int hl_cb_pool_init(struct hl_device *hdev)
|
|||||||
|
|
||||||
for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) {
|
for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) {
|
||||||
cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size,
|
cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size,
|
||||||
HL_KERNEL_ASID_ID);
|
HL_KERNEL_ASID_ID, false);
|
||||||
if (cb) {
|
if (cb) {
|
||||||
cb->is_pool = true;
|
cb->is_pool = true;
|
||||||
list_add(&cb->pool_list, &hdev->cb_pool);
|
list_add(&cb->pool_list, &hdev->cb_pool);
|
@ -246,6 +246,18 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
|
|||||||
kfree(job);
|
kfree(job);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx)
|
||||||
|
{
|
||||||
|
hdev->aggregated_cs_counters.device_in_reset_drop_cnt +=
|
||||||
|
ctx->cs_counters.device_in_reset_drop_cnt;
|
||||||
|
hdev->aggregated_cs_counters.out_of_mem_drop_cnt +=
|
||||||
|
ctx->cs_counters.out_of_mem_drop_cnt;
|
||||||
|
hdev->aggregated_cs_counters.parsing_drop_cnt +=
|
||||||
|
ctx->cs_counters.parsing_drop_cnt;
|
||||||
|
hdev->aggregated_cs_counters.queue_full_drop_cnt +=
|
||||||
|
ctx->cs_counters.queue_full_drop_cnt;
|
||||||
|
}
|
||||||
|
|
||||||
static void cs_do_release(struct kref *ref)
|
static void cs_do_release(struct kref *ref)
|
||||||
{
|
{
|
||||||
struct hl_cs *cs = container_of(ref, struct hl_cs,
|
struct hl_cs *cs = container_of(ref, struct hl_cs,
|
||||||
@ -349,6 +361,9 @@ static void cs_do_release(struct kref *ref)
|
|||||||
dma_fence_signal(cs->fence);
|
dma_fence_signal(cs->fence);
|
||||||
dma_fence_put(cs->fence);
|
dma_fence_put(cs->fence);
|
||||||
|
|
||||||
|
cs_counters_aggregate(hdev, cs->ctx);
|
||||||
|
|
||||||
|
kfree(cs->jobs_in_queue_cnt);
|
||||||
kfree(cs);
|
kfree(cs);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -373,9 +388,9 @@ static void cs_timedout(struct work_struct *work)
|
|||||||
hdev = cs->ctx->hdev;
|
hdev = cs->ctx->hdev;
|
||||||
ctx_asid = cs->ctx->asid;
|
ctx_asid = cs->ctx->asid;
|
||||||
|
|
||||||
/* TODO: add information about last signaled seq and last emitted seq */
|
dev_err(hdev->dev,
|
||||||
dev_err(hdev->dev, "User %d command submission %llu got stuck!\n",
|
"Command submission %llu has not finished in time!\n",
|
||||||
ctx_asid, cs->sequence);
|
cs->sequence);
|
||||||
|
|
||||||
cs_put(cs);
|
cs_put(cs);
|
||||||
|
|
||||||
@ -418,21 +433,29 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
|
|||||||
spin_lock(&ctx->cs_lock);
|
spin_lock(&ctx->cs_lock);
|
||||||
|
|
||||||
cs_cmpl->cs_seq = ctx->cs_sequence;
|
cs_cmpl->cs_seq = ctx->cs_sequence;
|
||||||
other = ctx->cs_pending[cs_cmpl->cs_seq & (HL_MAX_PENDING_CS - 1)];
|
other = ctx->cs_pending[cs_cmpl->cs_seq &
|
||||||
|
(hdev->asic_prop.max_pending_cs - 1)];
|
||||||
if ((other) && (!dma_fence_is_signaled(other))) {
|
if ((other) && (!dma_fence_is_signaled(other))) {
|
||||||
spin_unlock(&ctx->cs_lock);
|
|
||||||
dev_dbg(hdev->dev,
|
dev_dbg(hdev->dev,
|
||||||
"Rejecting CS because of too many in-flights CS\n");
|
"Rejecting CS because of too many in-flights CS\n");
|
||||||
rc = -EAGAIN;
|
rc = -EAGAIN;
|
||||||
goto free_fence;
|
goto free_fence;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
|
||||||
|
sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
|
||||||
|
if (!cs->jobs_in_queue_cnt) {
|
||||||
|
rc = -ENOMEM;
|
||||||
|
goto free_fence;
|
||||||
|
}
|
||||||
|
|
||||||
dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock,
|
dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock,
|
||||||
ctx->asid, ctx->cs_sequence);
|
ctx->asid, ctx->cs_sequence);
|
||||||
|
|
||||||
cs->sequence = cs_cmpl->cs_seq;
|
cs->sequence = cs_cmpl->cs_seq;
|
||||||
|
|
||||||
ctx->cs_pending[cs_cmpl->cs_seq & (HL_MAX_PENDING_CS - 1)] =
|
ctx->cs_pending[cs_cmpl->cs_seq &
|
||||||
|
(hdev->asic_prop.max_pending_cs - 1)] =
|
||||||
&cs_cmpl->base_fence;
|
&cs_cmpl->base_fence;
|
||||||
ctx->cs_sequence++;
|
ctx->cs_sequence++;
|
||||||
|
|
||||||
@ -447,6 +470,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
free_fence:
|
free_fence:
|
||||||
|
spin_unlock(&ctx->cs_lock);
|
||||||
kfree(cs_cmpl);
|
kfree(cs_cmpl);
|
||||||
free_cs:
|
free_cs:
|
||||||
kfree(cs);
|
kfree(cs);
|
||||||
@ -463,10 +487,12 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
|
|||||||
|
|
||||||
void hl_cs_rollback_all(struct hl_device *hdev)
|
void hl_cs_rollback_all(struct hl_device *hdev)
|
||||||
{
|
{
|
||||||
|
int i;
|
||||||
struct hl_cs *cs, *tmp;
|
struct hl_cs *cs, *tmp;
|
||||||
|
|
||||||
/* flush all completions */
|
/* flush all completions */
|
||||||
flush_workqueue(hdev->cq_wq);
|
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
|
||||||
|
flush_workqueue(hdev->cq_wq[i]);
|
||||||
|
|
||||||
/* Make sure we don't have leftovers in the H/W queues mirror list */
|
/* Make sure we don't have leftovers in the H/W queues mirror list */
|
||||||
list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list,
|
list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list,
|
||||||
@ -499,10 +525,18 @@ static int validate_queue_index(struct hl_device *hdev,
|
|||||||
struct asic_fixed_properties *asic = &hdev->asic_prop;
|
struct asic_fixed_properties *asic = &hdev->asic_prop;
|
||||||
struct hw_queue_properties *hw_queue_prop;
|
struct hw_queue_properties *hw_queue_prop;
|
||||||
|
|
||||||
|
/* This must be checked here to prevent out-of-bounds access to
|
||||||
|
* hw_queues_props array
|
||||||
|
*/
|
||||||
|
if (chunk->queue_index >= asic->max_queues) {
|
||||||
|
dev_err(hdev->dev, "Queue index %d is invalid\n",
|
||||||
|
chunk->queue_index);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
|
hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
|
||||||
|
|
||||||
if ((chunk->queue_index >= HL_MAX_QUEUES) ||
|
if (hw_queue_prop->type == QUEUE_TYPE_NA) {
|
||||||
(hw_queue_prop->type == QUEUE_TYPE_NA)) {
|
|
||||||
dev_err(hdev->dev, "Queue index %d is invalid\n",
|
dev_err(hdev->dev, "Queue index %d is invalid\n",
|
||||||
chunk->queue_index);
|
chunk->queue_index);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
@ -630,12 +664,15 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
|
|||||||
|
|
||||||
rc = validate_queue_index(hdev, chunk, &queue_type,
|
rc = validate_queue_index(hdev, chunk, &queue_type,
|
||||||
&is_kernel_allocated_cb);
|
&is_kernel_allocated_cb);
|
||||||
if (rc)
|
if (rc) {
|
||||||
|
hpriv->ctx->cs_counters.parsing_drop_cnt++;
|
||||||
goto free_cs_object;
|
goto free_cs_object;
|
||||||
|
}
|
||||||
|
|
||||||
if (is_kernel_allocated_cb) {
|
if (is_kernel_allocated_cb) {
|
||||||
cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
|
cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
|
||||||
if (!cb) {
|
if (!cb) {
|
||||||
|
hpriv->ctx->cs_counters.parsing_drop_cnt++;
|
||||||
rc = -EINVAL;
|
rc = -EINVAL;
|
||||||
goto free_cs_object;
|
goto free_cs_object;
|
||||||
}
|
}
|
||||||
@ -649,6 +686,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
|
|||||||
job = hl_cs_allocate_job(hdev, queue_type,
|
job = hl_cs_allocate_job(hdev, queue_type,
|
||||||
is_kernel_allocated_cb);
|
is_kernel_allocated_cb);
|
||||||
if (!job) {
|
if (!job) {
|
||||||
|
hpriv->ctx->cs_counters.out_of_mem_drop_cnt++;
|
||||||
dev_err(hdev->dev, "Failed to allocate a new job\n");
|
dev_err(hdev->dev, "Failed to allocate a new job\n");
|
||||||
rc = -ENOMEM;
|
rc = -ENOMEM;
|
||||||
if (is_kernel_allocated_cb)
|
if (is_kernel_allocated_cb)
|
||||||
@ -681,6 +719,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
|
|||||||
|
|
||||||
rc = cs_parser(hpriv, job);
|
rc = cs_parser(hpriv, job);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
|
hpriv->ctx->cs_counters.parsing_drop_cnt++;
|
||||||
dev_err(hdev->dev,
|
dev_err(hdev->dev,
|
||||||
"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
|
"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
|
||||||
cs->ctx->asid, cs->sequence, job->id, rc);
|
cs->ctx->asid, cs->sequence, job->id, rc);
|
||||||
@ -689,6 +728,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (int_queues_only) {
|
if (int_queues_only) {
|
||||||
|
hpriv->ctx->cs_counters.parsing_drop_cnt++;
|
||||||
dev_err(hdev->dev,
|
dev_err(hdev->dev,
|
||||||
"Reject CS %d.%llu because only internal queues jobs are present\n",
|
"Reject CS %d.%llu because only internal queues jobs are present\n",
|
||||||
cs->ctx->asid, cs->sequence);
|
cs->ctx->asid, cs->sequence);
|
||||||
@ -738,6 +778,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
|
|||||||
struct hl_cs_job *job;
|
struct hl_cs_job *job;
|
||||||
struct hl_cs *cs;
|
struct hl_cs *cs;
|
||||||
struct hl_cb *cb;
|
struct hl_cb *cb;
|
||||||
|
enum hl_queue_type q_type;
|
||||||
u64 *signal_seq_arr = NULL, signal_seq;
|
u64 *signal_seq_arr = NULL, signal_seq;
|
||||||
u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size;
|
u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size;
|
||||||
int rc;
|
int rc;
|
||||||
@ -770,9 +811,10 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
|
|||||||
chunk = &cs_chunk_array[0];
|
chunk = &cs_chunk_array[0];
|
||||||
q_idx = chunk->queue_index;
|
q_idx = chunk->queue_index;
|
||||||
hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
|
hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
|
||||||
|
q_type = hw_queue_prop->type;
|
||||||
|
|
||||||
if ((q_idx >= HL_MAX_QUEUES) ||
|
if ((q_idx >= hdev->asic_prop.max_queues) ||
|
||||||
(hw_queue_prop->type != QUEUE_TYPE_EXT)) {
|
(!hw_queue_prop->supports_sync_stream)) {
|
||||||
dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx);
|
dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx);
|
||||||
rc = -EINVAL;
|
rc = -EINVAL;
|
||||||
goto free_cs_chunk_array;
|
goto free_cs_chunk_array;
|
||||||
@ -869,25 +911,28 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
|
|||||||
|
|
||||||
*cs_seq = cs->sequence;
|
*cs_seq = cs->sequence;
|
||||||
|
|
||||||
job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
|
job = hl_cs_allocate_job(hdev, q_type, true);
|
||||||
if (!job) {
|
if (!job) {
|
||||||
|
ctx->cs_counters.out_of_mem_drop_cnt++;
|
||||||
dev_err(hdev->dev, "Failed to allocate a new job\n");
|
dev_err(hdev->dev, "Failed to allocate a new job\n");
|
||||||
rc = -ENOMEM;
|
rc = -ENOMEM;
|
||||||
goto put_cs;
|
goto put_cs;
|
||||||
}
|
}
|
||||||
|
|
||||||
cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
|
|
||||||
if (!cb) {
|
|
||||||
kfree(job);
|
|
||||||
rc = -EFAULT;
|
|
||||||
goto put_cs;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cs->type == CS_TYPE_WAIT)
|
if (cs->type == CS_TYPE_WAIT)
|
||||||
cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
|
cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
|
||||||
else
|
else
|
||||||
cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
|
cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
|
||||||
|
|
||||||
|
cb = hl_cb_kernel_create(hdev, cb_size,
|
||||||
|
q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
|
||||||
|
if (!cb) {
|
||||||
|
ctx->cs_counters.out_of_mem_drop_cnt++;
|
||||||
|
kfree(job);
|
||||||
|
rc = -EFAULT;
|
||||||
|
goto put_cs;
|
||||||
|
}
|
||||||
|
|
||||||
job->id = 0;
|
job->id = 0;
|
||||||
job->cs = cs;
|
job->cs = cs;
|
||||||
job->user_cb = cb;
|
job->user_cb = cb;
|
||||||
@ -1126,7 +1171,7 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
|
|||||||
rc = PTR_ERR(fence);
|
rc = PTR_ERR(fence);
|
||||||
if (rc == -EINVAL)
|
if (rc == -EINVAL)
|
||||||
dev_notice_ratelimited(hdev->dev,
|
dev_notice_ratelimited(hdev->dev,
|
||||||
"Can't wait on seq %llu because current CS is at seq %llu\n",
|
"Can't wait on CS %llu because current CS is at seq %llu\n",
|
||||||
seq, ctx->cs_sequence);
|
seq, ctx->cs_sequence);
|
||||||
} else if (fence) {
|
} else if (fence) {
|
||||||
rc = dma_fence_wait_timeout(fence, true, timeout);
|
rc = dma_fence_wait_timeout(fence, true, timeout);
|
||||||
@ -1159,15 +1204,21 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
|
|||||||
memset(args, 0, sizeof(*args));
|
memset(args, 0, sizeof(*args));
|
||||||
|
|
||||||
if (rc < 0) {
|
if (rc < 0) {
|
||||||
dev_err_ratelimited(hdev->dev,
|
|
||||||
"Error %ld on waiting for CS handle %llu\n",
|
|
||||||
rc, seq);
|
|
||||||
if (rc == -ERESTARTSYS) {
|
if (rc == -ERESTARTSYS) {
|
||||||
|
dev_err_ratelimited(hdev->dev,
|
||||||
|
"user process got signal while waiting for CS handle %llu\n",
|
||||||
|
seq);
|
||||||
args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
|
args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
|
||||||
rc = -EINTR;
|
rc = -EINTR;
|
||||||
} else if (rc == -ETIMEDOUT) {
|
} else if (rc == -ETIMEDOUT) {
|
||||||
|
dev_err_ratelimited(hdev->dev,
|
||||||
|
"CS %llu has timed-out while user process is waiting for it\n",
|
||||||
|
seq);
|
||||||
args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
|
args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
|
||||||
} else if (rc == -EIO) {
|
} else if (rc == -EIO) {
|
||||||
|
dev_err_ratelimited(hdev->dev,
|
||||||
|
"CS %llu has been aborted while user process is waiting for it\n",
|
||||||
|
seq);
|
||||||
args->out.status = HL_WAIT_CS_STATUS_ABORTED;
|
args->out.status = HL_WAIT_CS_STATUS_ABORTED;
|
||||||
}
|
}
|
||||||
return rc;
|
return rc;
|
@ -22,9 +22,11 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
|
|||||||
* to this function unless the ref count is 0
|
* to this function unless the ref count is 0
|
||||||
*/
|
*/
|
||||||
|
|
||||||
for (i = 0 ; i < HL_MAX_PENDING_CS ; i++)
|
for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++)
|
||||||
dma_fence_put(ctx->cs_pending[i]);
|
dma_fence_put(ctx->cs_pending[i]);
|
||||||
|
|
||||||
|
kfree(ctx->cs_pending);
|
||||||
|
|
||||||
if (ctx->asid != HL_KERNEL_ASID_ID) {
|
if (ctx->asid != HL_KERNEL_ASID_ID) {
|
||||||
/* The engines are stopped as there is no executing CS, but the
|
/* The engines are stopped as there is no executing CS, but the
|
||||||
* Coresight might be still working by accessing addresses
|
* Coresight might be still working by accessing addresses
|
||||||
@ -110,8 +112,7 @@ void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
dev_warn(hdev->dev,
|
dev_warn(hdev->dev,
|
||||||
"Context %d closed or terminated but its CS are executing\n",
|
"user process released device but its command submissions are still executing\n");
|
||||||
ctx->asid);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
|
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
|
||||||
@ -126,34 +127,49 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
|
|||||||
spin_lock_init(&ctx->cs_lock);
|
spin_lock_init(&ctx->cs_lock);
|
||||||
atomic_set(&ctx->thread_ctx_switch_token, 1);
|
atomic_set(&ctx->thread_ctx_switch_token, 1);
|
||||||
ctx->thread_ctx_switch_wait_token = 0;
|
ctx->thread_ctx_switch_wait_token = 0;
|
||||||
|
ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
|
||||||
|
sizeof(struct dma_fence *),
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!ctx->cs_pending)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
if (is_kernel_ctx) {
|
if (is_kernel_ctx) {
|
||||||
ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
|
ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
|
||||||
rc = hl_mmu_ctx_init(ctx);
|
rc = hl_mmu_ctx_init(ctx);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
dev_err(hdev->dev, "Failed to init mmu ctx module\n");
|
dev_err(hdev->dev, "Failed to init mmu ctx module\n");
|
||||||
goto mem_ctx_err;
|
goto err_free_cs_pending;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
ctx->asid = hl_asid_alloc(hdev);
|
ctx->asid = hl_asid_alloc(hdev);
|
||||||
if (!ctx->asid) {
|
if (!ctx->asid) {
|
||||||
dev_err(hdev->dev, "No free ASID, failed to create context\n");
|
dev_err(hdev->dev, "No free ASID, failed to create context\n");
|
||||||
return -ENOMEM;
|
rc = -ENOMEM;
|
||||||
|
goto err_free_cs_pending;
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = hl_vm_ctx_init(ctx);
|
rc = hl_vm_ctx_init(ctx);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
dev_err(hdev->dev, "Failed to init mem ctx module\n");
|
dev_err(hdev->dev, "Failed to init mem ctx module\n");
|
||||||
rc = -ENOMEM;
|
rc = -ENOMEM;
|
||||||
goto mem_ctx_err;
|
goto err_asid_free;
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = hdev->asic_funcs->ctx_init(ctx);
|
||||||
|
if (rc) {
|
||||||
|
dev_err(hdev->dev, "ctx_init failed\n");
|
||||||
|
goto err_vm_ctx_fini;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
mem_ctx_err:
|
err_vm_ctx_fini:
|
||||||
if (ctx->asid != HL_KERNEL_ASID_ID)
|
hl_vm_ctx_fini(ctx);
|
||||||
|
err_asid_free:
|
||||||
hl_asid_free(hdev, ctx->asid);
|
hl_asid_free(hdev, ctx->asid);
|
||||||
|
err_free_cs_pending:
|
||||||
|
kfree(ctx->cs_pending);
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
@ -170,6 +186,7 @@ int hl_ctx_put(struct hl_ctx *ctx)
|
|||||||
|
|
||||||
struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
|
struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
|
||||||
{
|
{
|
||||||
|
struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
|
||||||
struct dma_fence *fence;
|
struct dma_fence *fence;
|
||||||
|
|
||||||
spin_lock(&ctx->cs_lock);
|
spin_lock(&ctx->cs_lock);
|
||||||
@ -179,13 +196,13 @@ struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
|
|||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (seq + HL_MAX_PENDING_CS < ctx->cs_sequence) {
|
if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) {
|
||||||
spin_unlock(&ctx->cs_lock);
|
spin_unlock(&ctx->cs_lock);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
fence = dma_fence_get(
|
fence = dma_fence_get(
|
||||||
ctx->cs_pending[seq & (HL_MAX_PENDING_CS - 1)]);
|
ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]);
|
||||||
spin_unlock(&ctx->cs_lock);
|
spin_unlock(&ctx->cs_lock);
|
||||||
|
|
||||||
return fence;
|
return fence;
|
@ -249,7 +249,8 @@ static void device_cdev_sysfs_del(struct hl_device *hdev)
|
|||||||
*/
|
*/
|
||||||
static int device_early_init(struct hl_device *hdev)
|
static int device_early_init(struct hl_device *hdev)
|
||||||
{
|
{
|
||||||
int rc;
|
int i, rc;
|
||||||
|
char workq_name[32];
|
||||||
|
|
||||||
switch (hdev->asic_type) {
|
switch (hdev->asic_type) {
|
||||||
case ASIC_GOYA:
|
case ASIC_GOYA:
|
||||||
@ -274,11 +275,24 @@ static int device_early_init(struct hl_device *hdev)
|
|||||||
if (rc)
|
if (rc)
|
||||||
goto early_fini;
|
goto early_fini;
|
||||||
|
|
||||||
hdev->cq_wq = alloc_workqueue("hl-free-jobs", WQ_UNBOUND, 0);
|
if (hdev->asic_prop.completion_queues_count) {
|
||||||
|
hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
|
||||||
|
sizeof(*hdev->cq_wq),
|
||||||
|
GFP_ATOMIC);
|
||||||
|
if (!hdev->cq_wq) {
|
||||||
|
rc = -ENOMEM;
|
||||||
|
goto asid_fini;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
|
||||||
|
snprintf(workq_name, 32, "hl-free-jobs-%u", i);
|
||||||
|
hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
|
||||||
if (hdev->cq_wq == NULL) {
|
if (hdev->cq_wq == NULL) {
|
||||||
dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
|
dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
|
||||||
rc = -ENOMEM;
|
rc = -ENOMEM;
|
||||||
goto asid_fini;
|
goto free_cq_wq;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
|
hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
|
||||||
@ -321,7 +335,10 @@ free_chip_info:
|
|||||||
free_eq_wq:
|
free_eq_wq:
|
||||||
destroy_workqueue(hdev->eq_wq);
|
destroy_workqueue(hdev->eq_wq);
|
||||||
free_cq_wq:
|
free_cq_wq:
|
||||||
destroy_workqueue(hdev->cq_wq);
|
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
|
||||||
|
if (hdev->cq_wq[i])
|
||||||
|
destroy_workqueue(hdev->cq_wq[i]);
|
||||||
|
kfree(hdev->cq_wq);
|
||||||
asid_fini:
|
asid_fini:
|
||||||
hl_asid_fini(hdev);
|
hl_asid_fini(hdev);
|
||||||
early_fini:
|
early_fini:
|
||||||
@ -339,6 +356,8 @@ early_fini:
|
|||||||
*/
|
*/
|
||||||
static void device_early_fini(struct hl_device *hdev)
|
static void device_early_fini(struct hl_device *hdev)
|
||||||
{
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
mutex_destroy(&hdev->mmu_cache_lock);
|
mutex_destroy(&hdev->mmu_cache_lock);
|
||||||
mutex_destroy(&hdev->debug_lock);
|
mutex_destroy(&hdev->debug_lock);
|
||||||
mutex_destroy(&hdev->send_cpu_message_lock);
|
mutex_destroy(&hdev->send_cpu_message_lock);
|
||||||
@ -351,7 +370,10 @@ static void device_early_fini(struct hl_device *hdev)
|
|||||||
kfree(hdev->hl_chip_info);
|
kfree(hdev->hl_chip_info);
|
||||||
|
|
||||||
destroy_workqueue(hdev->eq_wq);
|
destroy_workqueue(hdev->eq_wq);
|
||||||
destroy_workqueue(hdev->cq_wq);
|
|
||||||
|
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
|
||||||
|
destroy_workqueue(hdev->cq_wq[i]);
|
||||||
|
kfree(hdev->cq_wq);
|
||||||
|
|
||||||
hl_asid_fini(hdev);
|
hl_asid_fini(hdev);
|
||||||
|
|
||||||
@ -838,6 +860,22 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
|
|||||||
if (rc)
|
if (rc)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
if (hard_reset) {
|
||||||
|
/* Disable PCI access from device F/W so he won't send
|
||||||
|
* us additional interrupts. We disable MSI/MSI-X at
|
||||||
|
* the halt_engines function and we can't have the F/W
|
||||||
|
* sending us interrupts after that. We need to disable
|
||||||
|
* the access here because if the device is marked
|
||||||
|
* disable, the message won't be send. Also, in case
|
||||||
|
* of heartbeat, the device CPU is marked as disable
|
||||||
|
* so this message won't be sent
|
||||||
|
*/
|
||||||
|
if (hl_fw_send_pci_access_msg(hdev,
|
||||||
|
ARMCP_PACKET_DISABLE_PCI_ACCESS))
|
||||||
|
dev_warn(hdev->dev,
|
||||||
|
"Failed to disable PCI access by F/W\n");
|
||||||
|
}
|
||||||
|
|
||||||
/* This also blocks future CS/VM/JOB completion operations */
|
/* This also blocks future CS/VM/JOB completion operations */
|
||||||
hdev->disabled = true;
|
hdev->disabled = true;
|
||||||
|
|
||||||
@ -995,6 +1033,12 @@ again:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Device is now enabled as part of the initialization requires
|
||||||
|
* communication with the device firmware to get information that
|
||||||
|
* is required for the initialization itself
|
||||||
|
*/
|
||||||
|
hdev->disabled = false;
|
||||||
|
|
||||||
rc = hdev->asic_funcs->hw_init(hdev);
|
rc = hdev->asic_funcs->hw_init(hdev);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
dev_err(hdev->dev,
|
dev_err(hdev->dev,
|
||||||
@ -1002,8 +1046,6 @@ again:
|
|||||||
goto out_err;
|
goto out_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
hdev->disabled = false;
|
|
||||||
|
|
||||||
/* Check that the communication with the device is working */
|
/* Check that the communication with the device is working */
|
||||||
rc = hdev->asic_funcs->test_queues(hdev);
|
rc = hdev->asic_funcs->test_queues(hdev);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
@ -1144,15 +1186,18 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
|||||||
* because there the addresses of the completion queues are being
|
* because there the addresses of the completion queues are being
|
||||||
* passed as arguments to request_irq
|
* passed as arguments to request_irq
|
||||||
*/
|
*/
|
||||||
|
if (cq_cnt) {
|
||||||
hdev->completion_queue = kcalloc(cq_cnt,
|
hdev->completion_queue = kcalloc(cq_cnt,
|
||||||
sizeof(*hdev->completion_queue),
|
sizeof(*hdev->completion_queue),
|
||||||
GFP_KERNEL);
|
GFP_KERNEL);
|
||||||
|
|
||||||
if (!hdev->completion_queue) {
|
if (!hdev->completion_queue) {
|
||||||
dev_err(hdev->dev, "failed to allocate completion queues\n");
|
dev_err(hdev->dev,
|
||||||
|
"failed to allocate completion queues\n");
|
||||||
rc = -ENOMEM;
|
rc = -ENOMEM;
|
||||||
goto hw_queues_destroy;
|
goto hw_queues_destroy;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
|
for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
|
||||||
rc = hl_cq_init(hdev, &hdev->completion_queue[i],
|
rc = hl_cq_init(hdev, &hdev->completion_queue[i],
|
||||||
@ -1162,6 +1207,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
|||||||
"failed to initialize completion queue\n");
|
"failed to initialize completion queue\n");
|
||||||
goto cq_fini;
|
goto cq_fini;
|
||||||
}
|
}
|
||||||
|
hdev->completion_queue[i].cq_idx = i;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1219,6 +1265,12 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
|||||||
*/
|
*/
|
||||||
add_cdev_sysfs_on_err = true;
|
add_cdev_sysfs_on_err = true;
|
||||||
|
|
||||||
|
/* Device is now enabled as part of the initialization requires
|
||||||
|
* communication with the device firmware to get information that
|
||||||
|
* is required for the initialization itself
|
||||||
|
*/
|
||||||
|
hdev->disabled = false;
|
||||||
|
|
||||||
rc = hdev->asic_funcs->hw_init(hdev);
|
rc = hdev->asic_funcs->hw_init(hdev);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
dev_err(hdev->dev, "failed to initialize the H/W\n");
|
dev_err(hdev->dev, "failed to initialize the H/W\n");
|
||||||
@ -1226,8 +1278,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
|||||||
goto out_disabled;
|
goto out_disabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
hdev->disabled = false;
|
|
||||||
|
|
||||||
/* Check that the communication with the device is working */
|
/* Check that the communication with the device is working */
|
||||||
rc = hdev->asic_funcs->test_queues(hdev);
|
rc = hdev->asic_funcs->test_queues(hdev);
|
||||||
if (rc) {
|
if (rc) {
|
@ -6,7 +6,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "habanalabs.h"
|
#include "habanalabs.h"
|
||||||
#include "include/hl_boot_if.h"
|
#include "include/common/hl_boot_if.h"
|
||||||
|
|
||||||
#include <linux/firmware.h>
|
#include <linux/firmware.h>
|
||||||
#include <linux/genalloc.h>
|
#include <linux/genalloc.h>
|
||||||
@ -289,7 +289,7 @@ int hl_fw_armcp_info_get(struct hl_device *hdev)
|
|||||||
HL_ARMCP_INFO_TIMEOUT_USEC, &result);
|
HL_ARMCP_INFO_TIMEOUT_USEC, &result);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
dev_err(hdev->dev,
|
dev_err(hdev->dev,
|
||||||
"Failed to send ArmCP info pkt, error %d\n", rc);
|
"Failed to handle ArmCP info pkt, error %d\n", rc);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -340,7 +340,7 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
|
|||||||
|
|
||||||
if (rc) {
|
if (rc) {
|
||||||
dev_err(hdev->dev,
|
dev_err(hdev->dev,
|
||||||
"Failed to send ArmCP EEPROM packet, error %d\n", rc);
|
"Failed to handle ArmCP EEPROM packet, error %d\n", rc);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -393,6 +393,53 @@ static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
|
|||||||
"Device boot error - NIC F/W initialization failed\n");
|
"Device boot error - NIC F/W initialization failed\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void hl_detect_cpu_boot_status(struct hl_device *hdev, u32 status)
|
||||||
|
{
|
||||||
|
switch (status) {
|
||||||
|
case CPU_BOOT_STATUS_NA:
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Device boot error - BTL did NOT run\n");
|
||||||
|
break;
|
||||||
|
case CPU_BOOT_STATUS_IN_WFE:
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Device boot error - Stuck inside WFE loop\n");
|
||||||
|
break;
|
||||||
|
case CPU_BOOT_STATUS_IN_BTL:
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Device boot error - Stuck in BTL\n");
|
||||||
|
break;
|
||||||
|
case CPU_BOOT_STATUS_IN_PREBOOT:
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Device boot error - Stuck in Preboot\n");
|
||||||
|
break;
|
||||||
|
case CPU_BOOT_STATUS_IN_SPL:
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Device boot error - Stuck in SPL\n");
|
||||||
|
break;
|
||||||
|
case CPU_BOOT_STATUS_IN_UBOOT:
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Device boot error - Stuck in u-boot\n");
|
||||||
|
break;
|
||||||
|
case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Device boot error - DRAM initialization failed\n");
|
||||||
|
break;
|
||||||
|
case CPU_BOOT_STATUS_UBOOT_NOT_READY:
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Device boot error - u-boot stopped by user\n");
|
||||||
|
break;
|
||||||
|
case CPU_BOOT_STATUS_TS_INIT_FAIL:
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Device boot error - Thermal Sensor initialization failed\n");
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Device boot error - Invalid status code %d\n",
|
||||||
|
status);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
|
int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
|
||||||
u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
|
u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
|
||||||
u32 boot_err0_reg, bool skip_bmc,
|
u32 boot_err0_reg, bool skip_bmc,
|
||||||
@ -466,50 +513,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
|
|||||||
* versions but we keep them here for backward compatibility
|
* versions but we keep them here for backward compatibility
|
||||||
*/
|
*/
|
||||||
if (rc) {
|
if (rc) {
|
||||||
switch (status) {
|
hl_detect_cpu_boot_status(hdev, status);
|
||||||
case CPU_BOOT_STATUS_NA:
|
|
||||||
dev_err(hdev->dev,
|
|
||||||
"Device boot error - BTL did NOT run\n");
|
|
||||||
break;
|
|
||||||
case CPU_BOOT_STATUS_IN_WFE:
|
|
||||||
dev_err(hdev->dev,
|
|
||||||
"Device boot error - Stuck inside WFE loop\n");
|
|
||||||
break;
|
|
||||||
case CPU_BOOT_STATUS_IN_BTL:
|
|
||||||
dev_err(hdev->dev,
|
|
||||||
"Device boot error - Stuck in BTL\n");
|
|
||||||
break;
|
|
||||||
case CPU_BOOT_STATUS_IN_PREBOOT:
|
|
||||||
dev_err(hdev->dev,
|
|
||||||
"Device boot error - Stuck in Preboot\n");
|
|
||||||
break;
|
|
||||||
case CPU_BOOT_STATUS_IN_SPL:
|
|
||||||
dev_err(hdev->dev,
|
|
||||||
"Device boot error - Stuck in SPL\n");
|
|
||||||
break;
|
|
||||||
case CPU_BOOT_STATUS_IN_UBOOT:
|
|
||||||
dev_err(hdev->dev,
|
|
||||||
"Device boot error - Stuck in u-boot\n");
|
|
||||||
break;
|
|
||||||
case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
|
|
||||||
dev_err(hdev->dev,
|
|
||||||
"Device boot error - DRAM initialization failed\n");
|
|
||||||
break;
|
|
||||||
case CPU_BOOT_STATUS_UBOOT_NOT_READY:
|
|
||||||
dev_err(hdev->dev,
|
|
||||||
"Device boot error - u-boot stopped by user\n");
|
|
||||||
break;
|
|
||||||
case CPU_BOOT_STATUS_TS_INIT_FAIL:
|
|
||||||
dev_err(hdev->dev,
|
|
||||||
"Device boot error - Thermal Sensor initialization failed\n");
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
dev_err(hdev->dev,
|
|
||||||
"Device boot error - Invalid status code %d\n",
|
|
||||||
status);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
rc = -EIO;
|
rc = -EIO;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@ -569,7 +573,8 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
|
|||||||
"Device reports FIT image is corrupted\n");
|
"Device reports FIT image is corrupted\n");
|
||||||
else
|
else
|
||||||
dev_err(hdev->dev,
|
dev_err(hdev->dev,
|
||||||
"Device failed to load, %d\n", status);
|
"Failed to load firmware to device, %d\n",
|
||||||
|
status);
|
||||||
|
|
||||||
rc = -EIO;
|
rc = -EIO;
|
||||||
goto out;
|
goto out;
|
@ -8,8 +8,9 @@
|
|||||||
#ifndef HABANALABSP_H_
|
#ifndef HABANALABSP_H_
|
||||||
#define HABANALABSP_H_
|
#define HABANALABSP_H_
|
||||||
|
|
||||||
#include "include/armcp_if.h"
|
#include "include/common/armcp_if.h"
|
||||||
#include "include/qman_if.h"
|
#include "include/common/qman_if.h"
|
||||||
|
#include <uapi/misc/habanalabs.h>
|
||||||
|
|
||||||
#include <linux/cdev.h>
|
#include <linux/cdev.h>
|
||||||
#include <linux/iopoll.h>
|
#include <linux/iopoll.h>
|
||||||
@ -40,11 +41,6 @@
|
|||||||
|
|
||||||
#define HL_SIM_MAX_TIMEOUT_US 10000000 /* 10s */
|
#define HL_SIM_MAX_TIMEOUT_US 10000000 /* 10s */
|
||||||
|
|
||||||
#define HL_MAX_QUEUES 128
|
|
||||||
|
|
||||||
/* MUST BE POWER OF 2 and larger than 1 */
|
|
||||||
#define HL_MAX_PENDING_CS 64
|
|
||||||
|
|
||||||
#define HL_IDLE_BUSY_TS_ARR_SIZE 4096
|
#define HL_IDLE_BUSY_TS_ARR_SIZE 4096
|
||||||
|
|
||||||
/* Memory */
|
/* Memory */
|
||||||
@ -53,6 +49,10 @@
|
|||||||
/* MMU */
|
/* MMU */
|
||||||
#define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
|
#define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream
|
||||||
|
* HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream
|
||||||
|
*/
|
||||||
#define HL_RSVD_SOBS 4
|
#define HL_RSVD_SOBS 4
|
||||||
#define HL_RSVD_MONS 2
|
#define HL_RSVD_MONS 2
|
||||||
|
|
||||||
@ -61,6 +61,11 @@
|
|||||||
|
|
||||||
#define HL_MAX_SOB_VAL (1 << 15)
|
#define HL_MAX_SOB_VAL (1 << 15)
|
||||||
|
|
||||||
|
#define IS_POWER_OF_2(n) (n != 0 && ((n & (n - 1)) == 0))
|
||||||
|
#define IS_MAX_PENDING_CS_VALID(n) (IS_POWER_OF_2(n) && (n > 1))
|
||||||
|
|
||||||
|
#define HL_PCI_NUM_BARS 6
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct pgt_info - MMU hop page info.
|
* struct pgt_info - MMU hop page info.
|
||||||
* @node: hash linked-list node for the pgts shadow hash of pgts.
|
* @node: hash linked-list node for the pgts shadow hash of pgts.
|
||||||
@ -85,6 +90,16 @@ struct pgt_info {
|
|||||||
struct hl_device;
|
struct hl_device;
|
||||||
struct hl_fpriv;
|
struct hl_fpriv;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* enum hl_pci_match_mode - pci match mode per region
|
||||||
|
* @PCI_ADDRESS_MATCH_MODE: address match mode
|
||||||
|
* @PCI_BAR_MATCH_MODE: bar match mode
|
||||||
|
*/
|
||||||
|
enum hl_pci_match_mode {
|
||||||
|
PCI_ADDRESS_MATCH_MODE,
|
||||||
|
PCI_BAR_MATCH_MODE
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* enum hl_fw_component - F/W components to read version through registers.
|
* enum hl_fw_component - F/W components to read version through registers.
|
||||||
* @FW_COMP_UBOOT: u-boot.
|
* @FW_COMP_UBOOT: u-boot.
|
||||||
@ -120,6 +135,32 @@ enum hl_cs_type {
|
|||||||
CS_TYPE_WAIT
|
CS_TYPE_WAIT
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* struct hl_inbound_pci_region - inbound region descriptor
|
||||||
|
* @mode: pci match mode for this region
|
||||||
|
* @addr: region target address
|
||||||
|
* @size: region size in bytes
|
||||||
|
* @offset_in_bar: offset within bar (address match mode)
|
||||||
|
* @bar: bar id
|
||||||
|
*/
|
||||||
|
struct hl_inbound_pci_region {
|
||||||
|
enum hl_pci_match_mode mode;
|
||||||
|
u64 addr;
|
||||||
|
u64 size;
|
||||||
|
u64 offset_in_bar;
|
||||||
|
u8 bar;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* struct hl_outbound_pci_region - outbound region descriptor
|
||||||
|
* @addr: region target address
|
||||||
|
* @size: region size in bytes
|
||||||
|
*/
|
||||||
|
struct hl_outbound_pci_region {
|
||||||
|
u64 addr;
|
||||||
|
u64 size;
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* struct hl_hw_sob - H/W SOB info.
|
* struct hl_hw_sob - H/W SOB info.
|
||||||
* @hdev: habanalabs device structure.
|
* @hdev: habanalabs device structure.
|
||||||
@ -141,11 +182,13 @@ struct hl_hw_sob {
|
|||||||
* false otherwise.
|
* false otherwise.
|
||||||
* @requires_kernel_cb: true if a CB handle must be provided for jobs on this
|
* @requires_kernel_cb: true if a CB handle must be provided for jobs on this
|
||||||
* queue, false otherwise (a CB address must be provided).
|
* queue, false otherwise (a CB address must be provided).
|
||||||
|
* @supports_sync_stream: True if queue supports sync stream
|
||||||
*/
|
*/
|
||||||
struct hw_queue_properties {
|
struct hw_queue_properties {
|
||||||
enum hl_queue_type type;
|
enum hl_queue_type type;
|
||||||
u8 driver_only;
|
u8 driver_only;
|
||||||
u8 requires_kernel_cb;
|
u8 requires_kernel_cb;
|
||||||
|
u8 supports_sync_stream;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -241,14 +284,19 @@ struct hl_mmu_properties {
|
|||||||
* @psoc_pci_pll_nf: PCI PLL NF value.
|
* @psoc_pci_pll_nf: PCI PLL NF value.
|
||||||
* @psoc_pci_pll_od: PCI PLL OD value.
|
* @psoc_pci_pll_od: PCI PLL OD value.
|
||||||
* @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value.
|
* @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value.
|
||||||
|
* @psoc_timestamp_frequency: frequency of the psoc timestamp clock.
|
||||||
* @high_pll: high PLL frequency used by the device.
|
* @high_pll: high PLL frequency used by the device.
|
||||||
* @cb_pool_cb_cnt: number of CBs in the CB pool.
|
* @cb_pool_cb_cnt: number of CBs in the CB pool.
|
||||||
* @cb_pool_cb_size: size of each CB in the CB pool.
|
* @cb_pool_cb_size: size of each CB in the CB pool.
|
||||||
|
* @max_pending_cs: maximum of concurrent pending command submissions
|
||||||
|
* @max_queues: maximum amount of queues in the system
|
||||||
|
* @sync_stream_first_sob: first sync object available for sync stream use
|
||||||
|
* @sync_stream_first_mon: first monitor available for sync stream use
|
||||||
* @tpc_enabled_mask: which TPCs are enabled.
|
* @tpc_enabled_mask: which TPCs are enabled.
|
||||||
* @completion_queues_count: number of completion queues.
|
* @completion_queues_count: number of completion queues.
|
||||||
*/
|
*/
|
||||||
struct asic_fixed_properties {
|
struct asic_fixed_properties {
|
||||||
struct hw_queue_properties hw_queues_props[HL_MAX_QUEUES];
|
struct hw_queue_properties *hw_queues_props;
|
||||||
struct armcp_info armcp_info;
|
struct armcp_info armcp_info;
|
||||||
char uboot_ver[VERSION_MAX_LEN];
|
char uboot_ver[VERSION_MAX_LEN];
|
||||||
char preboot_ver[VERSION_MAX_LEN];
|
char preboot_ver[VERSION_MAX_LEN];
|
||||||
@ -282,9 +330,14 @@ struct asic_fixed_properties {
|
|||||||
u32 psoc_pci_pll_nf;
|
u32 psoc_pci_pll_nf;
|
||||||
u32 psoc_pci_pll_od;
|
u32 psoc_pci_pll_od;
|
||||||
u32 psoc_pci_pll_div_factor;
|
u32 psoc_pci_pll_div_factor;
|
||||||
|
u32 psoc_timestamp_frequency;
|
||||||
u32 high_pll;
|
u32 high_pll;
|
||||||
u32 cb_pool_cb_cnt;
|
u32 cb_pool_cb_cnt;
|
||||||
u32 cb_pool_cb_size;
|
u32 cb_pool_cb_size;
|
||||||
|
u32 max_pending_cs;
|
||||||
|
u32 max_queues;
|
||||||
|
u16 sync_stream_first_sob;
|
||||||
|
u16 sync_stream_first_mon;
|
||||||
u8 tpc_enabled_mask;
|
u8 tpc_enabled_mask;
|
||||||
u8 completion_queues_count;
|
u8 completion_queues_count;
|
||||||
};
|
};
|
||||||
@ -339,6 +392,7 @@ struct hl_cb_mgr {
|
|||||||
* @ctx_id: holds the ID of the owner's context.
|
* @ctx_id: holds the ID of the owner's context.
|
||||||
* @mmap: true if the CB is currently mmaped to user.
|
* @mmap: true if the CB is currently mmaped to user.
|
||||||
* @is_pool: true if CB was acquired from the pool, false otherwise.
|
* @is_pool: true if CB was acquired from the pool, false otherwise.
|
||||||
|
* @is_internal: internaly allocated
|
||||||
*/
|
*/
|
||||||
struct hl_cb {
|
struct hl_cb {
|
||||||
struct kref refcount;
|
struct kref refcount;
|
||||||
@ -355,6 +409,7 @@ struct hl_cb {
|
|||||||
u32 ctx_id;
|
u32 ctx_id;
|
||||||
u8 mmap;
|
u8 mmap;
|
||||||
u8 is_pool;
|
u8 is_pool;
|
||||||
|
u8 is_internal;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -364,38 +419,19 @@ struct hl_cb {
|
|||||||
|
|
||||||
struct hl_cs_job;
|
struct hl_cs_job;
|
||||||
|
|
||||||
/*
|
/* Queue length of external and HW queues */
|
||||||
* Currently, there are two limitations on the maximum length of a queue:
|
#define HL_QUEUE_LENGTH 4096
|
||||||
*
|
|
||||||
* 1. The memory footprint of the queue. The current allocated space for the
|
|
||||||
* queue is PAGE_SIZE. Because each entry in the queue is HL_BD_SIZE,
|
|
||||||
* the maximum length of the queue can be PAGE_SIZE / HL_BD_SIZE,
|
|
||||||
* which currently is 4096/16 = 256 entries.
|
|
||||||
*
|
|
||||||
* To increase that, we need either to decrease the size of the
|
|
||||||
* BD (difficult), or allocate more than a single page (easier).
|
|
||||||
*
|
|
||||||
* 2. Because the size of the JOB handle field in the BD CTL / completion queue
|
|
||||||
* is 10-bit, we can have up to 1024 open jobs per hardware queue.
|
|
||||||
* Therefore, each queue can hold up to 1024 entries.
|
|
||||||
*
|
|
||||||
* HL_QUEUE_LENGTH is in units of struct hl_bd.
|
|
||||||
* HL_QUEUE_LENGTH * sizeof(struct hl_bd) should be <= HL_PAGE_SIZE
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define HL_PAGE_SIZE 4096 /* minimum page size */
|
|
||||||
/* Must be power of 2 (HL_PAGE_SIZE / HL_BD_SIZE) */
|
|
||||||
#define HL_QUEUE_LENGTH 256
|
|
||||||
#define HL_QUEUE_SIZE_IN_BYTES (HL_QUEUE_LENGTH * HL_BD_SIZE)
|
#define HL_QUEUE_SIZE_IN_BYTES (HL_QUEUE_LENGTH * HL_BD_SIZE)
|
||||||
|
|
||||||
/*
|
#if (HL_MAX_JOBS_PER_CS > HL_QUEUE_LENGTH)
|
||||||
* HL_CQ_LENGTH is in units of struct hl_cq_entry.
|
#error "HL_QUEUE_LENGTH must be greater than HL_MAX_JOBS_PER_CS"
|
||||||
* HL_CQ_LENGTH should be <= HL_PAGE_SIZE
|
#endif
|
||||||
*/
|
|
||||||
|
/* HL_CQ_LENGTH is in units of struct hl_cq_entry */
|
||||||
#define HL_CQ_LENGTH HL_QUEUE_LENGTH
|
#define HL_CQ_LENGTH HL_QUEUE_LENGTH
|
||||||
#define HL_CQ_SIZE_IN_BYTES (HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE)
|
#define HL_CQ_SIZE_IN_BYTES (HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE)
|
||||||
|
|
||||||
/* Must be power of 2 (HL_PAGE_SIZE / HL_EQ_ENTRY_SIZE) */
|
/* Must be power of 2 */
|
||||||
#define HL_EQ_LENGTH 64
|
#define HL_EQ_LENGTH 64
|
||||||
#define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
|
#define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
|
||||||
|
|
||||||
@ -422,6 +458,7 @@ struct hl_cs_job;
|
|||||||
* exist).
|
* exist).
|
||||||
* @curr_sob_offset: the id offset to the currently used SOB from the
|
* @curr_sob_offset: the id offset to the currently used SOB from the
|
||||||
* HL_RSVD_SOBS that are being used by this queue.
|
* HL_RSVD_SOBS that are being used by this queue.
|
||||||
|
* @supports_sync_stream: True if queue supports sync stream
|
||||||
*/
|
*/
|
||||||
struct hl_hw_queue {
|
struct hl_hw_queue {
|
||||||
struct hl_hw_sob hw_sob[HL_RSVD_SOBS];
|
struct hl_hw_sob hw_sob[HL_RSVD_SOBS];
|
||||||
@ -430,7 +467,7 @@ struct hl_hw_queue {
|
|||||||
u64 kernel_address;
|
u64 kernel_address;
|
||||||
dma_addr_t bus_address;
|
dma_addr_t bus_address;
|
||||||
u32 pi;
|
u32 pi;
|
||||||
u32 ci;
|
atomic_t ci;
|
||||||
u32 hw_queue_id;
|
u32 hw_queue_id;
|
||||||
u32 cq_id;
|
u32 cq_id;
|
||||||
u32 msi_vec;
|
u32 msi_vec;
|
||||||
@ -440,6 +477,7 @@ struct hl_hw_queue {
|
|||||||
u16 base_mon_id;
|
u16 base_mon_id;
|
||||||
u8 valid;
|
u8 valid;
|
||||||
u8 curr_sob_offset;
|
u8 curr_sob_offset;
|
||||||
|
u8 supports_sync_stream;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -447,6 +485,7 @@ struct hl_hw_queue {
|
|||||||
* @hdev: pointer to the device structure
|
* @hdev: pointer to the device structure
|
||||||
* @kernel_address: holds the queue's kernel virtual address
|
* @kernel_address: holds the queue's kernel virtual address
|
||||||
* @bus_address: holds the queue's DMA address
|
* @bus_address: holds the queue's DMA address
|
||||||
|
* @cq_idx: completion queue index in array
|
||||||
* @hw_queue_id: the id of the matching H/W queue
|
* @hw_queue_id: the id of the matching H/W queue
|
||||||
* @ci: ci inside the queue
|
* @ci: ci inside the queue
|
||||||
* @pi: pi inside the queue
|
* @pi: pi inside the queue
|
||||||
@ -456,6 +495,7 @@ struct hl_cq {
|
|||||||
struct hl_device *hdev;
|
struct hl_device *hdev;
|
||||||
u64 kernel_address;
|
u64 kernel_address;
|
||||||
dma_addr_t bus_address;
|
dma_addr_t bus_address;
|
||||||
|
u32 cq_idx;
|
||||||
u32 hw_queue_id;
|
u32 hw_queue_id;
|
||||||
u32 ci;
|
u32 ci;
|
||||||
u32 pi;
|
u32 pi;
|
||||||
@ -519,6 +559,15 @@ enum hl_pll_frequency {
|
|||||||
PLL_LAST
|
PLL_LAST
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define PLL_REF_CLK 50
|
||||||
|
|
||||||
|
enum div_select_defs {
|
||||||
|
DIV_SEL_REF_CLK = 0,
|
||||||
|
DIV_SEL_PLL_CLK = 1,
|
||||||
|
DIV_SEL_DIVIDED_REF = 2,
|
||||||
|
DIV_SEL_DIVIDED_PLL = 3,
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct hl_asic_funcs - ASIC specific functions that are can be called from
|
* struct hl_asic_funcs - ASIC specific functions that are can be called from
|
||||||
* common code.
|
* common code.
|
||||||
@ -596,14 +645,13 @@ enum hl_pll_frequency {
|
|||||||
* @rreg: Read a register. Needed for simulator support.
|
* @rreg: Read a register. Needed for simulator support.
|
||||||
* @wreg: Write a register. Needed for simulator support.
|
* @wreg: Write a register. Needed for simulator support.
|
||||||
* @halt_coresight: stop the ETF and ETR traces.
|
* @halt_coresight: stop the ETF and ETR traces.
|
||||||
|
* @ctx_init: context dependent initialization.
|
||||||
* @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
|
* @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
|
||||||
* @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
|
* @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
|
||||||
* @read_device_fw_version: read the device's firmware versions that are
|
* @read_device_fw_version: read the device's firmware versions that are
|
||||||
* contained in registers
|
* contained in registers
|
||||||
* @load_firmware_to_device: load the firmware to the device's memory
|
* @load_firmware_to_device: load the firmware to the device's memory
|
||||||
* @load_boot_fit_to_device: load boot fit to device's memory
|
* @load_boot_fit_to_device: load boot fit to device's memory
|
||||||
* @ext_queue_init: Initialize the given external queue.
|
|
||||||
* @ext_queue_reset: Reset the given external queue.
|
|
||||||
* @get_signal_cb_size: Get signal CB size.
|
* @get_signal_cb_size: Get signal CB size.
|
||||||
* @get_wait_cb_size: Get wait CB size.
|
* @get_wait_cb_size: Get wait CB size.
|
||||||
* @gen_signal_cb: Generate a signal CB.
|
* @gen_signal_cb: Generate a signal CB.
|
||||||
@ -700,14 +748,13 @@ struct hl_asic_funcs {
|
|||||||
u32 (*rreg)(struct hl_device *hdev, u32 reg);
|
u32 (*rreg)(struct hl_device *hdev, u32 reg);
|
||||||
void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
|
void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
|
||||||
void (*halt_coresight)(struct hl_device *hdev);
|
void (*halt_coresight)(struct hl_device *hdev);
|
||||||
|
int (*ctx_init)(struct hl_ctx *ctx);
|
||||||
int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
|
int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
|
||||||
u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
|
u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
|
||||||
void (*read_device_fw_version)(struct hl_device *hdev,
|
void (*read_device_fw_version)(struct hl_device *hdev,
|
||||||
enum hl_fw_component fwc);
|
enum hl_fw_component fwc);
|
||||||
int (*load_firmware_to_device)(struct hl_device *hdev);
|
int (*load_firmware_to_device)(struct hl_device *hdev);
|
||||||
int (*load_boot_fit_to_device)(struct hl_device *hdev);
|
int (*load_boot_fit_to_device)(struct hl_device *hdev);
|
||||||
void (*ext_queue_init)(struct hl_device *hdev, u32 hw_queue_id);
|
|
||||||
void (*ext_queue_reset)(struct hl_device *hdev, u32 hw_queue_id);
|
|
||||||
u32 (*get_signal_cb_size)(struct hl_device *hdev);
|
u32 (*get_signal_cb_size)(struct hl_device *hdev);
|
||||||
u32 (*get_wait_cb_size)(struct hl_device *hdev);
|
u32 (*get_wait_cb_size)(struct hl_device *hdev);
|
||||||
void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id);
|
void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id);
|
||||||
@ -743,7 +790,6 @@ struct hl_va_range {
|
|||||||
* struct hl_ctx - user/kernel context.
|
* struct hl_ctx - user/kernel context.
|
||||||
* @mem_hash: holds mapping from virtual address to virtual memory area
|
* @mem_hash: holds mapping from virtual address to virtual memory area
|
||||||
* descriptor (hl_vm_phys_pg_list or hl_userptr).
|
* descriptor (hl_vm_phys_pg_list or hl_userptr).
|
||||||
* @mmu_phys_hash: holds a mapping from physical address to pgt_info structure.
|
|
||||||
* @mmu_shadow_hash: holds a mapping from shadow address to pgt_info structure.
|
* @mmu_shadow_hash: holds a mapping from shadow address to pgt_info structure.
|
||||||
* @hpriv: pointer to the private (Kernel Driver) data of the process (fd).
|
* @hpriv: pointer to the private (Kernel Driver) data of the process (fd).
|
||||||
* @hdev: pointer to the device structure.
|
* @hdev: pointer to the device structure.
|
||||||
@ -777,18 +823,18 @@ struct hl_va_range {
|
|||||||
*/
|
*/
|
||||||
struct hl_ctx {
|
struct hl_ctx {
|
||||||
DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS);
|
DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS);
|
||||||
DECLARE_HASHTABLE(mmu_phys_hash, MMU_HASH_TABLE_BITS);
|
|
||||||
DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS);
|
DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS);
|
||||||
struct hl_fpriv *hpriv;
|
struct hl_fpriv *hpriv;
|
||||||
struct hl_device *hdev;
|
struct hl_device *hdev;
|
||||||
struct kref refcount;
|
struct kref refcount;
|
||||||
struct dma_fence *cs_pending[HL_MAX_PENDING_CS];
|
struct dma_fence **cs_pending;
|
||||||
struct hl_va_range *host_va_range;
|
struct hl_va_range *host_va_range;
|
||||||
struct hl_va_range *host_huge_va_range;
|
struct hl_va_range *host_huge_va_range;
|
||||||
struct hl_va_range *dram_va_range;
|
struct hl_va_range *dram_va_range;
|
||||||
struct mutex mem_hash_lock;
|
struct mutex mem_hash_lock;
|
||||||
struct mutex mmu_lock;
|
struct mutex mmu_lock;
|
||||||
struct list_head debugfs_list;
|
struct list_head debugfs_list;
|
||||||
|
struct hl_cs_counters cs_counters;
|
||||||
u64 cs_sequence;
|
u64 cs_sequence;
|
||||||
u64 *dram_default_hops;
|
u64 *dram_default_hops;
|
||||||
spinlock_t cs_lock;
|
spinlock_t cs_lock;
|
||||||
@ -863,7 +909,7 @@ struct hl_userptr {
|
|||||||
* @aborted: true if CS was aborted due to some device error.
|
* @aborted: true if CS was aborted due to some device error.
|
||||||
*/
|
*/
|
||||||
struct hl_cs {
|
struct hl_cs {
|
||||||
u16 jobs_in_queue_cnt[HL_MAX_QUEUES];
|
u16 *jobs_in_queue_cnt;
|
||||||
struct hl_ctx *ctx;
|
struct hl_ctx *ctx;
|
||||||
struct list_head job_list;
|
struct list_head job_list;
|
||||||
spinlock_t job_lock;
|
spinlock_t job_lock;
|
||||||
@ -1347,7 +1393,9 @@ struct hl_device_idle_busy_ts {
|
|||||||
/**
|
/**
|
||||||
* struct hl_device - habanalabs device structure.
|
* struct hl_device - habanalabs device structure.
|
||||||
* @pdev: pointer to PCI device, can be NULL in case of simulator device.
|
* @pdev: pointer to PCI device, can be NULL in case of simulator device.
|
||||||
* @pcie_bar: array of available PCIe bars.
|
* @pcie_bar_phys: array of available PCIe bars physical addresses.
|
||||||
|
* (required only for PCI address match mode)
|
||||||
|
* @pcie_bar: array of available PCIe bars virtual addresses.
|
||||||
* @rmmio: configuration area address on SRAM.
|
* @rmmio: configuration area address on SRAM.
|
||||||
* @cdev: related char device.
|
* @cdev: related char device.
|
||||||
* @cdev_ctrl: char device for control operations only (INFO IOCTL)
|
* @cdev_ctrl: char device for control operations only (INFO IOCTL)
|
||||||
@ -1358,7 +1406,8 @@ struct hl_device_idle_busy_ts {
|
|||||||
* @asic_name: ASIC specific nmae.
|
* @asic_name: ASIC specific nmae.
|
||||||
* @asic_type: ASIC specific type.
|
* @asic_type: ASIC specific type.
|
||||||
* @completion_queue: array of hl_cq.
|
* @completion_queue: array of hl_cq.
|
||||||
* @cq_wq: work queue of completion queues for executing work in process context
|
* @cq_wq: work queues of completion queues for executing work in process
|
||||||
|
* context.
|
||||||
* @eq_wq: work queue of event queue for executing work in process context.
|
* @eq_wq: work queue of event queue for executing work in process context.
|
||||||
* @kernel_ctx: Kernel driver context structure.
|
* @kernel_ctx: Kernel driver context structure.
|
||||||
* @kernel_queues: array of hl_hw_queue.
|
* @kernel_queues: array of hl_hw_queue.
|
||||||
@ -1387,12 +1436,17 @@ struct hl_device_idle_busy_ts {
|
|||||||
* @hl_debugfs: device's debugfs manager.
|
* @hl_debugfs: device's debugfs manager.
|
||||||
* @cb_pool: list of preallocated CBs.
|
* @cb_pool: list of preallocated CBs.
|
||||||
* @cb_pool_lock: protects the CB pool.
|
* @cb_pool_lock: protects the CB pool.
|
||||||
|
* @internal_cb_pool_virt_addr: internal command buffer pool virtual address.
|
||||||
|
* @internal_cb_pool_dma_addr: internal command buffer pool dma address.
|
||||||
|
* @internal_cb_pool: internal command buffer memory pool.
|
||||||
|
* @internal_cb_va_base: internal cb pool mmu virtual address base
|
||||||
* @fpriv_list: list of file private data structures. Each structure is created
|
* @fpriv_list: list of file private data structures. Each structure is created
|
||||||
* when a user opens the device
|
* when a user opens the device
|
||||||
* @fpriv_list_lock: protects the fpriv_list
|
* @fpriv_list_lock: protects the fpriv_list
|
||||||
* @compute_ctx: current compute context executing.
|
* @compute_ctx: current compute context executing.
|
||||||
* @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy
|
* @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy
|
||||||
* and vice-versa
|
* and vice-versa
|
||||||
|
* @aggregated_cs_counters: aggregated cs counters among all contexts
|
||||||
* @dram_used_mem: current DRAM memory consumption.
|
* @dram_used_mem: current DRAM memory consumption.
|
||||||
* @timeout_jiffies: device CS timeout value.
|
* @timeout_jiffies: device CS timeout value.
|
||||||
* @max_power: the max power of the device, as configured by the sysadmin. This
|
* @max_power: the max power of the device, as configured by the sysadmin. This
|
||||||
@ -1435,12 +1489,14 @@ struct hl_device_idle_busy_ts {
|
|||||||
* @cdev_sysfs_created: were char devices and sysfs nodes created.
|
* @cdev_sysfs_created: were char devices and sysfs nodes created.
|
||||||
* @stop_on_err: true if engines should stop on error.
|
* @stop_on_err: true if engines should stop on error.
|
||||||
* @supports_sync_stream: is sync stream supported.
|
* @supports_sync_stream: is sync stream supported.
|
||||||
|
* @sync_stream_queue_idx: helper index for sync stream queues initialization.
|
||||||
* @supports_coresight: is CoreSight supported.
|
* @supports_coresight: is CoreSight supported.
|
||||||
* @supports_soft_reset: is soft reset supported.
|
* @supports_soft_reset: is soft reset supported.
|
||||||
*/
|
*/
|
||||||
struct hl_device {
|
struct hl_device {
|
||||||
struct pci_dev *pdev;
|
struct pci_dev *pdev;
|
||||||
void __iomem *pcie_bar[6];
|
u64 pcie_bar_phys[HL_PCI_NUM_BARS];
|
||||||
|
void __iomem *pcie_bar[HL_PCI_NUM_BARS];
|
||||||
void __iomem *rmmio;
|
void __iomem *rmmio;
|
||||||
struct cdev cdev;
|
struct cdev cdev;
|
||||||
struct cdev cdev_ctrl;
|
struct cdev cdev_ctrl;
|
||||||
@ -1451,7 +1507,7 @@ struct hl_device {
|
|||||||
char asic_name[16];
|
char asic_name[16];
|
||||||
enum hl_asic_type asic_type;
|
enum hl_asic_type asic_type;
|
||||||
struct hl_cq *completion_queue;
|
struct hl_cq *completion_queue;
|
||||||
struct workqueue_struct *cq_wq;
|
struct workqueue_struct **cq_wq;
|
||||||
struct workqueue_struct *eq_wq;
|
struct workqueue_struct *eq_wq;
|
||||||
struct hl_ctx *kernel_ctx;
|
struct hl_ctx *kernel_ctx;
|
||||||
struct hl_hw_queue *kernel_queues;
|
struct hl_hw_queue *kernel_queues;
|
||||||
@ -1483,6 +1539,11 @@ struct hl_device {
|
|||||||
struct list_head cb_pool;
|
struct list_head cb_pool;
|
||||||
spinlock_t cb_pool_lock;
|
spinlock_t cb_pool_lock;
|
||||||
|
|
||||||
|
void *internal_cb_pool_virt_addr;
|
||||||
|
dma_addr_t internal_cb_pool_dma_addr;
|
||||||
|
struct gen_pool *internal_cb_pool;
|
||||||
|
u64 internal_cb_va_base;
|
||||||
|
|
||||||
struct list_head fpriv_list;
|
struct list_head fpriv_list;
|
||||||
struct mutex fpriv_list_lock;
|
struct mutex fpriv_list_lock;
|
||||||
|
|
||||||
@ -1490,6 +1551,8 @@ struct hl_device {
|
|||||||
|
|
||||||
struct hl_device_idle_busy_ts *idle_busy_ts_arr;
|
struct hl_device_idle_busy_ts *idle_busy_ts_arr;
|
||||||
|
|
||||||
|
struct hl_cs_counters aggregated_cs_counters;
|
||||||
|
|
||||||
atomic64_t dram_used_mem;
|
atomic64_t dram_used_mem;
|
||||||
u64 timeout_jiffies;
|
u64 timeout_jiffies;
|
||||||
u64 max_power;
|
u64 max_power;
|
||||||
@ -1522,6 +1585,7 @@ struct hl_device {
|
|||||||
u8 cdev_sysfs_created;
|
u8 cdev_sysfs_created;
|
||||||
u8 stop_on_err;
|
u8 stop_on_err;
|
||||||
u8 supports_sync_stream;
|
u8 supports_sync_stream;
|
||||||
|
u8 sync_stream_queue_idx;
|
||||||
u8 supports_coresight;
|
u8 supports_coresight;
|
||||||
u8 supports_soft_reset;
|
u8 supports_soft_reset;
|
||||||
|
|
||||||
@ -1690,7 +1754,7 @@ int hl_hwmon_init(struct hl_device *hdev);
|
|||||||
void hl_hwmon_fini(struct hl_device *hdev);
|
void hl_hwmon_fini(struct hl_device *hdev);
|
||||||
|
|
||||||
int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size,
|
int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size,
|
||||||
u64 *handle, int ctx_id);
|
u64 *handle, int ctx_id, bool internal_cb);
|
||||||
int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
|
int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
|
||||||
int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
|
int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
|
||||||
struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
|
struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
|
||||||
@ -1698,7 +1762,8 @@ struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
|
|||||||
void hl_cb_put(struct hl_cb *cb);
|
void hl_cb_put(struct hl_cb *cb);
|
||||||
void hl_cb_mgr_init(struct hl_cb_mgr *mgr);
|
void hl_cb_mgr_init(struct hl_cb_mgr *mgr);
|
||||||
void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr);
|
void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr);
|
||||||
struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size);
|
struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
|
||||||
|
bool internal_cb);
|
||||||
int hl_cb_pool_init(struct hl_device *hdev);
|
int hl_cb_pool_init(struct hl_device *hdev);
|
||||||
int hl_cb_pool_fini(struct hl_device *hdev);
|
int hl_cb_pool_fini(struct hl_device *hdev);
|
||||||
|
|
||||||
@ -1762,9 +1827,10 @@ int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
|
|||||||
int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data);
|
int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data);
|
||||||
int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
|
int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
|
||||||
u64 addr);
|
u64 addr);
|
||||||
int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
|
int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
|
||||||
u64 dram_base_address, u64 host_phys_base_address,
|
struct hl_inbound_pci_region *pci_region);
|
||||||
u64 host_phys_size);
|
int hl_pci_set_outbound_region(struct hl_device *hdev,
|
||||||
|
struct hl_outbound_pci_region *pci_region);
|
||||||
int hl_pci_init(struct hl_device *hdev);
|
int hl_pci_init(struct hl_device *hdev);
|
||||||
void hl_pci_fini(struct hl_device *hdev);
|
void hl_pci_fini(struct hl_device *hdev);
|
||||||
|
|
@ -238,7 +238,6 @@ static void set_driver_behavior_per_device(struct hl_device *hdev)
|
|||||||
hdev->axi_drain = 0;
|
hdev->axi_drain = 0;
|
||||||
hdev->sram_scrambler_enable = 1;
|
hdev->sram_scrambler_enable = 1;
|
||||||
hdev->dram_scrambler_enable = 1;
|
hdev->dram_scrambler_enable = 1;
|
||||||
hdev->rl_enable = 1;
|
|
||||||
hdev->bmc_enable = 1;
|
hdev->bmc_enable = 1;
|
||||||
hdev->hard_reset_on_fw_events = 1;
|
hdev->hard_reset_on_fw_events = 1;
|
||||||
}
|
}
|
@ -276,6 +276,27 @@ static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args)
|
|||||||
min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0;
|
min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
|
||||||
|
{
|
||||||
|
struct hl_device *hdev = hpriv->hdev;
|
||||||
|
struct hl_info_cs_counters cs_counters = {0};
|
||||||
|
u32 max_size = args->return_size;
|
||||||
|
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
|
||||||
|
|
||||||
|
if ((!max_size) || (!out))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
memcpy(&cs_counters.cs_counters, &hdev->aggregated_cs_counters,
|
||||||
|
sizeof(struct hl_cs_counters));
|
||||||
|
|
||||||
|
if (hpriv->ctx)
|
||||||
|
memcpy(&cs_counters.ctx_cs_counters, &hpriv->ctx->cs_counters,
|
||||||
|
sizeof(struct hl_cs_counters));
|
||||||
|
|
||||||
|
return copy_to_user(out, &cs_counters,
|
||||||
|
min((size_t) max_size, sizeof(cs_counters))) ? -EFAULT : 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
|
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
|
||||||
struct device *dev)
|
struct device *dev)
|
||||||
{
|
{
|
||||||
@ -336,6 +357,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
|
|||||||
case HL_INFO_TIME_SYNC:
|
case HL_INFO_TIME_SYNC:
|
||||||
return time_sync_info(hdev, args);
|
return time_sync_info(hdev, args);
|
||||||
|
|
||||||
|
case HL_INFO_CS_COUNTERS:
|
||||||
|
return cs_counters_info(hpriv, args);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
dev_err(dev, "Invalid request %d\n", args->op);
|
dev_err(dev, "Invalid request %d\n", args->op);
|
||||||
rc = -ENOTTY;
|
rc = -ENOTTY;
|
@ -23,10 +23,14 @@ inline u32 hl_hw_queue_add_ptr(u32 ptr, u16 val)
|
|||||||
ptr &= ((HL_QUEUE_LENGTH << 1) - 1);
|
ptr &= ((HL_QUEUE_LENGTH << 1) - 1);
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
static inline int queue_ci_get(atomic_t *ci, u32 queue_len)
|
||||||
|
{
|
||||||
|
return atomic_read(ci) & ((queue_len << 1) - 1);
|
||||||
|
}
|
||||||
|
|
||||||
static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
|
static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
|
||||||
{
|
{
|
||||||
int delta = (q->pi - q->ci);
|
int delta = (q->pi - queue_ci_get(&q->ci, queue_len));
|
||||||
|
|
||||||
if (delta >= 0)
|
if (delta >= 0)
|
||||||
return (queue_len - delta);
|
return (queue_len - delta);
|
||||||
@ -40,23 +44,16 @@ void hl_int_hw_queue_update_ci(struct hl_cs *cs)
|
|||||||
struct hl_hw_queue *q;
|
struct hl_hw_queue *q;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
hdev->asic_funcs->hw_queues_lock(hdev);
|
|
||||||
|
|
||||||
if (hdev->disabled)
|
if (hdev->disabled)
|
||||||
goto out;
|
return;
|
||||||
|
|
||||||
q = &hdev->kernel_queues[0];
|
q = &hdev->kernel_queues[0];
|
||||||
for (i = 0 ; i < HL_MAX_QUEUES ; i++, q++) {
|
for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) {
|
||||||
if (q->queue_type == QUEUE_TYPE_INT) {
|
if (q->queue_type == QUEUE_TYPE_INT)
|
||||||
q->ci += cs->jobs_in_queue_cnt[i];
|
atomic_add(cs->jobs_in_queue_cnt[i], &q->ci);
|
||||||
q->ci &= ((q->int_queue_len << 1) - 1);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
|
||||||
hdev->asic_funcs->hw_queues_unlock(hdev);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
|
* ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
|
||||||
* H/W queue.
|
* H/W queue.
|
||||||
@ -161,6 +158,13 @@ static int int_queue_sanity_checks(struct hl_device *hdev,
|
|||||||
{
|
{
|
||||||
int free_slots_cnt;
|
int free_slots_cnt;
|
||||||
|
|
||||||
|
if (num_of_entries > q->int_queue_len) {
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Cannot populate queue %u with %u jobs\n",
|
||||||
|
q->hw_queue_id, num_of_entries);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
/* Check we have enough space in the queue */
|
/* Check we have enough space in the queue */
|
||||||
free_slots_cnt = queue_free_slots(q, q->int_queue_len);
|
free_slots_cnt = queue_free_slots(q, q->int_queue_len);
|
||||||
|
|
||||||
@ -174,38 +178,26 @@ static int int_queue_sanity_checks(struct hl_device *hdev,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* hw_queue_sanity_checks() - Perform some sanity checks on a H/W queue.
|
* hw_queue_sanity_checks() - Make sure we have enough space in the h/w queue
|
||||||
* @hdev: Pointer to hl_device structure.
|
* @hdev: Pointer to hl_device structure.
|
||||||
* @q: Pointer to hl_hw_queue structure.
|
* @q: Pointer to hl_hw_queue structure.
|
||||||
* @num_of_entries: How many entries to check for space.
|
* @num_of_entries: How many entries to check for space.
|
||||||
*
|
*
|
||||||
* Perform the following:
|
* Notice: We do not reserve queue entries so this function mustn't be called
|
||||||
* - Make sure we have enough space in the completion queue.
|
* more than once per CS for the same queue
|
||||||
* This check also ensures that there is enough space in the h/w queue, as
|
|
||||||
* both queues are of the same size.
|
|
||||||
* - Reserve space in the completion queue (needs to be reversed if there
|
|
||||||
* is a failure down the road before the actual submission of work).
|
|
||||||
*
|
*
|
||||||
* Both operations are done using the "free_slots_cnt" field of the completion
|
|
||||||
* queue. The CI counters of the queue and the completion queue are not
|
|
||||||
* needed/used for the H/W queue type.
|
|
||||||
*/
|
*/
|
||||||
static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q,
|
static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q,
|
||||||
int num_of_entries)
|
int num_of_entries)
|
||||||
{
|
{
|
||||||
atomic_t *free_slots =
|
int free_slots_cnt;
|
||||||
&hdev->completion_queue[q->cq_id].free_slots_cnt;
|
|
||||||
|
|
||||||
/*
|
/* Check we have enough space in the queue */
|
||||||
* Check we have enough space in the completion queue.
|
free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
|
||||||
* Add -1 to counter (decrement) unless counter was already 0.
|
|
||||||
* In that case, CQ is full so we can't submit a new CB.
|
if (free_slots_cnt < num_of_entries) {
|
||||||
* atomic_add_unless will return 0 if counter was already 0.
|
dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
|
||||||
*/
|
q->hw_queue_id, num_of_entries);
|
||||||
if (atomic_add_negative(num_of_entries * -1, free_slots)) {
|
|
||||||
dev_dbg(hdev->dev, "No space for %d entries on CQ %d\n",
|
|
||||||
num_of_entries, q->hw_queue_id);
|
|
||||||
atomic_add(num_of_entries, free_slots);
|
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -366,7 +358,6 @@ static void hw_queue_schedule_job(struct hl_cs_job *job)
|
|||||||
{
|
{
|
||||||
struct hl_device *hdev = job->cs->ctx->hdev;
|
struct hl_device *hdev = job->cs->ctx->hdev;
|
||||||
struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
|
struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
|
||||||
struct hl_cq *cq;
|
|
||||||
u64 ptr;
|
u64 ptr;
|
||||||
u32 offset, ctl, len;
|
u32 offset, ctl, len;
|
||||||
|
|
||||||
@ -376,7 +367,7 @@ static void hw_queue_schedule_job(struct hl_cs_job *job)
|
|||||||
* write address offset in the SM block (QMAN LBW message).
|
* write address offset in the SM block (QMAN LBW message).
|
||||||
* The write address offset is calculated as "COMP_OFFSET << 2".
|
* The write address offset is calculated as "COMP_OFFSET << 2".
|
||||||
*/
|
*/
|
||||||
offset = job->cs->sequence & (HL_MAX_PENDING_CS - 1);
|
offset = job->cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
|
||||||
ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) |
|
ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) |
|
||||||
((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK);
|
((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK);
|
||||||
|
|
||||||
@ -395,17 +386,6 @@ static void hw_queue_schedule_job(struct hl_cs_job *job)
|
|||||||
else
|
else
|
||||||
ptr = (u64) (uintptr_t) job->user_cb;
|
ptr = (u64) (uintptr_t) job->user_cb;
|
||||||
|
|
||||||
/*
|
|
||||||
* No need to protect pi_offset because scheduling to the
|
|
||||||
* H/W queues is done under the scheduler mutex
|
|
||||||
*
|
|
||||||
* No need to check if CQ is full because it was already
|
|
||||||
* checked in hw_queue_sanity_checks
|
|
||||||
*/
|
|
||||||
cq = &hdev->completion_queue[q->cq_id];
|
|
||||||
|
|
||||||
cq->pi = hl_cq_inc_ptr(cq->pi);
|
|
||||||
|
|
||||||
ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
|
ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -509,19 +489,23 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
|
|||||||
struct hl_device *hdev = ctx->hdev;
|
struct hl_device *hdev = ctx->hdev;
|
||||||
struct hl_cs_job *job, *tmp;
|
struct hl_cs_job *job, *tmp;
|
||||||
struct hl_hw_queue *q;
|
struct hl_hw_queue *q;
|
||||||
|
u32 max_queues;
|
||||||
int rc = 0, i, cq_cnt;
|
int rc = 0, i, cq_cnt;
|
||||||
|
|
||||||
hdev->asic_funcs->hw_queues_lock(hdev);
|
hdev->asic_funcs->hw_queues_lock(hdev);
|
||||||
|
|
||||||
if (hl_device_disabled_or_in_reset(hdev)) {
|
if (hl_device_disabled_or_in_reset(hdev)) {
|
||||||
|
ctx->cs_counters.device_in_reset_drop_cnt++;
|
||||||
dev_err(hdev->dev,
|
dev_err(hdev->dev,
|
||||||
"device is disabled or in reset, CS rejected!\n");
|
"device is disabled or in reset, CS rejected!\n");
|
||||||
rc = -EPERM;
|
rc = -EPERM;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
max_queues = hdev->asic_prop.max_queues;
|
||||||
|
|
||||||
q = &hdev->kernel_queues[0];
|
q = &hdev->kernel_queues[0];
|
||||||
for (i = 0, cq_cnt = 0 ; i < HL_MAX_QUEUES ; i++, q++) {
|
for (i = 0, cq_cnt = 0 ; i < max_queues ; i++, q++) {
|
||||||
if (cs->jobs_in_queue_cnt[i]) {
|
if (cs->jobs_in_queue_cnt[i]) {
|
||||||
switch (q->queue_type) {
|
switch (q->queue_type) {
|
||||||
case QUEUE_TYPE_EXT:
|
case QUEUE_TYPE_EXT:
|
||||||
@ -543,11 +527,12 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rc)
|
if (rc) {
|
||||||
|
ctx->cs_counters.queue_full_drop_cnt++;
|
||||||
goto unroll_cq_resv;
|
goto unroll_cq_resv;
|
||||||
|
}
|
||||||
|
|
||||||
if (q->queue_type == QUEUE_TYPE_EXT ||
|
if (q->queue_type == QUEUE_TYPE_EXT)
|
||||||
q->queue_type == QUEUE_TYPE_HW)
|
|
||||||
cq_cnt++;
|
cq_cnt++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -598,10 +583,9 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
|
|||||||
|
|
||||||
unroll_cq_resv:
|
unroll_cq_resv:
|
||||||
q = &hdev->kernel_queues[0];
|
q = &hdev->kernel_queues[0];
|
||||||
for (i = 0 ; (i < HL_MAX_QUEUES) && (cq_cnt > 0) ; i++, q++) {
|
for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) {
|
||||||
if ((q->queue_type == QUEUE_TYPE_EXT ||
|
if ((q->queue_type == QUEUE_TYPE_EXT) &&
|
||||||
q->queue_type == QUEUE_TYPE_HW) &&
|
(cs->jobs_in_queue_cnt[i])) {
|
||||||
cs->jobs_in_queue_cnt[i]) {
|
|
||||||
atomic_t *free_slots =
|
atomic_t *free_slots =
|
||||||
&hdev->completion_queue[i].free_slots_cnt;
|
&hdev->completion_queue[i].free_slots_cnt;
|
||||||
atomic_add(cs->jobs_in_queue_cnt[i], free_slots);
|
atomic_add(cs->jobs_in_queue_cnt[i], free_slots);
|
||||||
@ -625,7 +609,7 @@ void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id)
|
|||||||
{
|
{
|
||||||
struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
|
struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
|
||||||
|
|
||||||
q->ci = hl_queue_inc_ptr(q->ci);
|
atomic_inc(&q->ci);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
|
static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
|
||||||
@ -660,12 +644,9 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Make sure read/write pointers are initialized to start of queue */
|
/* Make sure read/write pointers are initialized to start of queue */
|
||||||
q->ci = 0;
|
atomic_set(&q->ci, 0);
|
||||||
q->pi = 0;
|
q->pi = 0;
|
||||||
|
|
||||||
if (!is_cpu_queue)
|
|
||||||
hdev->asic_funcs->ext_queue_init(hdev, q->hw_queue_id);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
free_queue:
|
free_queue:
|
||||||
@ -697,7 +678,7 @@ static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
|
|||||||
|
|
||||||
q->kernel_address = (u64) (uintptr_t) p;
|
q->kernel_address = (u64) (uintptr_t) p;
|
||||||
q->pi = 0;
|
q->pi = 0;
|
||||||
q->ci = 0;
|
atomic_set(&q->ci, 0);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -726,12 +707,48 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
|
|||||||
q->kernel_address = (u64) (uintptr_t) p;
|
q->kernel_address = (u64) (uintptr_t) p;
|
||||||
|
|
||||||
/* Make sure read/write pointers are initialized to start of queue */
|
/* Make sure read/write pointers are initialized to start of queue */
|
||||||
q->ci = 0;
|
atomic_set(&q->ci, 0);
|
||||||
q->pi = 0;
|
q->pi = 0;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
|
||||||
|
{
|
||||||
|
struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
|
||||||
|
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||||
|
struct hl_hw_sob *hw_sob;
|
||||||
|
int sob, queue_idx = hdev->sync_stream_queue_idx++;
|
||||||
|
|
||||||
|
hw_queue->base_sob_id =
|
||||||
|
prop->sync_stream_first_sob + queue_idx * HL_RSVD_SOBS;
|
||||||
|
hw_queue->base_mon_id =
|
||||||
|
prop->sync_stream_first_mon + queue_idx * HL_RSVD_MONS;
|
||||||
|
hw_queue->next_sob_val = 1;
|
||||||
|
hw_queue->curr_sob_offset = 0;
|
||||||
|
|
||||||
|
for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
|
||||||
|
hw_sob = &hw_queue->hw_sob[sob];
|
||||||
|
hw_sob->hdev = hdev;
|
||||||
|
hw_sob->sob_id = hw_queue->base_sob_id + sob;
|
||||||
|
hw_sob->q_idx = q_idx;
|
||||||
|
kref_init(&hw_sob->kref);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx)
|
||||||
|
{
|
||||||
|
struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In case we got here due to a stuck CS, the refcnt might be bigger
|
||||||
|
* than 1 and therefore we reset it.
|
||||||
|
*/
|
||||||
|
kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref);
|
||||||
|
hw_queue->curr_sob_offset = 0;
|
||||||
|
hw_queue->next_sob_val = 1;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* queue_init - main initialization function for H/W queue object
|
* queue_init - main initialization function for H/W queue object
|
||||||
*
|
*
|
||||||
@ -747,8 +764,6 @@ static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
|
|||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
BUILD_BUG_ON(HL_QUEUE_SIZE_IN_BYTES > HL_PAGE_SIZE);
|
|
||||||
|
|
||||||
q->hw_queue_id = hw_queue_id;
|
q->hw_queue_id = hw_queue_id;
|
||||||
|
|
||||||
switch (q->queue_type) {
|
switch (q->queue_type) {
|
||||||
@ -774,6 +789,9 @@ static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (q->supports_sync_stream)
|
||||||
|
sync_stream_queue_init(hdev, q->hw_queue_id);
|
||||||
|
|
||||||
if (rc)
|
if (rc)
|
||||||
return rc;
|
return rc;
|
||||||
|
|
||||||
@ -835,7 +853,7 @@ int hl_hw_queues_create(struct hl_device *hdev)
|
|||||||
struct hl_hw_queue *q;
|
struct hl_hw_queue *q;
|
||||||
int i, rc, q_ready_cnt;
|
int i, rc, q_ready_cnt;
|
||||||
|
|
||||||
hdev->kernel_queues = kcalloc(HL_MAX_QUEUES,
|
hdev->kernel_queues = kcalloc(asic->max_queues,
|
||||||
sizeof(*hdev->kernel_queues), GFP_KERNEL);
|
sizeof(*hdev->kernel_queues), GFP_KERNEL);
|
||||||
|
|
||||||
if (!hdev->kernel_queues) {
|
if (!hdev->kernel_queues) {
|
||||||
@ -845,9 +863,11 @@ int hl_hw_queues_create(struct hl_device *hdev)
|
|||||||
|
|
||||||
/* Initialize the H/W queues */
|
/* Initialize the H/W queues */
|
||||||
for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues;
|
for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues;
|
||||||
i < HL_MAX_QUEUES ; i++, q_ready_cnt++, q++) {
|
i < asic->max_queues ; i++, q_ready_cnt++, q++) {
|
||||||
|
|
||||||
q->queue_type = asic->hw_queues_props[i].type;
|
q->queue_type = asic->hw_queues_props[i].type;
|
||||||
|
q->supports_sync_stream =
|
||||||
|
asic->hw_queues_props[i].supports_sync_stream;
|
||||||
rc = queue_init(hdev, q, i);
|
rc = queue_init(hdev, q, i);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
dev_err(hdev->dev,
|
dev_err(hdev->dev,
|
||||||
@ -870,9 +890,10 @@ release_queues:
|
|||||||
void hl_hw_queues_destroy(struct hl_device *hdev)
|
void hl_hw_queues_destroy(struct hl_device *hdev)
|
||||||
{
|
{
|
||||||
struct hl_hw_queue *q;
|
struct hl_hw_queue *q;
|
||||||
|
u32 max_queues = hdev->asic_prop.max_queues;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0, q = hdev->kernel_queues ; i < HL_MAX_QUEUES ; i++, q++)
|
for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++)
|
||||||
queue_fini(hdev, q);
|
queue_fini(hdev, q);
|
||||||
|
|
||||||
kfree(hdev->kernel_queues);
|
kfree(hdev->kernel_queues);
|
||||||
@ -881,15 +902,17 @@ void hl_hw_queues_destroy(struct hl_device *hdev)
|
|||||||
void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset)
|
void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset)
|
||||||
{
|
{
|
||||||
struct hl_hw_queue *q;
|
struct hl_hw_queue *q;
|
||||||
|
u32 max_queues = hdev->asic_prop.max_queues;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0, q = hdev->kernel_queues ; i < HL_MAX_QUEUES ; i++, q++) {
|
for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) {
|
||||||
if ((!q->valid) ||
|
if ((!q->valid) ||
|
||||||
((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU)))
|
((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU)))
|
||||||
continue;
|
continue;
|
||||||
q->pi = q->ci = 0;
|
q->pi = 0;
|
||||||
|
atomic_set(&q->ci, 0);
|
||||||
|
|
||||||
if (q->queue_type == QUEUE_TYPE_EXT)
|
if (q->supports_sync_stream)
|
||||||
hdev->asic_funcs->ext_queue_reset(hdev, q->hw_queue_id);
|
sync_stream_queue_reset(hdev, q->hw_queue_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -119,15 +119,10 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
|
|||||||
|
|
||||||
if ((shadow_index_valid) && (!hdev->disabled)) {
|
if ((shadow_index_valid) && (!hdev->disabled)) {
|
||||||
job = queue->shadow_queue[hl_pi_2_offset(shadow_index)];
|
job = queue->shadow_queue[hl_pi_2_offset(shadow_index)];
|
||||||
queue_work(hdev->cq_wq, &job->finish_work);
|
queue_work(hdev->cq_wq[cq->cq_idx], &job->finish_work);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Update ci of the context's queue. There is no
|
atomic_inc(&queue->ci);
|
||||||
* need to protect it with spinlock because this update is
|
|
||||||
* done only inside IRQ and there is a different IRQ per
|
|
||||||
* queue
|
|
||||||
*/
|
|
||||||
queue->ci = hl_queue_inc_ptr(queue->ci);
|
|
||||||
|
|
||||||
/* Clear CQ entry ready bit */
|
/* Clear CQ entry ready bit */
|
||||||
cq_entry->data = cpu_to_le32(le32_to_cpu(cq_entry->data) &
|
cq_entry->data = cpu_to_le32(le32_to_cpu(cq_entry->data) &
|
||||||
@ -220,8 +215,6 @@ int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id)
|
|||||||
{
|
{
|
||||||
void *p;
|
void *p;
|
||||||
|
|
||||||
BUILD_BUG_ON(HL_CQ_SIZE_IN_BYTES > HL_PAGE_SIZE);
|
|
||||||
|
|
||||||
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
|
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
|
||||||
&q->bus_address, GFP_KERNEL | __GFP_ZERO);
|
&q->bus_address, GFP_KERNEL | __GFP_ZERO);
|
||||||
if (!p)
|
if (!p)
|
||||||
@ -282,8 +275,6 @@ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q)
|
|||||||
{
|
{
|
||||||
void *p;
|
void *p;
|
||||||
|
|
||||||
BUILD_BUG_ON(HL_EQ_SIZE_IN_BYTES > HL_PAGE_SIZE);
|
|
||||||
|
|
||||||
p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
|
p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
|
||||||
HL_EQ_SIZE_IN_BYTES,
|
HL_EQ_SIZE_IN_BYTES,
|
||||||
&q->bus_address);
|
&q->bus_address);
|
@ -1730,8 +1730,7 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
|
|||||||
*/
|
*/
|
||||||
if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash))
|
if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash))
|
||||||
dev_notice(hdev->dev,
|
dev_notice(hdev->dev,
|
||||||
"ctx %d is freed while it has va in use\n",
|
"user released device without removing its memory mappings\n");
|
||||||
ctx->asid);
|
|
||||||
|
|
||||||
hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
|
hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
|
||||||
dev_dbg(hdev->dev,
|
dev_dbg(hdev->dev,
|
@ -502,7 +502,6 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
mutex_init(&ctx->mmu_lock);
|
mutex_init(&ctx->mmu_lock);
|
||||||
hash_init(ctx->mmu_phys_hash);
|
|
||||||
hash_init(ctx->mmu_shadow_hash);
|
hash_init(ctx->mmu_shadow_hash);
|
||||||
|
|
||||||
return dram_default_mapping_init(ctx);
|
return dram_default_mapping_init(ctx);
|
@ -9,9 +9,15 @@
|
|||||||
#include "include/hw_ip/pci/pci_general.h"
|
#include "include/hw_ip/pci/pci_general.h"
|
||||||
|
|
||||||
#include <linux/pci.h>
|
#include <linux/pci.h>
|
||||||
|
#include <linux/bitfield.h>
|
||||||
|
|
||||||
#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC (HL_PCI_ELBI_TIMEOUT_MSEC * 10)
|
#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC (HL_PCI_ELBI_TIMEOUT_MSEC * 10)
|
||||||
|
|
||||||
|
#define IATU_REGION_CTRL_REGION_EN_MASK BIT(31)
|
||||||
|
#define IATU_REGION_CTRL_MATCH_MODE_MASK BIT(30)
|
||||||
|
#define IATU_REGION_CTRL_NUM_MATCH_EN_MASK BIT(19)
|
||||||
|
#define IATU_REGION_CTRL_BAR_NUM_MASK GENMASK(10, 8)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* hl_pci_bars_map() - Map PCI BARs.
|
* hl_pci_bars_map() - Map PCI BARs.
|
||||||
* @hdev: Pointer to hl_device structure.
|
* @hdev: Pointer to hl_device structure.
|
||||||
@ -187,110 +193,94 @@ static void hl_pci_reset_link_through_bridge(struct hl_device *hdev)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* hl_pci_set_dram_bar_base() - Set DDR BAR to map specific device address.
|
* hl_pci_set_inbound_region() - Configure inbound region
|
||||||
* @hdev: Pointer to hl_device structure.
|
* @hdev: Pointer to hl_device structure.
|
||||||
* @inbound_region: Inbound region number.
|
* @region: Inbound region number.
|
||||||
* @bar: PCI BAR number.
|
* @pci_region: Inbound region parameters.
|
||||||
* @addr: Address in DRAM. Must be aligned to DRAM bar size.
|
|
||||||
*
|
*
|
||||||
* Configure the iATU so that the DRAM bar will start at the specified address.
|
* Configure the iATU inbound region.
|
||||||
*
|
*
|
||||||
* Return: 0 on success, negative value for failure.
|
* Return: 0 on success, negative value for failure.
|
||||||
*/
|
*/
|
||||||
int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
|
int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
|
||||||
u64 addr)
|
struct hl_inbound_pci_region *pci_region)
|
||||||
{
|
{
|
||||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||||
u32 offset;
|
u64 bar_phys_base, region_base, region_end_address;
|
||||||
int rc;
|
u32 offset, ctrl_reg_val;
|
||||||
|
int rc = 0;
|
||||||
|
|
||||||
switch (inbound_region) {
|
/* region offset */
|
||||||
case 0:
|
offset = (0x200 * region) + 0x100;
|
||||||
offset = 0x100;
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
offset = 0x300;
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
offset = 0x500;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
dev_err(hdev->dev, "Invalid inbound region %d\n",
|
|
||||||
inbound_region);
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bar != 0 && bar != 2 && bar != 4) {
|
if (pci_region->mode == PCI_ADDRESS_MATCH_MODE) {
|
||||||
dev_err(hdev->dev, "Invalid PCI BAR %d\n", bar);
|
bar_phys_base = hdev->pcie_bar_phys[pci_region->bar];
|
||||||
return -EINVAL;
|
region_base = bar_phys_base + pci_region->offset_in_bar;
|
||||||
|
region_end_address = region_base + pci_region->size - 1;
|
||||||
|
|
||||||
|
rc |= hl_pci_iatu_write(hdev, offset + 0x8,
|
||||||
|
lower_32_bits(region_base));
|
||||||
|
rc |= hl_pci_iatu_write(hdev, offset + 0xC,
|
||||||
|
upper_32_bits(region_base));
|
||||||
|
rc |= hl_pci_iatu_write(hdev, offset + 0x10,
|
||||||
|
lower_32_bits(region_end_address));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Point to the specified address */
|
/* Point to the specified address */
|
||||||
rc = hl_pci_iatu_write(hdev, offset + 0x14, lower_32_bits(addr));
|
rc = hl_pci_iatu_write(hdev, offset + 0x14,
|
||||||
rc |= hl_pci_iatu_write(hdev, offset + 0x18, upper_32_bits(addr));
|
lower_32_bits(pci_region->addr));
|
||||||
|
rc |= hl_pci_iatu_write(hdev, offset + 0x18,
|
||||||
|
upper_32_bits(pci_region->addr));
|
||||||
rc |= hl_pci_iatu_write(hdev, offset + 0x0, 0);
|
rc |= hl_pci_iatu_write(hdev, offset + 0x0, 0);
|
||||||
/* Enable + BAR match + match enable + BAR number */
|
|
||||||
rc |= hl_pci_iatu_write(hdev, offset + 0x4, 0xC0080000 | (bar << 8));
|
/* Enable + bar/address match + match enable + bar number */
|
||||||
|
ctrl_reg_val = FIELD_PREP(IATU_REGION_CTRL_REGION_EN_MASK, 1);
|
||||||
|
ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_MATCH_MODE_MASK,
|
||||||
|
pci_region->mode);
|
||||||
|
ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_NUM_MATCH_EN_MASK, 1);
|
||||||
|
|
||||||
|
if (pci_region->mode == PCI_BAR_MATCH_MODE)
|
||||||
|
ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_BAR_NUM_MASK,
|
||||||
|
pci_region->bar);
|
||||||
|
|
||||||
|
rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val);
|
||||||
|
|
||||||
/* Return the DBI window to the default location */
|
/* Return the DBI window to the default location */
|
||||||
rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
|
rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
|
||||||
rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
|
rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
|
||||||
|
|
||||||
if (rc)
|
if (rc)
|
||||||
dev_err(hdev->dev, "failed to map DRAM bar to 0x%08llx\n",
|
dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n",
|
||||||
addr);
|
pci_region->bar, pci_region->addr);
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* hl_pci_init_iatu() - Initialize the iATU unit inside the PCI controller.
|
* hl_pci_set_outbound_region() - Configure outbound region 0
|
||||||
* @hdev: Pointer to hl_device structure.
|
* @hdev: Pointer to hl_device structure.
|
||||||
* @sram_base_address: SRAM base address.
|
* @pci_region: Outbound region parameters.
|
||||||
* @dram_base_address: DRAM base address.
|
|
||||||
* @host_phys_base_address: Base physical address of host memory for device
|
|
||||||
* transactions.
|
|
||||||
* @host_phys_size: Size of host memory for device transactions.
|
|
||||||
*
|
*
|
||||||
* This is needed in case the firmware doesn't initialize the iATU.
|
* Configure the iATU outbound region 0.
|
||||||
*
|
*
|
||||||
* Return: 0 on success, negative value for failure.
|
* Return: 0 on success, negative value for failure.
|
||||||
*/
|
*/
|
||||||
int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
|
int hl_pci_set_outbound_region(struct hl_device *hdev,
|
||||||
u64 dram_base_address, u64 host_phys_base_address,
|
struct hl_outbound_pci_region *pci_region)
|
||||||
u64 host_phys_size)
|
|
||||||
{
|
{
|
||||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||||
u64 host_phys_end_addr;
|
u64 outbound_region_end_address;
|
||||||
int rc = 0;
|
int rc = 0;
|
||||||
|
|
||||||
/* Inbound Region 0 - Bar 0 - Point to SRAM base address */
|
/* Outbound Region 0 */
|
||||||
rc = hl_pci_iatu_write(hdev, 0x114, lower_32_bits(sram_base_address));
|
outbound_region_end_address =
|
||||||
rc |= hl_pci_iatu_write(hdev, 0x118, upper_32_bits(sram_base_address));
|
pci_region->addr + pci_region->size - 1;
|
||||||
rc |= hl_pci_iatu_write(hdev, 0x100, 0);
|
|
||||||
/* Enable + Bar match + match enable */
|
|
||||||
rc |= hl_pci_iatu_write(hdev, 0x104, 0xC0080000);
|
|
||||||
|
|
||||||
/* Return the DBI window to the default location */
|
|
||||||
rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
|
|
||||||
rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
|
|
||||||
|
|
||||||
hdev->asic_funcs->set_dma_mask_from_fw(hdev);
|
|
||||||
|
|
||||||
/* Point to DRAM */
|
|
||||||
if (!hdev->asic_funcs->set_dram_bar_base)
|
|
||||||
return -EINVAL;
|
|
||||||
if (hdev->asic_funcs->set_dram_bar_base(hdev, dram_base_address) ==
|
|
||||||
U64_MAX)
|
|
||||||
return -EIO;
|
|
||||||
|
|
||||||
/* Outbound Region 0 - Point to Host */
|
|
||||||
host_phys_end_addr = host_phys_base_address + host_phys_size - 1;
|
|
||||||
rc |= hl_pci_iatu_write(hdev, 0x008,
|
rc |= hl_pci_iatu_write(hdev, 0x008,
|
||||||
lower_32_bits(host_phys_base_address));
|
lower_32_bits(pci_region->addr));
|
||||||
rc |= hl_pci_iatu_write(hdev, 0x00C,
|
rc |= hl_pci_iatu_write(hdev, 0x00C,
|
||||||
upper_32_bits(host_phys_base_address));
|
upper_32_bits(pci_region->addr));
|
||||||
rc |= hl_pci_iatu_write(hdev, 0x010, lower_32_bits(host_phys_end_addr));
|
rc |= hl_pci_iatu_write(hdev, 0x010,
|
||||||
|
lower_32_bits(outbound_region_end_address));
|
||||||
rc |= hl_pci_iatu_write(hdev, 0x014, 0);
|
rc |= hl_pci_iatu_write(hdev, 0x014, 0);
|
||||||
|
|
||||||
if ((hdev->power9_64bit_dma_enable) && (hdev->dma_mask == 64))
|
if ((hdev->power9_64bit_dma_enable) && (hdev->dma_mask == 64))
|
||||||
@ -298,7 +288,8 @@ int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
|
|||||||
else
|
else
|
||||||
rc |= hl_pci_iatu_write(hdev, 0x018, 0);
|
rc |= hl_pci_iatu_write(hdev, 0x018, 0);
|
||||||
|
|
||||||
rc |= hl_pci_iatu_write(hdev, 0x020, upper_32_bits(host_phys_end_addr));
|
rc |= hl_pci_iatu_write(hdev, 0x020,
|
||||||
|
upper_32_bits(outbound_region_end_address));
|
||||||
/* Increase region size */
|
/* Increase region size */
|
||||||
rc |= hl_pci_iatu_write(hdev, 0x000, 0x00002000);
|
rc |= hl_pci_iatu_write(hdev, 0x000, 0x00002000);
|
||||||
/* Enable */
|
/* Enable */
|
||||||
@ -308,10 +299,7 @@ int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
|
|||||||
rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
|
rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
|
||||||
rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
|
rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
|
||||||
|
|
||||||
if (rc)
|
return rc;
|
||||||
return -EIO;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
@ -334,6 +334,9 @@ static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj,
|
|||||||
char *data;
|
char *data;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
|
if (hl_device_disabled_or_in_reset(hdev))
|
||||||
|
return -ENODEV;
|
||||||
|
|
||||||
if (!max_size)
|
if (!max_size)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
@ -1,5 +1,5 @@
|
|||||||
# SPDX-License-Identifier: GPL-2.0-only
|
# SPDX-License-Identifier: GPL-2.0-only
|
||||||
subdir-ccflags-y += -I$(src)
|
subdir-ccflags-y += -I$(src)/common
|
||||||
|
|
||||||
HL_GAUDI_FILES := gaudi/gaudi.o gaudi/gaudi_hwmgr.o gaudi/gaudi_security.o \
|
HL_GAUDI_FILES := gaudi/gaudi.o gaudi/gaudi_hwmgr.o gaudi/gaudi_security.o \
|
||||||
gaudi/gaudi_coresight.o
|
gaudi/gaudi_coresight.o
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -10,7 +10,7 @@
|
|||||||
|
|
||||||
#include <uapi/misc/habanalabs.h>
|
#include <uapi/misc/habanalabs.h>
|
||||||
#include "habanalabs.h"
|
#include "habanalabs.h"
|
||||||
#include "include/hl_boot_if.h"
|
#include "include/common/hl_boot_if.h"
|
||||||
#include "include/gaudi/gaudi_packets.h"
|
#include "include/gaudi/gaudi_packets.h"
|
||||||
#include "include/gaudi/gaudi.h"
|
#include "include/gaudi/gaudi.h"
|
||||||
#include "include/gaudi/gaudi_async_events.h"
|
#include "include/gaudi/gaudi_async_events.h"
|
||||||
@ -57,6 +57,12 @@
|
|||||||
|
|
||||||
#define GAUDI_DEFAULT_CARD_NAME "HL2000"
|
#define GAUDI_DEFAULT_CARD_NAME "HL2000"
|
||||||
|
|
||||||
|
#define GAUDI_MAX_PENDING_CS 1024
|
||||||
|
|
||||||
|
#if !IS_MAX_PENDING_CS_VALID(GAUDI_MAX_PENDING_CS)
|
||||||
|
#error "GAUDI_MAX_PENDING_CS must be power of 2 and greater than 1"
|
||||||
|
#endif
|
||||||
|
|
||||||
#define PCI_DMA_NUMBER_OF_CHNLS 3
|
#define PCI_DMA_NUMBER_OF_CHNLS 3
|
||||||
#define HBM_DMA_NUMBER_OF_CHNLS 5
|
#define HBM_DMA_NUMBER_OF_CHNLS 5
|
||||||
#define DMA_NUMBER_OF_CHNLS (PCI_DMA_NUMBER_OF_CHNLS + \
|
#define DMA_NUMBER_OF_CHNLS (PCI_DMA_NUMBER_OF_CHNLS + \
|
||||||
@ -117,14 +123,14 @@
|
|||||||
|
|
||||||
/* Internal QMANs PQ sizes */
|
/* Internal QMANs PQ sizes */
|
||||||
|
|
||||||
#define MME_QMAN_LENGTH 64
|
#define MME_QMAN_LENGTH 1024
|
||||||
#define MME_QMAN_SIZE_IN_BYTES (MME_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
|
#define MME_QMAN_SIZE_IN_BYTES (MME_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
|
||||||
|
|
||||||
#define HBM_DMA_QMAN_LENGTH 64
|
#define HBM_DMA_QMAN_LENGTH 1024
|
||||||
#define HBM_DMA_QMAN_SIZE_IN_BYTES \
|
#define HBM_DMA_QMAN_SIZE_IN_BYTES \
|
||||||
(HBM_DMA_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
|
(HBM_DMA_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
|
||||||
|
|
||||||
#define TPC_QMAN_LENGTH 64
|
#define TPC_QMAN_LENGTH 1024
|
||||||
#define TPC_QMAN_SIZE_IN_BYTES (TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
|
#define TPC_QMAN_SIZE_IN_BYTES (TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
|
||||||
|
|
||||||
#define SRAM_USER_BASE_OFFSET GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START
|
#define SRAM_USER_BASE_OFFSET GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START
|
||||||
@ -228,7 +234,6 @@ struct gaudi_internal_qman_info {
|
|||||||
* engine.
|
* engine.
|
||||||
* @multi_msi_mode: whether we are working in multi MSI single MSI mode.
|
* @multi_msi_mode: whether we are working in multi MSI single MSI mode.
|
||||||
* Multi MSI is possible only with IOMMU enabled.
|
* Multi MSI is possible only with IOMMU enabled.
|
||||||
* @ext_queue_idx: helper index for external queues initialization.
|
|
||||||
* @mmu_cache_inv_pi: PI for MMU cache invalidation flow. The H/W expects an
|
* @mmu_cache_inv_pi: PI for MMU cache invalidation flow. The H/W expects an
|
||||||
* 8-bit value so use u8.
|
* 8-bit value so use u8.
|
||||||
*/
|
*/
|
||||||
@ -249,7 +254,6 @@ struct gaudi_device {
|
|||||||
u32 events_stat_aggregate[GAUDI_EVENT_SIZE];
|
u32 events_stat_aggregate[GAUDI_EVENT_SIZE];
|
||||||
u32 hw_cap_initialized;
|
u32 hw_cap_initialized;
|
||||||
u8 multi_msi_mode;
|
u8 multi_msi_mode;
|
||||||
u8 ext_queue_idx;
|
|
||||||
u8 mmu_cache_inv_pi;
|
u8 mmu_cache_inv_pi;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -392,6 +392,7 @@ static int gaudi_config_stm(struct hl_device *hdev,
|
|||||||
{
|
{
|
||||||
struct hl_debug_params_stm *input;
|
struct hl_debug_params_stm *input;
|
||||||
u64 base_reg;
|
u64 base_reg;
|
||||||
|
u32 frequency;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
if (params->reg_idx >= ARRAY_SIZE(debug_stm_regs)) {
|
if (params->reg_idx >= ARRAY_SIZE(debug_stm_regs)) {
|
||||||
@ -420,7 +421,10 @@ static int gaudi_config_stm(struct hl_device *hdev,
|
|||||||
WREG32(base_reg + 0xE00, lower_32_bits(input->sp_mask));
|
WREG32(base_reg + 0xE00, lower_32_bits(input->sp_mask));
|
||||||
WREG32(base_reg + 0xEF4, input->id);
|
WREG32(base_reg + 0xEF4, input->id);
|
||||||
WREG32(base_reg + 0xDF4, 0x80);
|
WREG32(base_reg + 0xDF4, 0x80);
|
||||||
WREG32(base_reg + 0xE8C, input->frequency);
|
frequency = hdev->asic_prop.psoc_timestamp_frequency;
|
||||||
|
if (frequency == 0)
|
||||||
|
frequency = input->frequency;
|
||||||
|
WREG32(base_reg + 0xE8C, frequency);
|
||||||
WREG32(base_reg + 0xE90, 0x7FF);
|
WREG32(base_reg + 0xE90, 0x7FF);
|
||||||
|
|
||||||
/* SW-2176 - SW WA for HW bug */
|
/* SW-2176 - SW WA for HW bug */
|
||||||
|
@ -337,11 +337,19 @@ static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
|
|||||||
static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
|
static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
|
||||||
static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
|
static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
|
||||||
|
|
||||||
void goya_get_fixed_properties(struct hl_device *hdev)
|
int goya_get_fixed_properties(struct hl_device *hdev)
|
||||||
{
|
{
|
||||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
prop->max_queues = GOYA_QUEUE_ID_SIZE;
|
||||||
|
prop->hw_queues_props = kcalloc(prop->max_queues,
|
||||||
|
sizeof(struct hw_queue_properties),
|
||||||
|
GFP_KERNEL);
|
||||||
|
|
||||||
|
if (!prop->hw_queues_props)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
|
for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
|
||||||
prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
|
prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
|
||||||
prop->hw_queues_props[i].driver_only = 0;
|
prop->hw_queues_props[i].driver_only = 0;
|
||||||
@ -361,9 +369,6 @@ void goya_get_fixed_properties(struct hl_device *hdev)
|
|||||||
prop->hw_queues_props[i].requires_kernel_cb = 0;
|
prop->hw_queues_props[i].requires_kernel_cb = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (; i < HL_MAX_QUEUES; i++)
|
|
||||||
prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
|
|
||||||
|
|
||||||
prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
|
prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
|
||||||
|
|
||||||
prop->dram_base_address = DRAM_PHYS_BASE;
|
prop->dram_base_address = DRAM_PHYS_BASE;
|
||||||
@ -426,6 +431,10 @@ void goya_get_fixed_properties(struct hl_device *hdev)
|
|||||||
|
|
||||||
strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
|
strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
|
||||||
CARD_NAME_MAX_LEN);
|
CARD_NAME_MAX_LEN);
|
||||||
|
|
||||||
|
prop->max_pending_cs = GOYA_MAX_PENDING_CS;
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -456,6 +465,7 @@ static int goya_pci_bars_map(struct hl_device *hdev)
|
|||||||
static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
|
static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
|
||||||
{
|
{
|
||||||
struct goya_device *goya = hdev->asic_specific;
|
struct goya_device *goya = hdev->asic_specific;
|
||||||
|
struct hl_inbound_pci_region pci_region;
|
||||||
u64 old_addr = addr;
|
u64 old_addr = addr;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
@ -463,7 +473,10 @@ static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
|
|||||||
return old_addr;
|
return old_addr;
|
||||||
|
|
||||||
/* Inbound Region 1 - Bar 4 - Point to DDR */
|
/* Inbound Region 1 - Bar 4 - Point to DDR */
|
||||||
rc = hl_pci_set_dram_bar_base(hdev, 1, 4, addr);
|
pci_region.mode = PCI_BAR_MATCH_MODE;
|
||||||
|
pci_region.bar = DDR_BAR_ID;
|
||||||
|
pci_region.addr = addr;
|
||||||
|
rc = hl_pci_set_inbound_region(hdev, 1, &pci_region);
|
||||||
if (rc)
|
if (rc)
|
||||||
return U64_MAX;
|
return U64_MAX;
|
||||||
|
|
||||||
@ -485,8 +498,35 @@ static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
|
|||||||
*/
|
*/
|
||||||
static int goya_init_iatu(struct hl_device *hdev)
|
static int goya_init_iatu(struct hl_device *hdev)
|
||||||
{
|
{
|
||||||
return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
|
struct hl_inbound_pci_region inbound_region;
|
||||||
HOST_PHYS_BASE, HOST_PHYS_SIZE);
|
struct hl_outbound_pci_region outbound_region;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
/* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
|
||||||
|
inbound_region.mode = PCI_BAR_MATCH_MODE;
|
||||||
|
inbound_region.bar = SRAM_CFG_BAR_ID;
|
||||||
|
inbound_region.addr = SRAM_BASE_ADDR;
|
||||||
|
rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
|
||||||
|
if (rc)
|
||||||
|
goto done;
|
||||||
|
|
||||||
|
/* Inbound Region 1 - Bar 4 - Point to DDR */
|
||||||
|
inbound_region.mode = PCI_BAR_MATCH_MODE;
|
||||||
|
inbound_region.bar = DDR_BAR_ID;
|
||||||
|
inbound_region.addr = DRAM_PHYS_BASE;
|
||||||
|
rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
|
||||||
|
if (rc)
|
||||||
|
goto done;
|
||||||
|
|
||||||
|
hdev->asic_funcs->set_dma_mask_from_fw(hdev);
|
||||||
|
|
||||||
|
/* Outbound Region 0 - Point to Host */
|
||||||
|
outbound_region.addr = HOST_PHYS_BASE;
|
||||||
|
outbound_region.size = HOST_PHYS_SIZE;
|
||||||
|
rc = hl_pci_set_outbound_region(hdev, &outbound_region);
|
||||||
|
|
||||||
|
done:
|
||||||
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -507,7 +547,11 @@ static int goya_early_init(struct hl_device *hdev)
|
|||||||
u32 val;
|
u32 val;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
goya_get_fixed_properties(hdev);
|
rc = goya_get_fixed_properties(hdev);
|
||||||
|
if (rc) {
|
||||||
|
dev_err(hdev->dev, "Failed to get fixed properties\n");
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
/* Check BAR sizes */
|
/* Check BAR sizes */
|
||||||
if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
|
if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
|
||||||
@ -517,7 +561,8 @@ static int goya_early_init(struct hl_device *hdev)
|
|||||||
(unsigned long long) pci_resource_len(pdev,
|
(unsigned long long) pci_resource_len(pdev,
|
||||||
SRAM_CFG_BAR_ID),
|
SRAM_CFG_BAR_ID),
|
||||||
CFG_BAR_SIZE);
|
CFG_BAR_SIZE);
|
||||||
return -ENODEV;
|
rc = -ENODEV;
|
||||||
|
goto free_queue_props;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
|
if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
|
||||||
@ -527,14 +572,15 @@ static int goya_early_init(struct hl_device *hdev)
|
|||||||
(unsigned long long) pci_resource_len(pdev,
|
(unsigned long long) pci_resource_len(pdev,
|
||||||
MSIX_BAR_ID),
|
MSIX_BAR_ID),
|
||||||
MSIX_BAR_SIZE);
|
MSIX_BAR_SIZE);
|
||||||
return -ENODEV;
|
rc = -ENODEV;
|
||||||
|
goto free_queue_props;
|
||||||
}
|
}
|
||||||
|
|
||||||
prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
|
prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
|
||||||
|
|
||||||
rc = hl_pci_init(hdev);
|
rc = hl_pci_init(hdev);
|
||||||
if (rc)
|
if (rc)
|
||||||
return rc;
|
goto free_queue_props;
|
||||||
|
|
||||||
if (!hdev->pldm) {
|
if (!hdev->pldm) {
|
||||||
val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
|
val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
|
||||||
@ -544,6 +590,10 @@ static int goya_early_init(struct hl_device *hdev)
|
|||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
free_queue_props:
|
||||||
|
kfree(hdev->asic_prop.hw_queues_props);
|
||||||
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -556,6 +606,7 @@ static int goya_early_init(struct hl_device *hdev)
|
|||||||
*/
|
*/
|
||||||
static int goya_early_fini(struct hl_device *hdev)
|
static int goya_early_fini(struct hl_device *hdev)
|
||||||
{
|
{
|
||||||
|
kfree(hdev->asic_prop.hw_queues_props);
|
||||||
hl_pci_fini(hdev);
|
hl_pci_fini(hdev);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -592,11 +643,36 @@ static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
|
|||||||
static void goya_fetch_psoc_frequency(struct hl_device *hdev)
|
static void goya_fetch_psoc_frequency(struct hl_device *hdev)
|
||||||
{
|
{
|
||||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||||
|
u32 trace_freq = 0;
|
||||||
|
u32 pll_clk = 0;
|
||||||
|
u32 div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
|
||||||
|
u32 div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
|
||||||
|
u32 nr = RREG32(mmPSOC_PCI_PLL_NR);
|
||||||
|
u32 nf = RREG32(mmPSOC_PCI_PLL_NF);
|
||||||
|
u32 od = RREG32(mmPSOC_PCI_PLL_OD);
|
||||||
|
|
||||||
prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR);
|
if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
|
||||||
prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF);
|
if (div_sel == DIV_SEL_REF_CLK)
|
||||||
prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD);
|
trace_freq = PLL_REF_CLK;
|
||||||
prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
|
else
|
||||||
|
trace_freq = PLL_REF_CLK / (div_fctr + 1);
|
||||||
|
} else if (div_sel == DIV_SEL_PLL_CLK ||
|
||||||
|
div_sel == DIV_SEL_DIVIDED_PLL) {
|
||||||
|
pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
|
||||||
|
if (div_sel == DIV_SEL_PLL_CLK)
|
||||||
|
trace_freq = pll_clk;
|
||||||
|
else
|
||||||
|
trace_freq = pll_clk / (div_fctr + 1);
|
||||||
|
} else {
|
||||||
|
dev_warn(hdev->dev,
|
||||||
|
"Received invalid div select value: %d", div_sel);
|
||||||
|
}
|
||||||
|
|
||||||
|
prop->psoc_timestamp_frequency = trace_freq;
|
||||||
|
prop->psoc_pci_pll_nr = nr;
|
||||||
|
prop->psoc_pci_pll_nf = nf;
|
||||||
|
prop->psoc_pci_pll_od = od;
|
||||||
|
prop->psoc_pci_pll_div_factor = div_fctr;
|
||||||
}
|
}
|
||||||
|
|
||||||
int goya_late_init(struct hl_device *hdev)
|
int goya_late_init(struct hl_device *hdev)
|
||||||
@ -2164,29 +2240,15 @@ static void goya_disable_timestamp(struct hl_device *hdev)
|
|||||||
|
|
||||||
static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
|
static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
|
||||||
{
|
{
|
||||||
u32 wait_timeout_ms, cpu_timeout_ms;
|
u32 wait_timeout_ms;
|
||||||
|
|
||||||
dev_info(hdev->dev,
|
dev_info(hdev->dev,
|
||||||
"Halting compute engines and disabling interrupts\n");
|
"Halting compute engines and disabling interrupts\n");
|
||||||
|
|
||||||
if (hdev->pldm) {
|
if (hdev->pldm)
|
||||||
wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
|
wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
|
||||||
cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
|
else
|
||||||
} else {
|
|
||||||
wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
|
wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
|
||||||
cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hard_reset) {
|
|
||||||
/*
|
|
||||||
* I don't know what is the state of the CPU so make sure it is
|
|
||||||
* stopped in any means necessary
|
|
||||||
*/
|
|
||||||
WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
|
|
||||||
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
|
|
||||||
GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
|
|
||||||
msleep(cpu_timeout_ms);
|
|
||||||
}
|
|
||||||
|
|
||||||
goya_stop_external_queues(hdev);
|
goya_stop_external_queues(hdev);
|
||||||
goya_stop_internal_queues(hdev);
|
goya_stop_internal_queues(hdev);
|
||||||
@ -2491,14 +2553,26 @@ disable_queues:
|
|||||||
static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
|
static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
|
||||||
{
|
{
|
||||||
struct goya_device *goya = hdev->asic_specific;
|
struct goya_device *goya = hdev->asic_specific;
|
||||||
u32 reset_timeout_ms, status;
|
u32 reset_timeout_ms, cpu_timeout_ms, status;
|
||||||
|
|
||||||
if (hdev->pldm)
|
if (hdev->pldm) {
|
||||||
reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
|
reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
|
||||||
else
|
cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
|
||||||
|
} else {
|
||||||
reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
|
reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
|
||||||
|
cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
|
||||||
|
}
|
||||||
|
|
||||||
if (hard_reset) {
|
if (hard_reset) {
|
||||||
|
/* I don't know what is the state of the CPU so make sure it is
|
||||||
|
* stopped in any means necessary
|
||||||
|
*/
|
||||||
|
WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
|
||||||
|
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
|
||||||
|
GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
|
||||||
|
|
||||||
|
msleep(cpu_timeout_ms);
|
||||||
|
|
||||||
goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
|
goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
|
||||||
goya_disable_clk_rlx(hdev);
|
goya_disable_clk_rlx(hdev);
|
||||||
goya_set_pll_refclk(hdev);
|
goya_set_pll_refclk(hdev);
|
||||||
@ -3697,9 +3771,8 @@ static int goya_parse_cb_mmu(struct hl_device *hdev,
|
|||||||
parser->patched_cb_size = parser->user_cb_size +
|
parser->patched_cb_size = parser->user_cb_size +
|
||||||
sizeof(struct packet_msg_prot) * 2;
|
sizeof(struct packet_msg_prot) * 2;
|
||||||
|
|
||||||
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
|
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
|
||||||
parser->patched_cb_size,
|
&patched_cb_handle, HL_KERNEL_ASID_ID, false);
|
||||||
&patched_cb_handle, HL_KERNEL_ASID_ID);
|
|
||||||
|
|
||||||
if (rc) {
|
if (rc) {
|
||||||
dev_err(hdev->dev,
|
dev_err(hdev->dev,
|
||||||
@ -3771,9 +3844,8 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev,
|
|||||||
if (rc)
|
if (rc)
|
||||||
goto free_userptr;
|
goto free_userptr;
|
||||||
|
|
||||||
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
|
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
|
||||||
parser->patched_cb_size,
|
&patched_cb_handle, HL_KERNEL_ASID_ID, false);
|
||||||
&patched_cb_handle, HL_KERNEL_ASID_ID);
|
|
||||||
if (rc) {
|
if (rc) {
|
||||||
dev_err(hdev->dev,
|
dev_err(hdev->dev,
|
||||||
"Failed to allocate patched CB for DMA CS %d\n", rc);
|
"Failed to allocate patched CB for DMA CS %d\n", rc);
|
||||||
@ -4619,7 +4691,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
|
|||||||
lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
|
lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
|
||||||
cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
|
cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
|
||||||
sizeof(struct packet_msg_prot);
|
sizeof(struct packet_msg_prot);
|
||||||
cb = hl_cb_kernel_create(hdev, cb_size);
|
cb = hl_cb_kernel_create(hdev, cb_size, false);
|
||||||
if (!cb)
|
if (!cb)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
@ -5149,21 +5221,16 @@ static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
|
|||||||
return RREG32(mmHW_STATE);
|
return RREG32(mmHW_STATE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int goya_ctx_init(struct hl_ctx *ctx)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
|
u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
|
||||||
{
|
{
|
||||||
return cq_idx;
|
return cq_idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void goya_ext_queue_init(struct hl_device *hdev, u32 q_idx)
|
|
||||||
{
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
static void goya_ext_queue_reset(struct hl_device *hdev, u32 q_idx)
|
|
||||||
{
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
static u32 goya_get_signal_cb_size(struct hl_device *hdev)
|
static u32 goya_get_signal_cb_size(struct hl_device *hdev)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
@ -5272,13 +5339,12 @@ static const struct hl_asic_funcs goya_funcs = {
|
|||||||
.rreg = hl_rreg,
|
.rreg = hl_rreg,
|
||||||
.wreg = hl_wreg,
|
.wreg = hl_wreg,
|
||||||
.halt_coresight = goya_halt_coresight,
|
.halt_coresight = goya_halt_coresight,
|
||||||
|
.ctx_init = goya_ctx_init,
|
||||||
.get_clk_rate = goya_get_clk_rate,
|
.get_clk_rate = goya_get_clk_rate,
|
||||||
.get_queue_id_for_cq = goya_get_queue_id_for_cq,
|
.get_queue_id_for_cq = goya_get_queue_id_for_cq,
|
||||||
.read_device_fw_version = goya_read_device_fw_version,
|
.read_device_fw_version = goya_read_device_fw_version,
|
||||||
.load_firmware_to_device = goya_load_firmware_to_device,
|
.load_firmware_to_device = goya_load_firmware_to_device,
|
||||||
.load_boot_fit_to_device = goya_load_boot_fit_to_device,
|
.load_boot_fit_to_device = goya_load_boot_fit_to_device,
|
||||||
.ext_queue_init = goya_ext_queue_init,
|
|
||||||
.ext_queue_reset = goya_ext_queue_reset,
|
|
||||||
.get_signal_cb_size = goya_get_signal_cb_size,
|
.get_signal_cb_size = goya_get_signal_cb_size,
|
||||||
.get_wait_cb_size = goya_get_wait_cb_size,
|
.get_wait_cb_size = goya_get_wait_cb_size,
|
||||||
.gen_signal_cb = goya_gen_signal_cb,
|
.gen_signal_cb = goya_gen_signal_cb,
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
|
|
||||||
#include <uapi/misc/habanalabs.h>
|
#include <uapi/misc/habanalabs.h>
|
||||||
#include "habanalabs.h"
|
#include "habanalabs.h"
|
||||||
#include "include/hl_boot_if.h"
|
#include "include/common/hl_boot_if.h"
|
||||||
#include "include/goya/goya_packets.h"
|
#include "include/goya/goya_packets.h"
|
||||||
#include "include/goya/goya.h"
|
#include "include/goya/goya.h"
|
||||||
#include "include/goya/goya_async_events.h"
|
#include "include/goya/goya_async_events.h"
|
||||||
@ -31,10 +31,6 @@
|
|||||||
*/
|
*/
|
||||||
#define NUMBER_OF_INTERRUPTS (NUMBER_OF_CMPLT_QUEUES + 1)
|
#define NUMBER_OF_INTERRUPTS (NUMBER_OF_CMPLT_QUEUES + 1)
|
||||||
|
|
||||||
#if (NUMBER_OF_HW_QUEUES >= HL_MAX_QUEUES)
|
|
||||||
#error "Number of H/W queues must be smaller than HL_MAX_QUEUES"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if (NUMBER_OF_INTERRUPTS > GOYA_MSIX_ENTRIES)
|
#if (NUMBER_OF_INTERRUPTS > GOYA_MSIX_ENTRIES)
|
||||||
#error "Number of MSIX interrupts must be smaller or equal to GOYA_MSIX_ENTRIES"
|
#error "Number of MSIX interrupts must be smaller or equal to GOYA_MSIX_ENTRIES"
|
||||||
#endif
|
#endif
|
||||||
@ -57,6 +53,12 @@
|
|||||||
|
|
||||||
#define GOYA_DEFAULT_CARD_NAME "HL1000"
|
#define GOYA_DEFAULT_CARD_NAME "HL1000"
|
||||||
|
|
||||||
|
#define GOYA_MAX_PENDING_CS 64
|
||||||
|
|
||||||
|
#if !IS_MAX_PENDING_CS_VALID(GOYA_MAX_PENDING_CS)
|
||||||
|
#error "GOYA_MAX_PENDING_CS must be power of 2 and greater than 1"
|
||||||
|
#endif
|
||||||
|
|
||||||
/* DRAM Memory Map */
|
/* DRAM Memory Map */
|
||||||
|
|
||||||
#define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */
|
#define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */
|
||||||
@ -164,7 +166,7 @@ struct goya_device {
|
|||||||
u8 device_cpu_mmu_mappings_done;
|
u8 device_cpu_mmu_mappings_done;
|
||||||
};
|
};
|
||||||
|
|
||||||
void goya_get_fixed_properties(struct hl_device *hdev);
|
int goya_get_fixed_properties(struct hl_device *hdev);
|
||||||
int goya_mmu_init(struct hl_device *hdev);
|
int goya_mmu_init(struct hl_device *hdev);
|
||||||
void goya_init_dma_qmans(struct hl_device *hdev);
|
void goya_init_dma_qmans(struct hl_device *hdev);
|
||||||
void goya_init_mme_qmans(struct hl_device *hdev);
|
void goya_init_mme_qmans(struct hl_device *hdev);
|
||||||
|
@ -232,6 +232,7 @@ static int goya_config_stm(struct hl_device *hdev,
|
|||||||
{
|
{
|
||||||
struct hl_debug_params_stm *input;
|
struct hl_debug_params_stm *input;
|
||||||
u64 base_reg;
|
u64 base_reg;
|
||||||
|
u32 frequency;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
if (params->reg_idx >= ARRAY_SIZE(debug_stm_regs)) {
|
if (params->reg_idx >= ARRAY_SIZE(debug_stm_regs)) {
|
||||||
@ -264,7 +265,10 @@ static int goya_config_stm(struct hl_device *hdev,
|
|||||||
WREG32(base_reg + 0xE20, 0xFFFFFFFF);
|
WREG32(base_reg + 0xE20, 0xFFFFFFFF);
|
||||||
WREG32(base_reg + 0xEF4, input->id);
|
WREG32(base_reg + 0xEF4, input->id);
|
||||||
WREG32(base_reg + 0xDF4, 0x80);
|
WREG32(base_reg + 0xDF4, 0x80);
|
||||||
WREG32(base_reg + 0xE8C, input->frequency);
|
frequency = hdev->asic_prop.psoc_timestamp_frequency;
|
||||||
|
if (frequency == 0)
|
||||||
|
frequency = input->frequency;
|
||||||
|
WREG32(base_reg + 0xE8C, frequency);
|
||||||
WREG32(base_reg + 0xE90, 0x7FF);
|
WREG32(base_reg + 0xE90, 0x7FF);
|
||||||
WREG32(base_reg + 0xE80, 0x27 | (input->id << 16));
|
WREG32(base_reg + 0xE80, 0x27 | (input->id << 16));
|
||||||
} else {
|
} else {
|
||||||
|
@ -19,10 +19,20 @@ struct hl_eq_header {
|
|||||||
__le32 ctl;
|
__le32 ctl;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct hl_eq_ecc_data {
|
||||||
|
__le64 ecc_address;
|
||||||
|
__le64 ecc_syndrom;
|
||||||
|
__u8 memory_wrapper_idx;
|
||||||
|
__u8 pad[7];
|
||||||
|
};
|
||||||
|
|
||||||
struct hl_eq_entry {
|
struct hl_eq_entry {
|
||||||
struct hl_eq_header hdr;
|
struct hl_eq_header hdr;
|
||||||
|
union {
|
||||||
|
struct hl_eq_ecc_data ecc_data;
|
||||||
__le64 data[7];
|
__le64 data[7];
|
||||||
};
|
};
|
||||||
|
};
|
||||||
|
|
||||||
#define HL_EQ_ENTRY_SIZE sizeof(struct hl_eq_entry)
|
#define HL_EQ_ENTRY_SIZE sizeof(struct hl_eq_entry)
|
||||||
|
|
||||||
@ -276,6 +286,8 @@ struct armcp_packet {
|
|||||||
/* For get Armcp info/EEPROM data */
|
/* For get Armcp info/EEPROM data */
|
||||||
__le32 data_max_size;
|
__le32 data_max_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
__le32 reserved;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct armcp_unmask_irq_arr_packet {
|
struct armcp_unmask_irq_arr_packet {
|
@ -44,6 +44,15 @@
|
|||||||
* The NIC FW loading and initialization
|
* The NIC FW loading and initialization
|
||||||
* failed. This means NICs are not usable.
|
* failed. This means NICs are not usable.
|
||||||
*
|
*
|
||||||
|
* CPU_BOOT_ERR0_SECURITY_NOT_RDY Chip security initialization has been
|
||||||
|
* started, but is not ready yet - chip
|
||||||
|
* cannot be accessed.
|
||||||
|
*
|
||||||
|
* CPU_BOOT_ERR0_SECURITY_FAIL Security related tasks have failed.
|
||||||
|
* The tasks are security init (root of
|
||||||
|
* trust), boot authentication (chain of
|
||||||
|
* trust), data packets authentication.
|
||||||
|
*
|
||||||
* CPU_BOOT_ERR0_ENABLED Error registers enabled.
|
* CPU_BOOT_ERR0_ENABLED Error registers enabled.
|
||||||
* This is a main indication that the
|
* This is a main indication that the
|
||||||
* running FW populates the error
|
* running FW populates the error
|
||||||
@ -57,6 +66,8 @@
|
|||||||
#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << 4)
|
#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << 4)
|
||||||
#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << 5)
|
#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << 5)
|
||||||
#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << 6)
|
#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << 6)
|
||||||
|
#define CPU_BOOT_ERR0_SECURITY_NOT_RDY (1 << 7)
|
||||||
|
#define CPU_BOOT_ERR0_SECURITY_FAIL (1 << 8)
|
||||||
#define CPU_BOOT_ERR0_ENABLED (1 << 31)
|
#define CPU_BOOT_ERR0_ENABLED (1 << 31)
|
||||||
|
|
||||||
enum cpu_boot_status {
|
enum cpu_boot_status {
|
||||||
@ -79,7 +90,10 @@ enum cpu_boot_status {
|
|||||||
CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */
|
CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */
|
||||||
/* Last boot loader progress status, ready to receive commands */
|
/* Last boot loader progress status, ready to receive commands */
|
||||||
CPU_BOOT_STATUS_READY_TO_BOOT = 15,
|
CPU_BOOT_STATUS_READY_TO_BOOT = 15,
|
||||||
|
/* Internal Boot finished, ready for boot-fit */
|
||||||
CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT = 16,
|
CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT = 16,
|
||||||
|
/* Internal Security has been initialized, device can be accessed */
|
||||||
|
CPU_BOOT_STATUS_SECURITY_READY = 17,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum kmd_msg {
|
enum kmd_msg {
|
@ -91,18 +91,16 @@
|
|||||||
|
|
||||||
#include "psoc_pci_pll_regs.h"
|
#include "psoc_pci_pll_regs.h"
|
||||||
#include "psoc_hbm_pll_regs.h"
|
#include "psoc_hbm_pll_regs.h"
|
||||||
|
#include "psoc_cpu_pll_regs.h"
|
||||||
|
|
||||||
#define GAUDI_ECC_MEM_SEL_OFFSET 0xF18
|
#define GAUDI_ECC_MEM_SEL_OFFSET 0xF18
|
||||||
#define GAUDI_ECC_ADDRESS_OFFSET 0xF1C
|
#define GAUDI_ECC_ADDRESS_OFFSET 0xF1C
|
||||||
#define GAUDI_ECC_SYNDROME_OFFSET 0xF20
|
#define GAUDI_ECC_SYNDROME_OFFSET 0xF20
|
||||||
|
#define GAUDI_ECC_MEM_INFO_CLR_OFFSET 0xF28
|
||||||
|
#define GAUDI_ECC_MEM_INFO_CLR_SERR_MASK BIT(8)
|
||||||
|
#define GAUDI_ECC_MEM_INFO_CLR_DERR_MASK BIT(9)
|
||||||
#define GAUDI_ECC_SERR0_OFFSET 0xF30
|
#define GAUDI_ECC_SERR0_OFFSET 0xF30
|
||||||
#define GAUDI_ECC_SERR1_OFFSET 0xF34
|
|
||||||
#define GAUDI_ECC_SERR2_OFFSET 0xF38
|
|
||||||
#define GAUDI_ECC_SERR3_OFFSET 0xF3C
|
|
||||||
#define GAUDI_ECC_DERR0_OFFSET 0xF40
|
#define GAUDI_ECC_DERR0_OFFSET 0xF40
|
||||||
#define GAUDI_ECC_DERR1_OFFSET 0xF44
|
|
||||||
#define GAUDI_ECC_DERR2_OFFSET 0xF48
|
|
||||||
#define GAUDI_ECC_DERR3_OFFSET 0xF4C
|
|
||||||
|
|
||||||
#define mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 0x492000
|
#define mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 0x492000
|
||||||
#define mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 0x494000
|
#define mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 0x494000
|
||||||
@ -294,6 +292,7 @@
|
|||||||
|
|
||||||
#define mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG 0xC02000
|
#define mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG 0xC02000
|
||||||
|
|
||||||
|
#define mmPCIE_AUX_FLR_CTRL 0xC07394
|
||||||
#define mmPCIE_AUX_DBI 0xC07490
|
#define mmPCIE_AUX_DBI 0xC07490
|
||||||
|
|
||||||
#endif /* ASIC_REG_GAUDI_REGS_H_ */
|
#endif /* ASIC_REG_GAUDI_REGS_H_ */
|
||||||
|
@ -0,0 +1,114 @@
|
|||||||
|
/* SPDX-License-Identifier: GPL-2.0
|
||||||
|
*
|
||||||
|
* Copyright 2016-2018 HabanaLabs, Ltd.
|
||||||
|
* All Rights Reserved.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/************************************
|
||||||
|
** This is an auto-generated file **
|
||||||
|
** DO NOT EDIT BELOW **
|
||||||
|
************************************/
|
||||||
|
|
||||||
|
#ifndef ASIC_REG_PSOC_CPU_PLL_REGS_H_
|
||||||
|
#define ASIC_REG_PSOC_CPU_PLL_REGS_H_
|
||||||
|
|
||||||
|
/*
|
||||||
|
*****************************************
|
||||||
|
* PSOC_CPU_PLL (Prototype: PLL)
|
||||||
|
*****************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_NR 0xC70100
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_NF 0xC70104
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_OD 0xC70108
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_NB 0xC7010C
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_CFG 0xC70110
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_LOSE_MASK 0xC70120
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_LOCK_INTR 0xC70128
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_LOCK_BYPASS 0xC7012C
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_DATA_CHNG 0xC70130
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_RST 0xC70134
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_SLIP_WD_CNTR 0xC70150
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_DIV_FACTOR_0 0xC70200
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_DIV_FACTOR_1 0xC70204
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_DIV_FACTOR_2 0xC70208
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_DIV_FACTOR_3 0xC7020C
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_DIV_FACTOR_CMD_0 0xC70220
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_DIV_FACTOR_CMD_1 0xC70224
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_DIV_FACTOR_CMD_2 0xC70228
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_DIV_FACTOR_CMD_3 0xC7022C
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_DIV_SEL_0 0xC70280
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_DIV_SEL_1 0xC70284
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_DIV_SEL_2 0xC70288
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_DIV_SEL_3 0xC7028C
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_DIV_EN_0 0xC702A0
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_DIV_EN_1 0xC702A4
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_DIV_EN_2 0xC702A8
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_DIV_EN_3 0xC702AC
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_DIV_FACTOR_BUSY_0 0xC702C0
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_DIV_FACTOR_BUSY_1 0xC702C4
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_DIV_FACTOR_BUSY_2 0xC702C8
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_DIV_FACTOR_BUSY_3 0xC702CC
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_CLK_GATER 0xC70300
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_CLK_RLX_0 0xC70310
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_CLK_RLX_1 0xC70314
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_CLK_RLX_2 0xC70318
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_CLK_RLX_3 0xC7031C
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_REF_CNTR_PERIOD 0xC70400
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_REF_LOW_THRESHOLD 0xC70410
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_REF_HIGH_THRESHOLD 0xC70420
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_PLL_NOT_STABLE 0xC70430
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_FREQ_CALC_EN 0xC70440
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_RLX_BITMAP_CFG 0xC70500
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_RLX_BITMAP_0 0xC70510
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_RLX_BITMAP_1 0xC70514
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_RLX_BITMAP_2 0xC70518
|
||||||
|
|
||||||
|
#define mmPSOC_CPU_PLL_RLX_BITMAP_3 0xC7051C
|
||||||
|
|
||||||
|
#endif /* ASIC_REG_PSOC_CPU_PLL_REGS_H_ */
|
@ -455,4 +455,7 @@ enum axi_id {
|
|||||||
QM_ARB_ERR_MSG_EN_CHOISE_WDT_MASK |\
|
QM_ARB_ERR_MSG_EN_CHOISE_WDT_MASK |\
|
||||||
QM_ARB_ERR_MSG_EN_AXI_LBW_ERR_MASK)
|
QM_ARB_ERR_MSG_EN_AXI_LBW_ERR_MASK)
|
||||||
|
|
||||||
|
#define PCIE_AUX_FLR_CTRL_HW_CTRL_MASK 0x1
|
||||||
|
#define PCIE_AUX_FLR_CTRL_INT_MASK_MASK 0x2
|
||||||
|
|
||||||
#endif /* GAUDI_MASKS_H_ */
|
#endif /* GAUDI_MASKS_H_ */
|
||||||
|
@ -85,7 +85,7 @@ struct packet_msg_long {
|
|||||||
};
|
};
|
||||||
|
|
||||||
#define GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_SHIFT 0
|
#define GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_SHIFT 0
|
||||||
#define GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK 0x0000EFFF
|
#define GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK 0x00007FFF
|
||||||
|
|
||||||
#define GAUDI_PKT_SHORT_VAL_SOB_MOD_SHIFT 31
|
#define GAUDI_PKT_SHORT_VAL_SOB_MOD_SHIFT 31
|
||||||
#define GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK 0x80000000
|
#define GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK 0x80000000
|
||||||
@ -141,7 +141,7 @@ struct packet_msg_prot {
|
|||||||
#define GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK 0x00FF0000
|
#define GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK 0x00FF0000
|
||||||
|
|
||||||
#define GAUDI_PKT_FENCE_CFG_ID_SHIFT 30
|
#define GAUDI_PKT_FENCE_CFG_ID_SHIFT 30
|
||||||
#define GAUDI_PKT_FENCE_CFG_ID_MASK 0xC000000
|
#define GAUDI_PKT_FENCE_CFG_ID_MASK 0xC0000000
|
||||||
|
|
||||||
#define GAUDI_PKT_FENCE_CTL_PRED_SHIFT 0
|
#define GAUDI_PKT_FENCE_CTL_PRED_SHIFT 0
|
||||||
#define GAUDI_PKT_FENCE_CTL_PRED_MASK 0x0000001F
|
#define GAUDI_PKT_FENCE_CTL_PRED_MASK 0x0000001F
|
||||||
|
@ -263,6 +263,7 @@ enum hl_device_status {
|
|||||||
* time the driver was loaded.
|
* time the driver was loaded.
|
||||||
* HL_INFO_TIME_SYNC - Retrieve the device's time alongside the host's time
|
* HL_INFO_TIME_SYNC - Retrieve the device's time alongside the host's time
|
||||||
* for synchronization.
|
* for synchronization.
|
||||||
|
* HL_INFO_CS_COUNTERS - Retrieve command submission counters
|
||||||
*/
|
*/
|
||||||
#define HL_INFO_HW_IP_INFO 0
|
#define HL_INFO_HW_IP_INFO 0
|
||||||
#define HL_INFO_HW_EVENTS 1
|
#define HL_INFO_HW_EVENTS 1
|
||||||
@ -274,6 +275,7 @@ enum hl_device_status {
|
|||||||
#define HL_INFO_CLK_RATE 8
|
#define HL_INFO_CLK_RATE 8
|
||||||
#define HL_INFO_RESET_COUNT 9
|
#define HL_INFO_RESET_COUNT 9
|
||||||
#define HL_INFO_TIME_SYNC 10
|
#define HL_INFO_TIME_SYNC 10
|
||||||
|
#define HL_INFO_CS_COUNTERS 11
|
||||||
|
|
||||||
#define HL_INFO_VERSION_MAX_LEN 128
|
#define HL_INFO_VERSION_MAX_LEN 128
|
||||||
#define HL_INFO_CARD_NAME_MAX_LEN 16
|
#define HL_INFO_CARD_NAME_MAX_LEN 16
|
||||||
@ -338,6 +340,25 @@ struct hl_info_time_sync {
|
|||||||
__u64 host_time;
|
__u64 host_time;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct hl_info_cs_counters - command submission counters
|
||||||
|
* @out_of_mem_drop_cnt: dropped due to memory allocation issue
|
||||||
|
* @parsing_drop_cnt: dropped due to error in packet parsing
|
||||||
|
* @queue_full_drop_cnt: dropped due to queue full
|
||||||
|
* @device_in_reset_drop_cnt: dropped due to device in reset
|
||||||
|
*/
|
||||||
|
struct hl_cs_counters {
|
||||||
|
__u64 out_of_mem_drop_cnt;
|
||||||
|
__u64 parsing_drop_cnt;
|
||||||
|
__u64 queue_full_drop_cnt;
|
||||||
|
__u64 device_in_reset_drop_cnt;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct hl_info_cs_counters {
|
||||||
|
struct hl_cs_counters cs_counters;
|
||||||
|
struct hl_cs_counters ctx_cs_counters;
|
||||||
|
};
|
||||||
|
|
||||||
struct hl_info_args {
|
struct hl_info_args {
|
||||||
/* Location of relevant struct in userspace */
|
/* Location of relevant struct in userspace */
|
||||||
__u64 return_pointer;
|
__u64 return_pointer;
|
||||||
@ -530,13 +551,13 @@ union hl_wait_cs_args {
|
|||||||
struct hl_wait_cs_out out;
|
struct hl_wait_cs_out out;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Opcode to alloc device memory */
|
/* Opcode to allocate device memory */
|
||||||
#define HL_MEM_OP_ALLOC 0
|
#define HL_MEM_OP_ALLOC 0
|
||||||
/* Opcode to free previously allocated device memory */
|
/* Opcode to free previously allocated device memory */
|
||||||
#define HL_MEM_OP_FREE 1
|
#define HL_MEM_OP_FREE 1
|
||||||
/* Opcode to map host memory */
|
/* Opcode to map host and device memory */
|
||||||
#define HL_MEM_OP_MAP 2
|
#define HL_MEM_OP_MAP 2
|
||||||
/* Opcode to unmap previously mapped host memory */
|
/* Opcode to unmap previously mapped host and device memory */
|
||||||
#define HL_MEM_OP_UNMAP 3
|
#define HL_MEM_OP_UNMAP 3
|
||||||
|
|
||||||
/* Memory flags */
|
/* Memory flags */
|
||||||
|
Loading…
Reference in New Issue
Block a user