This tag contains the following changes for kernel 5.7:
- MMU code improvements that includes: - Flush MMU TLB cache only once, at the end of mapping/unmapping function, instead of flushing after mapping of every page. - Add future ASIC support by splitting properties of ASIC capabilities regarding mapping of host memory to regular and huge pages. - Add debugfs interface to write and read 64-bit values from the device's memory/registers. Previously the driver provided interface for 32-bit values and this will allow the user to debug much more quickly. We saw it gives a boost of around 1.5 - 1.7 when reading internal memories. - Support temperature offset via sysfs as defined in https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface - Display historical maximum of various sensors. - Print to kernel log when clock throttling occurs to due breach of power or thermal envelope. Also prints when clock throttling is finished (clock is back to optimal). - Fix bug when moving from manual to auto power-management mode. - Print a message ("unsupported device") to kernel log in case a GAUDI device is recognized. - Small bug fixes and minor improvements to code. -----BEGIN PGP SIGNATURE----- iQFJBAABCgA0FiEE7TEboABC71LctBLFZR1NuKta54AFAl55zpQWHG9kZWQuZ2Fi YmF5QGdtYWlsLmNvbQAKCRBlHU24q1rngOuHB/iBZhX6XY5uRMW4BIOxfXC+x3by r0lugR7pvlJ+w+SrM3IjwAxlF6T9QDfiZviS5MqnRhSupTFzmekGbq6KZEnLmuyQ nsnzLBW2auzf8kjAIrCB7ddl6GfakJS4elyZMKEhQkWmQsJJ6vv+TvACOVvHzb1J o4lXxRqeEruzA/OBXxaTjC9MLQa/tRyT6LQoSg4L+bHlx/JZO5T1eFQWcMACoWV1 75ZV6o7vaVRpFx3CTMF5S+MQCoZbroYNwNz/Xaqc8ezpCFB2LzheYpKUXpuf9Y47 lnDAoHKnozOoZDVDzgdc5hgGeNOnRISSAkmAnI0rKg+JZKOw+v02Zw7nciA= =FUs3 -----END PGP SIGNATURE----- Merge tag 'misc-habanalabs-next-2020-03-24' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next Oded writes: This tag contains the following changes for kernel 5.7: - MMU code improvements that includes: - Flush MMU TLB cache only once, at the end of mapping/unmapping function, instead of flushing after mapping of every page. - Add future ASIC support by splitting properties of ASIC capabilities regarding mapping of host memory to regular and huge pages. - Add debugfs interface to write and read 64-bit values from the device's memory/registers. Previously the driver provided interface for 32-bit values and this will allow the user to debug much more quickly. We saw it gives a boost of around 1.5 - 1.7 when reading internal memories. - Support temperature offset via sysfs as defined in https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface - Display historical maximum of various sensors. - Print to kernel log when clock throttling occurs to due breach of power or thermal envelope. Also prints when clock throttling is finished (clock is back to optimal). - Fix bug when moving from manual to auto power-management mode. - Print a message ("unsupported device") to kernel log in case a GAUDI device is recognized. - Small bug fixes and minor improvements to code. * tag 'misc-habanalabs-next-2020-03-24' of git://people.freedesktop.org/~gabbayo/linux: habanalabs: fix pm manual->auto in GOYA habanalabs: show unsupported message for GAUDI habanalabs: add print upon clock change habanalabs: update goya firmware register map habanalabs: Add missing annotation for goya_hw_queues_unlock() habanalabs: Add missing annotation for goya_hw_queues_lock() habanalabs: Remove unused parse_cnt variable habanalabs: provide historical maximum of various sensors habanalabs: modify the return values of hl_read/write routines habanalabs: support temperature offset via sysfs habanalabs: ratelimit error prints of IRQs habanalabs: add debugfs write64/read64 habanalabs: fix DDR bar address setting habanalabs: removing extra ; habanalabs: Avoid running restore chunks if no execute chunks habanalabs: Modify CS jobs counter to u16 habanalabs: split the host MMU properties habanalabs: use the user CB size as a default job size habanalabs: flush only at the end of the map/unmap
This commit is contained in:
commit
9d20328d0b
@ -43,6 +43,20 @@ Description: Allows the root user to read or write directly through the
|
||||
If the IOMMU is disabled, it also allows the root user to read
|
||||
or write from the host a device VA of a host mapped memory
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/data64
|
||||
Date: Jan 2020
|
||||
KernelVersion: 5.6
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Allows the root user to read or write 64 bit data directly
|
||||
through the device's PCI bar. Writing to this file generates a
|
||||
write transaction while reading from the file generates a read
|
||||
transaction. This custom interface is needed (instead of using
|
||||
the generic Linux user-space PCI mapping) because the DDR bar
|
||||
is very small compared to the DDR memory and only the driver can
|
||||
move the bar before and after the transaction.
|
||||
If the IOMMU is disabled, it also allows the root user to read
|
||||
or write from the host a device VA of a host mapped memory
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/device
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
|
@ -129,6 +129,8 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
|
||||
spin_unlock(&job->user_cb->lock);
|
||||
hl_cb_put(job->user_cb);
|
||||
job->user_cb = NULL;
|
||||
} else if (!rc) {
|
||||
job->job_cb_size = job->user_cb_size;
|
||||
}
|
||||
|
||||
return rc;
|
||||
@ -507,7 +509,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
|
||||
struct hl_cb *cb;
|
||||
bool int_queues_only = true;
|
||||
u32 size_to_copy;
|
||||
int rc, i, parse_cnt;
|
||||
int rc, i;
|
||||
|
||||
*cs_seq = ULLONG_MAX;
|
||||
|
||||
@ -547,7 +549,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
|
||||
hl_debugfs_add_cs(cs);
|
||||
|
||||
/* Validate ALL the CS chunks before submitting the CS */
|
||||
for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) {
|
||||
for (i = 0 ; i < num_chunks ; i++) {
|
||||
struct hl_cs_chunk *chunk = &cs_chunk_array[i];
|
||||
enum hl_queue_type queue_type;
|
||||
bool is_kernel_allocated_cb;
|
||||
@ -585,10 +587,6 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
|
||||
job->cs = cs;
|
||||
job->user_cb = cb;
|
||||
job->user_cb_size = chunk->cb_size;
|
||||
if (is_kernel_allocated_cb)
|
||||
job->job_cb_size = cb->size;
|
||||
else
|
||||
job->job_cb_size = chunk->cb_size;
|
||||
job->hw_queue_id = chunk->queue_index;
|
||||
|
||||
cs->jobs_in_queue_cnt[job->hw_queue_id]++;
|
||||
@ -659,8 +657,8 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
union hl_cs_args *args = data;
|
||||
struct hl_ctx *ctx = hpriv->ctx;
|
||||
void __user *chunks;
|
||||
u32 num_chunks;
|
||||
void __user *chunks_execute, *chunks_restore;
|
||||
u32 num_chunks_execute, num_chunks_restore;
|
||||
u64 cs_seq = ULONG_MAX;
|
||||
int rc, do_ctx_switch;
|
||||
bool need_soft_reset = false;
|
||||
@ -673,13 +671,25 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
goto out;
|
||||
}
|
||||
|
||||
chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute;
|
||||
num_chunks_execute = args->in.num_chunks_execute;
|
||||
|
||||
if (!num_chunks_execute) {
|
||||
dev_err(hdev->dev,
|
||||
"Got execute CS with 0 chunks, context %d\n",
|
||||
ctx->asid);
|
||||
rc = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
|
||||
|
||||
if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
|
||||
long ret;
|
||||
|
||||
chunks = (void __user *)(uintptr_t)args->in.chunks_restore;
|
||||
num_chunks = args->in.num_chunks_restore;
|
||||
chunks_restore =
|
||||
(void __user *) (uintptr_t) args->in.chunks_restore;
|
||||
num_chunks_restore = args->in.num_chunks_restore;
|
||||
|
||||
mutex_lock(&hpriv->restore_phase_mutex);
|
||||
|
||||
@ -707,13 +717,13 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
|
||||
hdev->asic_funcs->restore_phase_topology(hdev);
|
||||
|
||||
if (num_chunks == 0) {
|
||||
if (!num_chunks_restore) {
|
||||
dev_dbg(hdev->dev,
|
||||
"Need to run restore phase but restore CS is empty\n");
|
||||
rc = 0;
|
||||
} else {
|
||||
rc = _hl_cs_ioctl(hpriv, chunks, num_chunks,
|
||||
&cs_seq);
|
||||
rc = _hl_cs_ioctl(hpriv, chunks_restore,
|
||||
num_chunks_restore, &cs_seq);
|
||||
}
|
||||
|
||||
mutex_unlock(&hpriv->restore_phase_mutex);
|
||||
@ -726,7 +736,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
}
|
||||
|
||||
/* Need to wait for restore completion before execution phase */
|
||||
if (num_chunks > 0) {
|
||||
if (num_chunks_restore) {
|
||||
ret = _hl_cs_wait_ioctl(hdev, ctx,
|
||||
jiffies_to_usecs(hdev->timeout_jiffies),
|
||||
cs_seq);
|
||||
@ -754,18 +764,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
}
|
||||
}
|
||||
|
||||
chunks = (void __user *)(uintptr_t)args->in.chunks_execute;
|
||||
num_chunks = args->in.num_chunks_execute;
|
||||
|
||||
if (num_chunks == 0) {
|
||||
dev_err(hdev->dev,
|
||||
"Got execute CS with 0 chunks, context %d\n",
|
||||
ctx->asid);
|
||||
rc = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
rc = _hl_cs_ioctl(hpriv, chunks, num_chunks, &cs_seq);
|
||||
rc = _hl_cs_ioctl(hpriv, chunks_execute, num_chunks_execute, &cs_seq);
|
||||
|
||||
out:
|
||||
if (rc != -EAGAIN) {
|
||||
|
@ -393,9 +393,10 @@ static int mmu_show(struct seq_file *s, void *data)
|
||||
}
|
||||
|
||||
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
|
||||
prop->va_space_dram_start_address,
|
||||
prop->va_space_dram_end_address);
|
||||
prop->dmmu.start_addr,
|
||||
prop->dmmu.end_addr);
|
||||
|
||||
/* shifts and masks are the same in PMMU and HPMMU, use one of them */
|
||||
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
|
||||
|
||||
mutex_lock(&ctx->mmu_lock);
|
||||
@ -547,12 +548,15 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
|
||||
goto out;
|
||||
|
||||
if (hdev->dram_supports_virtual_memory &&
|
||||
addr >= prop->va_space_dram_start_address &&
|
||||
addr < prop->va_space_dram_end_address)
|
||||
(addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr))
|
||||
return true;
|
||||
|
||||
if (addr >= prop->va_space_host_start_address &&
|
||||
addr < prop->va_space_host_end_address)
|
||||
if (addr >= prop->pmmu.start_addr &&
|
||||
addr < prop->pmmu.end_addr)
|
||||
return true;
|
||||
|
||||
if (addr >= prop->pmmu_huge.start_addr &&
|
||||
addr < prop->pmmu_huge.end_addr)
|
||||
return true;
|
||||
out:
|
||||
return false;
|
||||
@ -575,9 +579,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
|
||||
}
|
||||
|
||||
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
|
||||
prop->va_space_dram_start_address,
|
||||
prop->va_space_dram_end_address);
|
||||
prop->dmmu.start_addr,
|
||||
prop->dmmu.end_addr);
|
||||
|
||||
/* shifts and masks are the same in PMMU and HPMMU, use one of them */
|
||||
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
|
||||
|
||||
mutex_lock(&ctx->mmu_lock);
|
||||
@ -705,6 +710,65 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf,
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t hl_data_read64(struct file *f, char __user *buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
|
||||
struct hl_device *hdev = entry->hdev;
|
||||
char tmp_buf[32];
|
||||
u64 addr = entry->addr;
|
||||
u64 val;
|
||||
ssize_t rc;
|
||||
|
||||
if (*ppos)
|
||||
return 0;
|
||||
|
||||
if (hl_is_device_va(hdev, addr)) {
|
||||
rc = device_va_to_pa(hdev, addr, &addr);
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = hdev->asic_funcs->debugfs_read64(hdev, addr, &val);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
|
||||
return rc;
|
||||
}
|
||||
|
||||
sprintf(tmp_buf, "0x%016llx\n", val);
|
||||
return simple_read_from_buffer(buf, count, ppos, tmp_buf,
|
||||
strlen(tmp_buf));
|
||||
}
|
||||
|
||||
static ssize_t hl_data_write64(struct file *f, const char __user *buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
|
||||
struct hl_device *hdev = entry->hdev;
|
||||
u64 addr = entry->addr;
|
||||
u64 value;
|
||||
ssize_t rc;
|
||||
|
||||
rc = kstrtoull_from_user(buf, count, 16, &value);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (hl_is_device_va(hdev, addr)) {
|
||||
rc = device_va_to_pa(hdev, addr, &addr);
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = hdev->asic_funcs->debugfs_write64(hdev, addr, value);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n",
|
||||
value, addr);
|
||||
return rc;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t hl_get_power_state(struct file *f, char __user *buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
@ -912,6 +976,12 @@ static const struct file_operations hl_data32b_fops = {
|
||||
.write = hl_data_write32
|
||||
};
|
||||
|
||||
static const struct file_operations hl_data64b_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.read = hl_data_read64,
|
||||
.write = hl_data_write64
|
||||
};
|
||||
|
||||
static const struct file_operations hl_i2c_data_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.read = hl_i2c_data_read,
|
||||
@ -1025,6 +1095,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
|
||||
dev_entry,
|
||||
&hl_data32b_fops);
|
||||
|
||||
debugfs_create_file("data64",
|
||||
0644,
|
||||
dev_entry->root,
|
||||
dev_entry,
|
||||
&hl_data64b_fops);
|
||||
|
||||
debugfs_create_file("set_power_state",
|
||||
0200,
|
||||
dev_entry->root,
|
||||
|
@ -36,7 +36,7 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
|
||||
status = HL_DEVICE_STATUS_OPERATIONAL;
|
||||
|
||||
return status;
|
||||
};
|
||||
}
|
||||
|
||||
static void hpriv_release(struct kref *ref)
|
||||
{
|
||||
|
@ -324,7 +324,11 @@ static u32 goya_all_events[] = {
|
||||
GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
|
||||
GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
|
||||
GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
|
||||
GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
|
||||
GOYA_ASYNC_EVENT_ID_DMA_BM_CH4,
|
||||
GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S,
|
||||
GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E,
|
||||
GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S,
|
||||
GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
|
||||
};
|
||||
|
||||
static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
|
||||
@ -393,19 +397,21 @@ void goya_get_fixed_properties(struct hl_device *hdev)
|
||||
prop->dmmu.hop2_mask = HOP2_MASK;
|
||||
prop->dmmu.hop3_mask = HOP3_MASK;
|
||||
prop->dmmu.hop4_mask = HOP4_MASK;
|
||||
prop->dmmu.huge_page_size = PAGE_SIZE_2MB;
|
||||
|
||||
/* No difference between PMMU and DMMU except of page size */
|
||||
memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
|
||||
prop->dmmu.start_addr = VA_DDR_SPACE_START;
|
||||
prop->dmmu.end_addr = VA_DDR_SPACE_END;
|
||||
prop->dmmu.page_size = PAGE_SIZE_2MB;
|
||||
|
||||
/* shifts and masks are the same in PMMU and DMMU */
|
||||
memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
|
||||
prop->pmmu.start_addr = VA_HOST_SPACE_START;
|
||||
prop->pmmu.end_addr = VA_HOST_SPACE_END;
|
||||
prop->pmmu.page_size = PAGE_SIZE_4KB;
|
||||
|
||||
prop->va_space_host_start_address = VA_HOST_SPACE_START;
|
||||
prop->va_space_host_end_address = VA_HOST_SPACE_END;
|
||||
prop->va_space_dram_start_address = VA_DDR_SPACE_START;
|
||||
prop->va_space_dram_end_address = VA_DDR_SPACE_END;
|
||||
prop->dram_size_for_default_page_mapping =
|
||||
prop->va_space_dram_end_address;
|
||||
/* PMMU and HPMMU are the same except of page size */
|
||||
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
|
||||
prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
|
||||
|
||||
prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
|
||||
prop->cfg_size = CFG_SIZE;
|
||||
prop->max_asid = MAX_ASID;
|
||||
prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
|
||||
@ -2573,8 +2579,7 @@ static int goya_hw_init(struct hl_device *hdev)
|
||||
* After CPU initialization is finished, change DDR bar mapping inside
|
||||
* iATU to point to the start address of the MMU page tables
|
||||
*/
|
||||
if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE +
|
||||
(MMU_PAGE_TABLES_ADDR &
|
||||
if (goya_set_ddr_bar_base(hdev, (MMU_PAGE_TABLES_ADDR &
|
||||
~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
|
||||
dev_err(hdev->dev,
|
||||
"failed to map DDR bar to MMU page tables\n");
|
||||
@ -3443,12 +3448,13 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
|
||||
/*
|
||||
* WA for HW-23.
|
||||
* We can't allow user to read from Host using QMANs other than 1.
|
||||
* PMMU and HPMMU addresses are equal, check only one of them.
|
||||
*/
|
||||
if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
|
||||
hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
|
||||
le32_to_cpu(user_dma_pkt->tsize),
|
||||
hdev->asic_prop.va_space_host_start_address,
|
||||
hdev->asic_prop.va_space_host_end_address)) {
|
||||
hdev->asic_prop.pmmu.start_addr,
|
||||
hdev->asic_prop.pmmu.end_addr)) {
|
||||
dev_err(hdev->dev,
|
||||
"Can't DMA from host on queue other then 1\n");
|
||||
return -EFAULT;
|
||||
@ -4178,6 +4184,96 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u64 ddr_bar_addr;
|
||||
int rc = 0;
|
||||
|
||||
if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
|
||||
u32 val_l = RREG32(addr - CFG_BASE);
|
||||
u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
|
||||
|
||||
*val = (((u64) val_h) << 32) | val_l;
|
||||
|
||||
} else if ((addr >= SRAM_BASE_ADDR) &&
|
||||
(addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
|
||||
|
||||
*val = readq(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
|
||||
(addr - SRAM_BASE_ADDR));
|
||||
|
||||
} else if ((addr >= DRAM_PHYS_BASE) &&
|
||||
(addr <=
|
||||
DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) {
|
||||
|
||||
u64 bar_base_addr = DRAM_PHYS_BASE +
|
||||
(addr & ~(prop->dram_pci_bar_size - 0x1ull));
|
||||
|
||||
ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
|
||||
if (ddr_bar_addr != U64_MAX) {
|
||||
*val = readq(hdev->pcie_bar[DDR_BAR_ID] +
|
||||
(addr - bar_base_addr));
|
||||
|
||||
ddr_bar_addr = goya_set_ddr_bar_base(hdev,
|
||||
ddr_bar_addr);
|
||||
}
|
||||
if (ddr_bar_addr == U64_MAX)
|
||||
rc = -EIO;
|
||||
|
||||
} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
|
||||
*val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
|
||||
|
||||
} else {
|
||||
rc = -EFAULT;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u64 ddr_bar_addr;
|
||||
int rc = 0;
|
||||
|
||||
if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
|
||||
WREG32(addr - CFG_BASE, lower_32_bits(val));
|
||||
WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
|
||||
|
||||
} else if ((addr >= SRAM_BASE_ADDR) &&
|
||||
(addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
|
||||
|
||||
writeq(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
|
||||
(addr - SRAM_BASE_ADDR));
|
||||
|
||||
} else if ((addr >= DRAM_PHYS_BASE) &&
|
||||
(addr <=
|
||||
DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) {
|
||||
|
||||
u64 bar_base_addr = DRAM_PHYS_BASE +
|
||||
(addr & ~(prop->dram_pci_bar_size - 0x1ull));
|
||||
|
||||
ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
|
||||
if (ddr_bar_addr != U64_MAX) {
|
||||
writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
|
||||
(addr - bar_base_addr));
|
||||
|
||||
ddr_bar_addr = goya_set_ddr_bar_base(hdev,
|
||||
ddr_bar_addr);
|
||||
}
|
||||
if (ddr_bar_addr == U64_MAX)
|
||||
rc = -EIO;
|
||||
|
||||
} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
|
||||
*(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
|
||||
|
||||
} else {
|
||||
rc = -EFAULT;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
|
||||
{
|
||||
struct goya_device *goya = hdev->asic_specific;
|
||||
@ -4297,6 +4393,14 @@ static const char *_goya_get_event_desc(u16 event_type)
|
||||
return "TPC%d_bmon_spmu";
|
||||
case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
|
||||
return "DMA_bm_ch%d";
|
||||
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
|
||||
return "POWER_ENV_S";
|
||||
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
|
||||
return "POWER_ENV_E";
|
||||
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
|
||||
return "THERMAL_ENV_S";
|
||||
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
|
||||
return "THERMAL_ENV_E";
|
||||
default:
|
||||
return "N/A";
|
||||
}
|
||||
@ -4388,22 +4492,22 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
|
||||
static void goya_print_razwi_info(struct hl_device *hdev)
|
||||
{
|
||||
if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
|
||||
dev_err(hdev->dev, "Illegal write to LBW\n");
|
||||
dev_err_ratelimited(hdev->dev, "Illegal write to LBW\n");
|
||||
WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
|
||||
}
|
||||
|
||||
if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
|
||||
dev_err(hdev->dev, "Illegal read from LBW\n");
|
||||
dev_err_ratelimited(hdev->dev, "Illegal read from LBW\n");
|
||||
WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
|
||||
}
|
||||
|
||||
if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
|
||||
dev_err(hdev->dev, "Illegal write to HBW\n");
|
||||
dev_err_ratelimited(hdev->dev, "Illegal write to HBW\n");
|
||||
WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
|
||||
}
|
||||
|
||||
if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
|
||||
dev_err(hdev->dev, "Illegal read from HBW\n");
|
||||
dev_err_ratelimited(hdev->dev, "Illegal read from HBW\n");
|
||||
WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
|
||||
}
|
||||
}
|
||||
@ -4423,7 +4527,8 @@ static void goya_print_mmu_error_info(struct hl_device *hdev)
|
||||
addr <<= 32;
|
||||
addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);
|
||||
|
||||
dev_err(hdev->dev, "MMU page fault on va 0x%llx\n", addr);
|
||||
dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
|
||||
addr);
|
||||
|
||||
WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
|
||||
}
|
||||
@ -4435,7 +4540,7 @@ static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
|
||||
char desc[20] = "";
|
||||
|
||||
goya_get_event_desc(event_type, desc, sizeof(desc));
|
||||
dev_err(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
|
||||
dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
|
||||
event_type, desc);
|
||||
|
||||
if (razwi) {
|
||||
@ -4526,6 +4631,33 @@ static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
|
||||
{
|
||||
switch (event_type) {
|
||||
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
|
||||
dev_info_ratelimited(hdev->dev,
|
||||
"Clock throttling due to power consumption\n");
|
||||
break;
|
||||
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
|
||||
dev_info_ratelimited(hdev->dev,
|
||||
"Power envelop is safe, back to optimal clock\n");
|
||||
break;
|
||||
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
|
||||
dev_info_ratelimited(hdev->dev,
|
||||
"Clock throttling due to overheating\n");
|
||||
break;
|
||||
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
|
||||
dev_info_ratelimited(hdev->dev,
|
||||
"Thermal envelop is safe, back to optimal clock\n");
|
||||
break;
|
||||
|
||||
default:
|
||||
dev_err(hdev->dev, "Received invalid clock change event %d\n",
|
||||
event_type);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
|
||||
{
|
||||
u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
|
||||
@ -4609,6 +4741,14 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
|
||||
goya_unmask_irq(hdev, event_type);
|
||||
break;
|
||||
|
||||
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
|
||||
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
|
||||
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
|
||||
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
|
||||
goya_print_clk_change_info(hdev, event_type);
|
||||
goya_unmask_irq(hdev, event_type);
|
||||
break;
|
||||
|
||||
default:
|
||||
dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
|
||||
event_type);
|
||||
@ -4776,7 +4916,8 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
|
||||
|
||||
for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
|
||||
rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
|
||||
prop->dram_base_address + off, PAGE_SIZE_2MB);
|
||||
prop->dram_base_address + off, PAGE_SIZE_2MB,
|
||||
(off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Map failed for address 0x%llx\n",
|
||||
prop->dram_base_address + off);
|
||||
@ -4786,7 +4927,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
|
||||
|
||||
if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
|
||||
rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
|
||||
hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB);
|
||||
hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB, true);
|
||||
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
@ -4799,7 +4940,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
|
||||
rc = hl_mmu_map(hdev->kernel_ctx,
|
||||
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
|
||||
hdev->cpu_accessible_dma_address + cpu_off,
|
||||
PAGE_SIZE_4KB);
|
||||
PAGE_SIZE_4KB, true);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Map failed for CPU accessible memory\n");
|
||||
@ -4825,14 +4966,15 @@ unmap_cpu:
|
||||
for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
|
||||
if (hl_mmu_unmap(hdev->kernel_ctx,
|
||||
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
|
||||
PAGE_SIZE_4KB))
|
||||
PAGE_SIZE_4KB, true))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"failed to unmap address 0x%llx\n",
|
||||
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
|
||||
unmap:
|
||||
for (; off >= 0 ; off -= PAGE_SIZE_2MB)
|
||||
if (hl_mmu_unmap(hdev->kernel_ctx,
|
||||
prop->dram_base_address + off, PAGE_SIZE_2MB))
|
||||
prop->dram_base_address + off, PAGE_SIZE_2MB,
|
||||
true))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"failed to unmap address 0x%llx\n",
|
||||
prop->dram_base_address + off);
|
||||
@ -4857,14 +4999,15 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
|
||||
|
||||
if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
|
||||
if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
|
||||
PAGE_SIZE_2MB))
|
||||
PAGE_SIZE_2MB, true))
|
||||
dev_warn(hdev->dev,
|
||||
"Failed to unmap CPU accessible memory\n");
|
||||
} else {
|
||||
for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
|
||||
if (hl_mmu_unmap(hdev->kernel_ctx,
|
||||
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
|
||||
PAGE_SIZE_4KB))
|
||||
PAGE_SIZE_4KB,
|
||||
(cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"failed to unmap address 0x%llx\n",
|
||||
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
|
||||
@ -4872,7 +5015,8 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
|
||||
|
||||
for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
|
||||
if (hl_mmu_unmap(hdev->kernel_ctx,
|
||||
prop->dram_base_address + off, PAGE_SIZE_2MB))
|
||||
prop->dram_base_address + off, PAGE_SIZE_2MB,
|
||||
(off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"Failed to unmap address 0x%llx\n",
|
||||
prop->dram_base_address + off);
|
||||
@ -5113,6 +5257,7 @@ static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
|
||||
}
|
||||
|
||||
static void goya_hw_queues_lock(struct hl_device *hdev)
|
||||
__acquires(&goya->hw_queues_lock)
|
||||
{
|
||||
struct goya_device *goya = hdev->asic_specific;
|
||||
|
||||
@ -5120,6 +5265,7 @@ static void goya_hw_queues_lock(struct hl_device *hdev)
|
||||
}
|
||||
|
||||
static void goya_hw_queues_unlock(struct hl_device *hdev)
|
||||
__releases(&goya->hw_queues_lock)
|
||||
{
|
||||
struct goya_device *goya = hdev->asic_specific;
|
||||
|
||||
@ -5180,6 +5326,8 @@ static const struct hl_asic_funcs goya_funcs = {
|
||||
.restore_phase_topology = goya_restore_phase_topology,
|
||||
.debugfs_read32 = goya_debugfs_read32,
|
||||
.debugfs_write32 = goya_debugfs_write32,
|
||||
.debugfs_read64 = goya_debugfs_read64,
|
||||
.debugfs_write64 = goya_debugfs_write64,
|
||||
.add_device_attr = goya_add_device_attr,
|
||||
.handle_eqe = goya_handle_eqe,
|
||||
.set_pll_profile = goya_set_pll_profile,
|
||||
|
@ -364,8 +364,8 @@ static int goya_etr_validate_address(struct hl_device *hdev, u64 addr,
|
||||
u64 range_start, range_end;
|
||||
|
||||
if (hdev->mmu_enable) {
|
||||
range_start = prop->va_space_dram_start_address;
|
||||
range_end = prop->va_space_dram_end_address;
|
||||
range_start = prop->dmmu.start_addr;
|
||||
range_end = prop->dmmu.end_addr;
|
||||
} else {
|
||||
range_start = prop->dram_user_base_address;
|
||||
range_end = prop->dram_end_address;
|
||||
|
@ -298,8 +298,8 @@ static ssize_t pm_mng_profile_store(struct device *dev,
|
||||
/* Make sure we are in LOW PLL when changing modes */
|
||||
if (hdev->pm_mng_profile == PM_MANUAL) {
|
||||
hdev->curr_pll_profile = PLL_HIGH;
|
||||
hl_device_set_frequency(hdev, PLL_LOW);
|
||||
hdev->pm_mng_profile = PM_AUTO;
|
||||
hl_device_set_frequency(hdev, PLL_LOW);
|
||||
}
|
||||
} else if (strncmp("manual", buf, strlen("manual")) == 0) {
|
||||
if (hdev->pm_mng_profile == PM_AUTO) {
|
||||
|
@ -132,6 +132,8 @@ enum hl_device_hw_state {
|
||||
|
||||
/**
|
||||
* struct hl_mmu_properties - ASIC specific MMU address translation properties.
|
||||
* @start_addr: virtual start address of the memory region.
|
||||
* @end_addr: virtual end address of the memory region.
|
||||
* @hop0_shift: shift of hop 0 mask.
|
||||
* @hop1_shift: shift of hop 1 mask.
|
||||
* @hop2_shift: shift of hop 2 mask.
|
||||
@ -143,9 +145,10 @@ enum hl_device_hw_state {
|
||||
* @hop3_mask: mask to get the PTE address in hop 3.
|
||||
* @hop4_mask: mask to get the PTE address in hop 4.
|
||||
* @page_size: default page size used to allocate memory.
|
||||
* @huge_page_size: page size used to allocate memory with huge pages.
|
||||
*/
|
||||
struct hl_mmu_properties {
|
||||
u64 start_addr;
|
||||
u64 end_addr;
|
||||
u64 hop0_shift;
|
||||
u64 hop1_shift;
|
||||
u64 hop2_shift;
|
||||
@ -157,7 +160,6 @@ struct hl_mmu_properties {
|
||||
u64 hop3_mask;
|
||||
u64 hop4_mask;
|
||||
u32 page_size;
|
||||
u32 huge_page_size;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -169,6 +171,8 @@ struct hl_mmu_properties {
|
||||
* @preboot_ver: F/W Preboot version.
|
||||
* @dmmu: DRAM MMU address translation properties.
|
||||
* @pmmu: PCI (host) MMU address translation properties.
|
||||
* @pmmu_huge: PCI (host) MMU address translation properties for memory
|
||||
* allocated with huge pages.
|
||||
* @sram_base_address: SRAM physical start address.
|
||||
* @sram_end_address: SRAM physical end address.
|
||||
* @sram_user_base_address - SRAM physical start address for user access.
|
||||
@ -178,14 +182,6 @@ struct hl_mmu_properties {
|
||||
* @dram_size: DRAM total size.
|
||||
* @dram_pci_bar_size: size of PCI bar towards DRAM.
|
||||
* @max_power_default: max power of the device after reset
|
||||
* @va_space_host_start_address: base address of virtual memory range for
|
||||
* mapping host memory.
|
||||
* @va_space_host_end_address: end address of virtual memory range for
|
||||
* mapping host memory.
|
||||
* @va_space_dram_start_address: base address of virtual memory range for
|
||||
* mapping DRAM memory.
|
||||
* @va_space_dram_end_address: end address of virtual memory range for
|
||||
* mapping DRAM memory.
|
||||
* @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page
|
||||
* fault.
|
||||
* @pcie_dbi_base_address: Base address of the PCIE_DBI block.
|
||||
@ -218,6 +214,7 @@ struct asic_fixed_properties {
|
||||
char preboot_ver[VERSION_MAX_LEN];
|
||||
struct hl_mmu_properties dmmu;
|
||||
struct hl_mmu_properties pmmu;
|
||||
struct hl_mmu_properties pmmu_huge;
|
||||
u64 sram_base_address;
|
||||
u64 sram_end_address;
|
||||
u64 sram_user_base_address;
|
||||
@ -227,10 +224,6 @@ struct asic_fixed_properties {
|
||||
u64 dram_size;
|
||||
u64 dram_pci_bar_size;
|
||||
u64 max_power_default;
|
||||
u64 va_space_host_start_address;
|
||||
u64 va_space_host_end_address;
|
||||
u64 va_space_dram_start_address;
|
||||
u64 va_space_dram_end_address;
|
||||
u64 dram_size_for_default_page_mapping;
|
||||
u64 pcie_dbi_base_address;
|
||||
u64 pcie_aux_dbi_reg_addr;
|
||||
@ -431,10 +424,12 @@ struct hl_eq {
|
||||
* enum hl_asic_type - supported ASIC types.
|
||||
* @ASIC_INVALID: Invalid ASIC type.
|
||||
* @ASIC_GOYA: Goya device.
|
||||
* @ASIC_GAUDI: Gaudi device.
|
||||
*/
|
||||
enum hl_asic_type {
|
||||
ASIC_INVALID,
|
||||
ASIC_GOYA
|
||||
ASIC_GOYA,
|
||||
ASIC_GAUDI
|
||||
};
|
||||
|
||||
struct hl_cs_parser;
|
||||
@ -589,6 +584,8 @@ struct hl_asic_funcs {
|
||||
void (*restore_phase_topology)(struct hl_device *hdev);
|
||||
int (*debugfs_read32)(struct hl_device *hdev, u64 addr, u32 *val);
|
||||
int (*debugfs_write32)(struct hl_device *hdev, u64 addr, u32 val);
|
||||
int (*debugfs_read64)(struct hl_device *hdev, u64 addr, u64 *val);
|
||||
int (*debugfs_write64)(struct hl_device *hdev, u64 addr, u64 val);
|
||||
void (*add_device_attr)(struct hl_device *hdev,
|
||||
struct attribute_group *dev_attr_grp);
|
||||
void (*handle_eqe)(struct hl_device *hdev,
|
||||
@ -658,6 +655,8 @@ struct hl_va_range {
|
||||
* this hits 0l. It is incremented on CS and CS_WAIT.
|
||||
* @cs_pending: array of DMA fence objects representing pending CS.
|
||||
* @host_va_range: holds available virtual addresses for host mappings.
|
||||
* @host_huge_va_range: holds available virtual addresses for host mappings
|
||||
* with huge pages.
|
||||
* @dram_va_range: holds available virtual addresses for DRAM mappings.
|
||||
* @mem_hash_lock: protects the mem_hash.
|
||||
* @mmu_lock: protects the MMU page tables. Any change to the PGT, modifing the
|
||||
@ -688,8 +687,9 @@ struct hl_ctx {
|
||||
struct hl_device *hdev;
|
||||
struct kref refcount;
|
||||
struct dma_fence *cs_pending[HL_MAX_PENDING_CS];
|
||||
struct hl_va_range host_va_range;
|
||||
struct hl_va_range dram_va_range;
|
||||
struct hl_va_range *host_va_range;
|
||||
struct hl_va_range *host_huge_va_range;
|
||||
struct hl_va_range *dram_va_range;
|
||||
struct mutex mem_hash_lock;
|
||||
struct mutex mmu_lock;
|
||||
struct list_head debugfs_list;
|
||||
@ -763,7 +763,7 @@ struct hl_userptr {
|
||||
* @aborted: true if CS was aborted due to some device error.
|
||||
*/
|
||||
struct hl_cs {
|
||||
u8 jobs_in_queue_cnt[HL_MAX_QUEUES];
|
||||
u16 jobs_in_queue_cnt[HL_MAX_QUEUES];
|
||||
struct hl_ctx *ctx;
|
||||
struct list_head job_list;
|
||||
spinlock_t job_lock;
|
||||
@ -1291,6 +1291,8 @@ struct hl_device_idle_busy_ts {
|
||||
* otherwise.
|
||||
* @dram_supports_virtual_memory: is MMU enabled towards DRAM.
|
||||
* @dram_default_page_mapping: is DRAM default page mapping enabled.
|
||||
* @pmmu_huge_range: is a different virtual addresses range used for PMMU with
|
||||
* huge pages.
|
||||
* @init_done: is the initialization of the device done.
|
||||
* @mmu_enable: is MMU enabled.
|
||||
* @device_cpu_disabled: is the device CPU disabled (due to timeouts)
|
||||
@ -1372,6 +1374,7 @@ struct hl_device {
|
||||
u8 reset_on_lockup;
|
||||
u8 dram_supports_virtual_memory;
|
||||
u8 dram_default_page_mapping;
|
||||
u8 pmmu_huge_range;
|
||||
u8 init_done;
|
||||
u8 device_cpu_disabled;
|
||||
u8 dma_mask;
|
||||
@ -1573,8 +1576,10 @@ int hl_mmu_init(struct hl_device *hdev);
|
||||
void hl_mmu_fini(struct hl_device *hdev);
|
||||
int hl_mmu_ctx_init(struct hl_ctx *ctx);
|
||||
void hl_mmu_ctx_fini(struct hl_ctx *ctx);
|
||||
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size);
|
||||
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size);
|
||||
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
|
||||
u32 page_size, bool flush_pte);
|
||||
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
|
||||
bool flush_pte);
|
||||
void hl_mmu_swap_out(struct hl_ctx *ctx);
|
||||
void hl_mmu_swap_in(struct hl_ctx *ctx);
|
||||
|
||||
@ -1606,11 +1611,18 @@ int hl_pci_set_dma_mask(struct hl_device *hdev, u8 dma_mask);
|
||||
|
||||
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
|
||||
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
|
||||
long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr);
|
||||
long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr);
|
||||
long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr);
|
||||
long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr);
|
||||
long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr);
|
||||
int hl_get_temperature(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value);
|
||||
int hl_set_temperature(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long value);
|
||||
int hl_get_voltage(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value);
|
||||
int hl_get_current(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value);
|
||||
int hl_get_fan_speed(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value);
|
||||
int hl_get_pwm_info(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value);
|
||||
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
|
||||
long value);
|
||||
u64 hl_get_max_power(struct hl_device *hdev);
|
||||
|
@ -40,12 +40,13 @@ MODULE_PARM_DESC(reset_on_lockup,
|
||||
#define PCI_VENDOR_ID_HABANALABS 0x1da3
|
||||
|
||||
#define PCI_IDS_GOYA 0x0001
|
||||
#define PCI_IDS_GAUDI 0x1000
|
||||
|
||||
static const struct pci_device_id ids[] = {
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
|
||||
{ 0, }
|
||||
};
|
||||
MODULE_DEVICE_TABLE(pci, ids);
|
||||
|
||||
/*
|
||||
* get_asic_type - translate device id to asic type
|
||||
@ -63,6 +64,9 @@ static enum hl_asic_type get_asic_type(u16 device)
|
||||
case PCI_IDS_GOYA:
|
||||
asic_type = ASIC_GOYA;
|
||||
break;
|
||||
case PCI_IDS_GAUDI:
|
||||
asic_type = ASIC_GAUDI;
|
||||
break;
|
||||
default:
|
||||
asic_type = ASIC_INVALID;
|
||||
break;
|
||||
@ -263,6 +267,11 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
|
||||
dev_err(&pdev->dev, "Unsupported ASIC\n");
|
||||
rc = -ENODEV;
|
||||
goto free_hdev;
|
||||
} else if (hdev->asic_type == ASIC_GAUDI) {
|
||||
dev_err(&pdev->dev,
|
||||
"GAUDI is not supported by the current kernel\n");
|
||||
rc = -ENODEV;
|
||||
goto free_hdev;
|
||||
}
|
||||
} else {
|
||||
hdev->asic_type = asic_type;
|
||||
|
@ -113,6 +113,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
|
||||
u32 attr, int channel, long *val)
|
||||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
int rc;
|
||||
|
||||
if (hl_device_disabled_or_in_reset(hdev))
|
||||
return -ENODEV;
|
||||
@ -125,36 +126,40 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
|
||||
case hwmon_temp_crit:
|
||||
case hwmon_temp_max_hyst:
|
||||
case hwmon_temp_crit_hyst:
|
||||
case hwmon_temp_offset:
|
||||
case hwmon_temp_highest:
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*val = hl_get_temperature(hdev, channel, attr);
|
||||
rc = hl_get_temperature(hdev, channel, attr, val);
|
||||
break;
|
||||
case hwmon_in:
|
||||
switch (attr) {
|
||||
case hwmon_in_input:
|
||||
case hwmon_in_min:
|
||||
case hwmon_in_max:
|
||||
case hwmon_in_highest:
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*val = hl_get_voltage(hdev, channel, attr);
|
||||
rc = hl_get_voltage(hdev, channel, attr, val);
|
||||
break;
|
||||
case hwmon_curr:
|
||||
switch (attr) {
|
||||
case hwmon_curr_input:
|
||||
case hwmon_curr_min:
|
||||
case hwmon_curr_max:
|
||||
case hwmon_curr_highest:
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*val = hl_get_current(hdev, channel, attr);
|
||||
rc = hl_get_current(hdev, channel, attr, val);
|
||||
break;
|
||||
case hwmon_fan:
|
||||
switch (attr) {
|
||||
@ -165,7 +170,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
*val = hl_get_fan_speed(hdev, channel, attr);
|
||||
rc = hl_get_fan_speed(hdev, channel, attr, val);
|
||||
break;
|
||||
case hwmon_pwm:
|
||||
switch (attr) {
|
||||
@ -175,12 +180,12 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
*val = hl_get_pwm_info(hdev, channel, attr);
|
||||
rc = hl_get_pwm_info(hdev, channel, attr, val);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int hl_write(struct device *dev, enum hwmon_sensor_types type,
|
||||
@ -192,6 +197,15 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
|
||||
return -ENODEV;
|
||||
|
||||
switch (type) {
|
||||
case hwmon_temp:
|
||||
switch (attr) {
|
||||
case hwmon_temp_offset:
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
hl_set_temperature(hdev, channel, attr, val);
|
||||
break;
|
||||
case hwmon_pwm:
|
||||
switch (attr) {
|
||||
case hwmon_pwm_input:
|
||||
@ -219,7 +233,10 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
|
||||
case hwmon_temp_max_hyst:
|
||||
case hwmon_temp_crit:
|
||||
case hwmon_temp_crit_hyst:
|
||||
case hwmon_temp_highest:
|
||||
return 0444;
|
||||
case hwmon_temp_offset:
|
||||
return 0644;
|
||||
}
|
||||
break;
|
||||
case hwmon_in:
|
||||
@ -227,6 +244,7 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
|
||||
case hwmon_in_input:
|
||||
case hwmon_in_min:
|
||||
case hwmon_in_max:
|
||||
case hwmon_in_highest:
|
||||
return 0444;
|
||||
}
|
||||
break;
|
||||
@ -235,6 +253,7 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
|
||||
case hwmon_curr_input:
|
||||
case hwmon_curr_min:
|
||||
case hwmon_curr_max:
|
||||
case hwmon_curr_highest:
|
||||
return 0444;
|
||||
}
|
||||
break;
|
||||
@ -265,10 +284,10 @@ static const struct hwmon_ops hl_hwmon_ops = {
|
||||
.write = hl_write
|
||||
};
|
||||
|
||||
long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr)
|
||||
int hl_get_temperature(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
long result;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
@ -279,22 +298,47 @@ long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr)
|
||||
pkt.type = __cpu_to_le16(attr);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
SENSORS_PKT_TIMEOUT, &result);
|
||||
SENSORS_PKT_TIMEOUT, value);
|
||||
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to get temperature from sensor %d, error %d\n",
|
||||
sensor_index, rc);
|
||||
result = 0;
|
||||
*value = 0;
|
||||
}
|
||||
|
||||
return result;
|
||||
return rc;
|
||||
}
|
||||
|
||||
long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr)
|
||||
int hl_set_temperature(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long value)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_SET <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.sensor_index = __cpu_to_le16(sensor_index);
|
||||
pkt.type = __cpu_to_le16(attr);
|
||||
pkt.value = __cpu_to_le64(value);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
SENSORS_PKT_TIMEOUT, NULL);
|
||||
|
||||
if (rc)
|
||||
dev_err(hdev->dev,
|
||||
"Failed to set temperature of sensor %d, error %d\n",
|
||||
sensor_index, rc);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int hl_get_voltage(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
long result;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
@ -305,22 +349,22 @@ long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr)
|
||||
pkt.type = __cpu_to_le16(attr);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
SENSORS_PKT_TIMEOUT, &result);
|
||||
SENSORS_PKT_TIMEOUT, value);
|
||||
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to get voltage from sensor %d, error %d\n",
|
||||
sensor_index, rc);
|
||||
result = 0;
|
||||
*value = 0;
|
||||
}
|
||||
|
||||
return result;
|
||||
return rc;
|
||||
}
|
||||
|
||||
long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr)
|
||||
int hl_get_current(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
long result;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
@ -331,22 +375,22 @@ long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr)
|
||||
pkt.type = __cpu_to_le16(attr);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
SENSORS_PKT_TIMEOUT, &result);
|
||||
SENSORS_PKT_TIMEOUT, value);
|
||||
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to get current from sensor %d, error %d\n",
|
||||
sensor_index, rc);
|
||||
result = 0;
|
||||
*value = 0;
|
||||
}
|
||||
|
||||
return result;
|
||||
return rc;
|
||||
}
|
||||
|
||||
long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr)
|
||||
int hl_get_fan_speed(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
long result;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
@ -357,22 +401,22 @@ long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr)
|
||||
pkt.type = __cpu_to_le16(attr);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
SENSORS_PKT_TIMEOUT, &result);
|
||||
SENSORS_PKT_TIMEOUT, value);
|
||||
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to get fan speed from sensor %d, error %d\n",
|
||||
sensor_index, rc);
|
||||
result = 0;
|
||||
*value = 0;
|
||||
}
|
||||
|
||||
return result;
|
||||
return rc;
|
||||
}
|
||||
|
||||
long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr)
|
||||
int hl_get_pwm_info(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
long result;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
@ -383,16 +427,16 @@ long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr)
|
||||
pkt.type = __cpu_to_le16(attr);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
SENSORS_PKT_TIMEOUT, &result);
|
||||
SENSORS_PKT_TIMEOUT, value);
|
||||
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to get pwm info from sensor %d, error %d\n",
|
||||
sensor_index, rc);
|
||||
result = 0;
|
||||
*value = 0;
|
||||
}
|
||||
|
||||
return result;
|
||||
return rc;
|
||||
}
|
||||
|
||||
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
|
||||
|
@ -189,6 +189,10 @@ enum pq_init_status {
|
||||
* ArmCP to write to the structure, to prevent data corruption in case of
|
||||
* mismatched driver/FW versions.
|
||||
*
|
||||
* ARMCP_PACKET_TEMPERATURE_SET -
|
||||
* Set the value of the offset property of a specified thermal sensor.
|
||||
* The packet's arguments specify the desired sensor and the field to
|
||||
* set.
|
||||
*/
|
||||
|
||||
enum armcp_packet_id {
|
||||
@ -214,6 +218,8 @@ enum armcp_packet_id {
|
||||
ARMCP_PACKET_MAX_POWER_GET, /* sysfs */
|
||||
ARMCP_PACKET_MAX_POWER_SET, /* sysfs */
|
||||
ARMCP_PACKET_EEPROM_DATA_GET, /* sysfs */
|
||||
ARMCP_RESERVED,
|
||||
ARMCP_PACKET_TEMPERATURE_SET, /* sysfs */
|
||||
};
|
||||
|
||||
#define ARMCP_PACKET_FENCE_VAL 0xFE8CE7A5
|
||||
@ -271,24 +277,32 @@ enum armcp_packet_rc {
|
||||
armcp_packet_fault
|
||||
};
|
||||
|
||||
/*
|
||||
* armcp_temp_type should adhere to hwmon_temp_attributes
|
||||
* defined in Linux kernel hwmon.h file
|
||||
*/
|
||||
enum armcp_temp_type {
|
||||
armcp_temp_input,
|
||||
armcp_temp_max = 6,
|
||||
armcp_temp_max_hyst,
|
||||
armcp_temp_crit,
|
||||
armcp_temp_crit_hyst
|
||||
armcp_temp_crit_hyst,
|
||||
armcp_temp_offset = 19,
|
||||
armcp_temp_highest = 22
|
||||
};
|
||||
|
||||
enum armcp_in_attributes {
|
||||
armcp_in_input,
|
||||
armcp_in_min,
|
||||
armcp_in_max
|
||||
armcp_in_max,
|
||||
armcp_in_highest = 7
|
||||
};
|
||||
|
||||
enum armcp_curr_attributes {
|
||||
armcp_curr_input,
|
||||
armcp_curr_min,
|
||||
armcp_curr_max
|
||||
armcp_curr_max,
|
||||
armcp_curr_highest = 7
|
||||
};
|
||||
|
||||
enum armcp_fan_attributes {
|
||||
|
@ -188,6 +188,10 @@ enum goya_async_event_id {
|
||||
GOYA_ASYNC_EVENT_ID_HALT_MACHINE = 485,
|
||||
GOYA_ASYNC_EVENT_ID_INTS_REGISTER = 486,
|
||||
GOYA_ASYNC_EVENT_ID_SOFT_RESET = 487,
|
||||
GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S = 507,
|
||||
GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E = 508,
|
||||
GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S = 509,
|
||||
GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E = 510,
|
||||
GOYA_ASYNC_EVENT_ID_LAST_VALID_ID = 1023,
|
||||
GOYA_ASYNC_EVENT_ID_SIZE
|
||||
};
|
||||
|
@ -11,24 +11,27 @@
|
||||
/*
|
||||
* PSOC scratch-pad registers
|
||||
*/
|
||||
#define mmCPU_PQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
|
||||
#define mmCPU_PQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
|
||||
#define mmCPU_EQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_2
|
||||
#define mmCPU_EQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_3
|
||||
#define mmCPU_EQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_4
|
||||
#define mmCPU_PQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_5
|
||||
#define mmCPU_EQ_CI mmPSOC_GLOBAL_CONF_SCRATCHPAD_6
|
||||
#define mmCPU_PQ_INIT_STATUS mmPSOC_GLOBAL_CONF_SCRATCHPAD_7
|
||||
#define mmCPU_CQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_8
|
||||
#define mmCPU_CQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_9
|
||||
#define mmCPU_CQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_10
|
||||
#define mmUPD_STS mmPSOC_GLOBAL_CONF_SCRATCHPAD_26
|
||||
#define mmUPD_CMD mmPSOC_GLOBAL_CONF_SCRATCHPAD_27
|
||||
#define mmPREBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_28
|
||||
#define mmUBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_29
|
||||
#define mmUBOOT_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_30
|
||||
#define mmBTL_ID mmPSOC_GLOBAL_CONF_SCRATCHPAD_31
|
||||
#define mmCPU_PQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
|
||||
#define mmCPU_PQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
|
||||
#define mmCPU_EQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_2
|
||||
#define mmCPU_EQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_3
|
||||
#define mmCPU_EQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_4
|
||||
#define mmCPU_PQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_5
|
||||
#define mmCPU_EQ_CI mmPSOC_GLOBAL_CONF_SCRATCHPAD_6
|
||||
#define mmCPU_PQ_INIT_STATUS mmPSOC_GLOBAL_CONF_SCRATCHPAD_7
|
||||
#define mmCPU_CQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_8
|
||||
#define mmCPU_CQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_9
|
||||
#define mmCPU_CQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_10
|
||||
#define mmCPU_BOOT_ERR0 mmPSOC_GLOBAL_CONF_SCRATCHPAD_24
|
||||
#define mmCPU_BOOT_ERR1 mmPSOC_GLOBAL_CONF_SCRATCHPAD_25
|
||||
#define mmUPD_STS mmPSOC_GLOBAL_CONF_SCRATCHPAD_26
|
||||
#define mmUPD_CMD mmPSOC_GLOBAL_CONF_SCRATCHPAD_27
|
||||
#define mmPREBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_28
|
||||
#define mmUBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_29
|
||||
#define mmRDWR_TEST mmPSOC_GLOBAL_CONF_SCRATCHPAD_30
|
||||
#define mmBTL_ID mmPSOC_GLOBAL_CONF_SCRATCHPAD_31
|
||||
|
||||
#define mmHW_STATE mmPSOC_GLOBAL_CONF_APP_STATUS
|
||||
#define mmHW_STATE mmPSOC_GLOBAL_CONF_APP_STATUS
|
||||
#define mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS mmPSOC_GLOBAL_CONF_WARM_REBOOT
|
||||
|
||||
#endif /* GOYA_REG_MAP_H_ */
|
||||
|
@ -8,20 +8,35 @@
|
||||
#ifndef HL_BOOT_IF_H
|
||||
#define HL_BOOT_IF_H
|
||||
|
||||
#define LKD_HARD_RESET_MAGIC 0xED7BD694
|
||||
|
||||
/* CPU error bits in BOOT_ERROR registers */
|
||||
#define CPU_BOOT_ERR0_DRAM_INIT_FAIL (1 << 0)
|
||||
#define CPU_BOOT_ERR0_FIT_CORRUPTED (1 << 1)
|
||||
#define CPU_BOOT_ERR0_TS_INIT_FAIL (1 << 2)
|
||||
#define CPU_BOOT_ERR0_DRAM_SKIPPED (1 << 3)
|
||||
#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << 4)
|
||||
#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << 5)
|
||||
#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << 6)
|
||||
#define CPU_BOOT_ERR0_ENABLED (1 << 31)
|
||||
|
||||
enum cpu_boot_status {
|
||||
CPU_BOOT_STATUS_NA = 0, /* Default value after reset of chip */
|
||||
CPU_BOOT_STATUS_IN_WFE,
|
||||
CPU_BOOT_STATUS_DRAM_RDY,
|
||||
CPU_BOOT_STATUS_SRAM_AVAIL,
|
||||
CPU_BOOT_STATUS_IN_BTL, /* BTL is H/W FSM */
|
||||
CPU_BOOT_STATUS_IN_PREBOOT,
|
||||
CPU_BOOT_STATUS_IN_SPL,
|
||||
CPU_BOOT_STATUS_IN_UBOOT,
|
||||
CPU_BOOT_STATUS_DRAM_INIT_FAIL,
|
||||
CPU_BOOT_STATUS_FIT_CORRUPTED,
|
||||
CPU_BOOT_STATUS_UBOOT_NOT_READY,
|
||||
CPU_BOOT_STATUS_RESERVED,
|
||||
CPU_BOOT_STATUS_TS_INIT_FAIL,
|
||||
CPU_BOOT_STATUS_IN_WFE = 1,
|
||||
CPU_BOOT_STATUS_DRAM_RDY = 2,
|
||||
CPU_BOOT_STATUS_SRAM_AVAIL = 3,
|
||||
CPU_BOOT_STATUS_IN_BTL = 4, /* BTL is H/W FSM */
|
||||
CPU_BOOT_STATUS_IN_PREBOOT = 5,
|
||||
CPU_BOOT_STATUS_IN_SPL = 6,
|
||||
CPU_BOOT_STATUS_IN_UBOOT = 7,
|
||||
CPU_BOOT_STATUS_DRAM_INIT_FAIL, /* deprecated - will be removed */
|
||||
CPU_BOOT_STATUS_FIT_CORRUPTED, /* deprecated - will be removed */
|
||||
CPU_BOOT_STATUS_UBOOT_NOT_READY = 10,
|
||||
CPU_BOOT_STATUS_NIC_FW_RDY = 11,
|
||||
CPU_BOOT_STATUS_TS_INIT_FAIL, /* deprecated - will be removed */
|
||||
CPU_BOOT_STATUS_DRAM_SKIPPED, /* deprecated - will be removed */
|
||||
CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */
|
||||
CPU_BOOT_STATUS_READY_TO_BOOT = 15,
|
||||
};
|
||||
|
||||
enum kmd_msg {
|
||||
|
@ -530,7 +530,7 @@ static u64 get_va_block(struct hl_device *hdev,
|
||||
* or not, hence we continue with the biggest possible
|
||||
* granularity.
|
||||
*/
|
||||
page_size = hdev->asic_prop.pmmu.huge_page_size;
|
||||
page_size = hdev->asic_prop.pmmu_huge.page_size;
|
||||
else
|
||||
page_size = hdev->asic_prop.dmmu.page_size;
|
||||
|
||||
@ -638,13 +638,12 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
|
||||
struct hl_userptr *userptr,
|
||||
struct hl_vm_phys_pg_pack **pphys_pg_pack)
|
||||
{
|
||||
struct hl_mmu_properties *mmu_prop = &ctx->hdev->asic_prop.pmmu;
|
||||
struct hl_vm_phys_pg_pack *phys_pg_pack;
|
||||
struct scatterlist *sg;
|
||||
dma_addr_t dma_addr;
|
||||
u64 page_mask, total_npages;
|
||||
u32 npages, page_size = PAGE_SIZE,
|
||||
huge_page_size = mmu_prop->huge_page_size;
|
||||
huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
|
||||
bool first = true, is_huge_page_opt = true;
|
||||
int rc, i, j;
|
||||
u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
|
||||
@ -747,7 +746,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
|
||||
for (i = 0 ; i < phys_pg_pack->npages ; i++) {
|
||||
paddr = phys_pg_pack->pages[i];
|
||||
|
||||
rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size);
|
||||
rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size,
|
||||
(i + 1) == phys_pg_pack->npages);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"map failed for handle %u, npages: %llu, mapped: %llu",
|
||||
@ -765,7 +765,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
|
||||
err:
|
||||
next_vaddr = vaddr;
|
||||
for (i = 0 ; i < mapped_pg_cnt ; i++) {
|
||||
if (hl_mmu_unmap(ctx, next_vaddr, page_size))
|
||||
if (hl_mmu_unmap(ctx, next_vaddr, page_size,
|
||||
(i + 1) == mapped_pg_cnt))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
|
||||
phys_pg_pack->handle, next_vaddr,
|
||||
@ -794,7 +795,8 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
|
||||
next_vaddr = vaddr;
|
||||
|
||||
for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
|
||||
if (hl_mmu_unmap(ctx, next_vaddr, page_size))
|
||||
if (hl_mmu_unmap(ctx, next_vaddr, page_size,
|
||||
(i + 1) == phys_pg_pack->npages))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"unmap failed for vaddr: 0x%llx\n", next_vaddr);
|
||||
|
||||
@ -853,6 +855,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
||||
struct hl_vm_phys_pg_pack *phys_pg_pack;
|
||||
struct hl_userptr *userptr = NULL;
|
||||
struct hl_vm_hash_node *hnode;
|
||||
struct hl_va_range *va_range;
|
||||
enum vm_type_t *vm_type;
|
||||
u64 ret_vaddr, hint_addr;
|
||||
u32 handle = 0;
|
||||
@ -924,9 +927,16 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
||||
goto hnode_err;
|
||||
}
|
||||
|
||||
ret_vaddr = get_va_block(hdev,
|
||||
is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
|
||||
phys_pg_pack->total_size, hint_addr, is_userptr);
|
||||
if (is_userptr)
|
||||
if (phys_pg_pack->page_size == hdev->asic_prop.pmmu.page_size)
|
||||
va_range = ctx->host_va_range;
|
||||
else
|
||||
va_range = ctx->host_huge_va_range;
|
||||
else
|
||||
va_range = ctx->dram_va_range;
|
||||
|
||||
ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
|
||||
hint_addr, is_userptr);
|
||||
if (!ret_vaddr) {
|
||||
dev_err(hdev->dev, "no available va block for handle %u\n",
|
||||
handle);
|
||||
@ -965,10 +975,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
||||
return 0;
|
||||
|
||||
map_err:
|
||||
if (add_va_block(hdev,
|
||||
is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
|
||||
ret_vaddr,
|
||||
ret_vaddr + phys_pg_pack->total_size - 1))
|
||||
if (add_va_block(hdev, va_range, ret_vaddr,
|
||||
ret_vaddr + phys_pg_pack->total_size - 1))
|
||||
dev_warn(hdev->dev,
|
||||
"release va block failed for handle 0x%x, vaddr: 0x%llx\n",
|
||||
handle, ret_vaddr);
|
||||
@ -1030,7 +1038,6 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
|
||||
|
||||
if (*vm_type == VM_TYPE_USERPTR) {
|
||||
is_userptr = true;
|
||||
va_range = &ctx->host_va_range;
|
||||
userptr = hnode->ptr;
|
||||
rc = init_phys_pg_pack_from_userptr(ctx, userptr,
|
||||
&phys_pg_pack);
|
||||
@ -1040,9 +1047,15 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
|
||||
vaddr);
|
||||
goto vm_type_err;
|
||||
}
|
||||
|
||||
if (phys_pg_pack->page_size ==
|
||||
hdev->asic_prop.pmmu.page_size)
|
||||
va_range = ctx->host_va_range;
|
||||
else
|
||||
va_range = ctx->host_huge_va_range;
|
||||
} else if (*vm_type == VM_TYPE_PHYS_PACK) {
|
||||
is_userptr = false;
|
||||
va_range = &ctx->dram_va_range;
|
||||
va_range = ctx->dram_va_range;
|
||||
phys_pg_pack = hnode->ptr;
|
||||
} else {
|
||||
dev_warn(hdev->dev,
|
||||
@ -1438,19 +1451,18 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_va_range_init - initialize virtual addresses range
|
||||
*
|
||||
* @hdev : pointer to the habanalabs device structure
|
||||
* @va_range : pointer to the range to initialize
|
||||
* @start : range start address
|
||||
* @end : range end address
|
||||
* va_range_init - initialize virtual addresses range
|
||||
* @hdev: pointer to the habanalabs device structure
|
||||
* @va_range: pointer to the range to initialize
|
||||
* @start: range start address
|
||||
* @end: range end address
|
||||
*
|
||||
* This function does the following:
|
||||
* - Initializes the virtual addresses list of the given range with the given
|
||||
* addresses.
|
||||
*/
|
||||
static int hl_va_range_init(struct hl_device *hdev,
|
||||
struct hl_va_range *va_range, u64 start, u64 end)
|
||||
static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
|
||||
u64 start, u64 end)
|
||||
{
|
||||
int rc;
|
||||
|
||||
@ -1485,47 +1497,105 @@ static int hl_va_range_init(struct hl_device *hdev,
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_vm_ctx_init_with_ranges - initialize virtual memory for context
|
||||
* va_range_fini() - clear a virtual addresses range
|
||||
* @hdev: pointer to the habanalabs structure
|
||||
* va_range: pointer to virtual addresses range
|
||||
*
|
||||
* @ctx : pointer to the habanalabs context structure
|
||||
* @host_range_start : host virtual addresses range start
|
||||
* @host_range_end : host virtual addresses range end
|
||||
* @dram_range_start : dram virtual addresses range start
|
||||
* @dram_range_end : dram virtual addresses range end
|
||||
* This function does the following:
|
||||
* - Frees the virtual addresses block list and its lock
|
||||
*/
|
||||
static void va_range_fini(struct hl_device *hdev,
|
||||
struct hl_va_range *va_range)
|
||||
{
|
||||
mutex_lock(&va_range->lock);
|
||||
clear_va_list_locked(hdev, &va_range->list);
|
||||
mutex_unlock(&va_range->lock);
|
||||
|
||||
mutex_destroy(&va_range->lock);
|
||||
kfree(va_range);
|
||||
}
|
||||
|
||||
/*
|
||||
* vm_ctx_init_with_ranges() - initialize virtual memory for context
|
||||
* @ctx: pointer to the habanalabs context structure
|
||||
* @host_range_start: host virtual addresses range start.
|
||||
* @host_range_end: host virtual addresses range end.
|
||||
* @host_huge_range_start: host virtual addresses range start for memory
|
||||
* allocated with huge pages.
|
||||
* @host_huge_range_end: host virtual addresses range end for memory allocated
|
||||
* with huge pages.
|
||||
* @dram_range_start: dram virtual addresses range start.
|
||||
* @dram_range_end: dram virtual addresses range end.
|
||||
*
|
||||
* This function initializes the following:
|
||||
* - MMU for context
|
||||
* - Virtual address to area descriptor hashtable
|
||||
* - Virtual block list of available virtual memory
|
||||
*/
|
||||
static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start,
|
||||
u64 host_range_end, u64 dram_range_start,
|
||||
u64 dram_range_end)
|
||||
static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
|
||||
u64 host_range_start,
|
||||
u64 host_range_end,
|
||||
u64 host_huge_range_start,
|
||||
u64 host_huge_range_end,
|
||||
u64 dram_range_start,
|
||||
u64 dram_range_end)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
int rc;
|
||||
|
||||
ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL);
|
||||
if (!ctx->host_va_range)
|
||||
return -ENOMEM;
|
||||
|
||||
ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range),
|
||||
GFP_KERNEL);
|
||||
if (!ctx->host_huge_va_range) {
|
||||
rc = -ENOMEM;
|
||||
goto host_huge_va_range_err;
|
||||
}
|
||||
|
||||
ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL);
|
||||
if (!ctx->dram_va_range) {
|
||||
rc = -ENOMEM;
|
||||
goto dram_va_range_err;
|
||||
}
|
||||
|
||||
rc = hl_mmu_ctx_init(ctx);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "failed to init context %d\n", ctx->asid);
|
||||
return rc;
|
||||
goto mmu_ctx_err;
|
||||
}
|
||||
|
||||
mutex_init(&ctx->mem_hash_lock);
|
||||
hash_init(ctx->mem_hash);
|
||||
|
||||
mutex_init(&ctx->host_va_range.lock);
|
||||
mutex_init(&ctx->host_va_range->lock);
|
||||
|
||||
rc = hl_va_range_init(hdev, &ctx->host_va_range, host_range_start,
|
||||
host_range_end);
|
||||
rc = va_range_init(hdev, ctx->host_va_range, host_range_start,
|
||||
host_range_end);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "failed to init host vm range\n");
|
||||
goto host_vm_err;
|
||||
goto host_page_range_err;
|
||||
}
|
||||
|
||||
mutex_init(&ctx->dram_va_range.lock);
|
||||
if (hdev->pmmu_huge_range) {
|
||||
mutex_init(&ctx->host_huge_va_range->lock);
|
||||
|
||||
rc = hl_va_range_init(hdev, &ctx->dram_va_range, dram_range_start,
|
||||
rc = va_range_init(hdev, ctx->host_huge_va_range,
|
||||
host_huge_range_start,
|
||||
host_huge_range_end);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"failed to init host huge vm range\n");
|
||||
goto host_hpage_range_err;
|
||||
}
|
||||
} else {
|
||||
ctx->host_huge_va_range = ctx->host_va_range;
|
||||
}
|
||||
|
||||
mutex_init(&ctx->dram_va_range->lock);
|
||||
|
||||
rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start,
|
||||
dram_range_end);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "failed to init dram vm range\n");
|
||||
@ -1537,15 +1607,29 @@ static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start,
|
||||
return 0;
|
||||
|
||||
dram_vm_err:
|
||||
mutex_destroy(&ctx->dram_va_range.lock);
|
||||
mutex_destroy(&ctx->dram_va_range->lock);
|
||||
|
||||
mutex_lock(&ctx->host_va_range.lock);
|
||||
clear_va_list_locked(hdev, &ctx->host_va_range.list);
|
||||
mutex_unlock(&ctx->host_va_range.lock);
|
||||
host_vm_err:
|
||||
mutex_destroy(&ctx->host_va_range.lock);
|
||||
if (hdev->pmmu_huge_range) {
|
||||
mutex_lock(&ctx->host_huge_va_range->lock);
|
||||
clear_va_list_locked(hdev, &ctx->host_huge_va_range->list);
|
||||
mutex_unlock(&ctx->host_huge_va_range->lock);
|
||||
}
|
||||
host_hpage_range_err:
|
||||
if (hdev->pmmu_huge_range)
|
||||
mutex_destroy(&ctx->host_huge_va_range->lock);
|
||||
mutex_lock(&ctx->host_va_range->lock);
|
||||
clear_va_list_locked(hdev, &ctx->host_va_range->list);
|
||||
mutex_unlock(&ctx->host_va_range->lock);
|
||||
host_page_range_err:
|
||||
mutex_destroy(&ctx->host_va_range->lock);
|
||||
mutex_destroy(&ctx->mem_hash_lock);
|
||||
hl_mmu_ctx_fini(ctx);
|
||||
mmu_ctx_err:
|
||||
kfree(ctx->dram_va_range);
|
||||
dram_va_range_err:
|
||||
kfree(ctx->host_huge_va_range);
|
||||
host_huge_va_range_err:
|
||||
kfree(ctx->host_va_range);
|
||||
|
||||
return rc;
|
||||
}
|
||||
@ -1553,8 +1637,8 @@ host_vm_err:
|
||||
int hl_vm_ctx_init(struct hl_ctx *ctx)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
|
||||
u64 host_range_start, host_range_end, dram_range_start,
|
||||
dram_range_end;
|
||||
u64 host_range_start, host_range_end, host_huge_range_start,
|
||||
host_huge_range_end, dram_range_start, dram_range_end;
|
||||
|
||||
atomic64_set(&ctx->dram_phys_mem, 0);
|
||||
|
||||
@ -1566,38 +1650,26 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
|
||||
* address of the memory related to the given handle.
|
||||
*/
|
||||
if (ctx->hdev->mmu_enable) {
|
||||
dram_range_start = prop->va_space_dram_start_address;
|
||||
dram_range_end = prop->va_space_dram_end_address;
|
||||
host_range_start = prop->va_space_host_start_address;
|
||||
host_range_end = prop->va_space_host_end_address;
|
||||
dram_range_start = prop->dmmu.start_addr;
|
||||
dram_range_end = prop->dmmu.end_addr;
|
||||
host_range_start = prop->pmmu.start_addr;
|
||||
host_range_end = prop->pmmu.end_addr;
|
||||
host_huge_range_start = prop->pmmu_huge.start_addr;
|
||||
host_huge_range_end = prop->pmmu_huge.end_addr;
|
||||
} else {
|
||||
dram_range_start = prop->dram_user_base_address;
|
||||
dram_range_end = prop->dram_end_address;
|
||||
host_range_start = prop->dram_user_base_address;
|
||||
host_range_end = prop->dram_end_address;
|
||||
host_huge_range_start = prop->dram_user_base_address;
|
||||
host_huge_range_end = prop->dram_end_address;
|
||||
}
|
||||
|
||||
return hl_vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
|
||||
dram_range_start, dram_range_end);
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_va_range_fini - clear a virtual addresses range
|
||||
*
|
||||
* @hdev : pointer to the habanalabs structure
|
||||
* va_range : pointer to virtual addresses range
|
||||
*
|
||||
* This function does the following:
|
||||
* - Frees the virtual addresses block list and its lock
|
||||
*/
|
||||
static void hl_va_range_fini(struct hl_device *hdev,
|
||||
struct hl_va_range *va_range)
|
||||
{
|
||||
mutex_lock(&va_range->lock);
|
||||
clear_va_list_locked(hdev, &va_range->list);
|
||||
mutex_unlock(&va_range->lock);
|
||||
|
||||
mutex_destroy(&va_range->lock);
|
||||
return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
|
||||
host_huge_range_start,
|
||||
host_huge_range_end,
|
||||
dram_range_start,
|
||||
dram_range_end);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1664,8 +1736,10 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
|
||||
}
|
||||
spin_unlock(&vm->idr_lock);
|
||||
|
||||
hl_va_range_fini(hdev, &ctx->dram_va_range);
|
||||
hl_va_range_fini(hdev, &ctx->host_va_range);
|
||||
va_range_fini(hdev, ctx->dram_va_range);
|
||||
if (hdev->pmmu_huge_range)
|
||||
va_range_fini(hdev, ctx->host_huge_va_range);
|
||||
va_range_fini(hdev, ctx->host_va_range);
|
||||
|
||||
mutex_destroy(&ctx->mem_hash_lock);
|
||||
hl_mmu_ctx_fini(ctx);
|
||||
|
@ -254,6 +254,15 @@ static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
|
||||
return phys_hop_addr + pte_offset;
|
||||
}
|
||||
|
||||
static bool is_dram_va(struct hl_device *hdev, u64 virt_addr)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
|
||||
return hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
|
||||
prop->dmmu.start_addr,
|
||||
prop->dmmu.end_addr);
|
||||
}
|
||||
|
||||
static int dram_default_mapping_init(struct hl_ctx *ctx)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
@ -548,6 +557,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
|
||||
curr_pte;
|
||||
bool is_huge, clear_hop3 = true;
|
||||
|
||||
/* shifts and masks are the same in PMMU and HPMMU, use one of them */
|
||||
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
|
||||
|
||||
hop0_addr = get_hop0_addr(ctx);
|
||||
@ -637,29 +647,27 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
|
||||
clear_hop3 = true;
|
||||
|
||||
if (!clear_hop3)
|
||||
goto flush;
|
||||
goto mapped;
|
||||
|
||||
clear_pte(ctx, hop3_pte_addr);
|
||||
|
||||
if (put_pte(ctx, hop3_addr))
|
||||
goto flush;
|
||||
goto mapped;
|
||||
|
||||
clear_pte(ctx, hop2_pte_addr);
|
||||
|
||||
if (put_pte(ctx, hop2_addr))
|
||||
goto flush;
|
||||
goto mapped;
|
||||
|
||||
clear_pte(ctx, hop1_pte_addr);
|
||||
|
||||
if (put_pte(ctx, hop1_addr))
|
||||
goto flush;
|
||||
goto mapped;
|
||||
|
||||
clear_pte(ctx, hop0_pte_addr);
|
||||
}
|
||||
|
||||
flush:
|
||||
flush(ctx);
|
||||
|
||||
mapped:
|
||||
return 0;
|
||||
|
||||
not_mapped:
|
||||
@ -675,6 +683,7 @@ not_mapped:
|
||||
* @ctx: pointer to the context structure
|
||||
* @virt_addr: virt addr to map from
|
||||
* @page_size: size of the page to unmap
|
||||
* @flush_pte: whether to do a PCI flush
|
||||
*
|
||||
* This function does the following:
|
||||
* - Check that the virt addr is mapped
|
||||
@ -685,40 +694,43 @@ not_mapped:
|
||||
* changes the MMU hash, it must be protected by a lock.
|
||||
* However, because it maps only a single page, the lock should be implemented
|
||||
* in a higher level in order to protect the entire mapping of the memory area
|
||||
*
|
||||
* For optimization reasons PCI flush may be requested once after unmapping of
|
||||
* large area.
|
||||
*/
|
||||
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
|
||||
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
|
||||
bool flush_pte)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct hl_mmu_properties *mmu_prop;
|
||||
u64 real_virt_addr;
|
||||
u32 real_page_size, npages;
|
||||
int i, rc;
|
||||
int i, rc = 0;
|
||||
bool is_dram_addr;
|
||||
|
||||
if (!hdev->mmu_enable)
|
||||
return 0;
|
||||
|
||||
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
|
||||
prop->va_space_dram_start_address,
|
||||
prop->va_space_dram_end_address);
|
||||
is_dram_addr = is_dram_va(hdev, virt_addr);
|
||||
|
||||
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
|
||||
if (is_dram_addr)
|
||||
mmu_prop = &prop->dmmu;
|
||||
else if ((page_size % prop->pmmu_huge.page_size) == 0)
|
||||
mmu_prop = &prop->pmmu_huge;
|
||||
else
|
||||
mmu_prop = &prop->pmmu;
|
||||
|
||||
/*
|
||||
* The H/W handles mapping of specific page sizes. Hence if the page
|
||||
* size is bigger, we break it to sub-pages and unmap them separately.
|
||||
*/
|
||||
if ((page_size % mmu_prop->huge_page_size) == 0) {
|
||||
real_page_size = mmu_prop->huge_page_size;
|
||||
} else if ((page_size % mmu_prop->page_size) == 0) {
|
||||
if ((page_size % mmu_prop->page_size) == 0) {
|
||||
real_page_size = mmu_prop->page_size;
|
||||
} else {
|
||||
dev_err(hdev->dev,
|
||||
"page size of %u is not %uKB nor %uMB aligned, can't unmap\n",
|
||||
page_size,
|
||||
mmu_prop->page_size >> 10,
|
||||
mmu_prop->huge_page_size >> 20);
|
||||
"page size of %u is not %uKB aligned, can't unmap\n",
|
||||
page_size, mmu_prop->page_size >> 10);
|
||||
|
||||
return -EFAULT;
|
||||
}
|
||||
@ -729,12 +741,15 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
|
||||
for (i = 0 ; i < npages ; i++) {
|
||||
rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr);
|
||||
if (rc)
|
||||
return rc;
|
||||
break;
|
||||
|
||||
real_virt_addr += real_page_size;
|
||||
}
|
||||
|
||||
return 0;
|
||||
if (flush_pte)
|
||||
flush(ctx);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
|
||||
@ -753,8 +768,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
|
||||
hop4_new = false, is_huge;
|
||||
int rc = -ENOMEM;
|
||||
|
||||
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
|
||||
|
||||
/*
|
||||
* This mapping function can map a page or a huge page. For huge page
|
||||
* there are only 3 hops rather than 4. Currently the DRAM allocation
|
||||
@ -762,11 +775,15 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
|
||||
* one of the two page sizes. Since this is a common code for all the
|
||||
* three cases, we need this hugs page check.
|
||||
*/
|
||||
is_huge = page_size == mmu_prop->huge_page_size;
|
||||
|
||||
if (is_dram_addr && !is_huge) {
|
||||
dev_err(hdev->dev, "DRAM mapping should use huge pages only\n");
|
||||
return -EFAULT;
|
||||
if (is_dram_addr) {
|
||||
mmu_prop = &prop->dmmu;
|
||||
is_huge = true;
|
||||
} else if (page_size == prop->pmmu_huge.page_size) {
|
||||
mmu_prop = &prop->pmmu_huge;
|
||||
is_huge = true;
|
||||
} else {
|
||||
mmu_prop = &prop->pmmu;
|
||||
is_huge = false;
|
||||
}
|
||||
|
||||
hop0_addr = get_hop0_addr(ctx);
|
||||
@ -885,8 +902,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
|
||||
get_pte(ctx, hop3_addr);
|
||||
}
|
||||
|
||||
flush(ctx);
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
@ -909,6 +924,7 @@ err:
|
||||
* @virt_addr: virt addr to map from
|
||||
* @phys_addr: phys addr to map to
|
||||
* @page_size: physical page size
|
||||
* @flush_pte: whether to do a PCI flush
|
||||
*
|
||||
* This function does the following:
|
||||
* - Check that the virt addr is not mapped
|
||||
@ -919,8 +935,12 @@ err:
|
||||
* changes the MMU hash, it must be protected by a lock.
|
||||
* However, because it maps only a single page, the lock should be implemented
|
||||
* in a higher level in order to protect the entire mapping of the memory area
|
||||
*
|
||||
* For optimization reasons PCI flush may be requested once after mapping of
|
||||
* large area.
|
||||
*/
|
||||
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
|
||||
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
|
||||
bool flush_pte)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
@ -933,26 +953,25 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
|
||||
if (!hdev->mmu_enable)
|
||||
return 0;
|
||||
|
||||
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
|
||||
prop->va_space_dram_start_address,
|
||||
prop->va_space_dram_end_address);
|
||||
is_dram_addr = is_dram_va(hdev, virt_addr);
|
||||
|
||||
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
|
||||
if (is_dram_addr)
|
||||
mmu_prop = &prop->dmmu;
|
||||
else if ((page_size % prop->pmmu_huge.page_size) == 0)
|
||||
mmu_prop = &prop->pmmu_huge;
|
||||
else
|
||||
mmu_prop = &prop->pmmu;
|
||||
|
||||
/*
|
||||
* The H/W handles mapping of specific page sizes. Hence if the page
|
||||
* size is bigger, we break it to sub-pages and map them separately.
|
||||
*/
|
||||
if ((page_size % mmu_prop->huge_page_size) == 0) {
|
||||
real_page_size = mmu_prop->huge_page_size;
|
||||
} else if ((page_size % mmu_prop->page_size) == 0) {
|
||||
if ((page_size % mmu_prop->page_size) == 0) {
|
||||
real_page_size = mmu_prop->page_size;
|
||||
} else {
|
||||
dev_err(hdev->dev,
|
||||
"page size of %u is not %dKB nor %dMB aligned, can't unmap\n",
|
||||
page_size,
|
||||
mmu_prop->page_size >> 10,
|
||||
mmu_prop->huge_page_size >> 20);
|
||||
"page size of %u is not %uKB aligned, can't unmap\n",
|
||||
page_size, mmu_prop->page_size >> 10);
|
||||
|
||||
return -EFAULT;
|
||||
}
|
||||
@ -976,6 +995,9 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
|
||||
mapped_cnt++;
|
||||
}
|
||||
|
||||
if (flush_pte)
|
||||
flush(ctx);
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
@ -988,6 +1010,8 @@ err:
|
||||
real_virt_addr += real_page_size;
|
||||
}
|
||||
|
||||
flush(ctx);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user