mirror of
https://github.com/torvalds/linux.git
synced 2024-12-26 21:02:19 +00:00
habanalabs/gaudi: add page fault notify event
Each time page fault happens, besides capturing its data, also notify the user about it. Signed-off-by: Dani Liberman <dliberman@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
parent
cd21701cde
commit
aff6354afd
@ -2490,3 +2490,12 @@ void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is
|
||||
hdev->captured_err_info.pgf_info.pgf.engine_id = eng_id;
|
||||
hl_capture_user_mappings(hdev, is_pmmu);
|
||||
}
|
||||
|
||||
void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu,
|
||||
u64 *event_mask)
|
||||
{
|
||||
hl_capture_page_fault(hdev, addr, eng_id, is_pmmu);
|
||||
|
||||
if (event_mask)
|
||||
*event_mask |= HL_NOTIFIER_EVENT_PAGE_FAULT;
|
||||
}
|
||||
|
@ -3815,6 +3815,8 @@ void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_
|
||||
void hl_handle_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines,
|
||||
u8 flags, u64 *event_mask);
|
||||
void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu);
|
||||
void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu,
|
||||
u64 *event_mask);
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
|
||||
|
@ -6740,7 +6740,7 @@ static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_i
|
||||
}
|
||||
}
|
||||
|
||||
static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr)
|
||||
static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
|
||||
{
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
u32 val;
|
||||
@ -6755,7 +6755,7 @@ static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr
|
||||
*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
|
||||
|
||||
dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
|
||||
hl_capture_page_fault(hdev, *addr, 0, true);
|
||||
hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
|
||||
|
||||
WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
|
||||
}
|
||||
@ -7323,7 +7323,7 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
|
||||
if (razwi) {
|
||||
gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
|
||||
&is_write);
|
||||
gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr);
|
||||
gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
|
||||
|
||||
if (is_read)
|
||||
razwi_flags |= HL_RAZWI_READ;
|
||||
|
@ -722,6 +722,7 @@ enum hl_server_type {
|
||||
* HL_NOTIFIER_EVENT_USER_ENGINE_ERR - Indicates device engine in error state
|
||||
* HL_NOTIFIER_EVENT_GENERAL_HW_ERR - Indicates device HW error
|
||||
* HL_NOTIFIER_EVENT_RAZWI - Indicates razwi happened
|
||||
* HL_NOTIFIER_EVENT_PAGE_FAULT - Indicates page fault happened
|
||||
*/
|
||||
#define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0)
|
||||
#define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1)
|
||||
@ -731,6 +732,7 @@ enum hl_server_type {
|
||||
#define HL_NOTIFIER_EVENT_USER_ENGINE_ERR (1ULL << 5)
|
||||
#define HL_NOTIFIER_EVENT_GENERAL_HW_ERR (1ULL << 6)
|
||||
#define HL_NOTIFIER_EVENT_RAZWI (1ULL << 7)
|
||||
#define HL_NOTIFIER_EVENT_PAGE_FAULT (1ULL << 8)
|
||||
|
||||
/* Opcode for management ioctl
|
||||
*
|
||||
|
Loading…
Reference in New Issue
Block a user