mirror of
https://github.com/torvalds/linux.git
synced 2024-11-17 09:31:50 +00:00
56809a28d4
... not PGD vmalloc() sets up the kernel page table (starting from @swapper_pg_dir). But when vmalloc area is accessed in context of a user task, say opening terminal in n_tty_open(), the user page tables need to be synced from kernel page tables so that TLB entry is created in "user context". The old code was doing this incorrectly, as it was updating the user pgd entry (first level itself) to point to kernel pud table (2nd level), effectively yanking away the entire user space translation with kernel one. The correct way to do this is to ONLY update a user space pgd/pud/pmd entry if it is not popluated already. This ensures that only the missing leaf pmd entry gets updated to point to relevant kernel pte table. From code change pov, we are chaging the pattern: p4d = p4d_offset(pgd, address); p4d_k = p4d_offset(pgd_k, address); if (!p4d_present(*p4d_k)) goto bad_area; set_p4d(p4d, *p4d_k); with p4d = p4d_offset(pgd, address); p4d_k = p4d_offset(pgd_k, address); if (p4d_none(*p4d_k)) goto bad_area; if (!p4d_present(*p4d)) set_p4d(p4d, *p4d_k); Signed-off-by: Vineet Gupta <vgupta@kernel.org>
195 lines
4.3 KiB
C
195 lines
4.3 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/* Page Fault Handling for ARC (TLB Miss / ProtV)
|
|
*
|
|
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
|
*/
|
|
|
|
#include <linux/signal.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/sched/signal.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/ptrace.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/kdebug.h>
|
|
#include <linux/perf_event.h>
|
|
#include <linux/mm_types.h>
|
|
#include <asm/mmu.h>
|
|
|
|
/*
|
|
* kernel virtual address is required to implement vmalloc/pkmap/fixmap
|
|
* Refer to asm/processor.h for System Memory Map
|
|
*
|
|
* It simply copies the PMD entry (pointer to 2nd level page table or hugepage)
|
|
* from swapper pgdir to task pgdir. The 2nd level table/page is thus shared
|
|
*/
|
|
noinline static int handle_kernel_vaddr_fault(unsigned long address)
|
|
{
|
|
/*
|
|
* Synchronize this task's top level page-table
|
|
* with the 'reference' page table.
|
|
*/
|
|
pgd_t *pgd, *pgd_k;
|
|
p4d_t *p4d, *p4d_k;
|
|
pud_t *pud, *pud_k;
|
|
pmd_t *pmd, *pmd_k;
|
|
|
|
pgd = pgd_offset(current->active_mm, address);
|
|
pgd_k = pgd_offset_k(address);
|
|
|
|
if (pgd_none (*pgd_k))
|
|
goto bad_area;
|
|
if (!pgd_present(*pgd))
|
|
set_pgd(pgd, *pgd_k);
|
|
|
|
p4d = p4d_offset(pgd, address);
|
|
p4d_k = p4d_offset(pgd_k, address);
|
|
if (p4d_none(*p4d_k))
|
|
goto bad_area;
|
|
if (!p4d_present(*p4d))
|
|
set_p4d(p4d, *p4d_k);
|
|
|
|
pud = pud_offset(p4d, address);
|
|
pud_k = pud_offset(p4d_k, address);
|
|
if (pud_none(*pud_k))
|
|
goto bad_area;
|
|
if (!pud_present(*pud))
|
|
set_pud(pud, *pud_k);
|
|
|
|
pmd = pmd_offset(pud, address);
|
|
pmd_k = pmd_offset(pud_k, address);
|
|
if (pmd_none(*pmd_k))
|
|
goto bad_area;
|
|
if (!pmd_present(*pmd))
|
|
set_pmd(pmd, *pmd_k);
|
|
|
|
/* XXX: create the TLB entry here */
|
|
return 0;
|
|
|
|
bad_area:
|
|
return 1;
|
|
}
|
|
|
|
void do_page_fault(unsigned long address, struct pt_regs *regs)
|
|
{
|
|
struct vm_area_struct *vma = NULL;
|
|
struct task_struct *tsk = current;
|
|
struct mm_struct *mm = tsk->mm;
|
|
int sig, si_code = SEGV_MAPERR;
|
|
unsigned int write = 0, exec = 0, mask;
|
|
vm_fault_t fault = VM_FAULT_SIGSEGV; /* handle_mm_fault() output */
|
|
unsigned int flags; /* handle_mm_fault() input */
|
|
|
|
/*
|
|
* NOTE! We MUST NOT take any locks for this case. We may
|
|
* be in an interrupt or a critical region, and should
|
|
* only copy the information from the master page table,
|
|
* nothing more.
|
|
*/
|
|
if (address >= VMALLOC_START && !user_mode(regs)) {
|
|
if (unlikely(handle_kernel_vaddr_fault(address)))
|
|
goto no_context;
|
|
else
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* If we're in an interrupt or have no user
|
|
* context, we must not take the fault..
|
|
*/
|
|
if (faulthandler_disabled() || !mm)
|
|
goto no_context;
|
|
|
|
if (regs->ecr_cause & ECR_C_PROTV_STORE) /* ST/EX */
|
|
write = 1;
|
|
else if ((regs->ecr_vec == ECR_V_PROTV) &&
|
|
(regs->ecr_cause == ECR_C_PROTV_INST_FETCH))
|
|
exec = 1;
|
|
|
|
flags = FAULT_FLAG_DEFAULT;
|
|
if (user_mode(regs))
|
|
flags |= FAULT_FLAG_USER;
|
|
if (write)
|
|
flags |= FAULT_FLAG_WRITE;
|
|
|
|
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
|
retry:
|
|
mmap_read_lock(mm);
|
|
|
|
vma = find_vma(mm, address);
|
|
if (!vma)
|
|
goto bad_area;
|
|
if (unlikely(address < vma->vm_start)) {
|
|
if (!(vma->vm_flags & VM_GROWSDOWN) || expand_stack(vma, address))
|
|
goto bad_area;
|
|
}
|
|
|
|
/*
|
|
* vm_area is good, now check permissions for this memory access
|
|
*/
|
|
mask = VM_READ;
|
|
if (write)
|
|
mask = VM_WRITE;
|
|
if (exec)
|
|
mask = VM_EXEC;
|
|
|
|
if (!(vma->vm_flags & mask)) {
|
|
si_code = SEGV_ACCERR;
|
|
goto bad_area;
|
|
}
|
|
|
|
fault = handle_mm_fault(vma, address, flags, regs);
|
|
|
|
/* Quick path to respond to signals */
|
|
if (fault_signal_pending(fault, regs)) {
|
|
if (!user_mode(regs))
|
|
goto no_context;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Fault retry nuances, mmap_lock already relinquished by core mm
|
|
*/
|
|
if (unlikely((fault & VM_FAULT_RETRY) &&
|
|
(flags & FAULT_FLAG_ALLOW_RETRY))) {
|
|
flags |= FAULT_FLAG_TRIED;
|
|
goto retry;
|
|
}
|
|
|
|
bad_area:
|
|
mmap_read_unlock(mm);
|
|
|
|
/*
|
|
* Major/minor page fault accounting
|
|
* (in case of retry we only land here once)
|
|
*/
|
|
if (likely(!(fault & VM_FAULT_ERROR)))
|
|
/* Normal return path: fault Handled Gracefully */
|
|
return;
|
|
|
|
if (!user_mode(regs))
|
|
goto no_context;
|
|
|
|
if (fault & VM_FAULT_OOM) {
|
|
pagefault_out_of_memory();
|
|
return;
|
|
}
|
|
|
|
if (fault & VM_FAULT_SIGBUS) {
|
|
sig = SIGBUS;
|
|
si_code = BUS_ADRERR;
|
|
}
|
|
else {
|
|
sig = SIGSEGV;
|
|
}
|
|
|
|
tsk->thread.fault_address = address;
|
|
force_sig_fault(sig, si_code, (void __user *)address);
|
|
return;
|
|
|
|
no_context:
|
|
if (fixup_exception(regs))
|
|
return;
|
|
|
|
die("Oops", regs, address);
|
|
}
|