mirror of
https://github.com/torvalds/linux.git
synced 2024-11-13 23:51:39 +00:00
Merge patch series "riscv: Fix set_memory_XX() and set_direct_map_XX()"
Alexandre Ghiti <alexghiti@rivosinc.com> says: Those 2 patches fix the set_memory_XX() and set_direct_map_XX() APIs, which in turn fix STRICT_KERNEL_RWX and memfd_secret(). Those were broken since the permission changes were not applied to the linear mapping because the linear mapping is mapped using hugepages and walk_page_range_novma() does not split such mappings. To fix that, patch 1 disables PGD mappings in the linear mapping as it is hard to propagate changes at this level in *all* the page tables, this has the downside of disabling PMD mapping for sv32 and PUD (1GB) mapping for sv39 in the linear mapping (for specific kernels, we could add a Kconfig to enable ARCH_HAS_SET_DIRECT_MAP and STRICT_KERNEL_RWX if needed, I'm pretty sure we'll discuss that). patch 2 implements the split of the huge linear mappings so that walk_page_range_novma() can properly apply the permissions. The whole split is protected with mmap_sem in write mode, but I'm wondering if that's enough, any opinion on that is appreciated. * b4-shazam-merge: riscv: Fix set_memory_XX() and set_direct_map_XX() by splitting huge linear mappings riscv: Don't use PGD entries for the linear mapping Link: https://lore.kernel.org/r/20231108075930.7157-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
This commit is contained in:
commit
05942f780a
@ -666,16 +666,16 @@ void __init create_pgd_mapping(pgd_t *pgdp,
|
||||
static uintptr_t __init best_map_size(phys_addr_t pa, uintptr_t va,
|
||||
phys_addr_t size)
|
||||
{
|
||||
if (!(pa & (PGDIR_SIZE - 1)) && !(va & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE)
|
||||
return PGDIR_SIZE;
|
||||
|
||||
if (!(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE)
|
||||
if (pgtable_l5_enabled &&
|
||||
!(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE)
|
||||
return P4D_SIZE;
|
||||
|
||||
if (!(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE)
|
||||
if (pgtable_l4_enabled &&
|
||||
!(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE)
|
||||
return PUD_SIZE;
|
||||
|
||||
if (!(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE)
|
||||
if (IS_ENABLED(CONFIG_64BIT) &&
|
||||
!(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE)
|
||||
return PMD_SIZE;
|
||||
|
||||
return PAGE_SIZE;
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
#include <linux/pagewalk.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/bitops.h>
|
||||
#include <asm/set_memory.h>
|
||||
@ -25,19 +26,6 @@ static unsigned long set_pageattr_masks(unsigned long val, struct mm_walk *walk)
|
||||
return new_val;
|
||||
}
|
||||
|
||||
static int pageattr_pgd_entry(pgd_t *pgd, unsigned long addr,
|
||||
unsigned long next, struct mm_walk *walk)
|
||||
{
|
||||
pgd_t val = READ_ONCE(*pgd);
|
||||
|
||||
if (pgd_leaf(val)) {
|
||||
val = __pgd(set_pageattr_masks(pgd_val(val), walk));
|
||||
set_pgd(pgd, val);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr,
|
||||
unsigned long next, struct mm_walk *walk)
|
||||
{
|
||||
@ -96,7 +84,6 @@ static int pageattr_pte_hole(unsigned long addr, unsigned long next,
|
||||
}
|
||||
|
||||
static const struct mm_walk_ops pageattr_ops = {
|
||||
.pgd_entry = pageattr_pgd_entry,
|
||||
.p4d_entry = pageattr_p4d_entry,
|
||||
.pud_entry = pageattr_pud_entry,
|
||||
.pmd_entry = pageattr_pmd_entry,
|
||||
@ -105,12 +92,181 @@ static const struct mm_walk_ops pageattr_ops = {
|
||||
.walk_lock = PGWALK_RDLOCK,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
static int __split_linear_mapping_pmd(pud_t *pudp,
|
||||
unsigned long vaddr, unsigned long end)
|
||||
{
|
||||
pmd_t *pmdp;
|
||||
unsigned long next;
|
||||
|
||||
pmdp = pmd_offset(pudp, vaddr);
|
||||
|
||||
do {
|
||||
next = pmd_addr_end(vaddr, end);
|
||||
|
||||
if (next - vaddr >= PMD_SIZE &&
|
||||
vaddr <= (vaddr & PMD_MASK) && end >= next)
|
||||
continue;
|
||||
|
||||
if (pmd_leaf(*pmdp)) {
|
||||
struct page *pte_page;
|
||||
unsigned long pfn = _pmd_pfn(*pmdp);
|
||||
pgprot_t prot = __pgprot(pmd_val(*pmdp) & ~_PAGE_PFN_MASK);
|
||||
pte_t *ptep_new;
|
||||
int i;
|
||||
|
||||
pte_page = alloc_page(GFP_KERNEL);
|
||||
if (!pte_page)
|
||||
return -ENOMEM;
|
||||
|
||||
ptep_new = (pte_t *)page_address(pte_page);
|
||||
for (i = 0; i < PTRS_PER_PTE; ++i, ++ptep_new)
|
||||
set_pte(ptep_new, pfn_pte(pfn + i, prot));
|
||||
|
||||
smp_wmb();
|
||||
|
||||
set_pmd(pmdp, pfn_pmd(page_to_pfn(pte_page), PAGE_TABLE));
|
||||
}
|
||||
} while (pmdp++, vaddr = next, vaddr != end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __split_linear_mapping_pud(p4d_t *p4dp,
|
||||
unsigned long vaddr, unsigned long end)
|
||||
{
|
||||
pud_t *pudp;
|
||||
unsigned long next;
|
||||
int ret;
|
||||
|
||||
pudp = pud_offset(p4dp, vaddr);
|
||||
|
||||
do {
|
||||
next = pud_addr_end(vaddr, end);
|
||||
|
||||
if (next - vaddr >= PUD_SIZE &&
|
||||
vaddr <= (vaddr & PUD_MASK) && end >= next)
|
||||
continue;
|
||||
|
||||
if (pud_leaf(*pudp)) {
|
||||
struct page *pmd_page;
|
||||
unsigned long pfn = _pud_pfn(*pudp);
|
||||
pgprot_t prot = __pgprot(pud_val(*pudp) & ~_PAGE_PFN_MASK);
|
||||
pmd_t *pmdp_new;
|
||||
int i;
|
||||
|
||||
pmd_page = alloc_page(GFP_KERNEL);
|
||||
if (!pmd_page)
|
||||
return -ENOMEM;
|
||||
|
||||
pmdp_new = (pmd_t *)page_address(pmd_page);
|
||||
for (i = 0; i < PTRS_PER_PMD; ++i, ++pmdp_new)
|
||||
set_pmd(pmdp_new,
|
||||
pfn_pmd(pfn + ((i * PMD_SIZE) >> PAGE_SHIFT), prot));
|
||||
|
||||
smp_wmb();
|
||||
|
||||
set_pud(pudp, pfn_pud(page_to_pfn(pmd_page), PAGE_TABLE));
|
||||
}
|
||||
|
||||
ret = __split_linear_mapping_pmd(pudp, vaddr, next);
|
||||
if (ret)
|
||||
return ret;
|
||||
} while (pudp++, vaddr = next, vaddr != end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __split_linear_mapping_p4d(pgd_t *pgdp,
|
||||
unsigned long vaddr, unsigned long end)
|
||||
{
|
||||
p4d_t *p4dp;
|
||||
unsigned long next;
|
||||
int ret;
|
||||
|
||||
p4dp = p4d_offset(pgdp, vaddr);
|
||||
|
||||
do {
|
||||
next = p4d_addr_end(vaddr, end);
|
||||
|
||||
/*
|
||||
* If [vaddr; end] contains [vaddr & P4D_MASK; next], we don't
|
||||
* need to split, we'll change the protections on the whole P4D.
|
||||
*/
|
||||
if (next - vaddr >= P4D_SIZE &&
|
||||
vaddr <= (vaddr & P4D_MASK) && end >= next)
|
||||
continue;
|
||||
|
||||
if (p4d_leaf(*p4dp)) {
|
||||
struct page *pud_page;
|
||||
unsigned long pfn = _p4d_pfn(*p4dp);
|
||||
pgprot_t prot = __pgprot(p4d_val(*p4dp) & ~_PAGE_PFN_MASK);
|
||||
pud_t *pudp_new;
|
||||
int i;
|
||||
|
||||
pud_page = alloc_page(GFP_KERNEL);
|
||||
if (!pud_page)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* Fill the pud level with leaf puds that have the same
|
||||
* protections as the leaf p4d.
|
||||
*/
|
||||
pudp_new = (pud_t *)page_address(pud_page);
|
||||
for (i = 0; i < PTRS_PER_PUD; ++i, ++pudp_new)
|
||||
set_pud(pudp_new,
|
||||
pfn_pud(pfn + ((i * PUD_SIZE) >> PAGE_SHIFT), prot));
|
||||
|
||||
/*
|
||||
* Make sure the pud filling is not reordered with the
|
||||
* p4d store which could result in seeing a partially
|
||||
* filled pud level.
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
set_p4d(p4dp, pfn_p4d(page_to_pfn(pud_page), PAGE_TABLE));
|
||||
}
|
||||
|
||||
ret = __split_linear_mapping_pud(p4dp, vaddr, next);
|
||||
if (ret)
|
||||
return ret;
|
||||
} while (p4dp++, vaddr = next, vaddr != end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __split_linear_mapping_pgd(pgd_t *pgdp,
|
||||
unsigned long vaddr,
|
||||
unsigned long end)
|
||||
{
|
||||
unsigned long next;
|
||||
int ret;
|
||||
|
||||
do {
|
||||
next = pgd_addr_end(vaddr, end);
|
||||
/* We never use PGD mappings for the linear mapping */
|
||||
ret = __split_linear_mapping_p4d(pgdp, vaddr, next);
|
||||
if (ret)
|
||||
return ret;
|
||||
} while (pgdp++, vaddr = next, vaddr != end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int split_linear_mapping(unsigned long start, unsigned long end)
|
||||
{
|
||||
return __split_linear_mapping_pgd(pgd_offset_k(start), start, end);
|
||||
}
|
||||
#endif /* CONFIG_64BIT */
|
||||
|
||||
static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
|
||||
pgprot_t clear_mask)
|
||||
{
|
||||
int ret;
|
||||
unsigned long start = addr;
|
||||
unsigned long end = start + PAGE_SIZE * numpages;
|
||||
unsigned long __maybe_unused lm_start;
|
||||
unsigned long __maybe_unused lm_end;
|
||||
struct pageattr_masks masks = {
|
||||
.set_mask = set_mask,
|
||||
.clear_mask = clear_mask
|
||||
@ -120,11 +276,67 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
|
||||
return 0;
|
||||
|
||||
mmap_write_lock(&init_mm);
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
/*
|
||||
* We are about to change the permissions of a kernel mapping, we must
|
||||
* apply the same changes to its linear mapping alias, which may imply
|
||||
* splitting a huge mapping.
|
||||
*/
|
||||
|
||||
if (is_vmalloc_or_module_addr((void *)start)) {
|
||||
struct vm_struct *area = NULL;
|
||||
int i, page_start;
|
||||
|
||||
area = find_vm_area((void *)start);
|
||||
page_start = (start - (unsigned long)area->addr) >> PAGE_SHIFT;
|
||||
|
||||
for (i = page_start; i < page_start + numpages; ++i) {
|
||||
lm_start = (unsigned long)page_address(area->pages[i]);
|
||||
lm_end = lm_start + PAGE_SIZE;
|
||||
|
||||
ret = split_linear_mapping(lm_start, lm_end);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
ret = walk_page_range_novma(&init_mm, lm_start, lm_end,
|
||||
&pageattr_ops, NULL, &masks);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
}
|
||||
} else if (is_kernel_mapping(start) || is_linear_mapping(start)) {
|
||||
lm_start = (unsigned long)lm_alias(start);
|
||||
lm_end = (unsigned long)lm_alias(end);
|
||||
|
||||
ret = split_linear_mapping(lm_start, lm_end);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
ret = walk_page_range_novma(&init_mm, lm_start, lm_end,
|
||||
&pageattr_ops, NULL, &masks);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
|
||||
&masks);
|
||||
|
||||
unlock:
|
||||
mmap_write_unlock(&init_mm);
|
||||
|
||||
/*
|
||||
* We can't use flush_tlb_kernel_range() here as we may have split a
|
||||
* hugepage that is larger than that, so let's flush everything.
|
||||
*/
|
||||
flush_tlb_all();
|
||||
#else
|
||||
ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
|
||||
&masks);
|
||||
|
||||
mmap_write_unlock(&init_mm);
|
||||
|
||||
flush_tlb_kernel_range(start, end);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -159,36 +371,14 @@ int set_memory_nx(unsigned long addr, int numpages)
|
||||
|
||||
int set_direct_map_invalid_noflush(struct page *page)
|
||||
{
|
||||
int ret;
|
||||
unsigned long start = (unsigned long)page_address(page);
|
||||
unsigned long end = start + PAGE_SIZE;
|
||||
struct pageattr_masks masks = {
|
||||
.set_mask = __pgprot(0),
|
||||
.clear_mask = __pgprot(_PAGE_PRESENT)
|
||||
};
|
||||
|
||||
mmap_read_lock(&init_mm);
|
||||
ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks);
|
||||
mmap_read_unlock(&init_mm);
|
||||
|
||||
return ret;
|
||||
return __set_memory((unsigned long)page_address(page), 1,
|
||||
__pgprot(0), __pgprot(_PAGE_PRESENT));
|
||||
}
|
||||
|
||||
int set_direct_map_default_noflush(struct page *page)
|
||||
{
|
||||
int ret;
|
||||
unsigned long start = (unsigned long)page_address(page);
|
||||
unsigned long end = start + PAGE_SIZE;
|
||||
struct pageattr_masks masks = {
|
||||
.set_mask = PAGE_KERNEL,
|
||||
.clear_mask = __pgprot(0)
|
||||
};
|
||||
|
||||
mmap_read_lock(&init_mm);
|
||||
ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks);
|
||||
mmap_read_unlock(&init_mm);
|
||||
|
||||
return ret;
|
||||
return __set_memory((unsigned long)page_address(page), 1,
|
||||
PAGE_KERNEL, __pgprot(0));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_PAGEALLOC
|
||||
|
Loading…
Reference in New Issue
Block a user