[PATCH] Enable mprotect on huge pages
2.6.16-rc3 uses hugetlb on-demand paging, but it doesn_t support hugetlb mprotect. From: David Gibson <david@gibson.dropbear.id.au> Remove a test from the mprotect() path which checks that the mprotect()ed range on a hugepage VMA is hugepage aligned (yes, really, the sense of is_aligned_hugepage_range() is the opposite of what you'd guess :-/). In fact, we don't need this test. If the given addresses match the beginning/end of a hugepage VMA they must already be suitably aligned. If they don't, then mprotect_fixup() will attempt to split the VMA. The very first test in split_vma() will check for a badly aligned address on a hugepage VMA and return -EINVAL if necessary. From: "Chen, Kenneth W" <kenneth.w.chen@intel.com> On i386 and x86-64, pte flag _PAGE_PSE collides with _PAGE_PROTNONE. The identify of hugetlb pte is lost when changing page protection via mprotect. A page fault occurs later will trigger a bug check in huge_pte_alloc(). The fix is to always make new pte a hugetlb pte and also to clean up legacy code where _PAGE_PRESENT is forced on in the pre-faulting day. Signed-off-by: Zhang Yanmin <yanmin.zhang@intel.com> Cc: David Gibson <david@gibson.dropbear.id.au> Cc: "David S. Miller" <davem@davemloft.net> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: William Lee Irwin III <wli@holomorphy.com> Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com> Cc: Andi Kleen <ak@muc.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
aed75ff3ca
commit
8f860591ff
@ -219,13 +219,12 @@ extern unsigned long pg0[];
|
||||
* The following only work if pte_present() is true.
|
||||
* Undefined behaviour if not..
|
||||
*/
|
||||
#define __LARGE_PTE (_PAGE_PSE | _PAGE_PRESENT)
|
||||
static inline int pte_user(pte_t pte) { return (pte).pte_low & _PAGE_USER; }
|
||||
static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; }
|
||||
static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; }
|
||||
static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; }
|
||||
static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; }
|
||||
static inline int pte_huge(pte_t pte) { return ((pte).pte_low & __LARGE_PTE) == __LARGE_PTE; }
|
||||
static inline int pte_huge(pte_t pte) { return (pte).pte_low & _PAGE_PSE; }
|
||||
|
||||
/*
|
||||
* The following only works if pte_present() is not true.
|
||||
@ -242,7 +241,7 @@ static inline pte_t pte_mkexec(pte_t pte) { (pte).pte_low |= _PAGE_USER; return
|
||||
static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; }
|
||||
static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; }
|
||||
static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; }
|
||||
static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= __LARGE_PTE; return pte; }
|
||||
static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; }
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
# include <asm/pgtable-3level.h>
|
||||
|
@ -314,7 +314,7 @@ ia64_phys_addr_valid (unsigned long addr)
|
||||
#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A))
|
||||
#define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D))
|
||||
#define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D))
|
||||
#define pte_mkhuge(pte) (__pte(pte_val(pte) | _PAGE_P))
|
||||
#define pte_mkhuge(pte) (__pte(pte_val(pte)))
|
||||
|
||||
/*
|
||||
* Macro to a page protection value as "uncacheable". Note that "protection" is really a
|
||||
|
@ -273,7 +273,7 @@ static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
|
||||
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
|
||||
static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; }
|
||||
static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
|
||||
static inline int pte_huge(pte_t pte) { return (pte_val(pte) & __LARGE_PTE) == __LARGE_PTE; }
|
||||
static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_PSE; }
|
||||
|
||||
static inline pte_t pte_rdprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; }
|
||||
static inline pte_t pte_exprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; }
|
||||
@ -285,7 +285,7 @@ static inline pte_t pte_mkexec(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _
|
||||
static inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; }
|
||||
static inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; }
|
||||
static inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; }
|
||||
static inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | __LARGE_PTE)); return pte; }
|
||||
static inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_PSE)); return pte; }
|
||||
|
||||
struct vm_area_struct;
|
||||
|
||||
|
@ -41,6 +41,8 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
|
||||
pmd_t *pmd, int write);
|
||||
int is_aligned_hugepage_range(unsigned long addr, unsigned long len);
|
||||
int pmd_huge(pmd_t pmd);
|
||||
void hugetlb_change_protection(struct vm_area_struct *vma,
|
||||
unsigned long address, unsigned long end, pgprot_t newprot);
|
||||
|
||||
#ifndef ARCH_HAS_HUGEPAGE_ONLY_RANGE
|
||||
#define is_hugepage_only_range(mm, addr, len) 0
|
||||
@ -101,6 +103,8 @@ static inline unsigned long hugetlb_total_pages(void)
|
||||
#define free_huge_page(p) ({ (void)(p); BUG(); })
|
||||
#define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; })
|
||||
|
||||
#define hugetlb_change_protection(vma, address, end, newprot)
|
||||
|
||||
#ifndef HPAGE_MASK
|
||||
#define HPAGE_MASK PAGE_MASK /* Keep the compiler happy */
|
||||
#define HPAGE_SIZE PAGE_SIZE
|
||||
|
29
mm/hugetlb.c
29
mm/hugetlb.c
@ -565,3 +565,32 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
void hugetlb_change_protection(struct vm_area_struct *vma,
|
||||
unsigned long address, unsigned long end, pgprot_t newprot)
|
||||
{
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
unsigned long start = address;
|
||||
pte_t *ptep;
|
||||
pte_t pte;
|
||||
|
||||
BUG_ON(address >= end);
|
||||
flush_cache_range(vma, address, end);
|
||||
|
||||
spin_lock(&mm->page_table_lock);
|
||||
for (; address < end; address += HPAGE_SIZE) {
|
||||
ptep = huge_pte_offset(mm, address);
|
||||
if (!ptep)
|
||||
continue;
|
||||
if (!pte_none(*ptep)) {
|
||||
pte = huge_ptep_get_and_clear(mm, address, ptep);
|
||||
pte = pte_mkhuge(pte_modify(pte, newprot));
|
||||
set_huge_pte_at(mm, address, ptep, pte);
|
||||
lazy_mmu_prot_update(pte);
|
||||
}
|
||||
}
|
||||
spin_unlock(&mm->page_table_lock);
|
||||
|
||||
flush_tlb_range(vma, start, end);
|
||||
}
|
||||
|
||||
|
@ -124,7 +124,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
|
||||
* a MAP_NORESERVE private mapping to writable will now reserve.
|
||||
*/
|
||||
if (newflags & VM_WRITE) {
|
||||
if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) {
|
||||
if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) {
|
||||
charged = nrpages;
|
||||
if (security_vm_enough_memory(charged))
|
||||
return -ENOMEM;
|
||||
@ -166,7 +166,10 @@ success:
|
||||
*/
|
||||
vma->vm_flags = newflags;
|
||||
vma->vm_page_prot = newprot;
|
||||
change_protection(vma, start, end, newprot);
|
||||
if (is_vm_hugetlb_page(vma))
|
||||
hugetlb_change_protection(vma, start, end, newprot);
|
||||
else
|
||||
change_protection(vma, start, end, newprot);
|
||||
vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
|
||||
vm_stat_account(mm, newflags, vma->vm_file, nrpages);
|
||||
return 0;
|
||||
@ -240,11 +243,6 @@ sys_mprotect(unsigned long start, size_t len, unsigned long prot)
|
||||
|
||||
/* Here we know that vma->vm_start <= nstart < vma->vm_end. */
|
||||
|
||||
if (is_vm_hugetlb_page(vma)) {
|
||||
error = -EACCES;
|
||||
goto out;
|
||||
}
|
||||
|
||||
newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
|
||||
|
||||
/* newflags >> 4 shift VM_MAY% in place of VM_% */
|
||||
|
Loading…
Reference in New Issue
Block a user