mirror of
https://github.com/torvalds/linux.git
synced 2024-11-14 08:02:07 +00:00
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "8 patches. Subsystems affected by this patch series: mm (hugetlb, pagemap, and userfaultfd), memfd, selftests, and kconfig" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: configs/debug: set CONFIG_DEBUG_INFO=y properly proc: fix documentation and description of pagemap kselftest/vm: fix tests build with old libc memfd: fix F_SEAL_WRITE after shmem huge page allocated mm: fix use-after-free when anon vma name is used after vma is freed mm: prevent vm_area_struct::anon_name refcount saturation mm: refactor vm_area_struct::anon_vma_name usage code selftests/vm: cleanup hugetlb file after mremap test
This commit is contained in:
commit
0014404f9c
@ -23,7 +23,7 @@ There are four components to pagemap:
|
||||
* Bit 56 page exclusively mapped (since 4.2)
|
||||
* Bit 57 pte is uffd-wp write-protected (since 5.13) (see
|
||||
:ref:`Documentation/admin-guide/mm/userfaultfd.rst <userfaultfd>`)
|
||||
* Bits 57-60 zero
|
||||
* Bits 58-60 zero
|
||||
* Bit 61 page is file-page or shared-anon (since 3.5)
|
||||
* Bit 62 page swapped
|
||||
* Bit 63 page present
|
||||
|
@ -309,7 +309,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
|
||||
|
||||
name = arch_vma_name(vma);
|
||||
if (!name) {
|
||||
const char *anon_name;
|
||||
struct anon_vma_name *anon_name;
|
||||
|
||||
if (!mm) {
|
||||
name = "[vdso]";
|
||||
@ -327,10 +327,10 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
|
||||
goto done;
|
||||
}
|
||||
|
||||
anon_name = vma_anon_name(vma);
|
||||
anon_name = anon_vma_name(vma);
|
||||
if (anon_name) {
|
||||
seq_pad(m, ' ');
|
||||
seq_printf(m, "[anon:%s]", anon_name);
|
||||
seq_printf(m, "[anon:%s]", anon_name->name);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1597,7 +1597,8 @@ static const struct mm_walk_ops pagemap_ops = {
|
||||
* Bits 5-54 swap offset if swapped
|
||||
* Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst)
|
||||
* Bit 56 page exclusively mapped
|
||||
* Bits 57-60 zero
|
||||
* Bit 57 pte is uffd-wp write-protected
|
||||
* Bits 58-60 zero
|
||||
* Bit 61 page is file-page or shared-anon
|
||||
* Bit 62 page swapped
|
||||
* Bit 63 page present
|
||||
|
@ -878,7 +878,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
|
||||
new_flags, vma->anon_vma,
|
||||
vma->vm_file, vma->vm_pgoff,
|
||||
vma_policy(vma),
|
||||
NULL_VM_UFFD_CTX, vma_anon_name(vma));
|
||||
NULL_VM_UFFD_CTX, anon_vma_name(vma));
|
||||
if (prev)
|
||||
vma = prev;
|
||||
else
|
||||
@ -1438,7 +1438,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
||||
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
|
||||
vma_policy(vma),
|
||||
((struct vm_userfaultfd_ctx){ ctx }),
|
||||
vma_anon_name(vma));
|
||||
anon_vma_name(vma));
|
||||
if (prev) {
|
||||
vma = prev;
|
||||
goto next;
|
||||
@ -1615,7 +1615,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
|
||||
prev = vma_merge(mm, prev, start, vma_end, new_flags,
|
||||
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
|
||||
vma_policy(vma),
|
||||
NULL_VM_UFFD_CTX, vma_anon_name(vma));
|
||||
NULL_VM_UFFD_CTX, anon_vma_name(vma));
|
||||
if (prev) {
|
||||
vma = prev;
|
||||
goto next;
|
||||
|
@ -2626,7 +2626,7 @@ static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start,
|
||||
extern struct vm_area_struct *vma_merge(struct mm_struct *,
|
||||
struct vm_area_struct *prev, unsigned long addr, unsigned long end,
|
||||
unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
|
||||
struct mempolicy *, struct vm_userfaultfd_ctx, const char *);
|
||||
struct mempolicy *, struct vm_userfaultfd_ctx, struct anon_vma_name *);
|
||||
extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
|
||||
extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
|
||||
unsigned long addr, int new_below);
|
||||
@ -3372,11 +3372,12 @@ static inline int seal_check_future_write(int seals, struct vm_area_struct *vma)
|
||||
|
||||
#ifdef CONFIG_ANON_VMA_NAME
|
||||
int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long len_in, const char *name);
|
||||
unsigned long len_in,
|
||||
struct anon_vma_name *anon_name);
|
||||
#else
|
||||
static inline int
|
||||
madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long len_in, const char *name) {
|
||||
unsigned long len_in, struct anon_vma_name *anon_name) {
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
@ -140,50 +140,91 @@ static __always_inline void del_page_from_lru_list(struct page *page,
|
||||
|
||||
#ifdef CONFIG_ANON_VMA_NAME
|
||||
/*
|
||||
* mmap_lock should be read-locked when calling vma_anon_name() and while using
|
||||
* the returned pointer.
|
||||
* mmap_lock should be read-locked when calling anon_vma_name(). Caller should
|
||||
* either keep holding the lock while using the returned pointer or it should
|
||||
* raise anon_vma_name refcount before releasing the lock.
|
||||
*/
|
||||
extern const char *vma_anon_name(struct vm_area_struct *vma);
|
||||
|
||||
/*
|
||||
* mmap_lock should be read-locked for orig_vma->vm_mm.
|
||||
* mmap_lock should be write-locked for new_vma->vm_mm or new_vma should be
|
||||
* isolated.
|
||||
*/
|
||||
extern void dup_vma_anon_name(struct vm_area_struct *orig_vma,
|
||||
struct vm_area_struct *new_vma);
|
||||
|
||||
/*
|
||||
* mmap_lock should be write-locked or vma should have been isolated under
|
||||
* write-locked mmap_lock protection.
|
||||
*/
|
||||
extern void free_vma_anon_name(struct vm_area_struct *vma);
|
||||
extern struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma);
|
||||
extern struct anon_vma_name *anon_vma_name_alloc(const char *name);
|
||||
extern void anon_vma_name_free(struct kref *kref);
|
||||
|
||||
/* mmap_lock should be read-locked */
|
||||
static inline bool is_same_vma_anon_name(struct vm_area_struct *vma,
|
||||
const char *name)
|
||||
static inline void anon_vma_name_get(struct anon_vma_name *anon_name)
|
||||
{
|
||||
const char *vma_name = vma_anon_name(vma);
|
||||
if (anon_name)
|
||||
kref_get(&anon_name->kref);
|
||||
}
|
||||
|
||||
/* either both NULL, or pointers to same string */
|
||||
if (vma_name == name)
|
||||
static inline void anon_vma_name_put(struct anon_vma_name *anon_name)
|
||||
{
|
||||
if (anon_name)
|
||||
kref_put(&anon_name->kref, anon_vma_name_free);
|
||||
}
|
||||
|
||||
static inline
|
||||
struct anon_vma_name *anon_vma_name_reuse(struct anon_vma_name *anon_name)
|
||||
{
|
||||
/* Prevent anon_name refcount saturation early on */
|
||||
if (kref_read(&anon_name->kref) < REFCOUNT_MAX) {
|
||||
anon_vma_name_get(anon_name);
|
||||
return anon_name;
|
||||
|
||||
}
|
||||
return anon_vma_name_alloc(anon_name->name);
|
||||
}
|
||||
|
||||
static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
|
||||
struct vm_area_struct *new_vma)
|
||||
{
|
||||
struct anon_vma_name *anon_name = anon_vma_name(orig_vma);
|
||||
|
||||
if (anon_name)
|
||||
new_vma->anon_name = anon_vma_name_reuse(anon_name);
|
||||
}
|
||||
|
||||
static inline void free_anon_vma_name(struct vm_area_struct *vma)
|
||||
{
|
||||
/*
|
||||
* Not using anon_vma_name because it generates a warning if mmap_lock
|
||||
* is not held, which might be the case here.
|
||||
*/
|
||||
if (!vma->vm_file)
|
||||
anon_vma_name_put(vma->anon_name);
|
||||
}
|
||||
|
||||
static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1,
|
||||
struct anon_vma_name *anon_name2)
|
||||
{
|
||||
if (anon_name1 == anon_name2)
|
||||
return true;
|
||||
|
||||
return name && vma_name && !strcmp(name, vma_name);
|
||||
return anon_name1 && anon_name2 &&
|
||||
!strcmp(anon_name1->name, anon_name2->name);
|
||||
}
|
||||
|
||||
#else /* CONFIG_ANON_VMA_NAME */
|
||||
static inline const char *vma_anon_name(struct vm_area_struct *vma)
|
||||
static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
static inline void dup_vma_anon_name(struct vm_area_struct *orig_vma,
|
||||
|
||||
static inline struct anon_vma_name *anon_vma_name_alloc(const char *name)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void anon_vma_name_get(struct anon_vma_name *anon_name) {}
|
||||
static inline void anon_vma_name_put(struct anon_vma_name *anon_name) {}
|
||||
static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
|
||||
struct vm_area_struct *new_vma) {}
|
||||
static inline void free_vma_anon_name(struct vm_area_struct *vma) {}
|
||||
static inline bool is_same_vma_anon_name(struct vm_area_struct *vma,
|
||||
const char *name)
|
||||
static inline void free_anon_vma_name(struct vm_area_struct *vma) {}
|
||||
|
||||
static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1,
|
||||
struct anon_vma_name *anon_name2)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_ANON_VMA_NAME */
|
||||
|
||||
static inline void init_tlb_flush_pending(struct mm_struct *mm)
|
||||
|
@ -416,7 +416,10 @@ struct vm_area_struct {
|
||||
struct rb_node rb;
|
||||
unsigned long rb_subtree_last;
|
||||
} shared;
|
||||
/* Serialized by mmap_sem. */
|
||||
/*
|
||||
* Serialized by mmap_sem. Never use directly because it is
|
||||
* valid only when vm_file is NULL. Use anon_vma_name instead.
|
||||
*/
|
||||
struct anon_vma_name *anon_name;
|
||||
};
|
||||
|
||||
|
@ -16,7 +16,7 @@ CONFIG_SYMBOLIC_ERRNAME=y
|
||||
#
|
||||
# Compile-time checks and compiler options
|
||||
#
|
||||
CONFIG_DEBUG_INFO=y
|
||||
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
|
||||
CONFIG_DEBUG_SECTION_MISMATCH=y
|
||||
CONFIG_FRAME_WARN=2048
|
||||
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
|
||||
|
@ -366,14 +366,14 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
|
||||
*new = data_race(*orig);
|
||||
INIT_LIST_HEAD(&new->anon_vma_chain);
|
||||
new->vm_next = new->vm_prev = NULL;
|
||||
dup_vma_anon_name(orig, new);
|
||||
dup_anon_vma_name(orig, new);
|
||||
}
|
||||
return new;
|
||||
}
|
||||
|
||||
void vm_area_free(struct vm_area_struct *vma)
|
||||
{
|
||||
free_vma_anon_name(vma);
|
||||
free_anon_vma_name(vma);
|
||||
kmem_cache_free(vm_area_cachep, vma);
|
||||
}
|
||||
|
||||
|
19
kernel/sys.c
19
kernel/sys.c
@ -7,6 +7,7 @@
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mm_inline.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/reboot.h>
|
||||
@ -2286,15 +2287,16 @@ static int prctl_set_vma(unsigned long opt, unsigned long addr,
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
const char __user *uname;
|
||||
char *name, *pch;
|
||||
struct anon_vma_name *anon_name = NULL;
|
||||
int error;
|
||||
|
||||
switch (opt) {
|
||||
case PR_SET_VMA_ANON_NAME:
|
||||
uname = (const char __user *)arg;
|
||||
if (uname) {
|
||||
name = strndup_user(uname, ANON_VMA_NAME_MAX_LEN);
|
||||
char *name, *pch;
|
||||
|
||||
name = strndup_user(uname, ANON_VMA_NAME_MAX_LEN);
|
||||
if (IS_ERR(name))
|
||||
return PTR_ERR(name);
|
||||
|
||||
@ -2304,15 +2306,18 @@ static int prctl_set_vma(unsigned long opt, unsigned long addr,
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Reset the name */
|
||||
name = NULL;
|
||||
/* anon_vma has its own copy */
|
||||
anon_name = anon_vma_name_alloc(name);
|
||||
kfree(name);
|
||||
if (!anon_name)
|
||||
return -ENOMEM;
|
||||
|
||||
}
|
||||
|
||||
mmap_write_lock(mm);
|
||||
error = madvise_set_anon_name(mm, addr, size, name);
|
||||
error = madvise_set_anon_name(mm, addr, size, anon_name);
|
||||
mmap_write_unlock(mm);
|
||||
kfree(name);
|
||||
anon_vma_name_put(anon_name);
|
||||
break;
|
||||
default:
|
||||
error = -EINVAL;
|
||||
|
90
mm/madvise.c
90
mm/madvise.c
@ -65,7 +65,7 @@ static int madvise_need_mmap_write(int behavior)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ANON_VMA_NAME
|
||||
static struct anon_vma_name *anon_vma_name_alloc(const char *name)
|
||||
struct anon_vma_name *anon_vma_name_alloc(const char *name)
|
||||
{
|
||||
struct anon_vma_name *anon_name;
|
||||
size_t count;
|
||||
@ -81,78 +81,48 @@ static struct anon_vma_name *anon_vma_name_alloc(const char *name)
|
||||
return anon_name;
|
||||
}
|
||||
|
||||
static void vma_anon_name_free(struct kref *kref)
|
||||
void anon_vma_name_free(struct kref *kref)
|
||||
{
|
||||
struct anon_vma_name *anon_name =
|
||||
container_of(kref, struct anon_vma_name, kref);
|
||||
kfree(anon_name);
|
||||
}
|
||||
|
||||
static inline bool has_vma_anon_name(struct vm_area_struct *vma)
|
||||
struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma)
|
||||
{
|
||||
return !vma->vm_file && vma->anon_name;
|
||||
}
|
||||
|
||||
const char *vma_anon_name(struct vm_area_struct *vma)
|
||||
{
|
||||
if (!has_vma_anon_name(vma))
|
||||
return NULL;
|
||||
|
||||
mmap_assert_locked(vma->vm_mm);
|
||||
|
||||
return vma->anon_name->name;
|
||||
}
|
||||
if (vma->vm_file)
|
||||
return NULL;
|
||||
|
||||
void dup_vma_anon_name(struct vm_area_struct *orig_vma,
|
||||
struct vm_area_struct *new_vma)
|
||||
{
|
||||
if (!has_vma_anon_name(orig_vma))
|
||||
return;
|
||||
|
||||
kref_get(&orig_vma->anon_name->kref);
|
||||
new_vma->anon_name = orig_vma->anon_name;
|
||||
}
|
||||
|
||||
void free_vma_anon_name(struct vm_area_struct *vma)
|
||||
{
|
||||
struct anon_vma_name *anon_name;
|
||||
|
||||
if (!has_vma_anon_name(vma))
|
||||
return;
|
||||
|
||||
anon_name = vma->anon_name;
|
||||
vma->anon_name = NULL;
|
||||
kref_put(&anon_name->kref, vma_anon_name_free);
|
||||
return vma->anon_name;
|
||||
}
|
||||
|
||||
/* mmap_lock should be write-locked */
|
||||
static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name)
|
||||
static int replace_anon_vma_name(struct vm_area_struct *vma,
|
||||
struct anon_vma_name *anon_name)
|
||||
{
|
||||
const char *anon_name;
|
||||
struct anon_vma_name *orig_name = anon_vma_name(vma);
|
||||
|
||||
if (!name) {
|
||||
free_vma_anon_name(vma);
|
||||
if (!anon_name) {
|
||||
vma->anon_name = NULL;
|
||||
anon_vma_name_put(orig_name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
anon_name = vma_anon_name(vma);
|
||||
if (anon_name) {
|
||||
/* Same name, nothing to do here */
|
||||
if (!strcmp(name, anon_name))
|
||||
if (anon_vma_name_eq(orig_name, anon_name))
|
||||
return 0;
|
||||
|
||||
free_vma_anon_name(vma);
|
||||
}
|
||||
vma->anon_name = anon_vma_name_alloc(name);
|
||||
if (!vma->anon_name)
|
||||
return -ENOMEM;
|
||||
vma->anon_name = anon_vma_name_reuse(anon_name);
|
||||
anon_vma_name_put(orig_name);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else /* CONFIG_ANON_VMA_NAME */
|
||||
static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name)
|
||||
static int replace_anon_vma_name(struct vm_area_struct *vma,
|
||||
struct anon_vma_name *anon_name)
|
||||
{
|
||||
if (name)
|
||||
if (anon_name)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
@ -161,17 +131,19 @@ static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name)
|
||||
/*
|
||||
* Update the vm_flags on region of a vma, splitting it or merging it as
|
||||
* necessary. Must be called with mmap_sem held for writing;
|
||||
* Caller should ensure anon_name stability by raising its refcount even when
|
||||
* anon_name belongs to a valid vma because this function might free that vma.
|
||||
*/
|
||||
static int madvise_update_vma(struct vm_area_struct *vma,
|
||||
struct vm_area_struct **prev, unsigned long start,
|
||||
unsigned long end, unsigned long new_flags,
|
||||
const char *name)
|
||||
struct anon_vma_name *anon_name)
|
||||
{
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
int error;
|
||||
pgoff_t pgoff;
|
||||
|
||||
if (new_flags == vma->vm_flags && is_same_vma_anon_name(vma, name)) {
|
||||
if (new_flags == vma->vm_flags && anon_vma_name_eq(anon_vma_name(vma), anon_name)) {
|
||||
*prev = vma;
|
||||
return 0;
|
||||
}
|
||||
@ -179,7 +151,7 @@ static int madvise_update_vma(struct vm_area_struct *vma,
|
||||
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
|
||||
*prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
|
||||
vma->vm_file, pgoff, vma_policy(vma),
|
||||
vma->vm_userfaultfd_ctx, name);
|
||||
vma->vm_userfaultfd_ctx, anon_name);
|
||||
if (*prev) {
|
||||
vma = *prev;
|
||||
goto success;
|
||||
@ -209,7 +181,7 @@ success:
|
||||
*/
|
||||
vma->vm_flags = new_flags;
|
||||
if (!vma->vm_file) {
|
||||
error = replace_vma_anon_name(vma, name);
|
||||
error = replace_anon_vma_name(vma, anon_name);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
@ -975,6 +947,7 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
|
||||
unsigned long behavior)
|
||||
{
|
||||
int error;
|
||||
struct anon_vma_name *anon_name;
|
||||
unsigned long new_flags = vma->vm_flags;
|
||||
|
||||
switch (behavior) {
|
||||
@ -1040,8 +1013,11 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
|
||||
break;
|
||||
}
|
||||
|
||||
anon_name = anon_vma_name(vma);
|
||||
anon_vma_name_get(anon_name);
|
||||
error = madvise_update_vma(vma, prev, start, end, new_flags,
|
||||
vma_anon_name(vma));
|
||||
anon_name);
|
||||
anon_vma_name_put(anon_name);
|
||||
|
||||
out:
|
||||
/*
|
||||
@ -1225,7 +1201,7 @@ int madvise_walk_vmas(struct mm_struct *mm, unsigned long start,
|
||||
static int madvise_vma_anon_name(struct vm_area_struct *vma,
|
||||
struct vm_area_struct **prev,
|
||||
unsigned long start, unsigned long end,
|
||||
unsigned long name)
|
||||
unsigned long anon_name)
|
||||
{
|
||||
int error;
|
||||
|
||||
@ -1234,7 +1210,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma,
|
||||
return -EBADF;
|
||||
|
||||
error = madvise_update_vma(vma, prev, start, end, vma->vm_flags,
|
||||
(const char *)name);
|
||||
(struct anon_vma_name *)anon_name);
|
||||
|
||||
/*
|
||||
* madvise() returns EAGAIN if kernel resources, such as
|
||||
@ -1246,7 +1222,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma,
|
||||
}
|
||||
|
||||
int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long len_in, const char *name)
|
||||
unsigned long len_in, struct anon_vma_name *anon_name)
|
||||
{
|
||||
unsigned long end;
|
||||
unsigned long len;
|
||||
@ -1266,7 +1242,7 @@ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
|
||||
if (end == start)
|
||||
return 0;
|
||||
|
||||
return madvise_walk_vmas(mm, start, end, (unsigned long)name,
|
||||
return madvise_walk_vmas(mm, start, end, (unsigned long)anon_name,
|
||||
madvise_vma_anon_name);
|
||||
}
|
||||
#endif /* CONFIG_ANON_VMA_NAME */
|
||||
|
42
mm/memfd.c
42
mm/memfd.c
@ -31,20 +31,28 @@
|
||||
static void memfd_tag_pins(struct xa_state *xas)
|
||||
{
|
||||
struct page *page;
|
||||
unsigned int tagged = 0;
|
||||
int latency = 0;
|
||||
int cache_count;
|
||||
|
||||
lru_add_drain();
|
||||
|
||||
xas_lock_irq(xas);
|
||||
xas_for_each(xas, page, ULONG_MAX) {
|
||||
if (xa_is_value(page))
|
||||
continue;
|
||||
page = find_subpage(page, xas->xa_index);
|
||||
if (page_count(page) - page_mapcount(page) > 1)
|
||||
xas_set_mark(xas, MEMFD_TAG_PINNED);
|
||||
cache_count = 1;
|
||||
if (!xa_is_value(page) &&
|
||||
PageTransHuge(page) && !PageHuge(page))
|
||||
cache_count = HPAGE_PMD_NR;
|
||||
|
||||
if (++tagged % XA_CHECK_SCHED)
|
||||
if (!xa_is_value(page) &&
|
||||
page_count(page) - total_mapcount(page) != cache_count)
|
||||
xas_set_mark(xas, MEMFD_TAG_PINNED);
|
||||
if (cache_count != 1)
|
||||
xas_set(xas, page->index + cache_count);
|
||||
|
||||
latency += cache_count;
|
||||
if (latency < XA_CHECK_SCHED)
|
||||
continue;
|
||||
latency = 0;
|
||||
|
||||
xas_pause(xas);
|
||||
xas_unlock_irq(xas);
|
||||
@ -73,7 +81,8 @@ static int memfd_wait_for_pins(struct address_space *mapping)
|
||||
|
||||
error = 0;
|
||||
for (scan = 0; scan <= LAST_SCAN; scan++) {
|
||||
unsigned int tagged = 0;
|
||||
int latency = 0;
|
||||
int cache_count;
|
||||
|
||||
if (!xas_marked(&xas, MEMFD_TAG_PINNED))
|
||||
break;
|
||||
@ -87,10 +96,14 @@ static int memfd_wait_for_pins(struct address_space *mapping)
|
||||
xas_lock_irq(&xas);
|
||||
xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) {
|
||||
bool clear = true;
|
||||
if (xa_is_value(page))
|
||||
continue;
|
||||
page = find_subpage(page, xas.xa_index);
|
||||
if (page_count(page) - page_mapcount(page) != 1) {
|
||||
|
||||
cache_count = 1;
|
||||
if (!xa_is_value(page) &&
|
||||
PageTransHuge(page) && !PageHuge(page))
|
||||
cache_count = HPAGE_PMD_NR;
|
||||
|
||||
if (!xa_is_value(page) && cache_count !=
|
||||
page_count(page) - total_mapcount(page)) {
|
||||
/*
|
||||
* On the last scan, we clean up all those tags
|
||||
* we inserted; but make a note that we still
|
||||
@ -103,8 +116,11 @@ static int memfd_wait_for_pins(struct address_space *mapping)
|
||||
}
|
||||
if (clear)
|
||||
xas_clear_mark(&xas, MEMFD_TAG_PINNED);
|
||||
if (++tagged % XA_CHECK_SCHED)
|
||||
|
||||
latency += cache_count;
|
||||
if (latency < XA_CHECK_SCHED)
|
||||
continue;
|
||||
latency = 0;
|
||||
|
||||
xas_pause(&xas);
|
||||
xas_unlock_irq(&xas);
|
||||
|
@ -814,7 +814,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
|
||||
prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
|
||||
vma->anon_vma, vma->vm_file, pgoff,
|
||||
new_pol, vma->vm_userfaultfd_ctx,
|
||||
vma_anon_name(vma));
|
||||
anon_vma_name(vma));
|
||||
if (prev) {
|
||||
vma = prev;
|
||||
next = vma->vm_next;
|
||||
|
@ -512,7 +512,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
|
||||
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
|
||||
*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
|
||||
vma->vm_file, pgoff, vma_policy(vma),
|
||||
vma->vm_userfaultfd_ctx, vma_anon_name(vma));
|
||||
vma->vm_userfaultfd_ctx, anon_vma_name(vma));
|
||||
if (*prev) {
|
||||
vma = *prev;
|
||||
goto success;
|
||||
|
12
mm/mmap.c
12
mm/mmap.c
@ -1031,7 +1031,7 @@ again:
|
||||
static inline int is_mergeable_vma(struct vm_area_struct *vma,
|
||||
struct file *file, unsigned long vm_flags,
|
||||
struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
|
||||
const char *anon_name)
|
||||
struct anon_vma_name *anon_name)
|
||||
{
|
||||
/*
|
||||
* VM_SOFTDIRTY should not prevent from VMA merging, if we
|
||||
@ -1049,7 +1049,7 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma,
|
||||
return 0;
|
||||
if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
|
||||
return 0;
|
||||
if (!is_same_vma_anon_name(vma, anon_name))
|
||||
if (!anon_vma_name_eq(anon_vma_name(vma), anon_name))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
@ -1084,7 +1084,7 @@ can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
|
||||
struct anon_vma *anon_vma, struct file *file,
|
||||
pgoff_t vm_pgoff,
|
||||
struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
|
||||
const char *anon_name)
|
||||
struct anon_vma_name *anon_name)
|
||||
{
|
||||
if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
|
||||
is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
|
||||
@ -1106,7 +1106,7 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
|
||||
struct anon_vma *anon_vma, struct file *file,
|
||||
pgoff_t vm_pgoff,
|
||||
struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
|
||||
const char *anon_name)
|
||||
struct anon_vma_name *anon_name)
|
||||
{
|
||||
if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
|
||||
is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
|
||||
@ -1167,7 +1167,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
|
||||
struct anon_vma *anon_vma, struct file *file,
|
||||
pgoff_t pgoff, struct mempolicy *policy,
|
||||
struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
|
||||
const char *anon_name)
|
||||
struct anon_vma_name *anon_name)
|
||||
{
|
||||
pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
|
||||
struct vm_area_struct *area, *next;
|
||||
@ -3256,7 +3256,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
|
||||
return NULL; /* should never get here */
|
||||
new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
|
||||
vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
|
||||
vma->vm_userfaultfd_ctx, vma_anon_name(vma));
|
||||
vma->vm_userfaultfd_ctx, anon_vma_name(vma));
|
||||
if (new_vma) {
|
||||
/*
|
||||
* Source vma may have been merged into new_vma
|
||||
|
@ -464,7 +464,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
|
||||
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
|
||||
*pprev = vma_merge(mm, *pprev, start, end, newflags,
|
||||
vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
|
||||
vma->vm_userfaultfd_ctx, vma_anon_name(vma));
|
||||
vma->vm_userfaultfd_ctx, anon_vma_name(vma));
|
||||
if (*pprev) {
|
||||
vma = *pprev;
|
||||
VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY);
|
||||
|
@ -3,9 +3,10 @@
|
||||
* hugepage-mremap:
|
||||
*
|
||||
* Example of remapping huge page memory in a user application using the
|
||||
* mremap system call. Code assumes a hugetlbfs filesystem is mounted
|
||||
* at './huge'. The amount of memory used by this test is decided by a command
|
||||
* line argument in MBs. If missing, the default amount is 10MB.
|
||||
* mremap system call. The path to a file in a hugetlbfs filesystem must
|
||||
* be passed as the last argument to this test. The amount of memory used
|
||||
* by this test in MBs can optionally be passed as an argument. If no memory
|
||||
* amount is passed, the default amount is 10MB.
|
||||
*
|
||||
* To make sure the test triggers pmd sharing and goes through the 'unshare'
|
||||
* path in the mremap code use 1GB (1024) or more.
|
||||
@ -25,7 +26,6 @@
|
||||
#define DEFAULT_LENGTH_MB 10UL
|
||||
#define MB_TO_BYTES(x) (x * 1024 * 1024)
|
||||
|
||||
#define FILE_NAME "huge/hugepagefile"
|
||||
#define PROTECTION (PROT_READ | PROT_WRITE | PROT_EXEC)
|
||||
#define FLAGS (MAP_SHARED | MAP_ANONYMOUS)
|
||||
|
||||
@ -107,17 +107,26 @@ static void register_region_with_uffd(char *addr, size_t len)
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
size_t length;
|
||||
|
||||
if (argc != 2 && argc != 3) {
|
||||
printf("Usage: %s [length_in_MB] <hugetlb_file>\n", argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* Read memory length as the first arg if valid, otherwise fallback to
|
||||
* the default length. Any additional args are ignored.
|
||||
* the default length.
|
||||
*/
|
||||
size_t length = argc > 1 ? (size_t)atoi(argv[1]) : 0UL;
|
||||
if (argc == 3)
|
||||
length = argc > 2 ? (size_t)atoi(argv[1]) : 0UL;
|
||||
|
||||
length = length > 0 ? length : DEFAULT_LENGTH_MB;
|
||||
length = MB_TO_BYTES(length);
|
||||
|
||||
int ret = 0;
|
||||
|
||||
int fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755);
|
||||
/* last arg is the hugetlb file name */
|
||||
int fd = open(argv[argc-1], O_CREAT | O_RDWR, 0755);
|
||||
|
||||
if (fd < 0) {
|
||||
perror("Open failed");
|
||||
@ -169,5 +178,8 @@ int main(int argc, char *argv[])
|
||||
|
||||
munmap(addr, length);
|
||||
|
||||
close(fd);
|
||||
unlink(argv[argc-1]);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -111,13 +111,14 @@ fi
|
||||
echo "-----------------------"
|
||||
echo "running hugepage-mremap"
|
||||
echo "-----------------------"
|
||||
./hugepage-mremap 256
|
||||
./hugepage-mremap $mnt/huge_mremap
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "[FAIL]"
|
||||
exitcode=1
|
||||
else
|
||||
echo "[PASS]"
|
||||
fi
|
||||
rm -f $mnt/huge_mremap
|
||||
|
||||
echo "NOTE: The above hugetlb tests provide minimal coverage. Use"
|
||||
echo " https://github.com/libhugetlbfs/libhugetlbfs.git for"
|
||||
|
@ -46,6 +46,7 @@
|
||||
#include <signal.h>
|
||||
#include <poll.h>
|
||||
#include <string.h>
|
||||
#include <linux/mman.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
Loading…
Reference in New Issue
Block a user