mirror of
https://github.com/torvalds/linux.git
synced 2024-11-11 06:31:49 +00:00
userfaultfd: move userfaultfd_ctx struct to header file
Patch series "per-vma locks in userfaultfd", v7. Performing userfaultfd operations (like copy/move etc.) in critical section of mmap_lock (read-mode) causes significant contention on the lock when operations requiring the lock in write-mode are taking place concurrently. We can use per-vma locks instead to significantly reduce the contention issue. Android runtime's Garbage Collector uses userfaultfd for concurrent compaction. mmap-lock contention during compaction potentially causes jittery experience for the user. During one such reproducible scenario, we observed the following improvements with this patch-set: - Wall clock time of compaction phase came down from ~3s to <500ms - Uninterruptible sleep time (across all threads in the process) was ~10ms (none in mmap_lock) during compaction, instead of >20s This patch (of 4): Move the struct to userfaultfd_k.h to be accessible from mm/userfaultfd.c. There are no other changes in the struct. This is required to prepare for using per-vma locks in userfaultfd operations. Link: https://lkml.kernel.org/r/20240215182756.3448972-1-lokeshgidra@google.com Link: https://lkml.kernel.org/r/20240215182756.3448972-2-lokeshgidra@google.com Signed-off-by: Lokesh Gidra <lokeshgidra@google.com> Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org> Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Axel Rasmussen <axelrasmussen@google.com> Cc: Brian Geffon <bgeffon@google.com> Cc: David Hildenbrand <david@redhat.com> Cc: Jann Horn <jannh@google.com> Cc: Kalesh Singh <kaleshsingh@google.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Nicolas Geoffray <ngeoffray@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Tim Murray <timmurray@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
952237b5a9
commit
f91e6b41dd
@ -50,45 +50,6 @@ static struct ctl_table vm_userfaultfd_table[] = {
|
||||
|
||||
static struct kmem_cache *userfaultfd_ctx_cachep __ro_after_init;
|
||||
|
||||
/*
|
||||
* Start with fault_pending_wqh and fault_wqh so they're more likely
|
||||
* to be in the same cacheline.
|
||||
*
|
||||
* Locking order:
|
||||
* fd_wqh.lock
|
||||
* fault_pending_wqh.lock
|
||||
* fault_wqh.lock
|
||||
* event_wqh.lock
|
||||
*
|
||||
* To avoid deadlocks, IRQs must be disabled when taking any of the above locks,
|
||||
* since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's
|
||||
* also taken in IRQ context.
|
||||
*/
|
||||
struct userfaultfd_ctx {
|
||||
/* waitqueue head for the pending (i.e. not read) userfaults */
|
||||
wait_queue_head_t fault_pending_wqh;
|
||||
/* waitqueue head for the userfaults */
|
||||
wait_queue_head_t fault_wqh;
|
||||
/* waitqueue head for the pseudo fd to wakeup poll/read */
|
||||
wait_queue_head_t fd_wqh;
|
||||
/* waitqueue head for events */
|
||||
wait_queue_head_t event_wqh;
|
||||
/* a refile sequence protected by fault_pending_wqh lock */
|
||||
seqcount_spinlock_t refile_seq;
|
||||
/* pseudo fd refcounting */
|
||||
refcount_t refcount;
|
||||
/* userfaultfd syscall flags */
|
||||
unsigned int flags;
|
||||
/* features requested from the userspace */
|
||||
unsigned int features;
|
||||
/* released */
|
||||
bool released;
|
||||
/* memory mappings are changing because of non-cooperative event */
|
||||
atomic_t mmap_changing;
|
||||
/* mm with one ore more vmas attached to this userfaultfd_ctx */
|
||||
struct mm_struct *mm;
|
||||
};
|
||||
|
||||
struct userfaultfd_fork_ctx {
|
||||
struct userfaultfd_ctx *orig;
|
||||
struct userfaultfd_ctx *new;
|
||||
|
@ -36,6 +36,45 @@
|
||||
#define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
|
||||
#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
|
||||
|
||||
/*
|
||||
* Start with fault_pending_wqh and fault_wqh so they're more likely
|
||||
* to be in the same cacheline.
|
||||
*
|
||||
* Locking order:
|
||||
* fd_wqh.lock
|
||||
* fault_pending_wqh.lock
|
||||
* fault_wqh.lock
|
||||
* event_wqh.lock
|
||||
*
|
||||
* To avoid deadlocks, IRQs must be disabled when taking any of the above locks,
|
||||
* since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's
|
||||
* also taken in IRQ context.
|
||||
*/
|
||||
struct userfaultfd_ctx {
|
||||
/* waitqueue head for the pending (i.e. not read) userfaults */
|
||||
wait_queue_head_t fault_pending_wqh;
|
||||
/* waitqueue head for the userfaults */
|
||||
wait_queue_head_t fault_wqh;
|
||||
/* waitqueue head for the pseudo fd to wakeup poll/read */
|
||||
wait_queue_head_t fd_wqh;
|
||||
/* waitqueue head for events */
|
||||
wait_queue_head_t event_wqh;
|
||||
/* a refile sequence protected by fault_pending_wqh lock */
|
||||
seqcount_spinlock_t refile_seq;
|
||||
/* pseudo fd refcounting */
|
||||
refcount_t refcount;
|
||||
/* userfaultfd syscall flags */
|
||||
unsigned int flags;
|
||||
/* features requested from the userspace */
|
||||
unsigned int features;
|
||||
/* released */
|
||||
bool released;
|
||||
/* memory mappings are changing because of non-cooperative event */
|
||||
atomic_t mmap_changing;
|
||||
/* mm with one ore more vmas attached to this userfaultfd_ctx */
|
||||
struct mm_struct *mm;
|
||||
};
|
||||
|
||||
extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
|
||||
|
||||
/* A combined operation mode + behavior flags. */
|
||||
|
Loading…
Reference in New Issue
Block a user