mirror of
https://github.com/torvalds/linux.git
synced 2024-12-27 13:22:23 +00:00
60490e7966
This problem can be reproduced with CONFIG_PERF_USE_VMALLOC enabled on
both x86_64 and aarch64 arch when using sysdig -B(using ebpf)[1].
sysdig -B works fine after rebuilding the kernel with
CONFIG_PERF_USE_VMALLOC disabled.
I tracked it down to the if condition event->rb->nr_pages != nr_pages
in perf_mmap is true when CONFIG_PERF_USE_VMALLOC is enabled where
event->rb->nr_pages = 1 and nr_pages = 2048 resulting perf_mmap to
return -EINVAL. This is because when CONFIG_PERF_USE_VMALLOC is
enabled, rb->nr_pages is always equal to 1.
Arch with CONFIG_PERF_USE_VMALLOC enabled by default:
arc/arm/csky/mips/sh/sparc/xtensa
Arch with CONFIG_PERF_USE_VMALLOC disabled by default:
x86_64/aarch64/...
Fix this problem by using data_page_nr()
[1] https://github.com/draios/sysdig
Fixes: 906010b213
("perf_event: Provide vmalloc() based mmap() backing")
Signed-off-by: Zhipeng Xie <xiezhipeng1@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220209145417.6495-1-xiezhipeng1@huawei.com
247 lines
5.8 KiB
C
247 lines
5.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _KERNEL_EVENTS_INTERNAL_H
|
|
#define _KERNEL_EVENTS_INTERNAL_H
|
|
|
|
#include <linux/hardirq.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/refcount.h>
|
|
|
|
/* Buffer handling */
|
|
|
|
#define RING_BUFFER_WRITABLE 0x01
|
|
|
|
struct perf_buffer {
|
|
refcount_t refcount;
|
|
struct rcu_head rcu_head;
|
|
#ifdef CONFIG_PERF_USE_VMALLOC
|
|
struct work_struct work;
|
|
int page_order; /* allocation order */
|
|
#endif
|
|
int nr_pages; /* nr of data pages */
|
|
int overwrite; /* can overwrite itself */
|
|
int paused; /* can write into ring buffer */
|
|
|
|
atomic_t poll; /* POLL_ for wakeups */
|
|
|
|
local_t head; /* write position */
|
|
unsigned int nest; /* nested writers */
|
|
local_t events; /* event limit */
|
|
local_t wakeup; /* wakeup stamp */
|
|
local_t lost; /* nr records lost */
|
|
|
|
long watermark; /* wakeup watermark */
|
|
long aux_watermark;
|
|
/* poll crap */
|
|
spinlock_t event_lock;
|
|
struct list_head event_list;
|
|
|
|
atomic_t mmap_count;
|
|
unsigned long mmap_locked;
|
|
struct user_struct *mmap_user;
|
|
|
|
/* AUX area */
|
|
long aux_head;
|
|
unsigned int aux_nest;
|
|
long aux_wakeup; /* last aux_watermark boundary crossed by aux_head */
|
|
unsigned long aux_pgoff;
|
|
int aux_nr_pages;
|
|
int aux_overwrite;
|
|
atomic_t aux_mmap_count;
|
|
unsigned long aux_mmap_locked;
|
|
void (*free_aux)(void *);
|
|
refcount_t aux_refcount;
|
|
int aux_in_sampling;
|
|
void **aux_pages;
|
|
void *aux_priv;
|
|
|
|
struct perf_event_mmap_page *user_page;
|
|
void *data_pages[];
|
|
};
|
|
|
|
extern void rb_free(struct perf_buffer *rb);
|
|
|
|
static inline void rb_free_rcu(struct rcu_head *rcu_head)
|
|
{
|
|
struct perf_buffer *rb;
|
|
|
|
rb = container_of(rcu_head, struct perf_buffer, rcu_head);
|
|
rb_free(rb);
|
|
}
|
|
|
|
static inline void rb_toggle_paused(struct perf_buffer *rb, bool pause)
|
|
{
|
|
if (!pause && rb->nr_pages)
|
|
rb->paused = 0;
|
|
else
|
|
rb->paused = 1;
|
|
}
|
|
|
|
extern struct perf_buffer *
|
|
rb_alloc(int nr_pages, long watermark, int cpu, int flags);
|
|
extern void perf_event_wakeup(struct perf_event *event);
|
|
extern int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
|
|
pgoff_t pgoff, int nr_pages, long watermark, int flags);
|
|
extern void rb_free_aux(struct perf_buffer *rb);
|
|
extern struct perf_buffer *ring_buffer_get(struct perf_event *event);
|
|
extern void ring_buffer_put(struct perf_buffer *rb);
|
|
|
|
static inline bool rb_has_aux(struct perf_buffer *rb)
|
|
{
|
|
return !!rb->aux_nr_pages;
|
|
}
|
|
|
|
void perf_event_aux_event(struct perf_event *event, unsigned long head,
|
|
unsigned long size, u64 flags);
|
|
|
|
extern struct page *
|
|
perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff);
|
|
|
|
#ifdef CONFIG_PERF_USE_VMALLOC
|
|
/*
|
|
* Back perf_mmap() with vmalloc memory.
|
|
*
|
|
* Required for architectures that have d-cache aliasing issues.
|
|
*/
|
|
|
|
static inline int page_order(struct perf_buffer *rb)
|
|
{
|
|
return rb->page_order;
|
|
}
|
|
|
|
#else
|
|
|
|
static inline int page_order(struct perf_buffer *rb)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
static inline int data_page_nr(struct perf_buffer *rb)
|
|
{
|
|
return rb->nr_pages << page_order(rb);
|
|
}
|
|
|
|
static inline unsigned long perf_data_size(struct perf_buffer *rb)
|
|
{
|
|
return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
|
|
}
|
|
|
|
static inline unsigned long perf_aux_size(struct perf_buffer *rb)
|
|
{
|
|
return rb->aux_nr_pages << PAGE_SHIFT;
|
|
}
|
|
|
|
#define __DEFINE_OUTPUT_COPY_BODY(advance_buf, memcpy_func, ...) \
|
|
{ \
|
|
unsigned long size, written; \
|
|
\
|
|
do { \
|
|
size = min(handle->size, len); \
|
|
written = memcpy_func(__VA_ARGS__); \
|
|
written = size - written; \
|
|
\
|
|
len -= written; \
|
|
handle->addr += written; \
|
|
if (advance_buf) \
|
|
buf += written; \
|
|
handle->size -= written; \
|
|
if (!handle->size) { \
|
|
struct perf_buffer *rb = handle->rb; \
|
|
\
|
|
handle->page++; \
|
|
handle->page &= rb->nr_pages - 1; \
|
|
handle->addr = rb->data_pages[handle->page]; \
|
|
handle->size = PAGE_SIZE << page_order(rb); \
|
|
} \
|
|
} while (len && written == size); \
|
|
\
|
|
return len; \
|
|
}
|
|
|
|
#define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \
|
|
static inline unsigned long \
|
|
func_name(struct perf_output_handle *handle, \
|
|
const void *buf, unsigned long len) \
|
|
__DEFINE_OUTPUT_COPY_BODY(true, memcpy_func, handle->addr, buf, size)
|
|
|
|
static inline unsigned long
|
|
__output_custom(struct perf_output_handle *handle, perf_copy_f copy_func,
|
|
const void *buf, unsigned long len)
|
|
{
|
|
unsigned long orig_len = len;
|
|
__DEFINE_OUTPUT_COPY_BODY(false, copy_func, handle->addr, buf,
|
|
orig_len - len, size)
|
|
}
|
|
|
|
static inline unsigned long
|
|
memcpy_common(void *dst, const void *src, unsigned long n)
|
|
{
|
|
memcpy(dst, src, n);
|
|
return 0;
|
|
}
|
|
|
|
DEFINE_OUTPUT_COPY(__output_copy, memcpy_common)
|
|
|
|
static inline unsigned long
|
|
memcpy_skip(void *dst, const void *src, unsigned long n)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
DEFINE_OUTPUT_COPY(__output_skip, memcpy_skip)
|
|
|
|
#ifndef arch_perf_out_copy_user
|
|
#define arch_perf_out_copy_user arch_perf_out_copy_user
|
|
|
|
static inline unsigned long
|
|
arch_perf_out_copy_user(void *dst, const void *src, unsigned long n)
|
|
{
|
|
unsigned long ret;
|
|
|
|
pagefault_disable();
|
|
ret = __copy_from_user_inatomic(dst, src, n);
|
|
pagefault_enable();
|
|
|
|
return ret;
|
|
}
|
|
#endif
|
|
|
|
DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
|
|
|
|
static inline int get_recursion_context(int *recursion)
|
|
{
|
|
unsigned char rctx = interrupt_context_level();
|
|
|
|
if (recursion[rctx])
|
|
return -1;
|
|
|
|
recursion[rctx]++;
|
|
barrier();
|
|
|
|
return rctx;
|
|
}
|
|
|
|
static inline void put_recursion_context(int *recursion, int rctx)
|
|
{
|
|
barrier();
|
|
recursion[rctx]--;
|
|
}
|
|
|
|
#ifdef CONFIG_HAVE_PERF_USER_STACK_DUMP
|
|
static inline bool arch_perf_have_user_stack_dump(void)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
#define perf_user_stack_pointer(regs) user_stack_pointer(regs)
|
|
#else
|
|
static inline bool arch_perf_have_user_stack_dump(void)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
#define perf_user_stack_pointer(regs) 0
|
|
#endif /* CONFIG_HAVE_PERF_USER_STACK_DUMP */
|
|
|
|
#endif /* _KERNEL_EVENTS_INTERNAL_H */
|