Mostly this is just clean ups and micro optimizations.

The changes with more meat are:
 
  o Allowing the trace event filters to filter on CPU number and process ids
 
  o Two new markers for trace output latency were added
     (10 and 100 msec latencies)
 
  o Have tracing_thresh filter function profiling time
 
 I also worked on modifying the ring buffer code for some future
 work, and moved the adding of the timestamp around. One of my changes
 caused a regression, and since other changes were built on top of it
 and already tested, I had to operate a revert of that change. Instead
 of rebasing, this change set has the code that caused a regression
 as well as the code to revert that change without touching the other
 changes that were made on top of it.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQEcBAABAgAGBQJV6aZEAAoJEEjnJuOKh9ldrR4H/A1RcQf1prLLoUibPP4w3lat
 dmQcdpS1NY+cqyiKuKPAOkFDGQL7qWzRqZ8whcPSJIsHq57ufqNSLf+0bbQYPzg9
 g3CgGL7OApmGi5ulj0sNxhadvc9TFm/SAN0nVJlNuUWdm8e1UWHLsrJZaMfopu2r
 RDEtkOhg619mhDL4rktNdS6rk0B92Fhu2o2PwLZPVlUl1NNEt4WJU+ejitXUVO1A
 Nb70/rTGGJKtyHbW+74on4LnEN5Uu0Viu6rMwGfYyIgRmC2otdBDvE4xfKMiTUKr
 SzBjzrhIoMIRn4Vl0vElfulkpYaw7pcC2BdpZ4d9VpIOiLSlZs0x/TgCtpFEv5M=
 =baZ3
 -----END PGP SIGNATURE-----

Merge tag 'trace-v4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace

Pull tracing update from Steven Rostedt:
 "Mostly this is just clean ups and micro optimizations.

  The changes with more meat are:

   - Allowing the trace event filters to filter on CPU number and
     process ids

   - Two new markers for trace output latency were added (10 and 100
     msec latencies)

   - Have tracing_thresh filter function profiling time

  I also worked on modifying the ring buffer code for some future work,
  and moved the adding of the timestamp around.  One of my changes
  caused a regression, and since other changes were built on top of it
  and already tested, I had to operate a revert of that change.  Instead
  of rebasing, this change set has the code that caused a regression as
  well as the code to revert that change without touching the other
  changes that were made on top of it"

* tag 'trace-v4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace:
  ring-buffer: Revert "ring-buffer: Get timestamp after event is allocated"
  tracing: Don't make assumptions about length of string on task rename
  tracing: Allow triggers to filter for CPU ids and process names
  ftrace: Format MCOUNT_ADDR address as type unsigned long
  tracing: Introduce two additional marks for delay
  ftrace: Fix function_graph duration spacing with 7-digits
  ftrace: add tracing_thresh to function profile
  tracing: Clean up stack tracing and fix fentry updates
  ring-buffer: Reorganize function locations
  ring-buffer: Make sure event has enough room for extend and padding
  ring-buffer: Get timestamp after event is allocated
  ring-buffer: Move the adding of the extended timestamp out of line
  ring-buffer: Add event descriptor to simplify passing data
  ftrace: correct the counter increment for trace_buffer data
  tracing: Fix for non-continuous cpu ids
  tracing: Prefer kcalloc over kzalloc with multiply
This commit is contained in:
Linus Torvalds 2015-09-08 14:04:14 -07:00
commit 59a47fff02
16 changed files with 569 additions and 444 deletions

View File

@ -691,6 +691,8 @@ The above is mostly meaningful for kernel developers.
The marks are determined by the difference between this The marks are determined by the difference between this
current trace and the next trace. current trace and the next trace.
'$' - greater than 1 second '$' - greater than 1 second
'@' - greater than 100 milisecond
'*' - greater than 10 milisecond
'#' - greater than 1000 microsecond '#' - greater than 1000 microsecond
'!' - greater than 100 microsecond '!' - greater than 100 microsecond
'+' - greater than 10 microsecond '+' - greater than 10 microsecond
@ -1944,26 +1946,49 @@ want, depending on your needs.
ie: ie:
0) | up_write() { 3) # 1837.709 us | } /* __switch_to */
0) 0.646 us | _spin_lock_irqsave(); 3) | finish_task_switch() {
0) 0.684 us | _spin_unlock_irqrestore(); 3) 0.313 us | _raw_spin_unlock_irq();
0) 3.123 us | } 3) 3.177 us | }
0) 0.548 us | fput(); 3) # 1889.063 us | } /* __schedule */
0) + 58.628 us | } 3) ! 140.417 us | } /* __schedule */
3) # 2034.948 us | } /* schedule */
3) * 33998.59 us | } /* schedule_preempt_disabled */
[...] [...]
0) | putname() { 1) 0.260 us | msecs_to_jiffies();
0) | kmem_cache_free() { 1) 0.313 us | __rcu_read_unlock();
0) 0.518 us | __phys_addr(); 1) + 61.770 us | }
0) 1.757 us | } 1) + 64.479 us | }
0) 2.861 us | } 1) 0.313 us | rcu_bh_qs();
0) ! 115.305 us | } 1) 0.313 us | __local_bh_enable();
0) ! 116.402 us | } 1) ! 217.240 us | }
1) 0.365 us | idle_cpu();
1) | rcu_irq_exit() {
1) 0.417 us | rcu_eqs_enter_common.isra.47();
1) 3.125 us | }
1) ! 227.812 us | }
1) ! 457.395 us | }
1) @ 119760.2 us | }
[...]
2) | handle_IPI() {
1) 6.979 us | }
2) 0.417 us | scheduler_ipi();
1) 9.791 us | }
1) + 12.917 us | }
2) 3.490 us | }
1) + 15.729 us | }
1) + 18.542 us | }
2) $ 3594274 us | }
+ means that the function exceeded 10 usecs. + means that the function exceeded 10 usecs.
! means that the function exceeded 100 usecs. ! means that the function exceeded 100 usecs.
# means that the function exceeded 1000 usecs. # means that the function exceeded 1000 usecs.
* means that the function exceeded 10 msecs.
@ means that the function exceeded 100 msecs.
$ means that the function exceeded 1 sec. $ means that the function exceeded 1 sec.

View File

@ -6,7 +6,7 @@
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
extern void mcount_wrapper(void); extern void mcount_wrapper(void);
#define MCOUNT_ADDR ((long)(mcount_wrapper)) #define MCOUNT_ADDR ((unsigned long)(mcount_wrapper))
static inline unsigned long ftrace_call_adjust(unsigned long addr) static inline unsigned long ftrace_call_adjust(unsigned long addr)
{ {

View File

@ -3,7 +3,7 @@
#ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_FUNCTION_TRACER
#define MCOUNT_ADDR ((long)(_mcount)) #define MCOUNT_ADDR ((unsigned long)(_mcount))
#define MCOUNT_INSN_SIZE 8 /* sizeof mcount call */ #define MCOUNT_INSN_SIZE 8 /* sizeof mcount call */
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__

View File

@ -2,7 +2,7 @@
#define _ASM_POWERPC_FTRACE #define _ASM_POWERPC_FTRACE
#ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_FUNCTION_TRACER
#define MCOUNT_ADDR ((long)(_mcount)) #define MCOUNT_ADDR ((unsigned long)(_mcount))
#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
#ifdef __ASSEMBLY__ #ifdef __ASSEMBLY__

View File

@ -9,7 +9,7 @@
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
extern void mcount(void); extern void mcount(void);
#define MCOUNT_ADDR ((long)(mcount)) #define MCOUNT_ADDR ((unsigned long)(mcount))
#ifdef CONFIG_DYNAMIC_FTRACE #ifdef CONFIG_DYNAMIC_FTRACE
#define CALL_ADDR ((long)(ftrace_call)) #define CALL_ADDR ((long)(ftrace_call))

View File

@ -2,7 +2,7 @@
#define _ASM_SPARC64_FTRACE #define _ASM_SPARC64_FTRACE
#ifdef CONFIG_MCOUNT #ifdef CONFIG_MCOUNT
#define MCOUNT_ADDR ((long)(_mcount)) #define MCOUNT_ADDR ((unsigned long)(_mcount))
#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__

View File

@ -3,9 +3,9 @@
#ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_FUNCTION_TRACER
#ifdef CC_USING_FENTRY #ifdef CC_USING_FENTRY
# define MCOUNT_ADDR ((long)(__fentry__)) # define MCOUNT_ADDR ((unsigned long)(__fentry__))
#else #else
# define MCOUNT_ADDR ((long)(mcount)) # define MCOUNT_ADDR ((unsigned long)(mcount))
#endif #endif
#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */

View File

@ -46,7 +46,7 @@ TRACE_EVENT(task_rename,
TP_fast_assign( TP_fast_assign(
__entry->pid = task->pid; __entry->pid = task->pid;
memcpy(entry->oldcomm, task->comm, TASK_COMM_LEN); memcpy(entry->oldcomm, task->comm, TASK_COMM_LEN);
memcpy(entry->newcomm, comm, TASK_COMM_LEN); strlcpy(entry->newcomm, comm, TASK_COMM_LEN);
__entry->oom_score_adj = task->signal->oom_score_adj; __entry->oom_score_adj = task->signal->oom_score_adj;
), ),

View File

@ -630,13 +630,18 @@ static int function_stat_show(struct seq_file *m, void *v)
goto out; goto out;
} }
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
avg = rec->time;
do_div(avg, rec->counter);
if (tracing_thresh && (avg < tracing_thresh))
goto out;
#endif
kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
seq_printf(m, " %-30.30s %10lu", str, rec->counter); seq_printf(m, " %-30.30s %10lu", str, rec->counter);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER #ifdef CONFIG_FUNCTION_GRAPH_TRACER
seq_puts(m, " "); seq_puts(m, " ");
avg = rec->time;
do_div(avg, rec->counter);
/* Sample standard deviation (s^2) */ /* Sample standard deviation (s^2) */
if (rec->counter <= 1) if (rec->counter <= 1)

View File

@ -399,6 +399,17 @@ struct rb_irq_work {
bool wakeup_full; bool wakeup_full;
}; };
/*
* Structure to hold event state and handle nested events.
*/
struct rb_event_info {
u64 ts;
u64 delta;
unsigned long length;
struct buffer_page *tail_page;
int add_timestamp;
};
/* /*
* Used for which event context the event is in. * Used for which event context the event is in.
* NMI = 0 * NMI = 0
@ -1876,73 +1887,6 @@ rb_event_index(struct ring_buffer_event *event)
return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE; return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
} }
static inline int
rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
unsigned long addr = (unsigned long)event;
unsigned long index;
index = rb_event_index(event);
addr &= PAGE_MASK;
return cpu_buffer->commit_page->page == (void *)addr &&
rb_commit_index(cpu_buffer) == index;
}
static void
rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
{
unsigned long max_count;
/*
* We only race with interrupts and NMIs on this CPU.
* If we own the commit event, then we can commit
* all others that interrupted us, since the interruptions
* are in stack format (they finish before they come
* back to us). This allows us to do a simple loop to
* assign the commit to the tail.
*/
again:
max_count = cpu_buffer->nr_pages * 100;
while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
if (RB_WARN_ON(cpu_buffer, !(--max_count)))
return;
if (RB_WARN_ON(cpu_buffer,
rb_is_reader_page(cpu_buffer->tail_page)))
return;
local_set(&cpu_buffer->commit_page->page->commit,
rb_page_write(cpu_buffer->commit_page));
rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
cpu_buffer->write_stamp =
cpu_buffer->commit_page->page->time_stamp;
/* add barrier to keep gcc from optimizing too much */
barrier();
}
while (rb_commit_index(cpu_buffer) !=
rb_page_write(cpu_buffer->commit_page)) {
local_set(&cpu_buffer->commit_page->page->commit,
rb_page_write(cpu_buffer->commit_page));
RB_WARN_ON(cpu_buffer,
local_read(&cpu_buffer->commit_page->page->commit) &
~RB_WRITE_MASK);
barrier();
}
/* again, keep gcc from optimizing */
barrier();
/*
* If an interrupt came in just after the first while loop
* and pushed the tail page forward, we will be left with
* a dangling commit that will never go forward.
*/
if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
goto again;
}
static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
{ {
cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp; cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
@ -1968,64 +1912,6 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
iter->head = 0; iter->head = 0;
} }
/* Slow path, do not inline */
static noinline struct ring_buffer_event *
rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
{
event->type_len = RINGBUF_TYPE_TIME_EXTEND;
/* Not the first event on the page? */
if (rb_event_index(event)) {
event->time_delta = delta & TS_MASK;
event->array[0] = delta >> TS_SHIFT;
} else {
/* nope, just zero it */
event->time_delta = 0;
event->array[0] = 0;
}
return skip_time_extend(event);
}
/**
* rb_update_event - update event type and data
* @event: the event to update
* @type: the type of event
* @length: the size of the event field in the ring buffer
*
* Update the type and data fields of the event. The length
* is the actual size that is written to the ring buffer,
* and with this, we can determine what to place into the
* data field.
*/
static void
rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event, unsigned length,
int add_timestamp, u64 delta)
{
/* Only a commit updates the timestamp */
if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
delta = 0;
/*
* If we need to add a timestamp, then we
* add it to the start of the resevered space.
*/
if (unlikely(add_timestamp)) {
event = rb_add_time_stamp(event, delta);
length -= RB_LEN_TIME_EXTEND;
delta = 0;
}
event->time_delta = delta;
length -= RB_EVNT_HDR_SIZE;
if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
event->type_len = 0;
event->array[0] = length;
} else
event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
}
/* /*
* rb_handle_head_page - writer hit the head page * rb_handle_head_page - writer hit the head page
* *
@ -2184,29 +2070,13 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
return 0; return 0;
} }
static unsigned rb_calculate_event_length(unsigned length)
{
struct ring_buffer_event event; /* Used only for sizeof array */
/* zero length can cause confusions */
if (!length)
length++;
if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
length += sizeof(event.array[0]);
length += RB_EVNT_HDR_SIZE;
length = ALIGN(length, RB_ARCH_ALIGNMENT);
return length;
}
static inline void static inline void
rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
struct buffer_page *tail_page, unsigned long tail, struct rb_event_info *info)
unsigned long tail, unsigned long length)
{ {
struct buffer_page *tail_page = info->tail_page;
struct ring_buffer_event *event; struct ring_buffer_event *event;
unsigned long length = info->length;
/* /*
* Only the event that crossed the page boundary * Only the event that crossed the page boundary
@ -2276,13 +2146,14 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
*/ */
static noinline struct ring_buffer_event * static noinline struct ring_buffer_event *
rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
unsigned long length, unsigned long tail, unsigned long tail, struct rb_event_info *info)
struct buffer_page *tail_page, u64 ts)
{ {
struct buffer_page *tail_page = info->tail_page;
struct buffer_page *commit_page = cpu_buffer->commit_page; struct buffer_page *commit_page = cpu_buffer->commit_page;
struct ring_buffer *buffer = cpu_buffer->buffer; struct ring_buffer *buffer = cpu_buffer->buffer;
struct buffer_page *next_page; struct buffer_page *next_page;
int ret; int ret;
u64 ts;
next_page = tail_page; next_page = tail_page;
@ -2368,75 +2239,121 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
out_again: out_again:
rb_reset_tail(cpu_buffer, tail_page, tail, length); rb_reset_tail(cpu_buffer, tail, info);
/* fail and let the caller try again */ /* fail and let the caller try again */
return ERR_PTR(-EAGAIN); return ERR_PTR(-EAGAIN);
out_reset: out_reset:
/* reset write */ /* reset write */
rb_reset_tail(cpu_buffer, tail_page, tail, length); rb_reset_tail(cpu_buffer, tail, info);
return NULL; return NULL;
} }
static struct ring_buffer_event * /* Slow path, do not inline */
__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, static noinline struct ring_buffer_event *
unsigned long length, u64 ts, rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
u64 delta, int add_timestamp)
{ {
struct buffer_page *tail_page; event->type_len = RINGBUF_TYPE_TIME_EXTEND;
struct ring_buffer_event *event;
unsigned long tail, write;
/* /* Not the first event on the page? */
* If the time delta since the last event is too big to if (rb_event_index(event)) {
* hold in the time field of the event, then we append a event->time_delta = delta & TS_MASK;
* TIME EXTEND event ahead of the data event. event->array[0] = delta >> TS_SHIFT;
*/ } else {
if (unlikely(add_timestamp)) /* nope, just zero it */
length += RB_LEN_TIME_EXTEND; event->time_delta = 0;
event->array[0] = 0;
}
tail_page = cpu_buffer->tail_page; return skip_time_extend(event);
write = local_add_return(length, &tail_page->write); }
/* set write to only the index of the write */ static inline int rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
write &= RB_WRITE_MASK; struct ring_buffer_event *event);
tail = write - length;
/* /**
* If this is the first commit on the page, then it has the same * rb_update_event - update event type and data
* timestamp as the page itself. * @event: the event to update
*/ * @type: the type of event
if (!tail) * @length: the size of the event field in the ring buffer
*
* Update the type and data fields of the event. The length
* is the actual size that is written to the ring buffer,
* and with this, we can determine what to place into the
* data field.
*/
static void
rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event,
struct rb_event_info *info)
{
unsigned length = info->length;
u64 delta = info->delta;
/* Only a commit updates the timestamp */
if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
delta = 0; delta = 0;
/* See if we shot pass the end of this buffer page */ /*
if (unlikely(write > BUF_PAGE_SIZE)) * If we need to add a timestamp, then we
return rb_move_tail(cpu_buffer, length, tail, * add it to the start of the resevered space.
tail_page, ts); */
if (unlikely(info->add_timestamp)) {
event = rb_add_time_stamp(event, delta);
length -= RB_LEN_TIME_EXTEND;
delta = 0;
}
/* We reserved something on the buffer */ event->time_delta = delta;
length -= RB_EVNT_HDR_SIZE;
if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
event->type_len = 0;
event->array[0] = length;
} else
event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
}
event = __rb_page_index(tail_page, tail); static unsigned rb_calculate_event_length(unsigned length)
kmemcheck_annotate_bitfield(event, bitfield); {
rb_update_event(cpu_buffer, event, length, add_timestamp, delta); struct ring_buffer_event event; /* Used only for sizeof array */
local_inc(&tail_page->entries); /* zero length can cause confusions */
if (!length)
length++;
if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
length += sizeof(event.array[0]);
length += RB_EVNT_HDR_SIZE;
length = ALIGN(length, RB_ARCH_ALIGNMENT);
/* /*
* If this is the first commit on the page, then update * In case the time delta is larger than the 27 bits for it
* its timestamp. * in the header, we need to add a timestamp. If another
* event comes in when trying to discard this one to increase
* the length, then the timestamp will be added in the allocated
* space of this event. If length is bigger than the size needed
* for the TIME_EXTEND, then padding has to be used. The events
* length must be either RB_LEN_TIME_EXTEND, or greater than or equal
* to RB_LEN_TIME_EXTEND + 8, as 8 is the minimum size for padding.
* As length is a multiple of 4, we only need to worry if it
* is 12 (RB_LEN_TIME_EXTEND + 4).
*/ */
if (!tail) if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT)
tail_page->page->time_stamp = ts; length += RB_ALIGNMENT;
/* account for these added bytes */ return length;
local_add(length, &cpu_buffer->entries_bytes);
return event;
} }
#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
static inline bool sched_clock_stable(void)
{
return true;
}
#endif
static inline int static inline int
rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event) struct ring_buffer_event *event)
@ -2483,6 +2400,59 @@ static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
local_inc(&cpu_buffer->commits); local_inc(&cpu_buffer->commits);
} }
static void
rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
{
unsigned long max_count;
/*
* We only race with interrupts and NMIs on this CPU.
* If we own the commit event, then we can commit
* all others that interrupted us, since the interruptions
* are in stack format (they finish before they come
* back to us). This allows us to do a simple loop to
* assign the commit to the tail.
*/
again:
max_count = cpu_buffer->nr_pages * 100;
while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
if (RB_WARN_ON(cpu_buffer, !(--max_count)))
return;
if (RB_WARN_ON(cpu_buffer,
rb_is_reader_page(cpu_buffer->tail_page)))
return;
local_set(&cpu_buffer->commit_page->page->commit,
rb_page_write(cpu_buffer->commit_page));
rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
cpu_buffer->write_stamp =
cpu_buffer->commit_page->page->time_stamp;
/* add barrier to keep gcc from optimizing too much */
barrier();
}
while (rb_commit_index(cpu_buffer) !=
rb_page_write(cpu_buffer->commit_page)) {
local_set(&cpu_buffer->commit_page->page->commit,
rb_page_write(cpu_buffer->commit_page));
RB_WARN_ON(cpu_buffer,
local_read(&cpu_buffer->commit_page->page->commit) &
~RB_WRITE_MASK);
barrier();
}
/* again, keep gcc from optimizing */
barrier();
/*
* If an interrupt came in just after the first while loop
* and pushed the tail page forward, we will be left with
* a dangling commit that will never go forward.
*/
if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
goto again;
}
static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
{ {
unsigned long commits; unsigned long commits;
@ -2515,91 +2485,94 @@ static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
} }
} }
static struct ring_buffer_event * static inline void rb_event_discard(struct ring_buffer_event *event)
rb_reserve_next_event(struct ring_buffer *buffer,
struct ring_buffer_per_cpu *cpu_buffer,
unsigned long length)
{ {
struct ring_buffer_event *event; if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
u64 ts, delta; event = skip_time_extend(event);
int nr_loops = 0;
int add_timestamp;
u64 diff;
rb_start_commit(cpu_buffer); /* array[0] holds the actual length for the discarded event */
event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
event->type_len = RINGBUF_TYPE_PADDING;
/* time delta must be non zero */
if (!event->time_delta)
event->time_delta = 1;
}
#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP static inline int
/* rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
* Due to the ability to swap a cpu buffer from a buffer struct ring_buffer_event *event)
* it is possible it was swapped before we committed. {
* (committing stops a swap). We check for it here and unsigned long addr = (unsigned long)event;
* if it happened, we have to fail the write. unsigned long index;
*/
barrier();
if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) {
local_dec(&cpu_buffer->committing);
local_dec(&cpu_buffer->commits);
return NULL;
}
#endif
length = rb_calculate_event_length(length); index = rb_event_index(event);
again: addr &= PAGE_MASK;
add_timestamp = 0;
delta = 0; return cpu_buffer->commit_page->page == (void *)addr &&
rb_commit_index(cpu_buffer) == index;
}
static void
rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
u64 delta;
/* /*
* We allow for interrupts to reenter here and do a trace. * The event first in the commit queue updates the
* If one does, it will cause this original code to loop * time stamp.
* back here. Even with heavy interrupts happening, this
* should only happen a few times in a row. If this happens
* 1000 times in a row, there must be either an interrupt
* storm or we have something buggy.
* Bail!
*/ */
if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) if (rb_event_is_commit(cpu_buffer, event)) {
goto out_fail; /*
* A commit event that is first on a page
ts = rb_time_stamp(cpu_buffer->buffer); * updates the write timestamp with the page stamp
diff = ts - cpu_buffer->write_stamp; */
if (!rb_event_index(event))
/* make sure this diff is calculated here */ cpu_buffer->write_stamp =
barrier(); cpu_buffer->commit_page->page->time_stamp;
else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
/* Did the write stamp get updated already? */ delta = event->array[0];
if (likely(ts >= cpu_buffer->write_stamp)) { delta <<= TS_SHIFT;
delta = diff; delta += event->time_delta;
if (unlikely(test_time_stamp(delta))) { cpu_buffer->write_stamp += delta;
int local_clock_stable = 1; } else
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK cpu_buffer->write_stamp += event->time_delta;
local_clock_stable = sched_clock_stable();
#endif
WARN_ONCE(delta > (1ULL << 59),
KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
(unsigned long long)delta,
(unsigned long long)ts,
(unsigned long long)cpu_buffer->write_stamp,
local_clock_stable ? "" :
"If you just came from a suspend/resume,\n"
"please switch to the trace global clock:\n"
" echo global > /sys/kernel/debug/tracing/trace_clock\n");
add_timestamp = 1;
}
} }
}
event = __rb_reserve_next(cpu_buffer, length, ts, static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
delta, add_timestamp); struct ring_buffer_event *event)
if (unlikely(PTR_ERR(event) == -EAGAIN)) {
goto again; local_inc(&cpu_buffer->entries);
rb_update_write_stamp(cpu_buffer, event);
if (!event)
goto out_fail;
return event;
out_fail:
rb_end_commit(cpu_buffer); rb_end_commit(cpu_buffer);
return NULL; }
static __always_inline void
rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
{
bool pagebusy;
if (buffer->irq_work.waiters_pending) {
buffer->irq_work.waiters_pending = false;
/* irq_work_queue() supplies it's own memory barriers */
irq_work_queue(&buffer->irq_work.work);
}
if (cpu_buffer->irq_work.waiters_pending) {
cpu_buffer->irq_work.waiters_pending = false;
/* irq_work_queue() supplies it's own memory barriers */
irq_work_queue(&cpu_buffer->irq_work.work);
}
pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
cpu_buffer->irq_work.wakeup_full = true;
cpu_buffer->irq_work.full_waiters_pending = false;
/* irq_work_queue() supplies it's own memory barriers */
irq_work_queue(&cpu_buffer->irq_work.work);
}
} }
/* /*
@ -2671,6 +2644,178 @@ trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->current_context &= cpu_buffer->current_context - 1; cpu_buffer->current_context &= cpu_buffer->current_context - 1;
} }
/**
* ring_buffer_unlock_commit - commit a reserved
* @buffer: The buffer to commit to
* @event: The event pointer to commit.
*
* This commits the data to the ring buffer, and releases any locks held.
*
* Must be paired with ring_buffer_lock_reserve.
*/
int ring_buffer_unlock_commit(struct ring_buffer *buffer,
struct ring_buffer_event *event)
{
struct ring_buffer_per_cpu *cpu_buffer;
int cpu = raw_smp_processor_id();
cpu_buffer = buffer->buffers[cpu];
rb_commit(cpu_buffer, event);
rb_wakeups(buffer, cpu_buffer);
trace_recursive_unlock(cpu_buffer);
preempt_enable_notrace();
return 0;
}
EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
static noinline void
rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
struct rb_event_info *info)
{
WARN_ONCE(info->delta > (1ULL << 59),
KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
(unsigned long long)info->delta,
(unsigned long long)info->ts,
(unsigned long long)cpu_buffer->write_stamp,
sched_clock_stable() ? "" :
"If you just came from a suspend/resume,\n"
"please switch to the trace global clock:\n"
" echo global > /sys/kernel/debug/tracing/trace_clock\n");
info->add_timestamp = 1;
}
static struct ring_buffer_event *
__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
struct rb_event_info *info)
{
struct ring_buffer_event *event;
struct buffer_page *tail_page;
unsigned long tail, write;
/*
* If the time delta since the last event is too big to
* hold in the time field of the event, then we append a
* TIME EXTEND event ahead of the data event.
*/
if (unlikely(info->add_timestamp))
info->length += RB_LEN_TIME_EXTEND;
tail_page = info->tail_page = cpu_buffer->tail_page;
write = local_add_return(info->length, &tail_page->write);
/* set write to only the index of the write */
write &= RB_WRITE_MASK;
tail = write - info->length;
/*
* If this is the first commit on the page, then it has the same
* timestamp as the page itself.
*/
if (!tail)
info->delta = 0;
/* See if we shot pass the end of this buffer page */
if (unlikely(write > BUF_PAGE_SIZE))
return rb_move_tail(cpu_buffer, tail, info);
/* We reserved something on the buffer */
event = __rb_page_index(tail_page, tail);
kmemcheck_annotate_bitfield(event, bitfield);
rb_update_event(cpu_buffer, event, info);
local_inc(&tail_page->entries);
/*
* If this is the first commit on the page, then update
* its timestamp.
*/
if (!tail)
tail_page->page->time_stamp = info->ts;
/* account for these added bytes */
local_add(info->length, &cpu_buffer->entries_bytes);
return event;
}
static struct ring_buffer_event *
rb_reserve_next_event(struct ring_buffer *buffer,
struct ring_buffer_per_cpu *cpu_buffer,
unsigned long length)
{
struct ring_buffer_event *event;
struct rb_event_info info;
int nr_loops = 0;
u64 diff;
rb_start_commit(cpu_buffer);
#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
/*
* Due to the ability to swap a cpu buffer from a buffer
* it is possible it was swapped before we committed.
* (committing stops a swap). We check for it here and
* if it happened, we have to fail the write.
*/
barrier();
if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) {
local_dec(&cpu_buffer->committing);
local_dec(&cpu_buffer->commits);
return NULL;
}
#endif
info.length = rb_calculate_event_length(length);
again:
info.add_timestamp = 0;
info.delta = 0;
/*
* We allow for interrupts to reenter here and do a trace.
* If one does, it will cause this original code to loop
* back here. Even with heavy interrupts happening, this
* should only happen a few times in a row. If this happens
* 1000 times in a row, there must be either an interrupt
* storm or we have something buggy.
* Bail!
*/
if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
goto out_fail;
info.ts = rb_time_stamp(cpu_buffer->buffer);
diff = info.ts - cpu_buffer->write_stamp;
/* make sure this diff is calculated here */
barrier();
/* Did the write stamp get updated already? */
if (likely(info.ts >= cpu_buffer->write_stamp)) {
info.delta = diff;
if (unlikely(test_time_stamp(info.delta)))
rb_handle_timestamp(cpu_buffer, &info);
}
event = __rb_reserve_next(cpu_buffer, &info);
if (unlikely(PTR_ERR(event) == -EAGAIN))
goto again;
if (!event)
goto out_fail;
return event;
out_fail:
rb_end_commit(cpu_buffer);
return NULL;
}
/** /**
* ring_buffer_lock_reserve - reserve a part of the buffer * ring_buffer_lock_reserve - reserve a part of the buffer
* @buffer: the ring buffer to reserve from * @buffer: the ring buffer to reserve from
@ -2729,111 +2874,6 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
} }
EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
static void
rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
u64 delta;
/*
* The event first in the commit queue updates the
* time stamp.
*/
if (rb_event_is_commit(cpu_buffer, event)) {
/*
* A commit event that is first on a page
* updates the write timestamp with the page stamp
*/
if (!rb_event_index(event))
cpu_buffer->write_stamp =
cpu_buffer->commit_page->page->time_stamp;
else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
delta = event->array[0];
delta <<= TS_SHIFT;
delta += event->time_delta;
cpu_buffer->write_stamp += delta;
} else
cpu_buffer->write_stamp += event->time_delta;
}
}
static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
local_inc(&cpu_buffer->entries);
rb_update_write_stamp(cpu_buffer, event);
rb_end_commit(cpu_buffer);
}
static __always_inline void
rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
{
bool pagebusy;
if (buffer->irq_work.waiters_pending) {
buffer->irq_work.waiters_pending = false;
/* irq_work_queue() supplies it's own memory barriers */
irq_work_queue(&buffer->irq_work.work);
}
if (cpu_buffer->irq_work.waiters_pending) {
cpu_buffer->irq_work.waiters_pending = false;
/* irq_work_queue() supplies it's own memory barriers */
irq_work_queue(&cpu_buffer->irq_work.work);
}
pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
cpu_buffer->irq_work.wakeup_full = true;
cpu_buffer->irq_work.full_waiters_pending = false;
/* irq_work_queue() supplies it's own memory barriers */
irq_work_queue(&cpu_buffer->irq_work.work);
}
}
/**
* ring_buffer_unlock_commit - commit a reserved
* @buffer: The buffer to commit to
* @event: The event pointer to commit.
*
* This commits the data to the ring buffer, and releases any locks held.
*
* Must be paired with ring_buffer_lock_reserve.
*/
int ring_buffer_unlock_commit(struct ring_buffer *buffer,
struct ring_buffer_event *event)
{
struct ring_buffer_per_cpu *cpu_buffer;
int cpu = raw_smp_processor_id();
cpu_buffer = buffer->buffers[cpu];
rb_commit(cpu_buffer, event);
rb_wakeups(buffer, cpu_buffer);
trace_recursive_unlock(cpu_buffer);
preempt_enable_notrace();
return 0;
}
EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
static inline void rb_event_discard(struct ring_buffer_event *event)
{
if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
event = skip_time_extend(event);
/* array[0] holds the actual length for the discarded event */
event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
event->type_len = RINGBUF_TYPE_PADDING;
/* time delta must be non zero */
if (!event->time_delta)
event->time_delta = 1;
}
/* /*
* Decrement the entries to the page that an event is on. * Decrement the entries to the page that an event is on.
* The event does not even need to exist, only the pointer * The event does not even need to exist, only the pointer

View File

@ -3035,7 +3035,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
if (!iter) if (!iter)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(), iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
GFP_KERNEL); GFP_KERNEL);
if (!iter->buffer_iter) if (!iter->buffer_iter)
goto release; goto release;
@ -6990,7 +6990,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
trace_init_global_iter(&iter); trace_init_global_iter(&iter);
for_each_tracing_cpu(cpu) { for_each_tracing_cpu(cpu) {
atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled); atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
} }
old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ; old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;

View File

@ -30,6 +30,7 @@
DEFINE_MUTEX(event_mutex); DEFINE_MUTEX(event_mutex);
LIST_HEAD(ftrace_events); LIST_HEAD(ftrace_events);
static LIST_HEAD(ftrace_generic_fields);
static LIST_HEAD(ftrace_common_fields); static LIST_HEAD(ftrace_common_fields);
#define GFP_TRACE (GFP_KERNEL | __GFP_ZERO) #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
@ -94,6 +95,10 @@ trace_find_event_field(struct trace_event_call *call, char *name)
struct ftrace_event_field *field; struct ftrace_event_field *field;
struct list_head *head; struct list_head *head;
field = __find_event_field(&ftrace_generic_fields, name);
if (field)
return field;
field = __find_event_field(&ftrace_common_fields, name); field = __find_event_field(&ftrace_common_fields, name);
if (field) if (field)
return field; return field;
@ -144,6 +149,13 @@ int trace_define_field(struct trace_event_call *call, const char *type,
} }
EXPORT_SYMBOL_GPL(trace_define_field); EXPORT_SYMBOL_GPL(trace_define_field);
#define __generic_field(type, item, filter_type) \
ret = __trace_define_field(&ftrace_generic_fields, #type, \
#item, 0, 0, is_signed_type(type), \
filter_type); \
if (ret) \
return ret;
#define __common_field(type, item) \ #define __common_field(type, item) \
ret = __trace_define_field(&ftrace_common_fields, #type, \ ret = __trace_define_field(&ftrace_common_fields, #type, \
"common_" #item, \ "common_" #item, \
@ -153,6 +165,16 @@ EXPORT_SYMBOL_GPL(trace_define_field);
if (ret) \ if (ret) \
return ret; return ret;
static int trace_define_generic_fields(void)
{
int ret;
__generic_field(int, cpu, FILTER_OTHER);
__generic_field(char *, comm, FILTER_PTR_STRING);
return ret;
}
static int trace_define_common_fields(void) static int trace_define_common_fields(void)
{ {
int ret; int ret;
@ -2671,6 +2693,9 @@ static __init int event_trace_init(void)
if (!entry) if (!entry)
pr_warn("Could not create tracefs 'available_events' entry\n"); pr_warn("Could not create tracefs 'available_events' entry\n");
if (trace_define_generic_fields())
pr_warn("tracing: Failed to allocated generic fields");
if (trace_define_common_fields()) if (trace_define_common_fields())
pr_warn("tracing: Failed to allocate common fields"); pr_warn("tracing: Failed to allocate common fields");

View File

@ -252,6 +252,50 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event)
return match; return match;
} }
/* Filter predicate for CPUs. */
static int filter_pred_cpu(struct filter_pred *pred, void *event)
{
int cpu, cmp;
int match = 0;
cpu = raw_smp_processor_id();
cmp = pred->val;
switch (pred->op) {
case OP_EQ:
match = cpu == cmp;
break;
case OP_LT:
match = cpu < cmp;
break;
case OP_LE:
match = cpu <= cmp;
break;
case OP_GT:
match = cpu > cmp;
break;
case OP_GE:
match = cpu >= cmp;
break;
default:
break;
}
return !!match == !pred->not;
}
/* Filter predicate for COMM. */
static int filter_pred_comm(struct filter_pred *pred, void *event)
{
int cmp, match;
cmp = pred->regex.match(current->comm, &pred->regex,
pred->regex.field_len);
match = cmp ^ pred->not;
return match;
}
static int filter_pred_none(struct filter_pred *pred, void *event) static int filter_pred_none(struct filter_pred *pred, void *event)
{ {
return 0; return 0;
@ -1002,7 +1046,10 @@ static int init_pred(struct filter_parse_state *ps,
if (is_string_field(field)) { if (is_string_field(field)) {
filter_build_regex(pred); filter_build_regex(pred);
if (field->filter_type == FILTER_STATIC_STRING) { if (!strcmp(field->name, "comm")) {
fn = filter_pred_comm;
pred->regex.field_len = TASK_COMM_LEN;
} else if (field->filter_type == FILTER_STATIC_STRING) {
fn = filter_pred_string; fn = filter_pred_string;
pred->regex.field_len = field->size; pred->regex.field_len = field->size;
} else if (field->filter_type == FILTER_DYN_STRING) } else if (field->filter_type == FILTER_DYN_STRING)
@ -1025,7 +1072,10 @@ static int init_pred(struct filter_parse_state *ps,
} }
pred->val = val; pred->val = val;
fn = select_comparison_fn(pred->op, field->size, if (!strcmp(field->name, "cpu"))
fn = filter_pred_cpu;
else
fn = select_comparison_fn(pred->op, field->size,
field->is_signed); field->is_signed);
if (!fn) { if (!fn) {
parse_error(ps, FILT_ERR_INVALID_OP, 0); parse_error(ps, FILT_ERR_INVALID_OP, 0);

View File

@ -715,13 +715,13 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
snprintf(nsecs_str, slen, "%03lu", nsecs_rem); snprintf(nsecs_str, slen, "%03lu", nsecs_rem);
trace_seq_printf(s, ".%s", nsecs_str); trace_seq_printf(s, ".%s", nsecs_str);
len += strlen(nsecs_str); len += strlen(nsecs_str) + 1;
} }
trace_seq_puts(s, " us "); trace_seq_puts(s, " us ");
/* Print remaining spaces to fit the row's width */ /* Print remaining spaces to fit the row's width */
for (i = len; i < 7; i++) for (i = len; i < 8; i++)
trace_seq_putc(s, ' '); trace_seq_putc(s, ' ');
} }

View File

@ -496,6 +496,8 @@ static const struct trace_mark {
char sym; char sym;
} mark[] = { } mark[] = {
MARK(1000000000ULL , '$'), /* 1 sec */ MARK(1000000000ULL , '$'), /* 1 sec */
MARK(100000000ULL , '@'), /* 100 msec */
MARK(10000000ULL , '*'), /* 10 msec */
MARK(1000000ULL , '#'), /* 1000 usecs */ MARK(1000000ULL , '#'), /* 1000 usecs */
MARK(100000ULL , '!'), /* 100 usecs */ MARK(100000ULL , '!'), /* 100 usecs */
MARK(10000ULL , '+'), /* 10 usecs */ MARK(10000ULL , '+'), /* 10 usecs */
@ -508,7 +510,7 @@ char trace_find_mark(unsigned long long d)
int size = ARRAY_SIZE(mark); int size = ARRAY_SIZE(mark);
for (i = 0; i < size; i++) { for (i = 0; i < size; i++) {
if (d >= mark[i].val) if (d > mark[i].val)
break; break;
} }

View File

@ -18,12 +18,6 @@
#define STACK_TRACE_ENTRIES 500 #define STACK_TRACE_ENTRIES 500
#ifdef CC_USING_FENTRY
# define fentry 1
#else
# define fentry 0
#endif
static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] = static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
{ [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX }; { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
static unsigned stack_dump_index[STACK_TRACE_ENTRIES]; static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
@ -35,7 +29,7 @@ static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
*/ */
static struct stack_trace max_stack_trace = { static struct stack_trace max_stack_trace = {
.max_entries = STACK_TRACE_ENTRIES - 1, .max_entries = STACK_TRACE_ENTRIES - 1,
.entries = &stack_dump_trace[1], .entries = &stack_dump_trace[0],
}; };
static unsigned long max_stack_size; static unsigned long max_stack_size;
@ -55,7 +49,7 @@ static inline void print_max_stack(void)
pr_emerg(" Depth Size Location (%d entries)\n" pr_emerg(" Depth Size Location (%d entries)\n"
" ----- ---- --------\n", " ----- ---- --------\n",
max_stack_trace.nr_entries - 1); max_stack_trace.nr_entries);
for (i = 0; i < max_stack_trace.nr_entries; i++) { for (i = 0; i < max_stack_trace.nr_entries; i++) {
if (stack_dump_trace[i] == ULONG_MAX) if (stack_dump_trace[i] == ULONG_MAX)
@ -77,7 +71,7 @@ check_stack(unsigned long ip, unsigned long *stack)
unsigned long this_size, flags; unsigned long *p, *top, *start; unsigned long this_size, flags; unsigned long *p, *top, *start;
static int tracer_frame; static int tracer_frame;
int frame_size = ACCESS_ONCE(tracer_frame); int frame_size = ACCESS_ONCE(tracer_frame);
int i; int i, x;
this_size = ((unsigned long)stack) & (THREAD_SIZE-1); this_size = ((unsigned long)stack) & (THREAD_SIZE-1);
this_size = THREAD_SIZE - this_size; this_size = THREAD_SIZE - this_size;
@ -105,26 +99,20 @@ check_stack(unsigned long ip, unsigned long *stack)
max_stack_size = this_size; max_stack_size = this_size;
max_stack_trace.nr_entries = 0; max_stack_trace.nr_entries = 0;
max_stack_trace.skip = 3;
if (using_ftrace_ops_list_func())
max_stack_trace.skip = 4;
else
max_stack_trace.skip = 3;
save_stack_trace(&max_stack_trace); save_stack_trace(&max_stack_trace);
/* /* Skip over the overhead of the stack tracer itself */
* Add the passed in ip from the function tracer. for (i = 0; i < max_stack_trace.nr_entries; i++) {
* Searching for this on the stack will skip over if (stack_dump_trace[i] == ip)
* most of the overhead from the stack tracer itself. break;
*/ }
stack_dump_trace[0] = ip;
max_stack_trace.nr_entries++;
/* /*
* Now find where in the stack these are. * Now find where in the stack these are.
*/ */
i = 0; x = 0;
start = stack; start = stack;
top = (unsigned long *) top = (unsigned long *)
(((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE); (((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
@ -139,12 +127,15 @@ check_stack(unsigned long ip, unsigned long *stack)
while (i < max_stack_trace.nr_entries) { while (i < max_stack_trace.nr_entries) {
int found = 0; int found = 0;
stack_dump_index[i] = this_size; stack_dump_index[x] = this_size;
p = start; p = start;
for (; p < top && i < max_stack_trace.nr_entries; p++) { for (; p < top && i < max_stack_trace.nr_entries; p++) {
if (stack_dump_trace[i] == ULONG_MAX)
break;
if (*p == stack_dump_trace[i]) { if (*p == stack_dump_trace[i]) {
this_size = stack_dump_index[i++] = stack_dump_trace[x] = stack_dump_trace[i++];
this_size = stack_dump_index[x++] =
(top - p) * sizeof(unsigned long); (top - p) * sizeof(unsigned long);
found = 1; found = 1;
/* Start the search from here */ /* Start the search from here */
@ -156,7 +147,7 @@ check_stack(unsigned long ip, unsigned long *stack)
* out what that is, then figure it out * out what that is, then figure it out
* now. * now.
*/ */
if (unlikely(!tracer_frame) && i == 1) { if (unlikely(!tracer_frame)) {
tracer_frame = (p - stack) * tracer_frame = (p - stack) *
sizeof(unsigned long); sizeof(unsigned long);
max_stack_size -= tracer_frame; max_stack_size -= tracer_frame;
@ -168,6 +159,10 @@ check_stack(unsigned long ip, unsigned long *stack)
i++; i++;
} }
max_stack_trace.nr_entries = x;
for (; x < i; x++)
stack_dump_trace[x] = ULONG_MAX;
if (task_stack_end_corrupted(current)) { if (task_stack_end_corrupted(current)) {
print_max_stack(); print_max_stack();
BUG(); BUG();
@ -192,24 +187,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip,
if (per_cpu(trace_active, cpu)++ != 0) if (per_cpu(trace_active, cpu)++ != 0)
goto out; goto out;
/* ip += MCOUNT_INSN_SIZE;
* When fentry is used, the traced function does not get
* its stack frame set up, and we lose the parent.
* The ip is pretty useless because the function tracer
* was called before that function set up its stack frame.
* In this case, we use the parent ip.
*
* By adding the return address of either the parent ip
* or the current ip we can disregard most of the stack usage
* caused by the stack tracer itself.
*
* The function tracer always reports the address of where the
* mcount call was, but the stack will hold the return address.
*/
if (fentry)
ip = parent_ip;
else
ip += MCOUNT_INSN_SIZE;
check_stack(ip, &stack); check_stack(ip, &stack);
@ -284,7 +262,7 @@ __next(struct seq_file *m, loff_t *pos)
{ {
long n = *pos - 1; long n = *pos - 1;
if (n >= max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX) if (n > max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX)
return NULL; return NULL;
m->private = (void *)n; m->private = (void *)n;
@ -354,7 +332,7 @@ static int t_show(struct seq_file *m, void *v)
seq_printf(m, " Depth Size Location" seq_printf(m, " Depth Size Location"
" (%d entries)\n" " (%d entries)\n"
" ----- ---- --------\n", " ----- ---- --------\n",
max_stack_trace.nr_entries - 1); max_stack_trace.nr_entries);
if (!stack_tracer_enabled && !max_stack_size) if (!stack_tracer_enabled && !max_stack_size)
print_disabled(m); print_disabled(m);