From 880ca15adf2392770a68047e7a98e076ff4d21da Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 1 Jun 2009 17:49:14 +1000 Subject: [PATCH] perf_counter: Allow software counters to count while task is not running This changes perf_swcounter_match() so that per-task software counters can count events that occur while their associated task is not running. This will allow us to use the generic software counter code for counting task migrations, which can occur while the task is not scheduled in. To do this, we have to distinguish between the situations where the counter is inactive because its task has been scheduled out, and those where the counter is inactive because it is part of a group that was not able to go on the PMU. In the former case we want the counter to count, but not in the latter case. If the context is active, we have the latter case. If the context is inactive then we need to know whether the counter was counting when the context was last active, which we can determine by comparing its ->tstamp_stopped timestamp with the context's timestamp. This also folds three checks in perf_swcounter_match, checking perf_event_raw(), perf_event_type() and perf_event_id() individually, into a single 64-bit comparison on counter->hw_event.config, as an optimization. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <18979.34810.259718.955621@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- kernel/perf_counter.c | 52 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index da8dfef4b472..ff8b4636f845 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2867,20 +2867,56 @@ static void perf_swcounter_overflow(struct perf_counter *counter, } +static int perf_swcounter_is_counting(struct perf_counter *counter) +{ + struct perf_counter_context *ctx; + unsigned long flags; + int count; + + if (counter->state == PERF_COUNTER_STATE_ACTIVE) + return 1; + + if (counter->state != PERF_COUNTER_STATE_INACTIVE) + return 0; + + /* + * If the counter is inactive, it could be just because + * its task is scheduled out, or because it's in a group + * which could not go on the PMU. We want to count in + * the first case but not the second. If the context is + * currently active then an inactive software counter must + * be the second case. If it's not currently active then + * we need to know whether the counter was active when the + * context was last active, which we can determine by + * comparing counter->tstamp_stopped with ctx->time. + * + * We are within an RCU read-side critical section, + * which protects the existence of *ctx. + */ + ctx = counter->ctx; + spin_lock_irqsave(&ctx->lock, flags); + count = 1; + /* Re-check state now we have the lock */ + if (counter->state < PERF_COUNTER_STATE_INACTIVE || + counter->ctx->is_active || + counter->tstamp_stopped < ctx->time) + count = 0; + spin_unlock_irqrestore(&ctx->lock, flags); + return count; +} + static int perf_swcounter_match(struct perf_counter *counter, enum perf_event_types type, u32 event, struct pt_regs *regs) { - if (counter->state != PERF_COUNTER_STATE_ACTIVE) + u64 event_config; + + event_config = ((u64) type << PERF_COUNTER_TYPE_SHIFT) | event; + + if (!perf_swcounter_is_counting(counter)) return 0; - if (perf_event_raw(&counter->hw_event)) - return 0; - - if (perf_event_type(&counter->hw_event) != type) - return 0; - - if (perf_event_id(&counter->hw_event) != event) + if (counter->hw_event.config != event_config) return 0; if (counter->hw_event.exclude_user && user_mode(regs))