Merge branch 'x86/cache' into perf/core, to resolve conflicts
Avoid conflict with upcoming perf/core patches, merge in the RDT perf work. Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
a4c9f26533
@ -520,18 +520,24 @@ the pseudo-locked region:
|
|||||||
2) Cache hit and miss measurements using model specific precision counters if
|
2) Cache hit and miss measurements using model specific precision counters if
|
||||||
available. Depending on the levels of cache on the system the pseudo_lock_l2
|
available. Depending on the levels of cache on the system the pseudo_lock_l2
|
||||||
and pseudo_lock_l3 tracepoints are available.
|
and pseudo_lock_l3 tracepoints are available.
|
||||||
WARNING: triggering this measurement uses from two (for just L2
|
|
||||||
measurements) to four (for L2 and L3 measurements) precision counters on
|
|
||||||
the system, if any other measurements are in progress the counters and
|
|
||||||
their corresponding event registers will be clobbered.
|
|
||||||
|
|
||||||
When a pseudo-locked region is created a new debugfs directory is created for
|
When a pseudo-locked region is created a new debugfs directory is created for
|
||||||
it in debugfs as /sys/kernel/debug/resctrl/<newdir>. A single
|
it in debugfs as /sys/kernel/debug/resctrl/<newdir>. A single
|
||||||
write-only file, pseudo_lock_measure, is present in this directory. The
|
write-only file, pseudo_lock_measure, is present in this directory. The
|
||||||
measurement on the pseudo-locked region depends on the number, 1 or 2,
|
measurement of the pseudo-locked region depends on the number written to this
|
||||||
written to this debugfs file. Since the measurements are recorded with the
|
debugfs file:
|
||||||
tracing infrastructure the relevant tracepoints need to be enabled before the
|
1 - writing "1" to the pseudo_lock_measure file will trigger the latency
|
||||||
measurement is triggered.
|
measurement captured in the pseudo_lock_mem_latency tracepoint. See
|
||||||
|
example below.
|
||||||
|
2 - writing "2" to the pseudo_lock_measure file will trigger the L2 cache
|
||||||
|
residency (cache hits and misses) measurement captured in the
|
||||||
|
pseudo_lock_l2 tracepoint. See example below.
|
||||||
|
3 - writing "3" to the pseudo_lock_measure file will trigger the L3 cache
|
||||||
|
residency (cache hits and misses) measurement captured in the
|
||||||
|
pseudo_lock_l3 tracepoint.
|
||||||
|
|
||||||
|
All measurements are recorded with the tracing infrastructure. This requires
|
||||||
|
the relevant tracepoints to be enabled before the measurement is triggered.
|
||||||
|
|
||||||
Example of latency debugging interface:
|
Example of latency debugging interface:
|
||||||
In this example a pseudo-locked region named "newlock" was created. Here is
|
In this example a pseudo-locked region named "newlock" was created. Here is
|
||||||
|
@ -1033,6 +1033,27 @@ static inline void x86_assign_hw_event(struct perf_event *event,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* x86_perf_rdpmc_index - Return PMC counter used for event
|
||||||
|
* @event: the perf_event to which the PMC counter was assigned
|
||||||
|
*
|
||||||
|
* The counter assigned to this performance event may change if interrupts
|
||||||
|
* are enabled. This counter should thus never be used while interrupts are
|
||||||
|
* enabled. Before this function is used to obtain the assigned counter the
|
||||||
|
* event should be checked for validity using, for example,
|
||||||
|
* perf_event_read_local(), within the same interrupt disabled section in
|
||||||
|
* which this counter is planned to be used.
|
||||||
|
*
|
||||||
|
* Return: The index of the performance monitoring counter assigned to
|
||||||
|
* @perf_event.
|
||||||
|
*/
|
||||||
|
int x86_perf_rdpmc_index(struct perf_event *event)
|
||||||
|
{
|
||||||
|
lockdep_assert_irqs_disabled();
|
||||||
|
|
||||||
|
return event->hw.event_base_rdpmc;
|
||||||
|
}
|
||||||
|
|
||||||
static inline int match_prev_assignment(struct hw_perf_event *hwc,
|
static inline int match_prev_assignment(struct hw_perf_event *hwc,
|
||||||
struct cpu_hw_events *cpuc,
|
struct cpu_hw_events *cpuc,
|
||||||
int i)
|
int i)
|
||||||
|
@ -278,6 +278,7 @@ struct perf_guest_switch_msr {
|
|||||||
extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
|
extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
|
||||||
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
|
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
|
||||||
extern void perf_check_microcode(void);
|
extern void perf_check_microcode(void);
|
||||||
|
extern int x86_perf_rdpmc_index(struct perf_event *event);
|
||||||
#else
|
#else
|
||||||
static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
|
static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
|
||||||
{
|
{
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include <linux/debugfs.h>
|
#include <linux/debugfs.h>
|
||||||
#include <linux/kthread.h>
|
#include <linux/kthread.h>
|
||||||
#include <linux/mman.h>
|
#include <linux/mman.h>
|
||||||
|
#include <linux/perf_event.h>
|
||||||
#include <linux/pm_qos.h>
|
#include <linux/pm_qos.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
@ -26,6 +27,7 @@
|
|||||||
#include <asm/intel_rdt_sched.h>
|
#include <asm/intel_rdt_sched.h>
|
||||||
#include <asm/perf_event.h>
|
#include <asm/perf_event.h>
|
||||||
|
|
||||||
|
#include "../../events/perf_event.h" /* For X86_CONFIG() */
|
||||||
#include "intel_rdt.h"
|
#include "intel_rdt.h"
|
||||||
|
|
||||||
#define CREATE_TRACE_POINTS
|
#define CREATE_TRACE_POINTS
|
||||||
@ -106,16 +108,6 @@ static u64 get_prefetch_disable_bits(void)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Helper to write 64bit value to MSR without tracing. Used when
|
|
||||||
* use of the cache should be restricted and use of registers used
|
|
||||||
* for local variables avoided.
|
|
||||||
*/
|
|
||||||
static inline void pseudo_wrmsrl_notrace(unsigned int msr, u64 val)
|
|
||||||
{
|
|
||||||
__wrmsr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* pseudo_lock_minor_get - Obtain available minor number
|
* pseudo_lock_minor_get - Obtain available minor number
|
||||||
* @minor: Pointer to where new minor number will be stored
|
* @minor: Pointer to where new minor number will be stored
|
||||||
@ -886,31 +878,14 @@ static int measure_cycles_lat_fn(void *_plr)
|
|||||||
struct pseudo_lock_region *plr = _plr;
|
struct pseudo_lock_region *plr = _plr;
|
||||||
unsigned long i;
|
unsigned long i;
|
||||||
u64 start, end;
|
u64 start, end;
|
||||||
#ifdef CONFIG_KASAN
|
|
||||||
/*
|
|
||||||
* The registers used for local register variables are also used
|
|
||||||
* when KASAN is active. When KASAN is active we use a regular
|
|
||||||
* variable to ensure we always use a valid pointer to access memory.
|
|
||||||
* The cost is that accessing this pointer, which could be in
|
|
||||||
* cache, will be included in the measurement of memory read latency.
|
|
||||||
*/
|
|
||||||
void *mem_r;
|
void *mem_r;
|
||||||
#else
|
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
register void *mem_r asm("rbx");
|
|
||||||
#else
|
|
||||||
register void *mem_r asm("ebx");
|
|
||||||
#endif /* CONFIG_X86_64 */
|
|
||||||
#endif /* CONFIG_KASAN */
|
|
||||||
|
|
||||||
local_irq_disable();
|
local_irq_disable();
|
||||||
/*
|
/*
|
||||||
* The wrmsr call may be reordered with the assignment below it.
|
* Disable hardware prefetchers.
|
||||||
* Call wrmsr as directly as possible to avoid tracing clobbering
|
|
||||||
* local register variable used for memory pointer.
|
|
||||||
*/
|
*/
|
||||||
__wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
|
wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
|
||||||
mem_r = plr->kmem;
|
mem_r = READ_ONCE(plr->kmem);
|
||||||
/*
|
/*
|
||||||
* Dummy execute of the time measurement to load the needed
|
* Dummy execute of the time measurement to load the needed
|
||||||
* instructions into the L1 instruction cache.
|
* instructions into the L1 instruction cache.
|
||||||
@ -932,157 +907,240 @@ static int measure_cycles_lat_fn(void *_plr)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int measure_cycles_perf_fn(void *_plr)
|
/*
|
||||||
|
* Create a perf_event_attr for the hit and miss perf events that will
|
||||||
|
* be used during the performance measurement. A perf_event maintains
|
||||||
|
* a pointer to its perf_event_attr so a unique attribute structure is
|
||||||
|
* created for each perf_event.
|
||||||
|
*
|
||||||
|
* The actual configuration of the event is set right before use in order
|
||||||
|
* to use the X86_CONFIG macro.
|
||||||
|
*/
|
||||||
|
static struct perf_event_attr perf_miss_attr = {
|
||||||
|
.type = PERF_TYPE_RAW,
|
||||||
|
.size = sizeof(struct perf_event_attr),
|
||||||
|
.pinned = 1,
|
||||||
|
.disabled = 0,
|
||||||
|
.exclude_user = 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct perf_event_attr perf_hit_attr = {
|
||||||
|
.type = PERF_TYPE_RAW,
|
||||||
|
.size = sizeof(struct perf_event_attr),
|
||||||
|
.pinned = 1,
|
||||||
|
.disabled = 0,
|
||||||
|
.exclude_user = 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct residency_counts {
|
||||||
|
u64 miss_before, hits_before;
|
||||||
|
u64 miss_after, hits_after;
|
||||||
|
};
|
||||||
|
|
||||||
|
static int measure_residency_fn(struct perf_event_attr *miss_attr,
|
||||||
|
struct perf_event_attr *hit_attr,
|
||||||
|
struct pseudo_lock_region *plr,
|
||||||
|
struct residency_counts *counts)
|
||||||
{
|
{
|
||||||
unsigned long long l3_hits = 0, l3_miss = 0;
|
u64 hits_before = 0, hits_after = 0, miss_before = 0, miss_after = 0;
|
||||||
u64 l3_hit_bits = 0, l3_miss_bits = 0;
|
struct perf_event *miss_event, *hit_event;
|
||||||
struct pseudo_lock_region *plr = _plr;
|
int hit_pmcnum, miss_pmcnum;
|
||||||
unsigned long long l2_hits, l2_miss;
|
|
||||||
u64 l2_hit_bits, l2_miss_bits;
|
|
||||||
unsigned long i;
|
|
||||||
#ifdef CONFIG_KASAN
|
|
||||||
/*
|
|
||||||
* The registers used for local register variables are also used
|
|
||||||
* when KASAN is active. When KASAN is active we use regular variables
|
|
||||||
* at the cost of including cache access latency to these variables
|
|
||||||
* in the measurements.
|
|
||||||
*/
|
|
||||||
unsigned int line_size;
|
unsigned int line_size;
|
||||||
unsigned int size;
|
unsigned int size;
|
||||||
|
unsigned long i;
|
||||||
void *mem_r;
|
void *mem_r;
|
||||||
#else
|
u64 tmp;
|
||||||
register unsigned int line_size asm("esi");
|
|
||||||
register unsigned int size asm("edi");
|
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
register void *mem_r asm("rbx");
|
|
||||||
#else
|
|
||||||
register void *mem_r asm("ebx");
|
|
||||||
#endif /* CONFIG_X86_64 */
|
|
||||||
#endif /* CONFIG_KASAN */
|
|
||||||
|
|
||||||
/*
|
miss_event = perf_event_create_kernel_counter(miss_attr, plr->cpu,
|
||||||
* Non-architectural event for the Goldmont Microarchitecture
|
NULL, NULL, NULL);
|
||||||
* from Intel x86 Architecture Software Developer Manual (SDM):
|
if (IS_ERR(miss_event))
|
||||||
* MEM_LOAD_UOPS_RETIRED D1H (event number)
|
|
||||||
* Umask values:
|
|
||||||
* L1_HIT 01H
|
|
||||||
* L2_HIT 02H
|
|
||||||
* L1_MISS 08H
|
|
||||||
* L2_MISS 10H
|
|
||||||
*
|
|
||||||
* On Broadwell Microarchitecture the MEM_LOAD_UOPS_RETIRED event
|
|
||||||
* has two "no fix" errata associated with it: BDM35 and BDM100. On
|
|
||||||
* this platform we use the following events instead:
|
|
||||||
* L2_RQSTS 24H (Documented in https://download.01.org/perfmon/BDW/)
|
|
||||||
* REFERENCES FFH
|
|
||||||
* MISS 3FH
|
|
||||||
* LONGEST_LAT_CACHE 2EH (Documented in SDM)
|
|
||||||
* REFERENCE 4FH
|
|
||||||
* MISS 41H
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Start by setting flags for IA32_PERFEVTSELx:
|
|
||||||
* OS (Operating system mode) 0x2
|
|
||||||
* INT (APIC interrupt enable) 0x10
|
|
||||||
* EN (Enable counter) 0x40
|
|
||||||
*
|
|
||||||
* Then add the Umask value and event number to select performance
|
|
||||||
* event.
|
|
||||||
*/
|
|
||||||
|
|
||||||
switch (boot_cpu_data.x86_model) {
|
|
||||||
case INTEL_FAM6_ATOM_GOLDMONT:
|
|
||||||
case INTEL_FAM6_ATOM_GEMINI_LAKE:
|
|
||||||
l2_hit_bits = (0x52ULL << 16) | (0x2 << 8) | 0xd1;
|
|
||||||
l2_miss_bits = (0x52ULL << 16) | (0x10 << 8) | 0xd1;
|
|
||||||
break;
|
|
||||||
case INTEL_FAM6_BROADWELL_X:
|
|
||||||
/* On BDW the l2_hit_bits count references, not hits */
|
|
||||||
l2_hit_bits = (0x52ULL << 16) | (0xff << 8) | 0x24;
|
|
||||||
l2_miss_bits = (0x52ULL << 16) | (0x3f << 8) | 0x24;
|
|
||||||
/* On BDW the l3_hit_bits count references, not hits */
|
|
||||||
l3_hit_bits = (0x52ULL << 16) | (0x4f << 8) | 0x2e;
|
|
||||||
l3_miss_bits = (0x52ULL << 16) | (0x41 << 8) | 0x2e;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
|
||||||
|
hit_event = perf_event_create_kernel_counter(hit_attr, plr->cpu,
|
||||||
|
NULL, NULL, NULL);
|
||||||
|
if (IS_ERR(hit_event))
|
||||||
|
goto out_miss;
|
||||||
|
|
||||||
local_irq_disable();
|
local_irq_disable();
|
||||||
/*
|
/*
|
||||||
* Call wrmsr direcly to avoid the local register variables from
|
* Check any possible error state of events used by performing
|
||||||
* being overwritten due to reordering of their assignment with
|
* one local read.
|
||||||
* the wrmsr calls.
|
|
||||||
*/
|
*/
|
||||||
__wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
|
if (perf_event_read_local(miss_event, &tmp, NULL, NULL)) {
|
||||||
/* Disable events and reset counters */
|
local_irq_enable();
|
||||||
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0, 0x0);
|
goto out_hit;
|
||||||
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x0);
|
|
||||||
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0, 0x0);
|
|
||||||
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 1, 0x0);
|
|
||||||
if (l3_hit_bits > 0) {
|
|
||||||
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x0);
|
|
||||||
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3, 0x0);
|
|
||||||
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 2, 0x0);
|
|
||||||
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 3, 0x0);
|
|
||||||
}
|
}
|
||||||
/* Set and enable the L2 counters */
|
if (perf_event_read_local(hit_event, &tmp, NULL, NULL)) {
|
||||||
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0, l2_hit_bits);
|
local_irq_enable();
|
||||||
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1, l2_miss_bits);
|
goto out_hit;
|
||||||
if (l3_hit_bits > 0) {
|
|
||||||
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 2,
|
|
||||||
l3_hit_bits);
|
|
||||||
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3,
|
|
||||||
l3_miss_bits);
|
|
||||||
}
|
}
|
||||||
mem_r = plr->kmem;
|
|
||||||
size = plr->size;
|
/*
|
||||||
line_size = plr->line_size;
|
* Disable hardware prefetchers.
|
||||||
|
*/
|
||||||
|
wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
|
||||||
|
|
||||||
|
/* Initialize rest of local variables */
|
||||||
|
/*
|
||||||
|
* Performance event has been validated right before this with
|
||||||
|
* interrupts disabled - it is thus safe to read the counter index.
|
||||||
|
*/
|
||||||
|
miss_pmcnum = x86_perf_rdpmc_index(miss_event);
|
||||||
|
hit_pmcnum = x86_perf_rdpmc_index(hit_event);
|
||||||
|
line_size = READ_ONCE(plr->line_size);
|
||||||
|
mem_r = READ_ONCE(plr->kmem);
|
||||||
|
size = READ_ONCE(plr->size);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read counter variables twice - first to load the instructions
|
||||||
|
* used in L1 cache, second to capture accurate value that does not
|
||||||
|
* include cache misses incurred because of instruction loads.
|
||||||
|
*/
|
||||||
|
rdpmcl(hit_pmcnum, hits_before);
|
||||||
|
rdpmcl(miss_pmcnum, miss_before);
|
||||||
|
/*
|
||||||
|
* From SDM: Performing back-to-back fast reads are not guaranteed
|
||||||
|
* to be monotonic.
|
||||||
|
* Use LFENCE to ensure all previous instructions are retired
|
||||||
|
* before proceeding.
|
||||||
|
*/
|
||||||
|
rmb();
|
||||||
|
rdpmcl(hit_pmcnum, hits_before);
|
||||||
|
rdpmcl(miss_pmcnum, miss_before);
|
||||||
|
/*
|
||||||
|
* Use LFENCE to ensure all previous instructions are retired
|
||||||
|
* before proceeding.
|
||||||
|
*/
|
||||||
|
rmb();
|
||||||
for (i = 0; i < size; i += line_size) {
|
for (i = 0; i < size; i += line_size) {
|
||||||
|
/*
|
||||||
|
* Add a barrier to prevent speculative execution of this
|
||||||
|
* loop reading beyond the end of the buffer.
|
||||||
|
*/
|
||||||
|
rmb();
|
||||||
asm volatile("mov (%0,%1,1), %%eax\n\t"
|
asm volatile("mov (%0,%1,1), %%eax\n\t"
|
||||||
:
|
:
|
||||||
: "r" (mem_r), "r" (i)
|
: "r" (mem_r), "r" (i)
|
||||||
: "%eax", "memory");
|
: "%eax", "memory");
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
* Call wrmsr directly (no tracing) to not influence
|
* Use LFENCE to ensure all previous instructions are retired
|
||||||
* the cache access counters as they are disabled.
|
* before proceeding.
|
||||||
*/
|
*/
|
||||||
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0,
|
rmb();
|
||||||
l2_hit_bits & ~(0x40ULL << 16));
|
rdpmcl(hit_pmcnum, hits_after);
|
||||||
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1,
|
rdpmcl(miss_pmcnum, miss_after);
|
||||||
l2_miss_bits & ~(0x40ULL << 16));
|
/*
|
||||||
if (l3_hit_bits > 0) {
|
* Use LFENCE to ensure all previous instructions are retired
|
||||||
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 2,
|
* before proceeding.
|
||||||
l3_hit_bits & ~(0x40ULL << 16));
|
*/
|
||||||
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3,
|
rmb();
|
||||||
l3_miss_bits & ~(0x40ULL << 16));
|
/* Re-enable hardware prefetchers */
|
||||||
}
|
|
||||||
l2_hits = native_read_pmc(0);
|
|
||||||
l2_miss = native_read_pmc(1);
|
|
||||||
if (l3_hit_bits > 0) {
|
|
||||||
l3_hits = native_read_pmc(2);
|
|
||||||
l3_miss = native_read_pmc(3);
|
|
||||||
}
|
|
||||||
wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
|
wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
|
||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
|
out_hit:
|
||||||
|
perf_event_release_kernel(hit_event);
|
||||||
|
out_miss:
|
||||||
|
perf_event_release_kernel(miss_event);
|
||||||
|
out:
|
||||||
/*
|
/*
|
||||||
* On BDW we count references and misses, need to adjust. Sometimes
|
* All counts will be zero on failure.
|
||||||
* the "hits" counter is a bit more than the references, for
|
|
||||||
* example, x references but x + 1 hits. To not report invalid
|
|
||||||
* hit values in this case we treat that as misses eaqual to
|
|
||||||
* references.
|
|
||||||
*/
|
*/
|
||||||
if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X)
|
counts->miss_before = miss_before;
|
||||||
l2_hits -= (l2_miss > l2_hits ? l2_hits : l2_miss);
|
counts->hits_before = hits_before;
|
||||||
trace_pseudo_lock_l2(l2_hits, l2_miss);
|
counts->miss_after = miss_after;
|
||||||
if (l3_hit_bits > 0) {
|
counts->hits_after = hits_after;
|
||||||
if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X)
|
return 0;
|
||||||
l3_hits -= (l3_miss > l3_hits ? l3_hits : l3_miss);
|
}
|
||||||
trace_pseudo_lock_l3(l3_hits, l3_miss);
|
|
||||||
|
static int measure_l2_residency(void *_plr)
|
||||||
|
{
|
||||||
|
struct pseudo_lock_region *plr = _plr;
|
||||||
|
struct residency_counts counts = {0};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Non-architectural event for the Goldmont Microarchitecture
|
||||||
|
* from Intel x86 Architecture Software Developer Manual (SDM):
|
||||||
|
* MEM_LOAD_UOPS_RETIRED D1H (event number)
|
||||||
|
* Umask values:
|
||||||
|
* L2_HIT 02H
|
||||||
|
* L2_MISS 10H
|
||||||
|
*/
|
||||||
|
switch (boot_cpu_data.x86_model) {
|
||||||
|
case INTEL_FAM6_ATOM_GOLDMONT:
|
||||||
|
case INTEL_FAM6_ATOM_GEMINI_LAKE:
|
||||||
|
perf_miss_attr.config = X86_CONFIG(.event = 0xd1,
|
||||||
|
.umask = 0x10);
|
||||||
|
perf_hit_attr.config = X86_CONFIG(.event = 0xd1,
|
||||||
|
.umask = 0x2);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
measure_residency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts);
|
||||||
|
/*
|
||||||
|
* If a failure prevented the measurements from succeeding
|
||||||
|
* tracepoints will still be written and all counts will be zero.
|
||||||
|
*/
|
||||||
|
trace_pseudo_lock_l2(counts.hits_after - counts.hits_before,
|
||||||
|
counts.miss_after - counts.miss_before);
|
||||||
|
out:
|
||||||
|
plr->thread_done = 1;
|
||||||
|
wake_up_interruptible(&plr->lock_thread_wq);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int measure_l3_residency(void *_plr)
|
||||||
|
{
|
||||||
|
struct pseudo_lock_region *plr = _plr;
|
||||||
|
struct residency_counts counts = {0};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On Broadwell Microarchitecture the MEM_LOAD_UOPS_RETIRED event
|
||||||
|
* has two "no fix" errata associated with it: BDM35 and BDM100. On
|
||||||
|
* this platform the following events are used instead:
|
||||||
|
* LONGEST_LAT_CACHE 2EH (Documented in SDM)
|
||||||
|
* REFERENCE 4FH
|
||||||
|
* MISS 41H
|
||||||
|
*/
|
||||||
|
|
||||||
|
switch (boot_cpu_data.x86_model) {
|
||||||
|
case INTEL_FAM6_BROADWELL_X:
|
||||||
|
/* On BDW the hit event counts references, not hits */
|
||||||
|
perf_hit_attr.config = X86_CONFIG(.event = 0x2e,
|
||||||
|
.umask = 0x4f);
|
||||||
|
perf_miss_attr.config = X86_CONFIG(.event = 0x2e,
|
||||||
|
.umask = 0x41);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
measure_residency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts);
|
||||||
|
/*
|
||||||
|
* If a failure prevented the measurements from succeeding
|
||||||
|
* tracepoints will still be written and all counts will be zero.
|
||||||
|
*/
|
||||||
|
|
||||||
|
counts.miss_after -= counts.miss_before;
|
||||||
|
if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X) {
|
||||||
|
/*
|
||||||
|
* On BDW references and misses are counted, need to adjust.
|
||||||
|
* Sometimes the "hits" counter is a bit more than the
|
||||||
|
* references, for example, x references but x + 1 hits.
|
||||||
|
* To not report invalid hit values in this case we treat
|
||||||
|
* that as misses equal to references.
|
||||||
|
*/
|
||||||
|
/* First compute the number of cache references measured */
|
||||||
|
counts.hits_after -= counts.hits_before;
|
||||||
|
/* Next convert references to cache hits */
|
||||||
|
counts.hits_after -= min(counts.miss_after, counts.hits_after);
|
||||||
|
} else {
|
||||||
|
counts.hits_after -= counts.hits_before;
|
||||||
|
}
|
||||||
|
|
||||||
|
trace_pseudo_lock_l3(counts.hits_after, counts.miss_after);
|
||||||
out:
|
out:
|
||||||
plr->thread_done = 1;
|
plr->thread_done = 1;
|
||||||
wake_up_interruptible(&plr->lock_thread_wq);
|
wake_up_interruptible(&plr->lock_thread_wq);
|
||||||
@ -1121,13 +1179,20 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
plr->cpu = cpu;
|
||||||
|
|
||||||
if (sel == 1)
|
if (sel == 1)
|
||||||
thread = kthread_create_on_node(measure_cycles_lat_fn, plr,
|
thread = kthread_create_on_node(measure_cycles_lat_fn, plr,
|
||||||
cpu_to_node(cpu),
|
cpu_to_node(cpu),
|
||||||
"pseudo_lock_measure/%u",
|
"pseudo_lock_measure/%u",
|
||||||
cpu);
|
cpu);
|
||||||
else if (sel == 2)
|
else if (sel == 2)
|
||||||
thread = kthread_create_on_node(measure_cycles_perf_fn, plr,
|
thread = kthread_create_on_node(measure_l2_residency, plr,
|
||||||
|
cpu_to_node(cpu),
|
||||||
|
"pseudo_lock_measure/%u",
|
||||||
|
cpu);
|
||||||
|
else if (sel == 3)
|
||||||
|
thread = kthread_create_on_node(measure_l3_residency, plr,
|
||||||
cpu_to_node(cpu),
|
cpu_to_node(cpu),
|
||||||
"pseudo_lock_measure/%u",
|
"pseudo_lock_measure/%u",
|
||||||
cpu);
|
cpu);
|
||||||
|
Loading…
Reference in New Issue
Block a user