linux/arch/x86/kernel/cpu/aperfmperf.c
Rafael J. Wysocki b29c6ef7bb x86 / CPU: Avoid unnecessary IPIs in arch_freq_get_on_cpu()
Even though aperfmperf_snapshot_khz() caches the samples.khz value to
return if called again in a sufficiently short time, its caller,
arch_freq_get_on_cpu(), still uses smp_call_function_single() to run it
which may allow user space to trigger an IPI storm by reading from the
scaling_cur_freq cpufreq sysfs file in a tight loop.

To avoid that, move the decision on whether or not to return the cached
samples.khz value to arch_freq_get_on_cpu().

This change was part of commit 941f5f0f6e ("x86: CPU: Fix up "cpu MHz"
in /proc/cpuinfo"), but it was not the reason for the revert and it
remains applicable.

Fixes: 4815d3c56d (cpufreq: x86: Make scaling_cur_freq behave more as expected)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: WANG Chao <chao.wang@ucloud.cn>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-11-13 19:42:39 -08:00

98 lines
2.2 KiB
C

/*
* x86 APERF/MPERF KHz calculation for
* /sys/.../cpufreq/scaling_cur_freq
*
* Copyright (C) 2017 Intel Corp.
* Author: Len Brown <len.brown@intel.com>
*
* This file is licensed under GPLv2.
*/
#include <linux/delay.h>
#include <linux/ktime.h>
#include <linux/math64.h>
#include <linux/percpu.h>
#include <linux/smp.h>
struct aperfmperf_sample {
unsigned int khz;
ktime_t time;
u64 aperf;
u64 mperf;
};
static DEFINE_PER_CPU(struct aperfmperf_sample, samples);
#define APERFMPERF_CACHE_THRESHOLD_MS 10
#define APERFMPERF_REFRESH_DELAY_MS 20
#define APERFMPERF_STALE_THRESHOLD_MS 1000
/*
* aperfmperf_snapshot_khz()
* On the current CPU, snapshot APERF, MPERF, and jiffies
* unless we already did it within 10ms
* calculate kHz, save snapshot
*/
static void aperfmperf_snapshot_khz(void *dummy)
{
u64 aperf, aperf_delta;
u64 mperf, mperf_delta;
struct aperfmperf_sample *s = this_cpu_ptr(&samples);
ktime_t now = ktime_get();
s64 time_delta = ktime_ms_delta(now, s->time);
unsigned long flags;
local_irq_save(flags);
rdmsrl(MSR_IA32_APERF, aperf);
rdmsrl(MSR_IA32_MPERF, mperf);
local_irq_restore(flags);
aperf_delta = aperf - s->aperf;
mperf_delta = mperf - s->mperf;
/*
* There is no architectural guarantee that MPERF
* increments faster than we can read it.
*/
if (mperf_delta == 0)
return;
s->time = now;
s->aperf = aperf;
s->mperf = mperf;
/* If the previous iteration was too long ago, discard it. */
if (time_delta > APERFMPERF_STALE_THRESHOLD_MS)
s->khz = 0;
else
s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta);
}
unsigned int arch_freq_get_on_cpu(int cpu)
{
s64 time_delta;
unsigned int khz;
if (!cpu_khz)
return 0;
if (!static_cpu_has(X86_FEATURE_APERFMPERF))
return 0;
/* Don't bother re-computing within the cache threshold time. */
time_delta = ktime_ms_delta(ktime_get(), per_cpu(samples.time, cpu));
khz = per_cpu(samples.khz, cpu);
if (khz && time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
return khz;
smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
khz = per_cpu(samples.khz, cpu);
if (khz)
return khz;
msleep(APERFMPERF_REFRESH_DELAY_MS);
smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
return per_cpu(samples.khz, cpu);
}