mirror of
https://github.com/torvalds/linux.git
synced 2024-11-12 15:11:50 +00:00
x86-64: Move vread_tsc and vread_hpet into the vDSO
The vsyscall page now consists entirely of trap instructions. Cc: John Stultz <johnstul@us.ibm.com> Signed-off-by: Andy Lutomirski <luto@mit.edu> Link: http://lkml.kernel.org/r/637648f303f2ef93af93bae25186e9a1bea093f5.1310639973.git.luto@mit.edu Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
This commit is contained in:
parent
433bd805e5
commit
98d0ac38ca
@ -7,8 +7,12 @@
|
|||||||
|
|
||||||
#define __ARCH_HAS_CLOCKSOURCE_DATA
|
#define __ARCH_HAS_CLOCKSOURCE_DATA
|
||||||
|
|
||||||
|
#define VCLOCK_NONE 0 /* No vDSO clock available. */
|
||||||
|
#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
|
||||||
|
#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
|
||||||
|
|
||||||
struct arch_clocksource_data {
|
struct arch_clocksource_data {
|
||||||
cycle_t (*vread)(void);
|
int vclock_mode;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* CONFIG_X86_64 */
|
#endif /* CONFIG_X86_64 */
|
||||||
|
@ -51,10 +51,6 @@ extern int unsynchronized_tsc(void);
|
|||||||
extern int check_tsc_unstable(void);
|
extern int check_tsc_unstable(void);
|
||||||
extern unsigned long native_calibrate_tsc(void);
|
extern unsigned long native_calibrate_tsc(void);
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
extern cycles_t vread_tsc(void);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Boot-time check whether the TSCs are synchronized across
|
* Boot-time check whether the TSCs are synchronized across
|
||||||
* all CPUs/cores:
|
* all CPUs/cores:
|
||||||
|
@ -13,7 +13,7 @@ struct vsyscall_gtod_data {
|
|||||||
|
|
||||||
struct timezone sys_tz;
|
struct timezone sys_tz;
|
||||||
struct { /* extract of a clocksource struct */
|
struct { /* extract of a clocksource struct */
|
||||||
cycle_t (*vread)(void);
|
int vclock_mode;
|
||||||
cycle_t cycle_last;
|
cycle_t cycle_last;
|
||||||
cycle_t mask;
|
cycle_t mask;
|
||||||
u32 mult;
|
u32 mult;
|
||||||
|
@ -16,10 +16,6 @@ enum vsyscall_num {
|
|||||||
#ifdef __KERNEL__
|
#ifdef __KERNEL__
|
||||||
#include <linux/seqlock.h>
|
#include <linux/seqlock.h>
|
||||||
|
|
||||||
/* Definitions for CONFIG_GENERIC_TIME definitions */
|
|
||||||
#define __vsyscall_fn \
|
|
||||||
__attribute__ ((unused, __section__(".vsyscall_fn"))) notrace
|
|
||||||
|
|
||||||
#define VGETCPU_RDTSCP 1
|
#define VGETCPU_RDTSCP 1
|
||||||
#define VGETCPU_LSL 2
|
#define VGETCPU_LSL 2
|
||||||
|
|
||||||
|
@ -24,17 +24,12 @@ endif
|
|||||||
nostackp := $(call cc-option, -fno-stack-protector)
|
nostackp := $(call cc-option, -fno-stack-protector)
|
||||||
CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
|
CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
|
||||||
CFLAGS_hpet.o := $(nostackp)
|
CFLAGS_hpet.o := $(nostackp)
|
||||||
CFLAGS_vread_tsc_64.o := $(nostackp)
|
|
||||||
CFLAGS_paravirt.o := $(nostackp)
|
CFLAGS_paravirt.o := $(nostackp)
|
||||||
GCOV_PROFILE_vsyscall_64.o := n
|
GCOV_PROFILE_vsyscall_64.o := n
|
||||||
GCOV_PROFILE_hpet.o := n
|
GCOV_PROFILE_hpet.o := n
|
||||||
GCOV_PROFILE_tsc.o := n
|
GCOV_PROFILE_tsc.o := n
|
||||||
GCOV_PROFILE_vread_tsc_64.o := n
|
|
||||||
GCOV_PROFILE_paravirt.o := n
|
GCOV_PROFILE_paravirt.o := n
|
||||||
|
|
||||||
# vread_tsc_64 is hot and should be fully optimized:
|
|
||||||
CFLAGS_REMOVE_vread_tsc_64.o = -pg -fno-optimize-sibling-calls
|
|
||||||
|
|
||||||
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
|
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
|
||||||
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
|
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
|
||||||
obj-y += time.o ioport.o ldt.o dumpstack.o
|
obj-y += time.o ioport.o ldt.o dumpstack.o
|
||||||
@ -43,7 +38,7 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o
|
|||||||
obj-y += probe_roms.o
|
obj-y += probe_roms.o
|
||||||
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
|
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
|
||||||
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
|
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
|
||||||
obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o vread_tsc_64.o
|
obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
|
||||||
obj-$(CONFIG_X86_64) += vsyscall_emu_64.o
|
obj-$(CONFIG_X86_64) += vsyscall_emu_64.o
|
||||||
obj-y += bootflag.o e820.o
|
obj-y += bootflag.o e820.o
|
||||||
obj-y += pci-dma.o quirks.o topology.o kdebugfs.o
|
obj-y += pci-dma.o quirks.o topology.o kdebugfs.o
|
||||||
|
@ -14,7 +14,6 @@
|
|||||||
#include <asm/pgtable.h>
|
#include <asm/pgtable.h>
|
||||||
#include <asm/mce.h>
|
#include <asm/mce.h>
|
||||||
#include <asm/nmi.h>
|
#include <asm/nmi.h>
|
||||||
#include <asm/vsyscall.h>
|
|
||||||
#include <asm/cacheflush.h>
|
#include <asm/cacheflush.h>
|
||||||
#include <asm/tlbflush.h>
|
#include <asm/tlbflush.h>
|
||||||
#include <asm/io.h>
|
#include <asm/io.h>
|
||||||
@ -250,7 +249,6 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
|
|||||||
|
|
||||||
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
|
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
|
||||||
extern s32 __smp_locks[], __smp_locks_end[];
|
extern s32 __smp_locks[], __smp_locks_end[];
|
||||||
extern char __vsyscall_0;
|
|
||||||
void *text_poke_early(void *addr, const void *opcode, size_t len);
|
void *text_poke_early(void *addr, const void *opcode, size_t len);
|
||||||
|
|
||||||
/* Replace instructions with better alternatives for this CPU type.
|
/* Replace instructions with better alternatives for this CPU type.
|
||||||
@ -294,12 +292,6 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
|
|||||||
add_nops(insnbuf + a->replacementlen,
|
add_nops(insnbuf + a->replacementlen,
|
||||||
a->instrlen - a->replacementlen);
|
a->instrlen - a->replacementlen);
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
/* vsyscall code is not mapped yet. resolve it manually. */
|
|
||||||
if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
|
|
||||||
instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
text_poke_early(instr, insnbuf, a->instrlen);
|
text_poke_early(instr, insnbuf, a->instrlen);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -738,13 +738,6 @@ static cycle_t read_hpet(struct clocksource *cs)
|
|||||||
return (cycle_t)hpet_readl(HPET_COUNTER);
|
return (cycle_t)hpet_readl(HPET_COUNTER);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
static cycle_t __vsyscall_fn vread_hpet(void)
|
|
||||||
{
|
|
||||||
return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static struct clocksource clocksource_hpet = {
|
static struct clocksource clocksource_hpet = {
|
||||||
.name = "hpet",
|
.name = "hpet",
|
||||||
.rating = 250,
|
.rating = 250,
|
||||||
@ -753,7 +746,7 @@ static struct clocksource clocksource_hpet = {
|
|||||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||||
.resume = hpet_resume_counter,
|
.resume = hpet_resume_counter,
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
.archdata = { .vread = vread_hpet },
|
.archdata = { .vclock_mode = VCLOCK_HPET },
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -777,7 +777,7 @@ static struct clocksource clocksource_tsc = {
|
|||||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
|
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
|
||||||
CLOCK_SOURCE_MUST_VERIFY,
|
CLOCK_SOURCE_MUST_VERIFY,
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
.archdata = { .vread = vread_tsc },
|
.archdata = { .vclock_mode = VCLOCK_TSC },
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -169,9 +169,6 @@ SECTIONS
|
|||||||
.vsyscall : AT(VLOAD(.vsyscall)) {
|
.vsyscall : AT(VLOAD(.vsyscall)) {
|
||||||
*(.vsyscall_0)
|
*(.vsyscall_0)
|
||||||
|
|
||||||
. = ALIGN(L1_CACHE_BYTES);
|
|
||||||
*(.vsyscall_fn)
|
|
||||||
|
|
||||||
. = 1024;
|
. = 1024;
|
||||||
*(.vsyscall_1)
|
*(.vsyscall_1)
|
||||||
|
|
||||||
|
@ -1,36 +0,0 @@
|
|||||||
/* This code runs in userspace. */
|
|
||||||
|
|
||||||
#define DISABLE_BRANCH_PROFILING
|
|
||||||
#include <asm/vgtod.h>
|
|
||||||
|
|
||||||
notrace cycle_t __vsyscall_fn vread_tsc(void)
|
|
||||||
{
|
|
||||||
cycle_t ret;
|
|
||||||
u64 last;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Empirically, a fence (of type that depends on the CPU)
|
|
||||||
* before rdtsc is enough to ensure that rdtsc is ordered
|
|
||||||
* with respect to loads. The various CPU manuals are unclear
|
|
||||||
* as to whether rdtsc can be reordered with later loads,
|
|
||||||
* but no one has ever seen it happen.
|
|
||||||
*/
|
|
||||||
rdtsc_barrier();
|
|
||||||
ret = (cycle_t)vget_cycles();
|
|
||||||
|
|
||||||
last = VVAR(vsyscall_gtod_data).clock.cycle_last;
|
|
||||||
|
|
||||||
if (likely(ret >= last))
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* GCC likes to generate cmov here, but this branch is extremely
|
|
||||||
* predictable (it's just a funciton of time and the likely is
|
|
||||||
* very likely) and there's a data dependence, so force GCC
|
|
||||||
* to generate a branch instead. I don't barrier() because
|
|
||||||
* we don't actually need a barrier, and if this function
|
|
||||||
* ever gets inlined it will generate worse code.
|
|
||||||
*/
|
|
||||||
asm volatile ("");
|
|
||||||
return last;
|
|
||||||
}
|
|
@ -74,7 +74,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
|
|||||||
write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
|
write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
|
||||||
|
|
||||||
/* copy vsyscall data */
|
/* copy vsyscall data */
|
||||||
vsyscall_gtod_data.clock.vread = clock->archdata.vread;
|
vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode;
|
||||||
vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
|
vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
|
||||||
vsyscall_gtod_data.clock.mask = clock->mask;
|
vsyscall_gtod_data.clock.mask = clock->mask;
|
||||||
vsyscall_gtod_data.clock.mult = mult;
|
vsyscall_gtod_data.clock.mult = mult;
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include <linux/time.h>
|
#include <linux/time.h>
|
||||||
#include <linux/string.h>
|
#include <linux/string.h>
|
||||||
#include <asm/vsyscall.h>
|
#include <asm/vsyscall.h>
|
||||||
|
#include <asm/fixmap.h>
|
||||||
#include <asm/vgtod.h>
|
#include <asm/vgtod.h>
|
||||||
#include <asm/timex.h>
|
#include <asm/timex.h>
|
||||||
#include <asm/hpet.h>
|
#include <asm/hpet.h>
|
||||||
@ -25,6 +26,43 @@
|
|||||||
|
|
||||||
#define gtod (&VVAR(vsyscall_gtod_data))
|
#define gtod (&VVAR(vsyscall_gtod_data))
|
||||||
|
|
||||||
|
notrace static cycle_t vread_tsc(void)
|
||||||
|
{
|
||||||
|
cycle_t ret;
|
||||||
|
u64 last;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Empirically, a fence (of type that depends on the CPU)
|
||||||
|
* before rdtsc is enough to ensure that rdtsc is ordered
|
||||||
|
* with respect to loads. The various CPU manuals are unclear
|
||||||
|
* as to whether rdtsc can be reordered with later loads,
|
||||||
|
* but no one has ever seen it happen.
|
||||||
|
*/
|
||||||
|
rdtsc_barrier();
|
||||||
|
ret = (cycle_t)vget_cycles();
|
||||||
|
|
||||||
|
last = VVAR(vsyscall_gtod_data).clock.cycle_last;
|
||||||
|
|
||||||
|
if (likely(ret >= last))
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GCC likes to generate cmov here, but this branch is extremely
|
||||||
|
* predictable (it's just a funciton of time and the likely is
|
||||||
|
* very likely) and there's a data dependence, so force GCC
|
||||||
|
* to generate a branch instead. I don't barrier() because
|
||||||
|
* we don't actually need a barrier, and if this function
|
||||||
|
* ever gets inlined it will generate worse code.
|
||||||
|
*/
|
||||||
|
asm volatile ("");
|
||||||
|
return last;
|
||||||
|
}
|
||||||
|
|
||||||
|
static notrace cycle_t vread_hpet(void)
|
||||||
|
{
|
||||||
|
return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
|
||||||
|
}
|
||||||
|
|
||||||
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
|
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
|
||||||
{
|
{
|
||||||
long ret;
|
long ret;
|
||||||
@ -36,9 +74,12 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
|
|||||||
notrace static inline long vgetns(void)
|
notrace static inline long vgetns(void)
|
||||||
{
|
{
|
||||||
long v;
|
long v;
|
||||||
cycles_t (*vread)(void);
|
cycles_t cycles;
|
||||||
vread = gtod->clock.vread;
|
if (gtod->clock.vclock_mode == VCLOCK_TSC)
|
||||||
v = (vread() - gtod->clock.cycle_last) & gtod->clock.mask;
|
cycles = vread_tsc();
|
||||||
|
else
|
||||||
|
cycles = vread_hpet();
|
||||||
|
v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
|
||||||
return (v * gtod->clock.mult) >> gtod->clock.shift;
|
return (v * gtod->clock.mult) >> gtod->clock.shift;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -118,11 +159,11 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
|
|||||||
{
|
{
|
||||||
switch (clock) {
|
switch (clock) {
|
||||||
case CLOCK_REALTIME:
|
case CLOCK_REALTIME:
|
||||||
if (likely(gtod->clock.vread))
|
if (likely(gtod->clock.vclock_mode != VCLOCK_NONE))
|
||||||
return do_realtime(ts);
|
return do_realtime(ts);
|
||||||
break;
|
break;
|
||||||
case CLOCK_MONOTONIC:
|
case CLOCK_MONOTONIC:
|
||||||
if (likely(gtod->clock.vread))
|
if (likely(gtod->clock.vclock_mode != VCLOCK_NONE))
|
||||||
return do_monotonic(ts);
|
return do_monotonic(ts);
|
||||||
break;
|
break;
|
||||||
case CLOCK_REALTIME_COARSE:
|
case CLOCK_REALTIME_COARSE:
|
||||||
@ -139,7 +180,7 @@ int clock_gettime(clockid_t, struct timespec *)
|
|||||||
notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
|
notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
|
||||||
{
|
{
|
||||||
long ret;
|
long ret;
|
||||||
if (likely(gtod->clock.vread)) {
|
if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) {
|
||||||
if (likely(tv != NULL)) {
|
if (likely(tv != NULL)) {
|
||||||
BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
|
BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
|
||||||
offsetof(struct timespec, tv_nsec) ||
|
offsetof(struct timespec, tv_nsec) ||
|
||||||
|
Loading…
Reference in New Issue
Block a user