linux/arch/x86/kernel/time.c
Linus Torvalds 093d9603b6 x86: stop playing stack games in profile_pc()
The 'profile_pc()' function is used for timer-based profiling, which
isn't really all that relevant any more to begin with, but it also ends
up making assumptions based on the stack layout that aren't necessarily
valid.

Basically, the code tries to account the time spent in spinlocks to the
caller rather than the spinlock, and while I support that as a concept,
it's not worth the code complexity or the KASAN warnings when no serious
profiling is done using timers anyway these days.

And the code really does depend on stack layout that is only true in the
simplest of cases.  We've lost the comment at some point (I think when
the 32-bit and 64-bit code was unified), but it used to say:

	Assume the lock function has either no stack frame or a copy
	of eflags from PUSHF.

which explains why it just blindly loads a word or two straight off the
stack pointer and then takes a minimal look at the values to just check
if they might be eflags or the return pc:

	Eflags always has bits 22 and up cleared unlike kernel addresses

but that basic stack layout assumption assumes that there isn't any lock
debugging etc going on that would complicate the code and cause a stack
frame.

It causes KASAN unhappiness reported for years by syzkaller [1] and
others [2].

With no real practical reason for this any more, just remove the code.

Just for historical interest, here's some background commits relating to
this code from 2006:

  0cb91a2293 ("i386: Account spinlocks to the caller during profiling for !FP kernels")
  31679f38d8 ("Simplify profile_pc on x86-64")

and a code unification from 2009:

  ef4512882d ("x86: time_32/64.c unify profile_pc")

but the basics of this thing actually goes back to before the git tree.

Link: https://syzkaller.appspot.com/bug?extid=84fe685c02cd112a2ac3 [1]
Link: https://lore.kernel.org/all/CAK55_s7Xyq=nh97=K=G1sxueOFrJDAvPOJAL4TPTCAYvmxO9_A@mail.gmail.com/ [2]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2024-06-28 14:27:22 -07:00

112 lines
2.5 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 1991,1992,1995 Linus Torvalds
* Copyright (c) 1994 Alan Modra
* Copyright (c) 1995 Markus Kuhn
* Copyright (c) 1996 Ingo Molnar
* Copyright (c) 1998 Andrea Arcangeli
* Copyright (c) 2002,2006 Vojtech Pavlik
* Copyright (c) 2003 Andi Kleen
*
*/
#include <linux/clocksource.h>
#include <linux/clockchips.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/i8253.h>
#include <linux/time.h>
#include <linux/export.h>
#include <asm/vsyscall.h>
#include <asm/x86_init.h>
#include <asm/i8259.h>
#include <asm/timer.h>
#include <asm/hpet.h>
#include <asm/time.h>
unsigned long profile_pc(struct pt_regs *regs)
{
return instruction_pointer(regs);
}
EXPORT_SYMBOL(profile_pc);
/*
* Default timer interrupt handler for PIT/HPET
*/
static irqreturn_t timer_interrupt(int irq, void *dev_id)
{
global_clock_event->event_handler(global_clock_event);
return IRQ_HANDLED;
}
static void __init setup_default_timer_irq(void)
{
unsigned long flags = IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER;
/*
* Unconditionally register the legacy timer interrupt; even
* without legacy PIC/PIT we need this for the HPET0 in legacy
* replacement mode.
*/
if (request_irq(0, timer_interrupt, flags, "timer", NULL))
pr_info("Failed to register legacy timer interrupt\n");
}
/* Default timer init function */
void __init hpet_time_init(void)
{
if (!hpet_enable()) {
if (!pit_timer_init())
return;
}
setup_default_timer_irq();
}
static __init void x86_late_time_init(void)
{
/*
* Before PIT/HPET init, select the interrupt mode. This is required
* to make the decision whether PIT should be initialized correct.
*/
x86_init.irqs.intr_mode_select();
/* Setup the legacy timers */
x86_init.timers.timer_init();
/*
* After PIT/HPET timers init, set up the final interrupt mode for
* delivering IRQs.
*/
x86_init.irqs.intr_mode_init();
tsc_init();
if (static_cpu_has(X86_FEATURE_WAITPKG))
use_tpause_delay();
}
/*
* Initialize TSC and delay the periodic timer init to
* late x86_late_time_init() so ioremap works.
*/
void __init time_init(void)
{
late_time_init = x86_late_time_init;
}
/*
* Sanity check the vdso related archdata content.
*/
void clocksource_arch_init(struct clocksource *cs)
{
if (cs->vdso_clock_mode == VDSO_CLOCKMODE_NONE)
return;
if (cs->mask != CLOCKSOURCE_MASK(64)) {
pr_warn("clocksource %s registered with invalid mask %016llx for VDSO. Disabling VDSO support.\n",
cs->name, cs->mask);
cs->vdso_clock_mode = VDSO_CLOCKMODE_NONE;
}
}