linux/arch/i386/kernel/time.c
Zachary Amsden 7b35520243 [PATCH] i386: Profile pc badness
Profile_pc was broken when using paravirtualization because the
assumption the kernel was running at CPL 0 was violated, causing
bad logic to read a random value off the stack.

The only way to be in kernel lock functions is to be in kernel
code, so validate that assumption explicitly by checking the CS
value.  We don't want to be fooled by BIOS / APM segments and
try to read those stacks, so only match KERNEL_CS.

I moved some stuff in segment.h to make it prettier.

Signed-off-by: Zachary Amsden <zach@vmware.com>
Signed-off-by: Andi Kleen <ak@suse.de>
2007-02-13 13:26:21 +01:00

392 lines
9.8 KiB
C

/*
* linux/arch/i386/kernel/time.c
*
* Copyright (C) 1991, 1992, 1995 Linus Torvalds
*
* This file contains the PC-specific time handling details:
* reading the RTC at bootup, etc..
* 1994-07-02 Alan Modra
* fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
* 1995-03-26 Markus Kuhn
* fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
* precision CMOS clock update
* 1996-05-03 Ingo Molnar
* fixed time warps in do_[slow|fast]_gettimeoffset()
* 1997-09-10 Updated NTP code according to technical memorandum Jan '96
* "A Kernel Model for Precision Timekeeping" by Dave Mills
* 1998-09-05 (Various)
* More robust do_fast_gettimeoffset() algorithm implemented
* (works with APM, Cyrix 6x86MX and Centaur C6),
* monotonic gettimeofday() with fast_get_timeoffset(),
* drift-proof precision TSC calibration on boot
* (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
* Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
* ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
* 1998-12-16 Andrea Arcangeli
* Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
* because was not accounting lost_ticks.
* 1998-12-24 Copyright (C) 1998 Andrea Arcangeli
* Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
* serialize accesses to xtime/lost_ticks).
*/
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/time.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/module.h>
#include <linux/sysdev.h>
#include <linux/bcd.h>
#include <linux/efi.h>
#include <linux/mca.h>
#include <asm/io.h>
#include <asm/smp.h>
#include <asm/irq.h>
#include <asm/msr.h>
#include <asm/delay.h>
#include <asm/mpspec.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/timer.h>
#include <asm/time.h>
#include "mach_time.h"
#include <linux/timex.h>
#include <asm/hpet.h>
#include <asm/arch_hooks.h>
#include "io_ports.h"
#include <asm/i8259.h>
int pit_latch_buggy; /* extern */
#include "do_timer.h"
unsigned int cpu_khz; /* Detected as we calibrate the TSC */
EXPORT_SYMBOL(cpu_khz);
DEFINE_SPINLOCK(rtc_lock);
EXPORT_SYMBOL(rtc_lock);
/*
* This is a special lock that is owned by the CPU and holds the index
* register we are working with. It is required for NMI access to the
* CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details.
*/
volatile unsigned long cmos_lock = 0;
EXPORT_SYMBOL(cmos_lock);
/* Routines for accessing the CMOS RAM/RTC. */
unsigned char rtc_cmos_read(unsigned char addr)
{
unsigned char val;
lock_cmos_prefix(addr);
outb_p(addr, RTC_PORT(0));
val = inb_p(RTC_PORT(1));
lock_cmos_suffix(addr);
return val;
}
EXPORT_SYMBOL(rtc_cmos_read);
void rtc_cmos_write(unsigned char val, unsigned char addr)
{
lock_cmos_prefix(addr);
outb_p(addr, RTC_PORT(0));
outb_p(val, RTC_PORT(1));
lock_cmos_suffix(addr);
}
EXPORT_SYMBOL(rtc_cmos_write);
static int set_rtc_mmss(unsigned long nowtime)
{
int retval;
unsigned long flags;
/* gets recalled with irq locally disabled */
/* XXX - does irqsave resolve this? -johnstul */
spin_lock_irqsave(&rtc_lock, flags);
retval = set_wallclock(nowtime);
spin_unlock_irqrestore(&rtc_lock, flags);
return retval;
}
int timer_ack;
unsigned long profile_pc(struct pt_regs *regs)
{
unsigned long pc = instruction_pointer(regs);
#ifdef CONFIG_SMP
if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->xcs) &&
in_lock_functions(pc)) {
#ifdef CONFIG_FRAME_POINTER
return *(unsigned long *)(regs->ebp + 4);
#else
unsigned long *sp = (unsigned long *)&regs->esp;
/* Return address is either directly at stack pointer
or above a saved eflags. Eflags has bits 22-31 zero,
kernel addresses don't. */
if (sp[0] >> 22)
return sp[0];
if (sp[1] >> 22)
return sp[1];
#endif
}
#endif
return pc;
}
EXPORT_SYMBOL(profile_pc);
/*
* This is the same as the above, except we _also_ save the current
* Time Stamp Counter value at the time of the timer interrupt, so that
* we later on can estimate the time of day more exactly.
*/
irqreturn_t timer_interrupt(int irq, void *dev_id)
{
/*
* Here we are in the timer irq handler. We just have irqs locally
* disabled but we don't know if the timer_bh is running on the other
* CPU. We need to avoid to SMP race with it. NOTE: we don' t need
* the irq version of write_lock because as just said we have irq
* locally disabled. -arca
*/
write_seqlock(&xtime_lock);
#ifdef CONFIG_X86_IO_APIC
if (timer_ack) {
/*
* Subtle, when I/O APICs are used we have to ack timer IRQ
* manually to reset the IRR bit for do_slow_gettimeoffset().
* This will also deassert NMI lines for the watchdog if run
* on an 82489DX-based system.
*/
spin_lock(&i8259A_lock);
outb(0x0c, PIC_MASTER_OCW3);
/* Ack the IRQ; AEOI will end it automatically. */
inb(PIC_MASTER_POLL);
spin_unlock(&i8259A_lock);
}
#endif
do_timer_interrupt_hook();
if (MCA_bus) {
/* The PS/2 uses level-triggered interrupts. You can't
turn them off, nor would you want to (any attempt to
enable edge-triggered interrupts usually gets intercepted by a
special hardware circuit). Hence we have to acknowledge
the timer interrupt. Through some incredibly stupid
design idea, the reset for IRQ 0 is done by setting the
high bit of the PPI port B (0x61). Note that some PS/2s,
notably the 55SX, work fine if this is removed. */
u8 irq_v = inb_p( 0x61 ); /* read the current state */
outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */
}
write_sequnlock(&xtime_lock);
#ifdef CONFIG_X86_LOCAL_APIC
if (using_apic_timer)
smp_send_timer_broadcast_ipi();
#endif
return IRQ_HANDLED;
}
/* not static: needed by APM */
unsigned long get_cmos_time(void)
{
unsigned long retval;
unsigned long flags;
spin_lock_irqsave(&rtc_lock, flags);
retval = get_wallclock();
spin_unlock_irqrestore(&rtc_lock, flags);
return retval;
}
EXPORT_SYMBOL(get_cmos_time);
static void sync_cmos_clock(unsigned long dummy);
static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
int no_sync_cmos_clock;
static void sync_cmos_clock(unsigned long dummy)
{
struct timeval now, next;
int fail = 1;
/*
* If we have an externally synchronized Linux clock, then update
* CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
* called as close as possible to 500 ms before the new second starts.
* This code is run on a timer. If the clock is set, that timer
* may not expire at the correct time. Thus, we adjust...
*/
if (!ntp_synced())
/*
* Not synced, exit, do not restart a timer (if one is
* running, let it run out).
*/
return;
do_gettimeofday(&now);
if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
fail = set_rtc_mmss(now.tv_sec);
next.tv_usec = USEC_AFTER - now.tv_usec;
if (next.tv_usec <= 0)
next.tv_usec += USEC_PER_SEC;
if (!fail)
next.tv_sec = 659;
else
next.tv_sec = 0;
if (next.tv_usec >= USEC_PER_SEC) {
next.tv_sec++;
next.tv_usec -= USEC_PER_SEC;
}
mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
}
void notify_arch_cmos_timer(void)
{
if (!no_sync_cmos_clock)
mod_timer(&sync_cmos_timer, jiffies + 1);
}
static long clock_cmos_diff;
static unsigned long sleep_start;
static int timer_suspend(struct sys_device *dev, pm_message_t state)
{
/*
* Estimate time zone so that set_time can update the clock
*/
unsigned long ctime = get_cmos_time();
clock_cmos_diff = -ctime;
clock_cmos_diff += get_seconds();
sleep_start = ctime;
return 0;
}
static int timer_resume(struct sys_device *dev)
{
unsigned long flags;
unsigned long sec;
unsigned long ctime = get_cmos_time();
long sleep_length = (ctime - sleep_start) * HZ;
struct timespec ts;
if (sleep_length < 0) {
printk(KERN_WARNING "CMOS clock skew detected in timer resume!\n");
/* The time after the resume must not be earlier than the time
* before the suspend or some nasty things will happen
*/
sleep_length = 0;
ctime = sleep_start;
}
#ifdef CONFIG_HPET_TIMER
if (is_hpet_enabled())
hpet_reenable();
#endif
setup_pit_timer();
sec = ctime + clock_cmos_diff;
ts.tv_sec = sec;
ts.tv_nsec = 0;
do_settimeofday(&ts);
write_seqlock_irqsave(&xtime_lock, flags);
jiffies_64 += sleep_length;
write_sequnlock_irqrestore(&xtime_lock, flags);
touch_softlockup_watchdog();
return 0;
}
static struct sysdev_class timer_sysclass = {
.resume = timer_resume,
.suspend = timer_suspend,
set_kset_name("timer"),
};
/* XXX this driverfs stuff should probably go elsewhere later -john */
static struct sys_device device_timer = {
.id = 0,
.cls = &timer_sysclass,
};
static int time_init_device(void)
{
int error = sysdev_class_register(&timer_sysclass);
if (!error)
error = sysdev_register(&device_timer);
return error;
}
device_initcall(time_init_device);
#ifdef CONFIG_HPET_TIMER
extern void (*late_time_init)(void);
/* Duplicate of time_init() below, with hpet_enable part added */
static void __init hpet_time_init(void)
{
struct timespec ts;
ts.tv_sec = get_cmos_time();
ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
do_settimeofday(&ts);
if ((hpet_enable() >= 0) && hpet_use_timer) {
printk("Using HPET for base-timer\n");
}
do_time_init();
}
#endif
void __init time_init(void)
{
struct timespec ts;
#ifdef CONFIG_HPET_TIMER
if (is_hpet_capable()) {
/*
* HPET initialization needs to do memory-mapped io. So, let
* us do a late initialization after mem_init().
*/
late_time_init = hpet_time_init;
return;
}
#endif
ts.tv_sec = get_cmos_time();
ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
do_settimeofday(&ts);
do_time_init();
}