Re: [PATCH] New x86_64 time code for 2.5.70

Vojtech Pavlik (vojtech@ucw.cz)
Thu, 12 Jun 2003 21:48:05 +0200


On Thu, Jun 12, 2003 at 10:47:10AM -0700, Bryan O'Sullivan wrote:
> On Wed, 2003-06-11 at 17:33, john stultz wrote:
>
> > Let me know if you have any issues with this patch.
>
> Thanks, John. Your updated patch has survived some beating on my test
> systems. I've also applied Vojtech's fix to hpet_tick.
>
> The latest grand unified x86_64 time patch against 2.5.70 is attached.

This one looks very good indeed. Andi?

>
> <b
>
> arch/x86_64/Kconfig | 12 +
> arch/x86_64/kernel/acpi/boot.c | 6
> arch/x86_64/kernel/apic.c | 84 +++++-----
> arch/x86_64/kernel/smpboot.c | 11 +
> arch/x86_64/kernel/time.c | 306 +++++++++++++++++++++++++++++++-------- arch/x86_64/kernel/vsyscall.c | 28 ++-
> arch/x86_64/vmlinux.lds.S | 6
> include/asm-x86_64/fixmap.h | 2
> include/asm-x86_64/mc146818rtc.h | 5
> include/asm-x86_64/timex.h | 30 +++
> include/asm-x86_64/vsyscall.h | 18 +-
> 11 files changed, 384 insertions(+), 124 deletions(-)
>

> # This is a BitKeeper generated patch for the following project:
> # Project Name: Linux kernel tree
> # This patch format is intended for GNU patch command version 2.5 or higher.
> # This patch includes the following deltas:
> # ChangeSet 1.1436 -> 1.1440
> # arch/x86_64/vmlinux.lds.S 1.16 -> 1.17
> # arch/x86_64/kernel/apic.c 1.20 -> 1.21
> # include/asm-x86_64/fixmap.h 1.3 -> 1.4
> # arch/x86_64/kernel/acpi/boot.c 1.1 -> 1.2
> # include/asm-x86_64/mc146818rtc.h 1.1 -> 1.2
> # include/asm-x86_64/timex.h 1.6 -> 1.7
> # arch/x86_64/kernel/time.c 1.15 -> 1.18
> # arch/x86_64/kernel/vsyscall.c 1.10 -> 1.11
> # arch/x86_64/Kconfig 1.22 -> 1.23
> # include/asm-x86_64/vsyscall.h 1.5 -> 1.6
> # arch/x86_64/kernel/smpboot.c 1.19 -> 1.20
> #
> # The following is the BitKeeper ChangeSet Log
> # --------------------------------------------
> # 03/06/11 bos@serpentine.com 1.1437
> # Forward port 2.4 time code. Optionally uses HPET instead of PIT/RTC for
> # gettimeofday calculations. Far more stable than the current 2.5 code.
> #
> # Current caveat: this code doesn't track lost interrupts properly, so we
> # can very slowly lose a jiffy here or there.
> # --------------------------------------------
> # 03/06/11 bos@serpentine.com 1.1438
> # Fix residual bogons.
> # 03/06/12 bos@serpentine.com 1.1440
> # Further fixes to 2.5 time code.
> # --------------------------------------------
> #
> diff -Nru a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
> --- a/arch/x86_64/Kconfig Thu Jun 12 10:44:01 2003
> +++ b/arch/x86_64/Kconfig Thu Jun 12 10:44:01 2003
> @@ -52,6 +52,18 @@
> klogd/syslogd or the X server. You should normally N here, unless
> you want to debug such a crash.
>
> +config HPET_TIMER
> + bool
> + default y
> + help
> + Use the IA-PC HPET (High Precision Event Timer) to manage
> + time in preference to the PIT and RTC, if a HPET is
> + present. The HPET provides a stable time base on SMP
> + systems, unlike the RTC, but it is more expensive to access,
> + as it is off-chip. You can find the HPET spec at
> + <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>.
> +
> + If unsure, say Y.
>
> config GENERIC_ISA_DMA
> bool
> diff -Nru a/arch/x86_64/kernel/acpi/boot.c b/arch/x86_64/kernel/acpi/boot.c
> --- a/arch/x86_64/kernel/acpi/boot.c Thu Jun 12 10:44:01 2003
> +++ b/arch/x86_64/kernel/acpi/boot.c Thu Jun 12 10:44:01 2003
> @@ -244,9 +244,11 @@
> return -1;
> }
>
> - hpet.address = hpet_tbl->addr.addrl | ((long) hpet_tbl->addr.addrh << 32);
> + vxtime.hpet_address = hpet_tbl->addr.addrl |
> + ((long) hpet_tbl->addr.addrh << 32);
>
> - printk(KERN_INFO "acpi: HPET id: %#x base: %#lx\n", hpet_tbl->id, hpet.address);
> + printk(KERN_INFO "acpi: HPET id: %#x base: %#lx\n",
> + hpet_tbl->id, vxtime.hpet_address);
>
> return 0;
> }
> diff -Nru a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
> --- a/arch/x86_64/kernel/apic.c Thu Jun 12 10:44:01 2003
> +++ b/arch/x86_64/kernel/apic.c Thu Jun 12 10:44:01 2003
> @@ -46,7 +46,7 @@
> void enable_NMI_through_LVT0 (void * dummy)
> {
> unsigned int v, ver;
> -
> +
> ver = apic_read(APIC_LVR);
> ver = GET_APIC_VERSION(ver);
> v = APIC_DM_NMI; /* unmask and set to NMI */
> @@ -297,7 +297,7 @@
> * Double-check whether this APIC is really registered.
> * This is meaningless in clustered apic mode, so we skip it.
> */
> - if (!clustered_apic_mode &&
> + if (!clustered_apic_mode &&
> !test_bit(GET_APIC_ID(apic_read(APIC_ID)), &phys_cpu_present_map))
> BUG();
>
> @@ -309,7 +309,7 @@
>
> if (!clustered_apic_mode) {
> /*
> - * In clustered apic mode, the firmware does this for us
> + * In clustered apic mode, the firmware does this for us
> * Put the APIC into flat delivery mode.
> * Must be "all ones" explicitly for 82489DX.
> */
> @@ -422,15 +422,15 @@
> value = apic_read(APIC_ESR);
> Dprintk("ESR value after enabling vector: %08x\n", value);
> } else {
> - if (esr_disable)
> - /*
> - * Something untraceble is creating bad interrupts on
> + if (esr_disable)
> + /*
> + * Something untraceble is creating bad interrupts on
> * secondary quads ... for the moment, just leave the
> * ESR disabled - we can't do anything useful with the
> * errors anyway - mbligh
> */
> printk("Leaving ESR disabled.\n");
> - else
> + else
> printk("No ESR for 82489DX.\n");
> }
>
> @@ -580,7 +580,7 @@
> * Detect and enable local APICs on non-SMP boards.
> * Original code written by Keir Fraser.
> * On AMD64 we trust the BIOS - if it says no APIC it is likely
> - * not correctly set up (usually the APIC timer won't work etc.)
> + * not correctly set up (usually the APIC timer won't work etc.)
> */
>
> static int __init detect_init_APIC (void)
> @@ -683,19 +683,25 @@
>
> local_irq_save(flags);
>
> - /* For some reasons this doesn't work on Simics, so fake it for now */
> - if (!strstr(boot_cpu_data.x86_model_id, "Screwdriver")) {
> + /* For some reasons this doesn't work on Simics, so fake it for now */
> + if (!strstr(boot_cpu_data.x86_model_id, "Screwdriver")) {
> __setup_APIC_LVTT(clocks);
> return;
> - }
> + }
>
> /* wait for irq slice */
> - {
> + if (vxtime.hpet_address) {
> + int trigger = hpet_readl(HPET_T0_CMP);
> + while (hpet_readl(HPET_COUNTER) >= trigger)
> + /* do nothing */ ;
> + while (hpet_readl(HPET_COUNTER) < trigger)
> + /* do nothing */ ;
> + } else {
> int c1, c2;
> outb_p(0x00, 0x43);
> c2 = inb_p(0x40);
> c2 |= inb_p(0x40) << 8;
> - do {
> + do {
> c1 = c2;
> outb_p(0x00, 0x43);
> c2 = inb_p(0x40);
> @@ -754,10 +760,10 @@
>
> void __init setup_boot_APIC_clock (void)
> {
> - if (disable_apic_timer) {
> - printk(KERN_INFO "Disabling APIC timer\n");
> - return;
> - }
> + if (disable_apic_timer) {
> + printk(KERN_INFO "Disabling APIC timer\n");
> + return;
> + }
>
> printk(KERN_INFO "Using local APIC timer interrupts.\n");
> using_apic_timer = 1;
> @@ -816,7 +822,7 @@
> if ( (!multiplier) || (calibration_result/multiplier < 500))
> return -EINVAL;
>
> - /*
> + /*
> * Set the new multiplier for each CPU. CPUs don't start using the
> * new values until the next timer interrupt in which they do process
> * accounting. At that time they also adjust their APIC timers
> @@ -856,7 +862,7 @@
> * Interrupts are already masked off at this point.
> */
> per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu);
> - if (per_cpu(prof_counter, cpu) !=
> + if (per_cpu(prof_counter, cpu) !=
> per_cpu(prof_old_multiplier, cpu)) {
> __setup_APIC_LVTT(calibration_result/
> per_cpu(prof_counter, cpu));
> @@ -928,19 +934,19 @@
> ack_APIC_irq();
>
> #if 0
> - static unsigned long last_warning;
> - static unsigned long skipped;
> + static unsigned long last_warning;
> + static unsigned long skipped;
>
> /* see sw-dev-man vol 3, chapter 7.4.13.5 */
> - if (time_before(last_warning+30*HZ,jiffies)) {
> + if (time_before(last_warning+30*HZ,jiffies)) {
> printk(KERN_INFO "spurious APIC interrupt on CPU#%d, %ld skipped.\n",
> smp_processor_id(), skipped);
> - last_warning = jiffies;
> + last_warning = jiffies;
> skipped = 0;
> - } else {
> - skipped++;
> - }
> -#endif
> + } else {
> + skipped++;
> + }
> +#endif
> irq_exit();
> }
>
> @@ -975,7 +981,7 @@
> irq_exit();
> }
>
> -int disable_apic;
> +int disable_apic;
>
> /*
> * This initializes the IO-APIC and APIC hardware if this is
> @@ -983,11 +989,11 @@
> */
> int __init APIC_init_uniprocessor (void)
> {
> - if (disable_apic) {
> + if (disable_apic) {
> printk(KERN_INFO "Apic disabled\n");
> - return -1;
> + return -1;
> }
> - if (!cpu_has_apic) {
> + if (!cpu_has_apic) {
> disable_apic = 1;
> printk(KERN_INFO "Apic disabled by BIOS\n");
> return -1;
> @@ -1015,18 +1021,18 @@
> return 0;
> }
>
> -static __init int setup_disableapic(char *str)
> -{
> +static __init int setup_disableapic(char *str)
> +{
> disable_apic = 1;
> return 0;
> -}
> +}
>
> -static __init int setup_noapictimer(char *str)
> -{
> +static __init int setup_noapictimer(char *str)
> +{
> disable_apic_timer = 1;
> return 0;
> -}
> +}
>
> -__setup("disableapic", setup_disableapic);
> -__setup("noapictimer", setup_noapictimer);
> +__setup("disableapic", setup_disableapic);
> +__setup("noapictimer", setup_noapictimer);
>
> diff -Nru a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
> --- a/arch/x86_64/kernel/smpboot.c Thu Jun 12 10:44:01 2003
> +++ b/arch/x86_64/kernel/smpboot.c Thu Jun 12 10:44:01 2003
> @@ -67,6 +67,8 @@
> /* Set when the idlers are all forked */
> int smp_threads_ready;
>
> +extern void time_init_smp(void);
> +
> /*
> * Trampoline 80x86 program as an array.
> */
> @@ -760,7 +762,7 @@
> if (APIC_init_uniprocessor())
> printk(KERN_NOTICE "Local APIC not detected."
> " Using dummy APIC emulation.\n");
> - return;
> + goto smp_done;
> }
>
> /*
> @@ -784,7 +786,7 @@
> cpu_online_map = phys_cpu_present_map = 1;
> phys_cpu_present_map = 1;
> disable_apic = 1;
> - return;
> + goto smp_done;
> }
>
> verify_local_APIC();
> @@ -799,7 +801,7 @@
> cpu_online_map = phys_cpu_present_map = 1;
> phys_cpu_present_map = 1;
> disable_apic = 1;
> - return;
> + goto smp_done;
> }
>
> connect_bsp_APIC();
> @@ -883,6 +885,9 @@
> */
> if (cpu_has_tsc && cpucount)
> synchronize_tsc_bp();
> +
> + smp_done:
> + time_init_smp();
> }
>
> /* These are wrappers to interface to the new boot process. Someone
> diff -Nru a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
> --- a/arch/x86_64/kernel/time.c Thu Jun 12 10:44:01 2003
> +++ b/arch/x86_64/kernel/time.c Thu Jun 12 10:44:01 2003
> @@ -24,6 +24,7 @@
> #include <linux/module.h>
> #include <linux/device.h>
> #include <linux/bcd.h>
> +#include <asm/pgtable.h>
> #include <asm/vsyscall.h>
> #include <asm/timex.h>
> #ifdef CONFIG_X86_LOCAL_APIC
> @@ -35,41 +36,61 @@
> extern int using_apic_timer;
>
> spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
> +spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED;
>
> extern int using_apic_timer;
> extern void smp_local_timer_interrupt(struct pt_regs * regs);
>
> +#undef HPET_HACK_ENABLE_DANGEROUS
>
> -unsigned int cpu_khz; /* TSC clocks / usec, not used here */
> -unsigned long hpet_period; /* fsecs / HPET clock */
> -unsigned long hpet_tick; /* HPET clocks / interrupt */
> -int hpet_report_lost_ticks; /* command line option */
>
> -struct hpet_data __hpet __section_hpet; /* address, quotient, trigger, hz */
> +unsigned int cpu_khz; /* TSC clocks / usec, not used here */
> +unsigned long hpet_period; /* fsecs / HPET clock */
> +unsigned long hpet_tick; /* HPET clocks / interrupt */
> +unsigned long vxtime_hz = 1193182;
> +int report_lost_ticks; /* command line option */
> +
> +struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
>
> volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
> unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
> struct timespec __xtime __section_xtime;
> struct timezone __sys_tz __section_sys_tz;
>
> +static inline void rdtscll_sync(unsigned long *tsc)
> +{
> +#ifdef CONFIG_SMP
> + sync_core();
> +#endif
> + rdtscll(*tsc);
> +}
> +
> /*
> * do_gettimeoffset() returns microseconds since last timer interrupt was
> * triggered by hardware. A memory read of HPET is slower than a register read
> * of TSC, but much more reliable. It's also synchronized to the timer
> * interrupt. Note that do_gettimeoffset() may return more than hpet_tick, if a
> - * timer interrupt has happened already, but hpet.trigger wasn't updated yet.
> + * timer interrupt has happened already, but vxtime.trigger wasn't updated yet.
> * This is not a problem, because jiffies hasn't updated either. They are bound
> * together by xtime_lock.
> - */
> + */
>
> -inline unsigned int do_gettimeoffset(void)
> +static inline unsigned int do_gettimeoffset_tsc(void)
> {
> unsigned long t;
> - sync_core();
> - rdtscll(t);
> - return (t - hpet.last_tsc) * (1000000L / HZ) / hpet.ticks + hpet.offset;
> + unsigned long x;
> + rdtscll_sync(&t);
> + x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32;
> + return x;
> +}
> +
> +static inline unsigned int do_gettimeoffset_hpet(void)
> +{
> + return ((hpet_readl(HPET_COUNTER) - vxtime.last) * vxtime.quot) >> 32;
> }
>
> +unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
> +
> /*
> * This version of gettimeofday() has microsecond resolution and better than
> * microsecond precision, as we're using at least a 10 MHz (usually 14.31818
> @@ -87,7 +108,8 @@
> sec = xtime.tv_sec;
> usec = xtime.tv_nsec / 1000;
>
> - t = (jiffies - wall_jiffies) * (1000000L / HZ) + do_gettimeoffset();
> + t = (jiffies - wall_jiffies) * (1000000L / HZ) +
> + do_gettimeoffset();
> usec += t;
>
> } while (read_seqretry(&xtime_lock, seq));
> @@ -169,17 +191,17 @@
> real_seconds = nowtime % 60;
> real_minutes = nowtime / 60;
> if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1)
> - real_minutes += 30; /* correct for half hour time zone */
> + real_minutes += 30; /* correct for half hour time zone */
> real_minutes %= 60;
>
> if (abs(real_minutes - cmos_minutes) < 30) {
> - BIN_TO_BCD(real_seconds);
> - BIN_TO_BCD(real_minutes);
> + BIN_TO_BCD(real_seconds);
> + BIN_TO_BCD(real_minutes);
> CMOS_WRITE(real_seconds, RTC_SECONDS);
> CMOS_WRITE(real_minutes, RTC_MINUTES);
> } else
> - printk(KERN_WARNING "time.c: can't update CMOS clock from %d to %d\n",
> - cmos_minutes, real_minutes);
> + printk(KERN_WARNING "time.c: can't update CMOS clock "
> + "from %d to %d\n", cmos_minutes, real_minutes);
>
> /*
> * The following flags have to be released exactly in this order, otherwise the
> @@ -198,27 +220,65 @@
> static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
> {
> static unsigned long rtc_update = 0;
> + unsigned long tsc, lost = 0;
> + int delay, offset = 0;
>
> /*
> * Here we are in the timer irq handler. We have irqs locally disabled (so we
> * don't need spin_lock_irqsave()) but we don't know if the timer_bh is running
> * on the other CPU, so we need a lock. We also need to lock the vsyscall
> * variables, because both do_timer() and us change them -arca+vojtech
> - */
> + */
>
> write_seqlock(&xtime_lock);
>
> - {
> - unsigned long t;
> + if (vxtime.hpet_address) {
> + offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
> + delay = hpet_readl(HPET_COUNTER) - offset;
> + } else {
> + spin_lock(&i8253_lock);
> + outb_p(0x00, 0x43);
> + delay = inb_p(0x40);
> + delay |= inb(0x40) << 8;
> + spin_unlock(&i8253_lock);
> + delay = LATCH - 1 - delay;
> + }
> +
> + rdtscll_sync(&tsc);
>
> - sync_core();
> - rdtscll(t);
> - hpet.offset = (t - hpet.last_tsc) * (1000000L / HZ) / hpet.ticks + hpet.offset - 1000000L / HZ;
> - if (hpet.offset >= 1000000L / HZ)
> - hpet.offset = 0;
> - hpet.ticks = min_t(long, max_t(long, (t - hpet.last_tsc) * (1000000L / HZ) / (1000000L / HZ - hpet.offset),
> - cpu_khz * 1000/HZ * 15 / 16), cpu_khz * 1000/HZ * 16 / 15);
> - hpet.last_tsc = t;
> + if (vxtime.mode == VXTIME_HPET) {
> + if (offset - vxtime.last > hpet_tick) {
> + lost = (offset - vxtime.last) / hpet_tick - 1;
> + }
> +
> + vxtime.last = offset;
> + } else {
> + offset = (((tsc - vxtime.last_tsc) *
> + vxtime.tsc_quot) >> 32) - tick_usec;
> +
> + if (offset < 0)
> + offset = 0;
> +
> + if (offset > (USEC_PER_SEC / HZ)) {
> + lost = offset / (USEC_PER_SEC / HZ);
> + offset %= (USEC_PER_SEC / HZ);
> + }
> +
> + vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
> +
> + if ((((tsc - vxtime.last_tsc) *
> + vxtime.tsc_quot) >> 32) < offset)
> + vxtime.last_tsc = tsc -
> + (((long) offset << 32) / vxtime.tsc_quot) - 1;
> + }
> +
> + if (lost) {
> + if (report_lost_ticks)
> + printk(KERN_WARNING "time.c: Lost %ld timer "
> + "tick(s)! (rip %016lx)\n",
> + (offset - vxtime.last) / hpet_tick - 1,
> + regs->rip);
> + jiffies += lost;
> }
>
> /*
> @@ -244,16 +304,16 @@
> * If we have an externally synchronized Linux clock, then update CMOS clock
> * accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy
> * closest to exactly 500 ms before the next second. If the update fails, we
> - * don'tcare, as it'll be updated on the next turn, and the problem (time way
> + * don't care, as it'll be updated on the next turn, and the problem (time way
> * off) isn't likely to go away much sooner anyway.
> */
>
> if ((~time_status & STA_UNSYNC) && xtime.tv_sec > rtc_update &&
> - abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) {
> + abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) {
> set_rtc_mmss(xtime.tv_sec);
> rtc_update = xtime.tv_sec + 660;
> }
> -
> +
> write_sequnlock(&xtime_lock);
>
> return IRQ_HANDLED;
> @@ -263,6 +323,7 @@
> {
> unsigned int timeout, year, mon, day, hour, min, sec;
> unsigned char last, this;
> + unsigned long flags;
>
> /*
> * The Linux interpretation of the CMOS clock register contents: When the
> @@ -272,7 +333,7 @@
> * standard 8.3 MHz ISA bus.
> */
>
> - spin_lock(&rtc_lock);
> + spin_lock_irqsave(&rtc_lock, flags);
>
> timeout = 1000000;
> last = this = 0;
> @@ -286,28 +347,28 @@
> /*
> * Here we are safe to assume the registers won't change for a whole second, so
> * we just go ahead and read them.
> - */
> + */
>
> - sec = CMOS_READ(RTC_SECONDS);
> - min = CMOS_READ(RTC_MINUTES);
> - hour = CMOS_READ(RTC_HOURS);
> - day = CMOS_READ(RTC_DAY_OF_MONTH);
> - mon = CMOS_READ(RTC_MONTH);
> - year = CMOS_READ(RTC_YEAR);
> + sec = CMOS_READ(RTC_SECONDS);
> + min = CMOS_READ(RTC_MINUTES);
> + hour = CMOS_READ(RTC_HOURS);
> + day = CMOS_READ(RTC_DAY_OF_MONTH);
> + mon = CMOS_READ(RTC_MONTH);
> + year = CMOS_READ(RTC_YEAR);
>
> - spin_unlock(&rtc_lock);
> + spin_unlock_irqrestore(&rtc_lock, flags);
>
> /*
> * We know that x86-64 always uses BCD format, no need to check the config
> * register.
> */
>
> - BCD_TO_BIN(sec);
> - BCD_TO_BIN(min);
> - BCD_TO_BIN(hour);
> - BCD_TO_BIN(day);
> - BCD_TO_BIN(mon);
> - BCD_TO_BIN(year);
> + BCD_TO_BIN(sec);
> + BCD_TO_BIN(min);
> + BCD_TO_BIN(hour);
> + BCD_TO_BIN(day);
> + BCD_TO_BIN(mon);
> + BCD_TO_BIN(year);
>
> /*
> * This will work up to Dec 31, 2069.
> @@ -326,6 +387,32 @@
>
> #define TICK_COUNT 100000000
>
> +static unsigned int __init hpet_calibrate_tsc(void)
> +{
> + int tsc_start, hpet_start;
> + int tsc_now, hpet_now;
> + unsigned long flags;
> +
> + local_irq_save(flags);
> + local_irq_disable();
> +
> + hpet_start = hpet_readl(HPET_COUNTER);
> + rdtscl(tsc_start);
> +
> + do {
> + local_irq_disable();
> + hpet_now = hpet_readl(HPET_COUNTER);
> + sync_core();
> + rdtscl(tsc_now);
> + local_irq_restore(flags);
> + } while ((tsc_now - tsc_start) < TICK_COUNT &&
> + (hpet_now - hpet_start) < TICK_COUNT);
> +
> + return (tsc_now - tsc_start) * 1000000000L
> + / ((hpet_now - hpet_start) * hpet_period / 1000);
> +}
> +
> +
> /*
> * pit_calibrate_tsc() uses the speaker output (channel 2) of
> * the PIT. This is better than using the timer interrupt output,
> @@ -339,10 +426,9 @@
> unsigned long start, end;
> unsigned long flags;
>
> - outb((inb(0x61) & ~0x02) | 0x01, 0x61);
> + spin_lock_irqsave(&i8253_lock, flags);
>
> - local_irq_save(flags);
> - local_irq_disable();
> + outb((inb(0x61) & ~0x02) | 0x01, 0x61);
>
> outb(0xb0, 0x43);
> outb((1193182 / (1000 / 50)) & 0xff, 0x42);
> @@ -353,42 +439,146 @@
> sync_core();
> rdtscll(end);
>
> + spin_unlock_irqrestore(&i8253_lock, flags);
>
> - local_irq_restore(flags);
> -
> return (end - start) / 50;
> }
>
> +static int hpet_init(void)
> +{
> + unsigned int cfg, id;
> +
> + if (!vxtime.hpet_address)
> + return -1;
> + set_fixmap_nocache(FIX_HPET_BASE, vxtime.hpet_address);
> +
> +/*
> + * Read the period, compute tick and quotient.
> + */
> +
> + id = hpet_readl(HPET_ID);
> +
> + if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER) ||
> + !(id & HPET_ID_LEGSUP))
> + return -1;
> +
> + hpet_period = hpet_readl(HPET_PERIOD);
> + if (hpet_period < 100000 || hpet_period > 100000000)
> + return -1;
> +
> + hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) /
> + hpet_period;
> +
> +/*
> + * Stop the timers and reset the main counter.
> + */
> +
> + cfg = hpet_readl(HPET_CFG);
> + cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
> + hpet_writel(cfg, HPET_CFG);
> + hpet_writel(0, HPET_COUNTER);
> + hpet_writel(0, HPET_COUNTER + 4);
> +
> +/*
> + * Set up timer 0, as periodic with first interrupt to happen at hpet_tick,
> + * and period also hpet_tick.
> + */
> +
> + hpet_writel(HPET_T0_ENABLE | HPET_T0_PERIODIC | HPET_T0_SETVAL |
> + HPET_T0_32BIT, HPET_T0_CFG);
> + hpet_writel(hpet_tick, HPET_T0_CMP);
> + hpet_writel(hpet_tick, HPET_T0_CMP);
> +
> +/*
> + * Go!
> + */
> +
> + cfg |= HPET_CFG_ENABLE | HPET_CFG_LEGACY;
> + hpet_writel(cfg, HPET_CFG);
> +
> + return 0;
> +}
> +
> void __init pit_init(void)
> {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&i8253_lock, flags);
> outb_p(0x34, 0x43); /* binary, mode 2, LSB/MSB, ch 0 */
> outb_p(LATCH & 0xff, 0x40); /* LSB */
> outb_p(LATCH >> 8, 0x40); /* MSB */
> + spin_unlock_irqrestore(&i8253_lock, flags);
> }
>
> int __init time_setup(char *str)
> {
> - hpet_report_lost_ticks = 1;
> + report_lost_ticks = 1;
> return 1;
> }
>
> -static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL};
> +static struct irqaction irq0 = {
> + timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL
> +};
>
> extern void __init config_acpi_tables(void);
>
> void __init time_init(void)
> {
> + char *timename;
> +
> +#ifdef HPET_HACK_ENABLE_DANGEROUS
> + if (!vxtime.hpet_address) {
> + printk(KERN_WARNING "time.c: WARNING: Enabling HPET base "
> + "manually!\n");
> + outl(0x800038a0, 0xcf8);
> + outl(0xff000001, 0xcfc);
> + outl(0x800038a0, 0xcf8);
> + hpet_address = inl(0xcfc) & 0xfffffffe;
> + printk(KERN_WARNING "time.c: WARNING: Enabled HPET "
> + "at %#lx.\n", hpet_address);
> + }
> +#endif
> +
> xtime.tv_sec = get_cmos_time();
> xtime.tv_nsec = 0;
>
> - pit_init();
> - printk(KERN_INFO "time.c: Using 1.1931816 MHz PIT timer.\n");
> - cpu_khz = pit_calibrate_tsc();
> + if (!hpet_init()) {
> + vxtime_hz = (1000000000000000L + hpet_period / 2) /
> + hpet_period;
> + cpu_khz = hpet_calibrate_tsc();
> + timename = "HPET";
> + } else {
> + pit_init();
> + cpu_khz = pit_calibrate_tsc();
> + timename = "PIT";
> + }
> +
> + printk(KERN_INFO "time.c: Using %ld.%06ld MHz %s timer.\n",
> + vxtime_hz / 1000000, vxtime_hz % 1000000, timename);
> printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
> cpu_khz / 1000, cpu_khz % 1000);
> - hpet.ticks = cpu_khz * (1000 / HZ);
> - rdtscll(hpet.last_tsc);
> + vxtime.mode = VXTIME_TSC;
> + vxtime.quot = (1000000L << 32) / vxtime_hz;
> + vxtime.tsc_quot = (1000L << 32) / cpu_khz;
> + vxtime.hz = vxtime_hz;
> + rdtscll_sync(&vxtime.last_tsc);
> setup_irq(0, &irq0);
> +}
> +
> +void __init time_init_smp(void)
> +{
> + char *timetype;
> +
> + if (vxtime.hpet_address) {
> + timetype = "HPET";
> + vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
> + vxtime.mode = VXTIME_HPET;
> + do_gettimeoffset = do_gettimeoffset_hpet;
> + } else {
> + timetype = "PIT/TSC";
> + vxtime.mode = VXTIME_TSC;
> + }
> + printk(KERN_INFO "time.c: Using %s based timekeeping.\n", timetype);
> }
>
> __setup("report_lost_ticks", time_setup);
> diff -Nru a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
> --- a/arch/x86_64/kernel/vsyscall.c Thu Jun 12 10:44:01 2003
> +++ b/arch/x86_64/kernel/vsyscall.c Thu Jun 12 10:44:01 2003
> @@ -33,7 +33,7 @@
> *
> * Add HPET support (port from 2.4). Still needed?
> * Nop out vsyscall syscall to avoid anchor for buffer overflows when sysctl off.
> - *
> + *
> * These are not urgent things that we need to address only before shipping the first
> * production binary kernels.
> */
> @@ -77,14 +77,22 @@
>
> do {
> sequence = read_seqbegin(&__xtime_lock);
> -
> - sync_core();
> - rdtscll(t);
> +
> sec = __xtime.tv_sec;
> usec = (__xtime.tv_nsec / 1000) +
> - (__jiffies - __wall_jiffies) * (1000000 / HZ) +
> - (t - __hpet.last_tsc) * (1000000 / HZ) / __hpet.ticks + __hpet.offset;
> + (__jiffies - __wall_jiffies) * (1000000 / HZ);
>
> + if (__vxtime.mode == VXTIME_TSC) {
> + sync_core();
> + rdtscll(t);
> + usec += ((t - __vxtime.last_tsc) *
> + __vxtime.tsc_quot) >> 32;
> + } else {
> +#if 0
> + usec += ((readl(fix_to_virt(VSYSCALL_HPET) + 0xf0) -
> + __vxtime.last) * __vxtime.quot) >> 32;
> +#endif
> + }
> } while (read_seqretry(&__xtime_lock, sequence));
>
> tv->tv_sec = sec + usec / 1000000;
> @@ -100,7 +108,7 @@
> static force_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
> {
> int ret;
> - asm volatile("syscall"
> + asm volatile("syscall"
> : "=a" (ret)
> : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber );
> return ret;
> @@ -109,7 +117,7 @@
> static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
> {
> if (unlikely(!__sysctl_vsyscall))
> - return gettimeofday(tv,tz);
> + return gettimeofday(tv,tz);
> if (tv)
> do_vgettimeofday(tv);
> if (tz)
> @@ -119,13 +127,13 @@
>
> static time_t __vsyscall(1) vtime(time_t * t)
> {
> - struct timeval tv;
> + struct timeval tv;
> if (unlikely(!__sysctl_vsyscall))
> gettimeofday(&tv, NULL);
> else
> do_vgettimeofday(&tv);
> if (t)
> - *t = tv.tv_sec;
> + *t = tv.tv_sec;
> return tv.tv_sec;
> }
>
> diff -Nru a/arch/x86_64/vmlinux.lds.S b/arch/x86_64/vmlinux.lds.S
> --- a/arch/x86_64/vmlinux.lds.S Thu Jun 12 10:44:01 2003
> +++ b/arch/x86_64/vmlinux.lds.S Thu Jun 12 10:44:01 2003
> @@ -50,10 +50,10 @@
> .xtime_lock : AT ((LOADADDR(.vsyscall_0) + SIZEOF(.vsyscall_0) + 63) & ~(63)) { *(.xtime_lock) }
> xtime_lock = LOADADDR(.xtime_lock);
> . = ALIGN(16);
> - .hpet : AT ((LOADADDR(.xtime_lock) + SIZEOF(.xtime_lock) + 15) & ~(15)) { *(.hpet) }
> - hpet = LOADADDR(.hpet);
> + .vxtime : AT ((LOADADDR(.xtime_lock) + SIZEOF(.xtime_lock) + 15) & ~(15)) { *(.vxtime) }
> + vxtime = LOADADDR(.vxtime);
> . = ALIGN(16);
> - .wall_jiffies : AT ((LOADADDR(.hpet) + SIZEOF(.hpet) + 15) & ~(15)) { *(.wall_jiffies) }
> + .wall_jiffies : AT ((LOADADDR(.vxtime) + SIZEOF(.vxtime) + 15) & ~(15)) { *(.wall_jiffies) }
> wall_jiffies = LOADADDR(.wall_jiffies);
> . = ALIGN(16);
> .sys_tz : AT ((LOADADDR(.wall_jiffies) + SIZEOF(.wall_jiffies) + 15) & ~(15)) { *(.sys_tz) }
> diff -Nru a/include/asm-x86_64/fixmap.h b/include/asm-x86_64/fixmap.h
> --- a/include/asm-x86_64/fixmap.h Thu Jun 12 10:44:01 2003
> +++ b/include/asm-x86_64/fixmap.h Thu Jun 12 10:44:01 2003
> @@ -35,6 +35,8 @@
> enum fixed_addresses {
> VSYSCALL_LAST_PAGE,
> VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
> + VSYSCALL_HPET,
> + FIX_HPET_BASE,
> #ifdef CONFIG_X86_LOCAL_APIC
> FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
> #endif
> diff -Nru a/include/asm-x86_64/mc146818rtc.h b/include/asm-x86_64/mc146818rtc.h
> --- a/include/asm-x86_64/mc146818rtc.h Thu Jun 12 10:44:01 2003
> +++ b/include/asm-x86_64/mc146818rtc.h Thu Jun 12 10:44:01 2003
> @@ -24,6 +24,11 @@
> outb_p((val),RTC_PORT(1)); \
> })
>
> +#ifndef CONFIG_HPET_TIMER
> #define RTC_IRQ 8
> +#else
> +/* Temporary workaround due to IRQ routing problem. */
> +#define RTC_IRQ 0
> +#endif
>
> #endif /* _ASM_MC146818RTC_H */
> diff -Nru a/include/asm-x86_64/timex.h b/include/asm-x86_64/timex.h
> --- a/include/asm-x86_64/timex.h Thu Jun 12 10:44:01 2003
> +++ b/include/asm-x86_64/timex.h Thu Jun 12 10:44:01 2003
> @@ -30,6 +30,34 @@
>
> extern unsigned int cpu_khz;
>
> -extern struct hpet_data hpet;
> +/*
> + * Documentation on HPET can be found at:
> + * http://www.intel.com/ial/home/sp/pcmmspec.htm
> + * ftp://download.intel.com/ial/home/sp/mmts098.pdf
> + */
> +
> +#define HPET_ID 0x000
> +#define HPET_PERIOD 0x004
> +#define HPET_CFG 0x010
> +#define HPET_STATUS 0x020
> +#define HPET_COUNTER 0x0f0
> +#define HPET_T0_CFG 0x100
> +#define HPET_T0_CMP 0x108
> +#define HPET_T0_ROUTE 0x110
> +
> +#define HPET_ID_VENDOR 0xffff0000
> +#define HPET_ID_LEGSUP 0x00008000
> +#define HPET_ID_NUMBER 0x00000f00
> +#define HPET_ID_REV 0x000000ff
> +
> +#define HPET_CFG_ENABLE 0x001
> +#define HPET_CFG_LEGACY 0x002
> +
> +#define HPET_T0_ENABLE 0x004
> +#define HPET_T0_PERIODIC 0x008
> +#define HPET_T0_SETVAL 0x040
> +#define HPET_T0_32BIT 0x100
> +
> +extern struct vxtime_data vxtime;
>
> #endif
> diff -Nru a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h
> --- a/include/asm-x86_64/vsyscall.h Thu Jun 12 10:44:01 2003
> +++ b/include/asm-x86_64/vsyscall.h Thu Jun 12 10:44:01 2003
> @@ -15,7 +15,7 @@
>
> #ifdef __KERNEL__
>
> -#define __section_hpet __attribute__ ((unused, __section__ (".hpet"), aligned(16)))
> +#define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16)))
> #define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies"), aligned(16)))
> #define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16)))
> #define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16)))
> @@ -23,22 +23,24 @@
> #define __section_xtime __attribute__ ((unused, __section__ (".xtime"), aligned(16)))
> #define __section_xtime_lock __attribute__ ((unused, __section__ (".xtime_lock"), aligned(L1_CACHE_BYTES)))
>
> +#define VXTIME_TSC 1
> +#define VXTIME_HPET 2
>
> -struct hpet_data {
> - long address; /* base address */
> +struct vxtime_data {
> + long hpet_address; /* HPET base address */
> unsigned long hz; /* HPET clocks / sec */
> - int trigger; /* value at last interrupt */
> int last;
> - int offset;
> unsigned long last_tsc;
> - long ticks;
> + long quot;
> + long tsc_quot;
> + int mode;
> };
>
> #define hpet_readl(a) readl(fix_to_virt(FIX_HPET_BASE) + a)
> #define hpet_writel(d,a) writel(d, fix_to_virt(FIX_HPET_BASE) + a)
>
> /* vsyscall space (readonly) */
> -extern struct hpet_data __hpet;
> +extern struct vxtime_data __vxtime;
> extern struct timespec __xtime;
> extern volatile unsigned long __jiffies;
> extern unsigned long __wall_jiffies;
> @@ -46,7 +48,7 @@
> extern seqlock_t __xtime_lock;
>
> /* kernel space (writeable) */
> -extern struct hpet_data hpet;
> +extern struct vxtime_data vxtime;
> extern unsigned long wall_jiffies;
> extern struct timezone sys_tz;
> extern int sysctl_vsyscall;

-- 
Vojtech Pavlik
SuSE Labs, SuSE CR
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/