Robert Love wrote:
> On Fri, 2002-10-04 at 17:25, Manfred Spraul wrote:
>
>
>>Which cpus have slow local_irq_disable() implementations? At least
>>for my Duron, this doesn't seem to be the case [~ 4 cpu cycles
>>for cli]
>
>
> I believe there are pipeline effects to disabling interrupts, e.g. it
> has to be flushed?
>
At least my Duron [700 MHz] obviously doesn't flush the pipeline. If the
Pentium 4 flushes his pipeline, it could mean 20+ cycles - test app is
attached.
-- Manfred--------------040703060806010501050609 Content-Type: text/plain; name="cli.cpp" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="cli.cpp"
/* * cli.cpp: RDTSC based performance tester. * * Copyright (C) 1999, 2001, 2002 by Manfred Spraul. * All rights reserved except the rights granted by the GPL. * * Redistribution of this file is permitted under the terms of the GNU * General Public License (GPL) version 2 or later. * $Header: /pub/home/manfred/cvs-tree/timetest/cli.cpp,v 1.4 2002/10/04 21:22:09 manfred Exp $ */
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <getopt.h>
// define a cache flushing function #undef CACHE_FLUSH
// Intel recommends that a serializing instruction // should be called before and after rdtsc. // CPUID is a serializing instruction. // ".align 128:" P 4 L2 cache line size #define read_rdtsc_before(time) \ __asm__ __volatile__( \ ".align 128\n\t" \ "xor %%eax,%%eax\n\t" \ "cpuid\n\t" \ "rdtsc\n\t" \ "mov %%eax,(%0)\n\t" \ "mov %%edx,4(%0)\n\t" \ "xor %%eax,%%eax\n\t" \ "cpuid\n\t" \ : /* no output */ \ : "S"(&time) \ : "eax", "ebx", "ecx", "edx", "memory")
#define read_rdtsc_after(time) \ __asm__ __volatile__( \ "xor %%eax,%%eax\n\t" \ "cpuid\n\t" \ "rdtsc\n\t" \ "mov %%eax,(%0)\n\t" \ "mov %%edx,4(%0)\n\t" \ "xor %%eax,%%eax\n\t" \ "cpuid\n\t" \ "sti\n\t" \ : /* no output */ \ : "S"(&time) \ : "eax", "ebx", "ecx", "edx", "memory")
#define BUILD_TESTFNC(name, text, instructions) \ void name##_dummy(void) \ { \ __asm__ __volatile__( \ ".align 4096\n\t" \ "xor %%eax, %%eax\n\t" \ : : : "eax"); \ } \ static unsigned long name##_best = 1024*1024*1024; \ \ static void name(void) \ { \ unsigned long long time; \ unsigned long long time2; \ \ read_rdtsc_before(time); \ instructions; \ read_rdtsc_after(time2); \ if(time2-time < name##_best) { \ printf( text ":\t%10Ld ticks; \n", \ time2-time-zerotest_best); \ name##_best = time2-time; \ } \ }
void filler(void) { static int i = 3; static int j; j = i*i; }
#define DO_3(x) \ do { x; x; x; } while(0)
#define DO_10(x) \ do { x; x; x; x; x; x; x; x; x; x;} while(0)
#define DO_50(x) \ do { DO_10(x); DO_10(x);DO_10(x); DO_10(x);DO_10(x);} while(0)
#define DO_T(y) do { \ DO_3(filler()); \ y; \ DO_3(filler());} while(0)
#ifdef CACHE_FLUSH #define DRAIN_SZ (4*1024*1024) int other[3*DRAIN_SZ] __attribute ((aligned (4096))); static inline void drain_cache(void) { int i; for(i=0;i<DRAIN_SZ;i++) other[DRAIN_SZ+i]=0; for(i=0;i<DRAIN_SZ;i++) if(other[DRAIN_SZ+i]!=0) break; } #else static inline void drain_cache(void) { } #endif
#define DO_TEST(x) \ do { \ int i; \ for(i=0;i<500000;i++) \ x; \ } while(0)
//////////////////////////////////////////////////////////////////////////////
static inline void nothing() { __asm__ __volatile__("nop": : : "memory"); }
BUILD_TESTFNC(zerotest,"zerotest", DO_T(nothing()));
//////////////////////////////////////////////////////////////////////////////
static inline void test0() { __asm__ __volatile__("cli": : : "memory"); }
BUILD_TESTFNC(test_0, "cli", DO_T(test0()))
////////////////////////////////////////////////////////////////////////////// extern "C" int iopl __P ((int __level));
int main() { printf("CLI bench\n"); iopl(3);
for(;;) { DO_TEST(zerotest()); DO_TEST(test_0()); } return 0; }
--------------040703060806010501050609--
- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/