Looking at arch/i386/lib/mmx.c I see that MMX fast_copy_page()
makes extra prefetches past the end of source page.
This is harmless but wastes CPU cycles and pollutes cache.
Pls find a patch below.
Diffed against 2.4.9.
If you want to test it, be sure to disable K7 optimization
or else this code will be ifdef'ed out :-)
-- Best regards, VDA mailto:VDA@port.imtp.ilyichevsk.odessa.ua http://port.imtp.ilyichevsk.odessa.ua/vda/--- mmx.c.orig Tue May 22 15:23:16 2001 +++ mmx.c Tue Sep 18 16:51:50 2001 @@ -293,7 +293,7 @@ ".previous" : : "r" (from) ); - for(i=0; i<4096/64; i++) + for(i=0; i<(4096-320)/64; i++) { __asm__ __volatile__ ( "1: prefetch 320(%0)\n" @@ -321,6 +321,29 @@ " .align 4\n" " .long 1b, 3b\n" ".previous" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + for(i=(4096-320)/64; i<4096/64; i++) + { + __asm__ __volatile__ ( + "2: movq (%0), %%mm0\n" + " movq 8(%0), %%mm1\n" + " movq 16(%0), %%mm2\n" + " movq 24(%0), %%mm3\n" + " movq %%mm0, (%1)\n" + " movq %%mm1, 8(%1)\n" + " movq %%mm2, 16(%1)\n" + " movq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm0\n" + " movq 40(%0), %%mm1\n" + " movq 48(%0), %%mm2\n" + " movq 56(%0), %%mm3\n" + " movq %%mm0, 32(%1)\n" + " movq %%mm1, 40(%1)\n" + " movq %%mm2, 48(%1)\n" + " movq %%mm3, 56(%1)\n" : : "r" (from), "r" (to) : "memory"); from+=64; to+=64;
- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/