most probably yes, fxrestor needs to read ram, pxor also takes some
icache and bytecode ram but it sounds like it will be faster.
Maybe we could also interleave the pxor with the xorps, since they uses
different parts of the cpu, Honza?
diff -urN ref/arch/x86_64/kernel/i387.c xmm/arch/x86_64/kernel/i387.c
--- ref/arch/x86_64/kernel/i387.c Fri Apr 19 19:37:30 2002
+++ xmm/arch/x86_64/kernel/i387.c Fri Apr 19 19:39:02 2002
@@ -34,6 +34,31 @@
struct task_struct *me = current;
__asm__("fninit");
load_mxcsr(0x1f80);
+ asm volatile("pxor %mm0, %mm0\n\t"
+ "pxor %mm1, %mm1\n\t"
+ "pxor %mm2, %mm2\n\t"
+ "pxor %mm3, %mm3\n\t"
+ "pxor %mm4, %mm4\n\t"
+ "pxor %mm5, %mm5\n\t"
+ "pxor %mm6, %mm6\n\t"
+ "pxor %mm7, %mm7\n\t"
+ "emms\n\t"
+ "xorps %xmm0, %xmm0\n\t"
+ "xorps %xmm1, %xmm1\n\t"
+ "xorps %xmm2, %xmm2\n\t"
+ "xorps %xmm3, %xmm3\n\t"
+ "xorps %xmm4, %xmm4\n\t"
+ "xorps %xmm5, %xmm5\n\t"
+ "xorps %xmm6, %xmm6\n\t"
+ "xorps %xmm7, %xmm7\n\t"
+ "xorps %xmm8, %xmm8\n\t"
+ "xorps %xmm9, %xmm9\n\t"
+ "xorps %xmm10, %xmm10\n\t"
+ "xorps %xmm11, %xmm11\n\t"
+ "xorps %xmm12, %xmm12\n\t"
+ "xorps %xmm13, %xmm13\n\t"
+ "xorps %xmm14, %xmm14\n\t"
+ "xorps %xmm15, %xmm15\n");
me->used_math = 1;
}
Andrea
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/