Yes I implemented a similar patch now too last night. It also fixes a few other
fast path bugs in __switch_to
- Fix false sharing in the GDT and replace an imul with a shift.
Really pad the GDT to cache lines now.
- Don't use LOCK prefixes in bit operations when accessing the
thread_info flags of the switched threads. LOCK is very slow on P4
and it isn't necessary here.
Really we should have __set_bit/__test_bit without memory barrier
and atomic stuff on each arch and use that for thread_info.h,
but for now do it this way.
[this is a port from x86-64]
- Inline FPU switch - it is only a few lines.
But I must say I don't know vm86() semantics enough to know if this is
good enough, especially when the clear the TIF_VM86 flag. Could someone
more familiar with it review it?
BTW vm86.c at the first look doesn't look very preempt safe to me.
comments?
-Andi
diff -burpN -X ../KDIFX linux-2.5.60/arch/i386/kernel/cpu/common.c linux-2.5.60-work/arch/i386/kernel/cpu/common.c
--- linux-2.5.60/arch/i386/kernel/cpu/common.c 2003-02-10 19:37:57.000000000 +0100
+++ linux-2.5.60-work/arch/i386/kernel/cpu/common.c 2003-02-12 01:42:01.000000000 +0100
@@ -484,7 +484,7 @@ void __init cpu_init (void)
BUG();
enter_lazy_tlb(&init_mm, current, cpu);
- load_esp0(t, thread->esp0);
+ load_esp0(current, t, thread->esp0);
set_tss_desc(cpu,t);
cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
load_TR_desc();
diff -burpN -X ../KDIFX linux-2.5.60/arch/i386/kernel/i387.c linux-2.5.60-work/arch/i386/kernel/i387.c
--- linux-2.5.60/arch/i386/kernel/i387.c 2003-02-10 19:39:17.000000000 +0100
+++ linux-2.5.60-work/arch/i386/kernel/i387.c 2003-02-11 23:51:58.000000000 +0100
@@ -52,24 +52,6 @@ void init_fpu(struct task_struct *tsk)
* FPU lazy state save handling.
*/
-static inline void __save_init_fpu( struct task_struct *tsk )
-{
- if ( cpu_has_fxsr ) {
- asm volatile( "fxsave %0 ; fnclex"
- : "=m" (tsk->thread.i387.fxsave) );
- } else {
- asm volatile( "fnsave %0 ; fwait"
- : "=m" (tsk->thread.i387.fsave) );
- }
- clear_tsk_thread_flag(tsk, TIF_USEDFPU);
-}
-
-void save_init_fpu( struct task_struct *tsk )
-{
- __save_init_fpu(tsk);
- stts();
-}
-
void kernel_fpu_begin(void)
{
preempt_disable();
diff -burpN -X ../KDIFX linux-2.5.60/arch/i386/kernel/process.c linux-2.5.60-work/arch/i386/kernel/process.c
--- linux-2.5.60/arch/i386/kernel/process.c 2003-02-10 19:37:54.000000000 +0100
+++ linux-2.5.60-work/arch/i386/kernel/process.c 2003-02-12 01:40:02.000000000 +0100
@@ -437,7 +437,7 @@ void __switch_to(struct task_struct *pre
/*
* Reload esp0, LDT and the page table pointer:
*/
- load_esp0(tss, next->esp0);
+ load_esp0(prev_p, tss, next->esp0);
/*
* Load the per-thread Thread-Local Storage descriptor.
diff -burpN -X ../KDIFX linux-2.5.60/arch/i386/kernel/vm86.c linux-2.5.60-work/arch/i386/kernel/vm86.c
--- linux-2.5.60/arch/i386/kernel/vm86.c 2003-02-10 19:37:58.000000000 +0100
+++ linux-2.5.60-work/arch/i386/kernel/vm86.c 2003-02-12 01:46:51.000000000 +0100
@@ -114,7 +117,7 @@ struct pt_regs * save_v86_state(struct k
}
tss = init_tss + smp_processor_id();
current->thread.esp0 = current->thread.saved_esp0;
- load_esp0(tss, current->thread.esp0);
+ load_esp0(current, tss, current->thread.esp0);
current->thread.saved_esp0 = 0;
loadsegment(fs, current->thread.saved_fs);
loadsegment(gs, current->thread.saved_gs);
@@ -309,6 +313,10 @@ static inline void return_to_32bit(struc
{
struct pt_regs * regs32;
+ /* FIXME should disable preemption here but how can we reenable it? */
+
+ enable_sysenter();
+
regs32 = save_v86_state(regs16);
regs32->eax = retval;
__asm__ __volatile__("movl %0,%%esp\n\t"
diff -burpN -X ../KDIFX linux-2.5.60/arch/x86_64/kernel/process.c linux-2.5.60-work/arch/x86_64/kernel/process.c
--- linux-2.5.60/arch/x86_64/kernel/process.c 2003-02-10 19:37:56.000000000 +0100
+++ linux-2.5.60-work/arch/x86_64/kernel/process.c 2003-02-12 01:51:00.000000000 +0100
@@ -41,6 +41,7 @@
#include <linux/init.h>
#include <linux/ctype.h>
#include <linux/slab.h>
+#include <linux/thread_info.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
diff -burpN -X ../KDIFX linux-2.5.60/include/asm-i386/i387.h linux-2.5.60-work/include/asm-i386/i387.h
--- linux-2.5.60/include/asm-i386/i387.h 2003-02-10 19:38:49.000000000 +0100
+++ linux-2.5.60-work/include/asm-i386/i387.h 2003-02-12 01:21:13.000000000 +0100
@@ -21,23 +21,41 @@ extern void init_fpu(struct task_struct
/*
* FPU lazy state save handling...
*/
-extern void save_init_fpu( struct task_struct *tsk );
extern void restore_fpu( struct task_struct *tsk );
extern void kernel_fpu_begin(void);
#define kernel_fpu_end() do { stts(); preempt_enable(); } while(0)
+static inline void __save_init_fpu( struct task_struct *tsk )
+{
+ if ( cpu_has_fxsr ) {
+ asm volatile( "fxsave %0 ; fnclex"
+ : "=m" (tsk->thread.i387.fxsave) );
+ } else {
+ asm volatile( "fnsave %0 ; fwait"
+ : "=m" (tsk->thread.i387.fsave) );
+ }
+ tsk->thread_info->flags &= ~TIF_USEDFPU;
+}
+
+static inline void save_init_fpu( struct task_struct *tsk )
+{
+ __save_init_fpu(tsk);
+ stts();
+}
+
+
#define unlazy_fpu( tsk ) do { \
- if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) \
+ if ((tsk)->thread_info->flags & _TIF_USEDFPU) \
save_init_fpu( tsk ); \
} while (0)
#define clear_fpu( tsk ) \
do { \
- if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) { \
+ if ((tsk)->thread_info->flags & _TIF_USEDFPU) { \
asm volatile("fwait"); \
- clear_tsk_thread_flag(tsk, TIF_USEDFPU); \
+ (tsk)->thread_info->flags &= ~_TIF_USEDFPU; \
stts(); \
} \
} while (0)
diff -burpN -X ../KDIFX linux-2.5.60/include/asm-i386/processor.h linux-2.5.60-work/include/asm-i386/processor.h
--- linux-2.5.60/include/asm-i386/processor.h 2003-02-10 19:37:57.000000000 +0100
+++ linux-2.5.60-work/include/asm-i386/processor.h 2003-02-12 01:52:28.000000000 +0100
@@ -408,20 +408,30 @@ struct thread_struct {
.io_bitmap = { [ 0 ... IO_BITMAP_SIZE ] = ~0 }, \
}
-static inline void load_esp0(struct tss_struct *tss, unsigned long esp0)
-{
- tss->esp0 = esp0;
- if (cpu_has_sep) {
- wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
- wrmsr(MSR_IA32_SYSENTER_ESP, esp0, 0);
- }
-}
-
-static inline void disable_sysenter(void)
-{
- if (cpu_has_sep)
- wrmsr(MSR_IA32_SYSENTER_CS, 0, 0);
-}
+#define load_esp0(prev, tss, _esp0) do { \
+ (tss)->esp0 = _esp0; \
+ if (cpu_has_sep) { \
+ if (unlikely((prev)->thread_info->flags & _TIF_VM86)) \
+ wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); \
+ wrmsr(MSR_IA32_SYSENTER_ESP, (_esp0), 0); \
+ } \
+} while(0)
+
+/* The caller of the next two functions should have disabled preemption. */
+
+#define disable_sysenter() do { \
+ if (cpu_has_sep) { \
+ set_thread_flag(TIF_VM86); \
+ wrmsr(MSR_IA32_SYSENTER_CS, 0, 0); \
+ } \
+} while(0)
+
+#define enable_sysenter() do { \
+ if (cpu_has_sep) { \
+ clear_thread_flag(TIF_VM86); \
+ wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); \
+ } \
+} while(0)
#define start_thread(regs, new_eip, new_esp) do { \
__asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \
diff -burpN -X ../KDIFX linux-2.5.60/include/asm-i386/segment.h linux-2.5.60-work/include/asm-i386/segment.h
--- linux-2.5.60/include/asm-i386/segment.h 2003-02-10 19:38:06.000000000 +0100
+++ linux-2.5.60-work/include/asm-i386/segment.h 2003-02-11 23:56:37.000000000 +0100
@@ -67,7 +67,7 @@
/*
* The GDT has 25 entries but we pad it to cacheline boundary:
*/
-#define GDT_ENTRIES 28
+#define GDT_ENTRIES 32
#define GDT_SIZE (GDT_ENTRIES * 8)
diff -burpN -X ../KDIFX linux-2.5.60/include/asm-i386/thread_info.h linux-2.5.60-work/include/asm-i386/thread_info.h
--- linux-2.5.60/include/asm-i386/thread_info.h 2003-02-10 19:37:59.000000000 +0100
+++ linux-2.5.60-work/include/asm-i386/thread_info.h 2003-02-12 01:51:26.000000000 +0100
@@ -111,15 +111,18 @@ static inline struct thread_info *curren
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */
#define TIF_IRET 5 /* return with iret */
+#define TIF_VM86 6 /* may use vm86 */
#define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */
#define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */
+
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
#define _TIF_IRET (1<<TIF_IRET)
+#define _TIF_VM86 (1<<TIF_VM86)
#define _TIF_USEDFPU (1<<TIF_USEDFPU)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/