I've been thinking about doing away with balance_irq on P4 boxes by using the
TPR. It might even help P3 and other i386 CPUs by routing interrupts
preferentially to idle CPUs. And, it would do it in real time, not as a
snapshot of the past stored in the I/O APIC's dest masks.
What do you think about this? (Crudely ripped out of my 2.5 summit patch; may
not apply correctly.)
The code in do_IRQ could be "if (clustered_apic_mode) apic_adj_tpr(TPR_IRQ)"
or some such, if that kind of APIC foolery is not considered necessary for
the non-P4 crowd. (The automatic priority boost for serial APICs with
unEOIed interrupts should do the job for us.)
diff -ruN 2.5.24/arch/i386/kernel/irq.c d24/arch/i386/kernel/irq.c
--- 2.5.24/arch/i386/kernel/irq.c Thu Jun 20 15:53:44 2002
+++ d24/arch/i386/kernel/irq.c Wed Jul 10 13:34:14 2002
@@ -582,6 +582,7 @@
unsigned int status;
kstat.irqs[cpu][irq]++;
+ apic_adj_tpr(TPR_IRQ);
spin_lock(&desc->lock);
desc->handler->ack(irq);
/*
@@ -642,6 +643,7 @@
if (softirq_pending(cpu))
do_softirq();
+ apic_adj_tpr(-TPR_IRQ);
return 1;
}
diff -ruN 2.5.24/arch/i386/kernel/process.c d24/arch/i386/kernel/process.c
--- 2.5.24/arch/i386/kernel/process.c Thu Jun 20 15:53:40 2002
+++ d24/arch/i386/kernel/process.c Wed Jul 10 14:12:57 2002
@@ -145,7 +145,9 @@
irq_stat[smp_processor_id()].idle_timestamp = jiffies;
while (!need_resched())
idle();
+ apic_set_tpr(TPR_TASK);
schedule();
+ apic_set_tpr(TPR_IDLE);
}
}
diff -ruN 2.5.24/include/asm-i386/apic.h d24/include/asm-i386/apic.h
--- 2.5.24/include/asm-i386/apic.h Thu Jun 20 15:53:57 2002
+++ d24/include/asm-i386/apic.h Wed Jul 10 13:34:14 2002
@@ -64,6 +64,22 @@
apic_write_around(APIC_EOI, 0);
}
+static inline void apic_set_tpr(unsigned long val)
+{
+ unsigned long value;
+
+ value = apic_read(APIC_TASKPRI);
+ apic_write_around(APIC_TASKPRI, (value & ~APIC_TPRI_MASK) + val);
+}
+
+static inline void apic_adj_tpr(long adj)
+{
+ unsigned long value;
+
+ value = apic_read(APIC_TASKPRI);
+ apic_write_around(APIC_TASKPRI, value + adj);
+}
+
extern int get_maxlvt(void);
extern void clear_local_APIC(void);
extern void connect_bsp_APIC (void);
@@ -95,6 +118,15 @@
#define NMI_LOCAL_APIC 2
#define NMI_INVALID 3
+#else /* CONFIG_X86_LOCAL_APIC */
+#define apic_set_tpr(val)
+#define apic_adj_tpr(adj)
#endif /* CONFIG_X86_LOCAL_APIC */
+/* Priority values for apic_adj_tpr() and apic_set_tpr() */
+/* xAPICs only do priority comparisons on the upper nibble. */
+#define TPR_IDLE (0x00ul)
+#define TPR_TASK (0x10ul)
+#define TPR_IRQ (0x20ul) /* Or maybe 0x10 ?? */
+
#endif /* __ASM_APIC_H */
-- James Cleverdon IBM xSeries Linux Solutions {jamesclv(Unix, preferred), cleverdj(Notes)} at us dot ibm dot com- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/