Maybe.
The below patch has been in -mm for some time. It was supposed to kill the
IRQ if 750 of the previous 1000 IRQs weren't handled.
I disabled the killing code because it was triggering on someone's
works-just-fine setup.
There will be pain involved in getting all this to work right. Do you
really think there's much value in it?
Attempt to do something intelligent with IRQ handlers which don't return
IRQ_HANDLED.
- If they return neither IRQ_HANDLED nor IRQ_NONE, complain.
- If they return IRQ_NONE more than 750 times in 1000 interrupts, complain
and disable the IRQ.
arch/i386/kernel/irq.c | 96 +++++++++++++++++++++++++++++++++++--------------
include/linux/irq.h | 2 +
2 files changed, 72 insertions(+), 26 deletions(-)
diff -puN arch/i386/kernel/irq.c~irq-check-rate-limit arch/i386/kernel/irq.c
--- 25/arch/i386/kernel/irq.c~irq-check-rate-limit 2003-05-08 00:27:15.000000000 -0700
+++ 25-akpm/arch/i386/kernel/irq.c 2003-05-08 00:27:32.000000000 -0700
@@ -66,8 +66,12 @@
/*
* Controller mappings for all interrupt sources:
*/
-irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned =
- { [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}};
+irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
+ [0 ... NR_IRQS-1] = {
+ .handler = &no_irq_type,
+ .lock = SPIN_LOCK_UNLOCKED
+ }
+};
static void register_irq_proc (unsigned int irq);
@@ -209,7 +213,6 @@ int handle_IRQ_event(unsigned int irq,
{
int status = 1; /* Force the "do bottom halves" bit */
int retval = 0;
- struct irqaction *first_action = action;
if (!(action->flags & SA_INTERRUPT))
local_irq_enable();
@@ -222,30 +225,69 @@ int handle_IRQ_event(unsigned int irq,
if (status & SA_SAMPLE_RANDOM)
add_interrupt_randomness(irq);
local_irq_disable();
- if (retval != 1) {
- static int count = 100;
- if (count) {
- count--;
- if (retval) {
- printk("irq event %d: bogus retval mask %x\n",
- irq, retval);
- } else {
- printk("irq %d: nobody cared!\n", irq);
- }
- dump_stack();
- printk("handlers:\n");
- action = first_action;
- do {
- printk("[<%p>]", action->handler);
- print_symbol(" (%s)",
- (unsigned long)action->handler);
- printk("\n");
- action = action->next;
- } while (action);
+ return retval;
+}
+
+static void report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret)
+{
+ static int count = 100;
+ struct irqaction *action;
+
+ if (count) {
+ count--;
+ if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) {
+ printk("irq event %d: bogus return value %x\n",
+ irq, action_ret);
+ } else {
+ printk("irq %d: nobody cared!\n", irq);
}
+ dump_stack();
+ printk("handlers:\n");
+ action = desc->action;
+ do {
+ printk("[<%p>]", action->handler);
+ print_symbol(" (%s)",
+ (unsigned long)action->handler);
+ printk("\n");
+ action = action->next;
+ } while (action);
}
+}
- return status;
+/*
+ * If 750 of the previous 1000 interrupts have not been handled then assume
+ * that the IRQ is stuck in some manner. Drop a diagnostic and try to turn the
+ * IRQ off.
+ *
+ * Called under desc->lock
+ */
+static void note_interrupt(int irq, irq_desc_t *desc, irqreturn_t action_ret)
+{
+ if (action_ret != IRQ_HANDLED) {
+ desc->irqs_unhandled++;
+ if (action_ret != IRQ_NONE)
+ report_bad_irq(irq, desc, action_ret);
+ }
+
+ desc->irq_count++;
+ if (desc->irq_count < 1000)
+ return;
+
+ desc->irq_count = 0;
+ if (desc->irqs_unhandled > 750) {
+ /*
+ * The interrupt is stuck
+ */
+ report_bad_irq(irq, desc, action_ret);
+ /*
+ * Now kill the IRQ
+ */
+#if 0
+ desc->status |= IRQ_DISABLED;
+ desc->handler->disable(irq);
+#endif
+ }
+ desc->irqs_unhandled = 0;
}
/*
@@ -418,10 +460,12 @@ asmlinkage unsigned int do_IRQ(struct pt
* SMP environment.
*/
for (;;) {
+ irqreturn_t action_ret;
+
spin_unlock(&desc->lock);
- handle_IRQ_event(irq, ®s, action);
+ action_ret = handle_IRQ_event(irq, ®s, action);
spin_lock(&desc->lock);
-
+ note_interrupt(irq, desc, action_ret);
if (likely(!(desc->status & IRQ_PENDING)))
break;
desc->status &= ~IRQ_PENDING;
diff -puN include/linux/irq.h~irq-check-rate-limit include/linux/irq.h
--- 25/include/linux/irq.h~irq-check-rate-limit 2003-05-08 00:27:15.000000000 -0700
+++ 25-akpm/include/linux/irq.h 2003-05-08 00:27:15.000000000 -0700
@@ -61,6 +61,8 @@ typedef struct {
hw_irq_controller *handler;
struct irqaction *action; /* IRQ action list */
unsigned int depth; /* nested irq disables */
+ unsigned int irq_count; /* For detecting broken interrupts */
+ unsigned int irqs_unhandled;
spinlock_t lock;
} ____cacheline_aligned irq_desc_t;
_
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/