<!-- received="Sun Apr 23 04:14:27 2000 EET DST" -->
<!-- sent="Sat, 22 Apr 2000 22:09:57 -0300" -->
<!-- name="Cesar Eduardo Barros" -->
<!-- email="cesarb@web4u.com.br" -->
<!-- subject="[PATCH] Fix for rtc.c non-atomic mess (try 2)" -->
<!-- id="" -->
<!-- inreplyto="20000420104701.A902@cesarb.uucp.openprojects.net" -->
<title>Linux-kernel mailing list archive 2000-17,: [PATCH] Fix for rtc.c non-atomic mess (try 2)</title>
<body bgcolor="#FFFFFF"><font face="Arial,Helvetica">
<h1>[PATCH] Fix for rtc.c non-atomic mess (try 2)</h1>
<b>Cesar Eduardo Barros</b> (<a href="mailto:cesarb@web4u.com.br"><i>cesarb@web4u.com.br</i></a>)<br>
<i>Sat, 22 Apr 2000 22:09:57 -0300</i>
<p>
<ul>
<li> <b>Messages sorted by:</b> <a href="date.html#17">[ date ]</a><a href="index.html#17">[ thread ]</a><a href="subject.html#17">[ subject ]</a><a href="author.html#17">[ author ]</a>
<!-- next="start" -->
<li> <b>Next message:</b> <a href="0018.html">Alexander Viro: "Re: [PATCH] f_op-&gt;poll() without lock_kernel()"</a>
<li> <b>Previous message:</b> <a href="0016.html">Horst von Brand: "Re: Kernel code analysis (was Re: "movb" for spin-unlock (was Re: namei() query))"</a>
<!-- nextthread="start" -->
<!-- reply="end" -->
</ul>
<hr>
<!-- body="start" -->
Well, based in feedback, I remade the whole patch. Now I added code to avoid<br>
the xchg() race (thanks to Manfred Spraul for hinting me on that one). Couldn't<br>
avoid using spinlocks; sorry :-)<br>
<p>
This time I didn't even try to compile it. Please tell me what I did wrong this<br>
time :-)<br>
<p>
<p>
--- linux-2.3.99-pre5/drivers/char/rtc.c	Fri Apr 14 14:31:08 2000<br>
+++ linux-rtcfix2/drivers/char/rtc.c	Sat Apr 22 21:56:48 2000<br>
@@ -40,6 +40,9 @@<br>
  *	1.10b	Andrew Morton: SMP lock fix<br>
  */<br>
 <br>
+/* If someone thinks this is spinlocking too much, feel free to use a single<br>
+ * global /dev/rtc driver spinlock plus the rtc_lock for the CMOS_* stuff */<br>
+<br>
 #define RTC_VERSION		"1.10b"<br>
 <br>
 #define RTC_IRQ 	8	/* Can't see this changing soon.	*/<br>
@@ -124,8 +127,21 @@<br>
 #define RTC_IS_OPEN		0x01	/* means /dev/rtc is in use	*/<br>
 #define RTC_TIMER_ON		0x02	/* missed irq timer active	*/<br>
 <br>
-static atomic_t rtc_status = ATOMIC_INIT(0); /* bitmapped status byte.	*/<br>
+/*<br>
+ * rtc_status is never changed by rtc_interrupt, and ioctl/open/close is<br>
+ * protected by the big kernel lock. However, ioctl can still disable the timer<br>
+ * in rtc_status and then with del_timer after the interrupt has read<br>
+ * rtc_status but before mod_timer is called, which would then reenable the<br>
+ * timer (but you would need to have an awful timing before you'd trip on it)<br>
+ */<br>
+static spinlock_t rtc_irq_timer_lock = SPIN_LOCK_UNLOCKED;<br>
+static unsigned long rtc_status = 0;	/* bitmapped status byte.	*/<br>
 static unsigned long rtc_freq = 0;	/* Current periodic IRQ rate	*/<br>
+/* Having rtc_lock double as rtc_irq_data_lock is evil */<br>
+static spinlock_t rtc_irq_data_lock = SPIN_LOCK_UNLOCKED;<br>
+/* Making this two variables instead of one might make the interrupt a bit<br>
+ * faster (and exchange a spinlock (2 locked mem accesses) by two variable<br>
+ * atomic updates (2 locked mem accesses)). Or maybe slower. Ideas? */<br>
 static unsigned long rtc_irq_data = 0;	/* our output to the world	*/<br>
 <br>
 /*<br>
@@ -151,6 +167,8 @@<br>
 <br>
 static void rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)<br>
 {<br>
+	unsigned long rtc_data;<br>
+<br>
 	/*<br>
 	 *	Can be an alarm interrupt, update complete interrupt,<br>
 	 *	or a periodic interrupt. We store the status in the<br>
@@ -158,19 +176,29 @@<br>
 	 *	the last read in the remainder of rtc_irq_data.<br>
 	 */<br>
 <br>
+	/* First, ack the interrupt */<br>
 	spin_lock (&amp;rtc_lock);<br>
+	rtc_data = CMOS_READ(RTC_INTR_FLAGS);<br>
+	spin_unlock (&amp;rtc_lock);<br>
+<br>
+	/* Second, avoid a bogus rtc_dropped_irq */<br>
+	spin_lock (&amp;rtc_irq_timer_lock);<br>
+	if (rtc_status &amp; RTC_TIMER_ON)<br>
+		mod_timer(&amp;rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100);<br>
+	spin_unlock (&amp;rtc_irq_timer_lock);<br>
+<br>
+	/* Third, update rtc_irq_data */<br>
+	spin_lock (&amp;rtc_irq_data_lock);<br>
 	rtc_irq_data += 0x100;<br>
 	rtc_irq_data &amp;= ~0xff;<br>
-	rtc_irq_data |= (CMOS_READ(RTC_INTR_FLAGS) &amp; 0xF0);<br>
-	spin_unlock (&amp;rtc_lock);<br>
+	rtc_irq_data |= rtc_data &amp; 0xF0;<br>
+	spin_unlock (&amp;rtc_irq_data_lock);<br>
 <br>
+	/* Now do the rest of the actions */<br>
 	wake_up_interruptible(&amp;rtc_wait);	<br>
 <br>
 	if (rtc_async_queue)<br>
 		kill_fasync (rtc_async_queue, SIGIO, POLL_IN);<br>
-<br>
-	if (atomic_read(&amp;rtc_status) &amp; RTC_TIMER_ON)<br>
-		mod_timer(&amp;rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100);<br>
 }<br>
 #endif<br>
 <br>
@@ -192,6 +220,7 @@<br>
 	DECLARE_WAITQUEUE(wait, current);<br>
 	unsigned long data;<br>
 	ssize_t retval;<br>
+	unsigned long flags;<br>
 	<br>
 	if (count &lt; sizeof(unsigned long))<br>
 		return -EINVAL;<br>
@@ -200,7 +229,18 @@<br>
 <br>
 	current-&gt;state = TASK_INTERRUPTIBLE;<br>
 		<br>
-	while ((data = xchg(&amp;rtc_irq_data, 0)) == 0) {<br>
+	do {<br>
+		/* First make it right. Then make it fast. Putting this whole<br>
+		 * block within the parentheses of a while would be too<br>
+		 * confusing. And no, xchg() is not the answer. */<br>
+		spin_lock_irqsave (&amp;rtc_irq_data_lock, flags);<br>
+		data = rtc_irq_data;<br>
+		rtc_irq_data = 0;<br>
+		spin_unlock_irqrestore (&amp;rtc_irq_data_lock, flags);<br>
+<br>
+		if (data != 0)<br>
+			break;<br>
+<br>
 		if (file-&gt;f_flags &amp; O_NONBLOCK) {<br>
 			retval = -EAGAIN;<br>
 			goto out;<br>
@@ -210,7 +250,7 @@<br>
 			goto out;<br>
 		}<br>
 		schedule();<br>
-	}<br>
+	} while (1);<br>
 <br>
 	retval = put_user(data, (unsigned long *)buf); <br>
 	if (!retval)<br>
@@ -245,11 +285,12 @@<br>
 	case RTC_PIE_OFF:	/* Mask periodic int. enab. bit	*/<br>
 	{<br>
 		mask_rtc_irq_bit(RTC_PIE);<br>
-		if (atomic_read(&amp;rtc_status) &amp; RTC_TIMER_ON) {<br>
+		spin_lock_irqsave (&amp;rtc_irq_timer_lock, flags);<br>
+		if (rtc_status &amp; RTC_TIMER_ON) {<br>
+			rtc_status &amp;= ~RTC_TIMER_ON;<br>
 			del_timer(&amp;rtc_irq_timer);<br>
-			atomic_set(&amp;rtc_status,<br>
-				   atomic_read(&amp;rtc_status) &amp; ~RTC_TIMER_ON);<br>
 		}<br>
+		spin_unlock_irqrestore (&amp;rtc_irq_timer_lock, flags);<br>
 		return 0;<br>
 	}<br>
 	case RTC_PIE_ON:	/* Allow periodic ints		*/<br>
@@ -262,11 +303,13 @@<br>
 		if ((rtc_freq &gt; 64) &amp;&amp; (!capable(CAP_SYS_RESOURCE)))<br>
 			return -EACCES;<br>
 <br>
-		if (!(atomic_read(&amp;rtc_status) &amp; RTC_TIMER_ON)) {<br>
-			atomic_set(&amp;rtc_status,<br>
-				   atomic_read(&amp;rtc_status) | RTC_TIMER_ON);<br>
+		/* No need to spinlock here if RTC_TIMER_ON is only enabled at<br>
+		 * the end. The mb() should avoid trigger-happy compilers */<br>
+		if (!(rtc_status &amp; RTC_TIMER_ON)) {<br>
 			rtc_irq_timer.expires = jiffies + HZ/rtc_freq + 2*HZ/100;<br>
 			add_timer(&amp;rtc_irq_timer);<br>
+			mb ();	/* FIXME is this really needed? */<br>
+			rtc_status |= RTC_TIMER_ON;<br>
 		}<br>
 		set_rtc_irq_bit(RTC_PIE);<br>
 		return 0;<br>
@@ -491,16 +534,18 @@<br>
 {<br>
 	unsigned long flags;<br>
 <br>
-	if(atomic_read(&amp;rtc_status) &amp; RTC_IS_OPEN)<br>
+	/* If someday somebody decides to remove the kernel_lock on open and<br>
+	 * close and ioctl this is gonna get open to races again */<br>
+	if(rtc_status &amp; RTC_IS_OPEN)<br>
 		return -EBUSY;<br>
 <br>
 	MOD_INC_USE_COUNT;<br>
 <br>
-	atomic_set(&amp;rtc_status, atomic_read(&amp;rtc_status) | RTC_IS_OPEN);<br>
+	rtc_status |= RTC_IS_OPEN;<br>
 <br>
-	spin_lock_irqsave (&amp;rtc_lock, flags);<br>
+	spin_lock_irqsave (&amp;rtc_irq_data_lock, flags);<br>
 	rtc_irq_data = 0;<br>
-	spin_unlock_irqrestore (&amp;rtc_lock, flags);<br>
+	spin_unlock_irqrestore (&amp;rtc_irq_data_lock, flags);<br>
 	return 0;<br>
 }<br>
 <br>
@@ -530,10 +575,12 @@<br>
 	CMOS_READ(RTC_INTR_FLAGS);<br>
 	spin_unlock_irqrestore(&amp;rtc_lock, flags);<br>
 <br>
-	if (atomic_read(&amp;rtc_status) &amp; RTC_TIMER_ON) {<br>
-		atomic_set(&amp;rtc_status, atomic_read(&amp;rtc_status) &amp; ~RTC_TIMER_ON);<br>
+	spin_lock_irqsave (&amp;rtc_irq_timer_lock, flags);<br>
+	if (rtc_status &amp; RTC_TIMER_ON) {<br>
+		rtc_status &amp;= ~RTC_TIMER_ON;<br>
 		del_timer(&amp;rtc_irq_timer);<br>
 	}<br>
+	spin_unlock_irqrestore (&amp;rtc_irq_timer_lock, flags);<br>
 <br>
 	if (file-&gt;f_flags &amp; FASYNC) {<br>
 		rtc_fasync (-1, file, 0);<br>
@@ -542,10 +589,11 @@<br>
 #endif<br>
 	MOD_DEC_USE_COUNT;<br>
 <br>
-	spin_lock_irqsave (&amp;rtc_lock, flags);<br>
+	/* FIXME: why is this outside the #ifndef __alpha__? */<br>
+	spin_lock_irqsave (&amp;rtc_irq_data_lock, flags);<br>
 	rtc_irq_data = 0;<br>
-	spin_unlock_irqrestore (&amp;rtc_lock, flags);<br>
-	atomic_set(&amp;rtc_status, atomic_read(&amp;rtc_status) &amp; ~RTC_IS_OPEN);<br>
+	spin_unlock_irqrestore (&amp;rtc_irq_data_lock, flags);<br>
+	rtc_status &amp;= ~RTC_IS_OPEN;<br>
 	return 0;<br>
 }<br>
 <br>
@@ -556,9 +604,9 @@<br>
 <br>
 	poll_wait(file, &amp;rtc_wait, wait);<br>
 <br>
-	spin_lock_irqsave (&amp;rtc_lock, flags);<br>
+	spin_lock_irqsave (&amp;rtc_irq_data_lock, flags);<br>
 	l = rtc_irq_data;<br>
-	spin_unlock_irqrestore (&amp;rtc_lock, flags);<br>
+	spin_unlock_irqrestore (&amp;rtc_irq_data_lock, flags);<br>
 <br>
 	if (l != 0)<br>
 		return POLLIN | POLLRDNORM;<br>
@@ -698,9 +746,14 @@<br>
 static void __exit rtc_exit (void)<br>
 {<br>
 	/* interrupts and maybe timer disabled at this point by rtc_release */<br>
+	/* FIXME: Maybe??? */<br>
 <br>
-	if (atomic_read(&amp;rtc_status) &amp; RTC_TIMER_ON)<br>
+#if 0<br>
+	spin_lock_irqsave (&amp;rtc_irq_timer_lock, flags);<br>
+	if (rtc_status &amp; RTC_TIMER_ON)<br>
 		del_timer(&amp;rtc_irq_timer);<br>
+	spin_unlock_irqrestore (&amp;rtc_irq_timer_lock, flags);<br>
+#endif<br>
 <br>
 	remove_proc_entry ("driver/rtc", NULL);<br>
 	misc_deregister(&amp;rtc_dev);<br>
@@ -734,16 +787,38 @@<br>
 <br>
 static void rtc_dropped_irq(unsigned long data)<br>
 {<br>
+	unsigned long rtc_data;<br>
 	unsigned long flags;<br>
 <br>
 	printk(KERN_INFO "rtc: lost some interrupts at %ldHz.\n", rtc_freq);<br>
-	mod_timer(&amp;rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100);<br>
 <br>
-	spin_lock_irqsave(&amp;rtc_lock, flags);<br>
+	/* First, ack the (missed) interrupt */<br>
+	spin_lock_irqsave (&amp;rtc_lock, flags);<br>
+	rtc_data = CMOS_READ(RTC_INTR_FLAGS);<br>
+	spin_unlock_irqrestore (&amp;rtc_lock, flags);<br>
+<br>
+	/* Second, avoid a bogus rtc_dropped_irq */<br>
+	spin_lock_irqsave (&amp;rtc_irq_timer_lock, flags);<br>
+	/* Just in case someone disabled the timer from behind our back... */<br>
+	if (rtc_status &amp; RTC_TIMER_ON)<br>
+		mod_timer(&amp;rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100);<br>
+	spin_unlock_irqrestore (&amp;rtc_irq_timer_lock, flags);<br>
+<br>
+	/* Third, update rtc_irq_data */<br>
+	/* FIXME: what happens if the real interrupt comes before we get here?<br>
+	 * The order of the status would get a bit missed up... Maybe I should<br>
+	 * ack within this block? */<br>
+	spin_lock_irqsave(&amp;rtc_irq_data_lock, flags);<br>
 	rtc_irq_data += ((rtc_freq/HZ)&lt;&lt;8);<br>
 	rtc_irq_data &amp;= ~0xff;<br>
-	rtc_irq_data |= (CMOS_READ(RTC_INTR_FLAGS) &amp; 0xF0);	/* restart */<br>
-	spin_unlock_irqrestore(&amp;rtc_lock, flags);<br>
+	rtc_irq_data |= rtc_data &amp; 0xF0;<br>
+	spin_unlock_irqrestore(&amp;rtc_irq_data_lock, flags);<br>
+<br>
+	/* Now we have new data */<br>
+	wake_up_interruptible(&amp;rtc_wait);<br>
+<br>
+	if (rtc_async_queue)<br>
+		kill_fasync (rtc_async_queue, SIGIO, POLL_IN);<br>
 }<br>
 #endif<br>
 <br>
@@ -955,8 +1030,12 @@<br>
 	val &amp;=  ~bit;<br>
 	CMOS_WRITE(val, RTC_CONTROL);<br>
 	CMOS_READ(RTC_INTR_FLAGS);<br>
-	rtc_irq_data = 0;<br>
 	spin_unlock_irqrestore(&amp;rtc_lock, flags);<br>
+<br>
+	/* FIXME is doing this outside the rtc_lock ok? */<br>
+	spin_lock_irqsave (&amp;rtc_irq_data_lock, flags);<br>
+	rtc_irq_data = 0;<br>
+	spin_unlock_irqrestore (&amp;rtc_irq_data_lock, flags);<br>
 }<br>
 <br>
 static void set_rtc_irq_bit(unsigned char bit)<br>
@@ -969,7 +1048,11 @@<br>
 	val |= bit;<br>
 	CMOS_WRITE(val, RTC_CONTROL);<br>
 	CMOS_READ(RTC_INTR_FLAGS);<br>
-	rtc_irq_data = 0;<br>
 	spin_unlock_irqrestore(&amp;rtc_lock, flags);<br>
+<br>
+	/* FIXME is doing this outside the rtc_lock ok? */<br>
+	spin_lock_irqsave (&amp;rtc_irq_data_lock, flags);<br>
+	rtc_irq_data = 0;<br>
+	spin_unlock_irqrestore (&amp;rtc_irq_data_lock, flags);<br>
 }<br>
 #endif<br>
<p>
<pre>
-- 
Cesar Eduardo Barros
<a href="mailto:cesarb@web4u.com.br">cesarb@web4u.com.br</a>
<a href="mailto:cesarb@dcc.ufrj.br">cesarb@dcc.ufrj.br</a>
<p>
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at <a href="http://www.tux.org/lkml/">http://www.tux.org/lkml/</a>
</pre>
<!-- body="end" -->
<hr>
<p>
<ul>
<!-- next="start" -->
<li> <b>Next message:</b> <a href="0018.html">Alexander Viro: "Re: [PATCH] f_op-&gt;poll() without lock_kernel()"</a>
<li> <b>Previous message:</b> <a href="0016.html">Horst von Brand: "Re: Kernel code analysis (was Re: "movb" for spin-unlock (was Re: namei() query))"</a>
<!-- nextthread="start" -->
<!-- reply="end" -->
</ul>
</font></body>
