<!-- received="Wed Jul 19 05:22:14 2000 EET DST" -->
<!-- sent="Wed, 19 Jul 2000 04:12:40 +0200" -->
<!-- name="Roger Larsson" -->
<!-- email="roger.larsson@norran.net" -->
<!-- subject="[PATCH--] test5-pre1 vmfix (rev 2.1) + one rescheduling bugfix?" -->
<!-- id="" -->
<!-- inreplyto="" -->
<title>Linux-kernel mailing list archive 2000-29,: [PATCH--] test5-pre1 vmfix (rev 2.1) + one rescheduling bugfix?</title>
<body bgcolor="#FFFFFF"><font face="Arial,Helvetica">
<h1>[PATCH--] test5-pre1 vmfix (rev 2.1) + one rescheduling bugfix?</h1>
<b>Roger Larsson</b> (<a href="mailto:roger.larsson@norran.net"><i>roger.larsson@norran.net</i></a>)<br>
<i>Wed, 19 Jul 2000 04:12:40 +0200</i>
<p>
<ul>
<li> <b>Messages sorted by:</b> <a href="date.html#406">[ date ]</a><a href="index.html#406">[ thread ]</a><a href="subject.html#406">[ subject ]</a><a href="author.html#406">[ author ]</a>
<!-- next="start" -->
<li> <b>Next message:</b> <a href="0407.html">Mike A. Harris: "chattr and ext2 extended attributes"</a>
<li> <b>Previous message:</b> <a href="0405.html">dixonbp@us.ibm.com: "NFS locking does not call the filesystem lock operation"</a>
<!-- nextthread="start" -->
<li> <b>Next in thread:</b> <a href="0408.html">Zdenek Kabelac: "Re: [PATCH--] test5-pre1 vmfix (rev 2.1) + one rescheduling bugfix?"</a>
<li> <b>Reply:</b> <a href="0408.html">Zdenek Kabelac: "Re: [PATCH--] test5-pre1 vmfix (rev 2.1) + one rescheduling bugfix?"</a>
<li> <b>Reply:</b> <a href="0409.html">Zdenek Kabelac: "Re: [PATCH--] test5-pre1 vmfix (rev 2.1) + one rescheduling bugfix?"</a>
<!-- reply="end" -->
</ul>
<hr>
<!-- body="start" -->
This is a multi-part message in MIME format.<br>
--------------7CB5DBEDA0302B78AD73DA44<br>
Content-Type: text/plain; charset=us-ascii<br>
Content-Transfer-Encoding: 7bit<br>
<p>
Hi,<br>
<p>
[Intermediate release - indicator of problem with previous]<br>
<p>
Since I am responsible for messing up some aspects of vm<br>
(when fixing others)<br>
here is a patch that tries to solve the introduced problems.<br>
<p>
* no more periodic wake up of kswapd - not needed anymore<br>
* no more freeing all zones to (free_pages &gt; pages_high)<br>
* always wakes kswapd up after try_to_free_pages<br>
* kswapd starts when all zones gets zone_wake_kswapd<br>
  (runs once for each zone that hits zone_wake_kswapd)<br>
* removed test for more than pages_high in alloc_pages,<br>
  zones will mostly be in the range [pages_high...pages_low]<br>
* Up to 10% better throughput than 2.4.0-test4, YMMV<br>
* Tested mostly with streaming tests. On a non HIGHMEM config.<br>
<p>
+ New: zone_wake_kswapd == 2 indicates a situation where<br>
  free_pages &lt; pages_low another alloc is done.<br>
+ New-BUGFIX: runs kswapd while zone_wake_kswapd == 2 or<br>
  low_on_memory (Quintela)<br>
+ New-BUGFIX? checks if __GFP_IO before rescheduling<br>
  (will become separate patch if correct, thanks Quintela)<br>
<p>
<p>
- Kills mmap002, did not in the previously released that<br>
  should have this problem (but did not) - why? Will<br>
  investigate further...<br>
- Since kswapd does not wake up periodic anymore, the<br>
  latencies might be worse... Will investigate it<br>
  further when other stuff works.<br>
<p>
Note: logic of function keep_kswapd_awake has changed.<br>
<p>
/RogerL<br>
<p>
<p>
<pre>
--
Home page:
  <a href="http://www.norran.net/nra02596/">http://www.norran.net/nra02596/</a>
--------------7CB5DBEDA0302B78AD73DA44
Content-Type: text/plain; charset=us-ascii;
 name="patch-2.4.0-test5-1-vmfix.21"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="patch-2.4.0-test5-1-vmfix.21"
<p>
--- linux/mm/vmscan.c.orig	Sat Jul 15 23:44:34 2000
+++ linux/mm/vmscan.c	Wed Jul 19 03:44:12 2000
@@ -419,7 +419,7 @@ out:
 }
 
 /*
- * Check if there is any memory pressure (free_pages &lt; pages_low)
+ * Check if there is any memory pressure (zone_wake_kswapd == 2)
  */
 static inline int memory_pressure(void)
 {
@@ -430,7 +430,7 @@ static inline int memory_pressure(void)
 		for(i = 0; i &lt; MAX_NR_ZONES; i++) {
 			zone_t *zone = pgdat-&gt;node_zones+ i;
 			if (zone-&gt;size &amp;&amp;
-			    zone-&gt;free_pages &lt; zone-&gt;pages_low)
+			    zone-&gt;zone_wake_kswapd == 2)
 				return 1;
 		}
 		pgdat = pgdat-&gt;node_next;
@@ -440,24 +440,31 @@ static inline int memory_pressure(void)
 }
 
 /*
- * Check if there recently has been memory pressure (zone_wake_kswapd)
+ * Check if any zone have recently been critical (low_on_memory)
+ * any zone with current memory pressure (zone_wake_kswapd == 2)
+ * all zones have recently had memory_pressure (zone_wake_kswapd)
  */
 static inline int keep_kswapd_awake(void)
 {
+	int all_recent = 1;
 	pg_data_t *pgdat = pgdat_list;
 
 	do {
 		int i;
 		for(i = 0; i &lt; MAX_NR_ZONES; i++) {
 			zone_t *zone = pgdat-&gt;node_zones+ i;
-			if (zone-&gt;size &amp;&amp;
-			    zone-&gt;zone_wake_kswapd)
-				return 1;
+			if (!zone-&gt;size) {
+				if (zone-&gt;zone_wake_kswapd == 2 ||
+				    zone-&gt;low_on_memory)
+					return 1;
+				if (!zone-&gt;zone_wake_kswapd)
+					all_recent = 0;
+			}
 		}
 		pgdat = pgdat-&gt;node_next;
 	} while (pgdat);
 
-	return 0;
+	return all_recent;
 }
 
 /*
@@ -484,7 +491,7 @@ static int do_try_to_free_pages(unsigned
 
 	priority = 64;
 	do {
-		if (current-&gt;need_resched) {
+		if ((gfp_mask &amp; __GFP_IO) &amp;&amp; current-&gt;need_resched) {
 			schedule();
 			/* time has passed - pressure too? */
 			if (!memory_pressure())
@@ -496,9 +503,7 @@ static int do_try_to_free_pages(unsigned
 				goto done;
 		}
 
-		/* not (been) low on memory - it is
-		 * pointless to try to swap out.
-		 */
+		/* check if mission completed */
 		if (!keep_kswapd_awake())
 			goto done;
 
@@ -596,10 +601,7 @@ int kswapd(void *unused)
 
 	for (;;) {
 		if (!keep_kswapd_awake()) {
-			/* wake up regulary to do an early attempt too free
-			 * pages - pages will not actually be freed.
-			 */
-			interruptible_sleep_on_timeout(&amp;kswapd_wait, HZ);
+			interruptible_sleep_on(&amp;kswapd_wait);
 		}
 
 		do_try_to_free_pages(GFP_KSWAPD);
@@ -628,24 +630,30 @@ int try_to_free_pages(unsigned int gfp_m
 	if (gfp_mask &amp; __GFP_WAIT) {
 		current-&gt;state = TASK_RUNNING;
 		current-&gt;flags |= PF_MEMALLOC;
-		retval = do_try_to_free_pages(gfp_mask);
+		do {
+			retval = do_try_to_free_pages(gfp_mask);
+		} while (!retval);
 		current-&gt;flags &amp;= ~PF_MEMALLOC;
 	}
-	else {
-		/* make sure kswapd runs */
-		if (waitqueue_active(&amp;kswapd_wait))
-			wake_up_interruptible(&amp;kswapd_wait);
-	}
+
+	/* someone needed memory that kswapd had not provided
+	 * make sure kswapd runs, should not happen often */
+	if (waitqueue_active(&amp;kswapd_wait))
+		wake_up_interruptible(&amp;kswapd_wait);
 
 	return retval;
 }
 
 static int __init kswapd_init(void)
 {
-	printk("Starting kswapd v1.6\n");
+	printk("Starting kswapd v1.7\n");
 	swap_setup();
 	kernel_thread(kswapd, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
 	return 0;
 }
 
 module_init(kswapd_init)
+
+
+
+
--- linux/mm/page_alloc.c.orig	Sat Jul 15 23:44:46 2000
+++ linux/mm/page_alloc.c	Wed Jul 19 03:45:36 2000
@@ -141,9 +141,12 @@ void __free_pages_ok (struct page *page,
 
 	spin_unlock_irqrestore(&amp;zone-&gt;lock, flags);
 
-	if (zone-&gt;free_pages &gt; zone-&gt;pages_high) {
-		zone-&gt;zone_wake_kswapd = 0;
-		zone-&gt;low_on_memory = 0;
+	if (zone-&gt;free_pages &gt; zone-&gt;pages_low) {
+		zone-&gt;zone_wake_kswapd = 1;
+		if (zone-&gt;free_pages &gt; zone-&gt;pages_high) {
+			zone-&gt;zone_wake_kswapd = 0;
+			zone-&gt;low_on_memory = 0;
+		}
 	}
 }
 
@@ -217,7 +220,7 @@ static struct page * rmqueue(zone_t *zon
  */
 struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order)
 {
-	zone_t **zone = zonelist-&gt;zones;
+	zone_t **zone;
 	extern wait_queue_head_t kswapd_wait;
 
 	/*
@@ -228,21 +231,6 @@ struct page * __alloc_pages(zonelist_t *
 	 * in a higher zone fails.
 	 */
 
-	for (;;) {
-		zone_t *z = *(zone++);
-		if (!z)
-			break;
-		if (!z-&gt;size)
-			BUG();
-
-		/* If there are zones with a lot of free memory allocate from them */
-		if (z-&gt;free_pages &gt; z-&gt;pages_high) {
-			struct page *page = rmqueue(z, order);
-			if (page)
-				return page;
-		}
-	}
-
 	zone = zonelist-&gt;zones;
 	for (;;) {
 		zone_t *z = *(zone++);
@@ -256,6 +244,16 @@ struct page * __alloc_pages(zonelist_t *
 			struct page *page = rmqueue(z, order);
 			if (z-&gt;free_pages &lt; z-&gt;pages_low) {
 				z-&gt;zone_wake_kswapd = 1;
+
+				/* Usually zone_wake_kswapd is set to 2 
+				 * on second alloc below pages_low
+				 * but if this was a big one
+				 * - do not let it pass unnoticed 
+				 */
+				if (z-&gt;free_pages &lt; z-&gt;pages_low - MAX_ORDER) {
+					z-&gt;zone_wake_kswapd = 2;
+				}
+
 				if (waitqueue_active(&amp;kswapd_wait))
 					wake_up_interruptible(&amp;kswapd_wait);
 			}
@@ -264,6 +262,21 @@ struct page * __alloc_pages(zonelist_t *
 		}
 	}
 
+	/* Three possibilities to get here
+	 * - Previous alloc_pages resulted in last zone set to have
+	 *   zone_wake_kswapd and start it. kswapd has not been able
+	 *   to release enough pages so that one zone does not have
+	 *   zone_wake_kswapd set.
+	 * - Different sets of zones (zonelist)
+	 *   previous did not have all zones with zone_wake_kswapd but
+	 *   this one has... should kswapd be woken up? it will run once.
+	 * - SMP race, kswapd went to sleep slightly after it as running
+	 *   in 'if (waitqueue_active(...))' above.
+	 * + anyway the test is very cheap to do...
+	 */
+	if (waitqueue_active(&amp;kswapd_wait))
+		wake_up_interruptible(&amp;kswapd_wait);
+
 	/*
 	 * Ok, we don't have any zones that don't need some
 	 * balancing.. See if we have any that aren't critical..
@@ -275,8 +288,17 @@ struct page * __alloc_pages(zonelist_t *
 			break;
 		if (!z-&gt;low_on_memory) {
 			struct page *page = rmqueue(z, order);
-			if (z-&gt;free_pages &lt; z-&gt;pages_min)
-				z-&gt;low_on_memory = 1;
+			if (z-&gt;free_pages &lt; z-&gt;pages_low) {
+				z-&gt;zone_wake_kswapd = 2; /* future: ++ */
+
+				if (z-&gt;free_pages &lt; z-&gt;pages_min)
+					z-&gt;low_on_memory = 1;
+
+				/* make kswapd notice new condition */
+				if (waitqueue_active(&amp;kswapd_wait))
+					wake_up_interruptible(&amp;kswapd_wait);
+			}
+
 			if (page)
 				return page;
 		}
<p>
--------------7CB5DBEDA0302B78AD73DA44--
<p>
<p>
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at <a href="http://www.tux.org/lkml/">http://www.tux.org/lkml/</a>
</pre>
<!-- body="end" -->
<hr>
<p>
<ul>
<!-- next="start" -->
<li> <b>Next message:</b> <a href="0407.html">Mike A. Harris: "chattr and ext2 extended attributes"</a>
<li> <b>Previous message:</b> <a href="0405.html">dixonbp@us.ibm.com: "NFS locking does not call the filesystem lock operation"</a>
<!-- nextthread="start" -->
<li> <b>Next in thread:</b> <a href="0408.html">Zdenek Kabelac: "Re: [PATCH--] test5-pre1 vmfix (rev 2.1) + one rescheduling bugfix?"</a>
<li> <b>Reply:</b> <a href="0408.html">Zdenek Kabelac: "Re: [PATCH--] test5-pre1 vmfix (rev 2.1) + one rescheduling bugfix?"</a>
<li> <b>Reply:</b> <a href="0409.html">Zdenek Kabelac: "Re: [PATCH--] test5-pre1 vmfix (rev 2.1) + one rescheduling bugfix?"</a>
<!-- reply="end" -->
</ul>
</font></body>
