Hi,
I'm currently scanning the parameter space of IDLE_NODE_REBALANCE_TICK
and BUSY_NODE_REBALANCE_TICK with the help of tunable rebalance
rates. The patch basically does:
-#define IDLE_NODE_REBALANCE_TICK (IDLE_REBALANCE_TICK * 2)
-#define BUSY_NODE_REBALANCE_TICK (BUSY_REBALANCE_TICK * 2)
+int idle_nodebalance_rate =3D 10;
+int busy_nodebalance_rate =3D 10;
+#define IDLE_NODE_REBALANCE_TICK (IDLE_REBALANCE_TICK * idle_nodebalance=
_rate)
+#define BUSY_NODE_REBALANCE_TICK (BUSY_REBALANCE_TICK * busy_nodebalance=
_rate)
and makes the variables accessible in /proc/sys/kernel
We might want to leave these tunable in case it turns out that
different platforms need significantly different values. Right now
it's just a tool for tuning.
Regards,
Erich
--------------Boundary-00=_98WW86ZNRLCFVPI8Z61H
Content-Type: text/x-diff;
charset="iso-8859-15";
name="tunable-balance-rate-2.5.59"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="tunable-balance-rate-2.5.59"
diff -urNp 2.5.59-B0/include/linux/sysctl.h 2.5.59-B0-tune/include/linux/sysctl.h
--- 2.5.59-B0/include/linux/sysctl.h 2003-01-17 03:22:16.000000000 +0100
+++ 2.5.59-B0-tune/include/linux/sysctl.h 2003-01-18 12:49:17.000000000 +0100
@@ -129,6 +129,8 @@ enum
KERN_CADPID=54, /* int: PID of the process to notify on CAD */
KERN_PIDMAX=55, /* int: PID # limit */
KERN_CORE_PATTERN=56, /* string: pattern for core-file names */
+ KERN_NODBALI=57, /* int: idle cross-node balance rate */
+ KERN_NODBALB=58, /* int: busy cross-node balance rate */
};
diff -urNp 2.5.59-B0/kernel/sched.c 2.5.59-B0-tune/kernel/sched.c
--- 2.5.59-B0/kernel/sched.c 2003-01-18 11:50:23.000000000 +0100
+++ 2.5.59-B0-tune/kernel/sched.c 2003-01-18 12:01:03.000000000 +0100
@@ -984,12 +984,14 @@ out:
* busy-rebalance every 200 msecs. idle-rebalance every 1 msec. (or on
* systems with HZ=100, every 10 msecs.)
*
- * On NUMA, do a node-rebalance every 400 msecs.
+ * On NUMA, do a node-rebalance every 10ms (idle) or 2 secs (busy).
*/
#define IDLE_REBALANCE_TICK (HZ/1000 ?: 1)
#define BUSY_REBALANCE_TICK (HZ/5 ?: 1)
-#define IDLE_NODE_REBALANCE_TICK (IDLE_REBALANCE_TICK * 2)
-#define BUSY_NODE_REBALANCE_TICK (BUSY_REBALANCE_TICK * 2)
+int idle_nodebalance_rate = 10;
+int busy_nodebalance_rate = 10;
+#define IDLE_NODE_REBALANCE_TICK (IDLE_REBALANCE_TICK * idle_nodebalance_rate)
+#define BUSY_NODE_REBALANCE_TICK (BUSY_REBALANCE_TICK * busy_nodebalance_rate)
#if CONFIG_NUMA
static void balance_node(runqueue_t *this_rq, int idle, int this_cpu)
diff -urNp 2.5.59-B0/kernel/sysctl.c 2.5.59-B0-tune/kernel/sysctl.c
--- 2.5.59-B0/kernel/sysctl.c 2003-01-17 03:21:39.000000000 +0100
+++ 2.5.59-B0-tune/kernel/sysctl.c 2003-01-18 11:59:53.000000000 +0100
@@ -55,6 +55,8 @@ extern char core_pattern[];
extern int cad_pid;
extern int pid_max;
extern int sysctl_lower_zone_protection;
+extern int idle_nodebalance_rate;
+extern int busy_nodebalance_rate;
/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
static int maxolduid = 65535;
@@ -261,6 +263,10 @@ static ctl_table kern_table[] = {
#endif
{KERN_PIDMAX, "pid_max", &pid_max, sizeof (int),
0600, NULL, &proc_dointvec},
+ {KERN_NODBALI, "idle_nodebalance_rate", &idle_nodebalance_rate,
+ sizeof (int), 0600, NULL, &proc_dointvec},
+ {KERN_NODBALB, "busy_nodebalance_rate", &busy_nodebalance_rate,
+ sizeof (int), 0600, NULL, &proc_dointvec},
{0}
};
--------------Boundary-00=_98WW86ZNRLCFVPI8Z61H--
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/