LWN.net Logo

tunable rebalance rates for sched-2.5.59-B0

From:  Erich Focht <efocht@ess.nec.de>
To:  Michael Hohnbaum <hohnbaum@us.ibm.com>, "Martin J. Bligh" <mbligh@aracnet.com>, Ingo Molnar <mingo@elte.hu>
Subject:  [patch] tunable rebalance rates for sched-2.5.59-B0
Date:  Sat, 18 Jan 2003 14:31:21 +0100
Cc:  Matthew Dobson <colpatch@us.ibm.com>, Christoph Hellwig <hch@infradead.org>, Robert Love <rml@tech9.net>, Andrew Theurer <habanero@us.ibm.com>, Linus Torvalds <torvalds@transmeta.com>, linux-kernel <linux-kernel@vger.kernel.org>, lse-tech <lse-tech@lists.sourceforge.net>

Hi,

I'm currently scanning the parameter space of IDLE_NODE_REBALANCE_TICK
and BUSY_NODE_REBALANCE_TICK with the help of tunable rebalance
rates. The patch basically does:

-#define IDLE_NODE_REBALANCE_TICK (IDLE_REBALANCE_TICK * 2)
-#define BUSY_NODE_REBALANCE_TICK (BUSY_REBALANCE_TICK * 2)
+int idle_nodebalance_rate = 10;
+int busy_nodebalance_rate = 10;
+#define IDLE_NODE_REBALANCE_TICK (IDLE_REBALANCE_TICK * idle_nodebalance_rate)
+#define BUSY_NODE_REBALANCE_TICK (BUSY_REBALANCE_TICK * busy_nodebalance_rate)

and makes the variables accessible in /proc/sys/kernel

We might want to leave these tunable in case it turns out that
different platforms need significantly different values. Right now
it's just a tool for tuning.

Regards,
Erich

diff -urNp 2.5.59-B0/include/linux/sysctl.h 2.5.59-B0-tune/include/linux/sysctl.h
--- 2.5.59-B0/include/linux/sysctl.h	2003-01-17 03:22:16.000000000 +0100
+++ 2.5.59-B0-tune/include/linux/sysctl.h	2003-01-18 12:49:17.000000000 +0100
@@ -129,6 +129,8 @@ enum
 	KERN_CADPID=54,		/* int: PID of the process to notify on CAD */
 	KERN_PIDMAX=55,		/* int: PID # limit */
   	KERN_CORE_PATTERN=56,	/* string: pattern for core-file names */
+  	KERN_NODBALI=57,	/* int: idle cross-node balance rate */
+  	KERN_NODBALB=58,	/* int: busy cross-node balance rate */
 };
 
 
diff -urNp 2.5.59-B0/kernel/sched.c 2.5.59-B0-tune/kernel/sched.c
--- 2.5.59-B0/kernel/sched.c	2003-01-18 11:50:23.000000000 +0100
+++ 2.5.59-B0-tune/kernel/sched.c	2003-01-18 12:01:03.000000000 +0100
@@ -984,12 +984,14 @@ out:
  * busy-rebalance every 200 msecs. idle-rebalance every 1 msec. (or on
  * systems with HZ=100, every 10 msecs.)
  *
- * On NUMA, do a node-rebalance every 400 msecs.
+ * On NUMA, do a node-rebalance every 10ms (idle) or 2 secs (busy).
  */
 #define IDLE_REBALANCE_TICK (HZ/1000 ?: 1)
 #define BUSY_REBALANCE_TICK (HZ/5 ?: 1)
-#define IDLE_NODE_REBALANCE_TICK (IDLE_REBALANCE_TICK * 2)
-#define BUSY_NODE_REBALANCE_TICK (BUSY_REBALANCE_TICK * 2)
+int idle_nodebalance_rate = 10;
+int busy_nodebalance_rate = 10;
+#define IDLE_NODE_REBALANCE_TICK (IDLE_REBALANCE_TICK * idle_nodebalance_rate)
+#define BUSY_NODE_REBALANCE_TICK (BUSY_REBALANCE_TICK * busy_nodebalance_rate)
 
 #if CONFIG_NUMA
 static void balance_node(runqueue_t *this_rq, int idle, int this_cpu)
diff -urNp 2.5.59-B0/kernel/sysctl.c 2.5.59-B0-tune/kernel/sysctl.c
--- 2.5.59-B0/kernel/sysctl.c	2003-01-17 03:21:39.000000000 +0100
+++ 2.5.59-B0-tune/kernel/sysctl.c	2003-01-18 11:59:53.000000000 +0100
@@ -55,6 +55,8 @@ extern char core_pattern[];
 extern int cad_pid;
 extern int pid_max;
 extern int sysctl_lower_zone_protection;
+extern int idle_nodebalance_rate;
+extern int busy_nodebalance_rate;
 
 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
 static int maxolduid = 65535;
@@ -261,6 +263,10 @@ static ctl_table kern_table[] = {
 #endif
 	{KERN_PIDMAX, "pid_max", &pid_max, sizeof (int),
 	 0600, NULL, &proc_dointvec},
+	{KERN_NODBALI, "idle_nodebalance_rate", &idle_nodebalance_rate,
+	 sizeof (int), 0600, NULL, &proc_dointvec},
+	{KERN_NODBALB, "busy_nodebalance_rate", &busy_nodebalance_rate,
+	 sizeof (int), 0600, NULL, &proc_dointvec},
 	{0}
 };
 

Copyright © 2003, Eklektix, Inc.
Comments and public postings are copyrighted by their creators.
Linux is a registered trademark of Linus Torvalds