More scheduling related merges. This time the code comes from Ingo's J0
patch, Rusty, and myself. Again I am only including bug fixes and
cleanups that should be obviously correct, although most of these are a
bit less critical than the last patch I sent.
I am still refraining from anything that is controversial, especially
the load_balance and sched/idle_tick changes, which I'll leave to Ingo
and Davide. I hope everyone can agree on a solution.
Itemized Changes:
- init and smp_boot fixes
- do lock order by address not rq_cpu
- remove old RT event comment
- make lock_task_rq return rq in lieu of passing it
- make lock_task_rq and unlock_task_rq inlines instead of defines
- grab spin_lock later in wake_up_forked_process
- remove RUN_CHILD_FIRST #if's. it is clear this works, now.
- misc cleanups
Robert Love
diff -urN linux-2.5.3-pre1/arch/i386/kernel/smpboot.c linux/arch/i386/kernel/smpboot.c
--- linux-2.5.3-pre1/arch/i386/kernel/smpboot.c Tue Jan 15 18:05:21 2002
+++ linux/arch/i386/kernel/smpboot.c Wed Jan 16 14:39:11 2002
@@ -462,6 +462,7 @@
* things done here to the most necessary things.
*/
cpu_init();
+ init_idle();
smp_callin();
while (!atomic_read(&smp_commenced))
rep_nop();
@@ -470,8 +471,8 @@
* the local TLBs too.
*/
local_flush_tlb();
+ idle_startup_done();
- init_idle();
return cpu_idle();
}
diff -urN linux-2.5.3-pre1/include/asm-i386/smplock.h linux/include/asm-i386/smplock.h
--- linux-2.5.3-pre1/include/asm-i386/smplock.h Tue Jan 15 18:05:20 2002
+++ linux/include/asm-i386/smplock.h Wed Jan 16 15:41:34 2002
@@ -19,8 +19,8 @@
do { \
if (unlikely(task->lock_depth >= 0)) { \
spin_unlock(&kernel_flag); \
- release_irqlock(cpu); \
- __sti(); \
+ if (global_irq_holder == (cpu)) \
+ BUG(); \
} \
} while (0)
diff -urN linux-2.5.3-pre1/include/linux/sched.h linux/include/linux/sched.h
--- linux-2.5.3-pre1/include/linux/sched.h Tue Jan 15 18:05:20 2002
+++ linux/include/linux/sched.h Wed Jan 16 15:41:29 2002
@@ -136,8 +136,11 @@
extern rwlock_t tasklist_lock;
extern spinlock_t mmlist_lock;
+typedef struct task_struct task_t;
+
extern void sched_init(void);
extern void init_idle(void);
+extern void idle_startup_done(void);
extern void show_state(void);
extern void cpu_init (void);
extern void trap_init(void);
@@ -221,7 +224,6 @@
extern struct user_struct root_user;
#define INIT_USER (&root_user)
-typedef struct task_struct task_t;
typedef struct prio_array prio_array_t;
struct task_struct {
diff -urN linux-2.5.3-pre1/init/main.c linux/init/main.c
--- linux-2.5.3-pre1/init/main.c Tue Jan 15 18:05:20 2002
+++ linux/init/main.c Wed Jan 16 14:38:29 2002
@@ -290,8 +290,6 @@
extern void setup_arch(char **);
extern void cpu_idle(void);
-unsigned long wait_init_idle;
-
#ifndef CONFIG_SMP
#ifdef CONFIG_X86_LOCAL_APIC
@@ -305,6 +303,16 @@
#else
+static unsigned long __initdata wait_init_idle;
+
+void __init idle_startup_done(void)
+{
+ clear_bit(smp_processor_id(), &wait_init_idle);
+ while (wait_init_idle) {
+ cpu_relax();
+ barrier();
+ }
+}
/* Called by boot processor to activate the rest. */
static void __init smp_init(void)
@@ -315,6 +323,7 @@
smp_threads_ready=1;
smp_commence();
+ idle_startup_done();
}
#endif
@@ -411,6 +420,7 @@
check_bugs();
printk("POSIX conformance testing by UNIFIX\n");
+ init_idle();
/*
* We count on the initial thread going ok
* Like idlers init is an unlocked kernel thread, which will
@@ -418,14 +428,6 @@
*/
smp_init();
- /*
- * Finally, we wait for all other CPU's, and initialize this
- * thread that will become the idle thread for the boot CPU.
- * After this, the scheduler is fully initialized, and we can
- * start creating and running new threads.
- */
- init_idle();
-
/* Do the rest non-__init'ed, we're now alive */
rest_init();
}
diff -urN linux-2.5.3-pre1/kernel/fork.c linux/kernel/fork.c
--- linux-2.5.3-pre1/kernel/fork.c Tue Jan 15 18:05:20 2002
+++ linux/kernel/fork.c Wed Jan 16 15:30:48 2002
@@ -746,23 +746,16 @@
if (p->ptrace & PT_PTRACED)
send_sig(SIGSTOP, p, 1);
-#define RUN_CHILD_FIRST 1
-#if RUN_CHILD_FIRST
wake_up_forked_process(p); /* do this last */
-#else
- wake_up_process(p); /* do this last */
-#endif
++total_forks;
if (clone_flags & CLONE_VFORK)
wait_for_completion(&vfork);
-#if RUN_CHILD_FIRST
else
/*
* Let the child process run first, to avoid most of the
* COW overhead when the child exec()s afterwards.
*/
current->need_resched = 1;
-#endif
fork_out:
return retval;
diff -urN linux-2.5.3-pre1/kernel/sched.c linux/kernel/sched.c
--- linux-2.5.3-pre1/kernel/sched.c Tue Jan 15 18:05:20 2002
+++ linux/kernel/sched.c Wed Jan 16 15:40:20 2002
@@ -40,14 +40,9 @@
*
* Locking rule: those places that want to lock multiple runqueues
* (such as the load balancing or the process migration code), lock
- * acquire operations must be ordered by rq->cpu.
- *
- * The RT event id is used to avoid calling into the the RT scheduler
- * if there is a RT task active in an SMP system but there is no
- * RT scheduling activity otherwise.
+ * acquire operations must be ordered by ascending &runqueue.
*/
static struct runqueue {
- int cpu;
spinlock_t lock;
unsigned long nr_running, nr_switches;
task_t *curr, *idle;
@@ -55,25 +50,31 @@
char __pad [SMP_CACHE_BYTES];
} runqueues [NR_CPUS] __cacheline_aligned;
-#define this_rq() (runqueues + smp_processor_id())
-#define task_rq(p) (runqueues + (p)->cpu)
#define cpu_rq(cpu) (runqueues + (cpu))
-#define cpu_curr(cpu) (runqueues[(cpu)].curr)
+#define this_rq() cpu_rq(smp_processor_id())
+#define task_rq(p) cpu_rq((p)->cpu)
+#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define rt_task(p) ((p)->policy != SCHED_OTHER)
-#define lock_task_rq(rq,p,flags) \
-do { \
-repeat_lock_task: \
- rq = task_rq(p); \
- spin_lock_irqsave(&rq->lock, flags); \
- if (unlikely((rq)->cpu != (p)->cpu)) { \
- spin_unlock_irqrestore(&rq->lock, flags); \
- goto repeat_lock_task; \
- } \
-} while (0)
+static inline runqueue_t *lock_task_rq(task_t *p, unsigned long *flags)
+{
+ struct runqueue *rq;
+
+repeat_lock_task:
+ rq = task_rq(p);
+ spin_lock_irqsave(&rq->lock, *flags);
+ if (unlikely(rq != task_rq(p))) {
+ spin_unlock_irqrestore(&rq->lock, *flags);
+ goto repeat_lock_task;
+ }
+ return rq;
+}
-#define unlock_task_rq(rq,p,flags) \
- spin_unlock_irqrestore(&rq->lock, flags)
+static inline void unlock_task_rq(runqueue_t *rq, task_t *p,
+ unsigned long *flags)
+{
+ spin_unlock_irqrestore(&rq->lock, *flags);
+}
/*
* Adding/removing a task to/from a priority array:
@@ -147,12 +148,12 @@
cpu_relax();
barrier();
}
- lock_task_rq(rq, p, flags);
+ rq = lock_task_rq(p, &flags);
if (unlikely(rq->curr == p)) {
- unlock_task_rq(rq, p, flags);
+ unlock_task_rq(rq, p, &flags);
goto repeat;
}
- unlock_task_rq(rq, p, flags);
+ unlock_task_rq(rq, p, &flags);
}
/*
@@ -185,7 +186,7 @@
int success = 0;
runqueue_t *rq;
- lock_task_rq(rq, p, flags);
+ rq = lock_task_rq(p, &flags);
p->state = TASK_RUNNING;
if (!p->array) {
activate_task(p, rq);
@@ -193,7 +194,7 @@
resched_task(rq->curr);
success = 1;
}
- unlock_task_rq(rq, p, flags);
+ unlock_task_rq(rq, p, &flags);
return success;
}
@@ -206,13 +207,13 @@
{
runqueue_t *rq = this_rq();
- spin_lock_irq(&rq->lock);
p->state = TASK_RUNNING;
if (!rt_task(p)) {
p->prio += MAX_USER_PRIO/10;
if (p->prio > MAX_PRIO-1)
p->prio = MAX_PRIO-1;
}
+ spin_lock_irq(&rq->lock);
activate_task(p, rq);
spin_unlock_irq(&rq->lock);
}
@@ -333,7 +334,7 @@
if (max_load <= this_rq->nr_running)
return;
prev_max_load = max_load;
- if (busiest->cpu < this_rq->cpu) {
+ if (busiest < this_rq) {
spin_unlock(&this_rq->lock);
spin_lock(&busiest->lock);
spin_lock(&this_rq->lock);
@@ -731,7 +732,7 @@
* We have to be careful, if called from sys_setpriority(),
* the task might be in the middle of scheduling on another CPU.
*/
- lock_task_rq(rq, p, flags);
+ rq = lock_task_rq(p, &flags);
if (rt_task(p)) {
p->__nice = nice;
goto out_unlock;
@@ -745,7 +746,7 @@
if (array) {
enqueue_task(p, array);
/*
- * If the task is runnable and lowered its priority,
+ * If the task is running and lowered its priority,
* or increased its priority then reschedule its CPU:
*/
if ((nice < p->__nice) ||
@@ -753,7 +754,7 @@
resched_task(rq->curr);
}
out_unlock:
- unlock_task_rq(rq, p, flags);
+ unlock_task_rq(rq, p, &flags);
}
#ifndef __alpha__
@@ -830,7 +831,7 @@
* To be able to change p->policy safely, the apropriate
* runqueue lock must be held.
*/
- lock_task_rq(rq,p,flags);
+ rq = lock_task_rq(p, &flags);
if (policy < 0)
policy = p->policy;
@@ -873,7 +874,7 @@
activate_task(p, task_rq(p));
out_unlock:
- unlock_task_rq(rq,p,flags);
+ unlock_task_rq(rq, p, &flags);
out_unlock_tasklist:
read_unlock_irq(&tasklist_lock);
@@ -1116,14 +1117,12 @@
read_unlock(&tasklist_lock);
}
-extern unsigned long wait_init_idle;
-
static inline void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
{
if (rq1 == rq2)
spin_lock(&rq1->lock);
else {
- if (rq1->cpu < rq2->cpu) {
+ if (rq1 < rq2) {
spin_lock(&rq1->lock);
spin_lock(&rq2->lock);
} else {
@@ -1154,14 +1153,9 @@
current->array = NULL;
current->prio = MAX_PRIO;
current->state = TASK_RUNNING;
- clear_bit(smp_processor_id(), &wait_init_idle);
double_rq_unlock(this_rq, rq);
- while (wait_init_idle) {
- cpu_relax();
- barrier();
- }
current->need_resched = 1;
- __sti();
+ __restore_flags(flags);
}
extern void init_timervecs(void);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/