From 743f5cdaf5ccb9fefc7c3ac68ea4676637b8782f Mon Sep 17 00:00:00 2001
From: Leon Woestenberg <leon.woestenberg@gmail.com>
Date: Sun, 7 Oct 2007 14:43:24 +0000
Subject: linux-efika: Moved from 2.6.20.11-cfs to .20.20-cfs. Needed
 div64_32() symbol weakening in lib.

---
 .../linux/linux-efika-2.6.20.11/.mtn2git_empty     |    0
 .../sched-cfs-v9-v2.6.20.11.patch                  | 5590 --------------------
 .../linux/linux-efika-2.6.20.20/.mtn2git_empty     |    0
 .../sched-cfs-v9-v2.6.20.11.patch                  | 5590 ++++++++++++++++++++
 .../weaken-div64_32-symbol.patch                   |   23 +
 packages/linux/linux-efika-2.6.20/defconfig        |   23 +-
 packages/linux/linux-efika_2.6.20.11.bb            |   85 -
 packages/linux/linux-efika_2.6.20.20.bb            |   83 +
 8 files changed, 5712 insertions(+), 5682 deletions(-)
 delete mode 100644 packages/linux/linux-efika-2.6.20.11/.mtn2git_empty
 delete mode 100644 packages/linux/linux-efika-2.6.20.11/sched-cfs-v9-v2.6.20.11.patch
 create mode 100644 packages/linux/linux-efika-2.6.20.20/.mtn2git_empty
 create mode 100644 packages/linux/linux-efika-2.6.20.20/sched-cfs-v9-v2.6.20.11.patch
 create mode 100644 packages/linux/linux-efika-2.6.20.20/weaken-div64_32-symbol.patch
 delete mode 100644 packages/linux/linux-efika_2.6.20.11.bb
 create mode 100644 packages/linux/linux-efika_2.6.20.20.bb

(limited to 'packages')
diff --git a/packages/linux/linux-efika-2.6.20.11/.mtn2git_empty b/packages/linux/linux-efika-2.6.20.11/.mtn2git_empty
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/packages/linux/linux-efika-2.6.20.11/sched-cfs-v9-v2.6.20.11.patch b/packages/linux/linux-efika-2.6.20.11/sched-cfs-v9-v2.6.20.11.patch
deleted file mode 100644
index 29071a99ac..0000000000
--- a/packages/linux/linux-efika-2.6.20.11/sched-cfs-v9-v2.6.20.11.patch
+++ /dev/null
@@ -1,5590 +0,0 @@
-This is the Complete Fair Scheduler (CFS) v9 patch for
-linux 2.6.20.10 patch (rediffed cleanly against .11).
-
-http://people.redhat.com/mingo/cfs-scheduler/
-
-Index: linux-cfs-2.6.20.8.q/Documentation/kernel-parameters.txt
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/Documentation/kernel-parameters.txt
-+++ linux-cfs-2.6.20.8.q/Documentation/kernel-parameters.txt
-@@ -914,49 +914,6 @@ and is between 256 and 4096 characters. 
- 
- 	mga=		[HW,DRM]
- 
--	migration_cost=
--			[KNL,SMP] debug: override scheduler migration costs
--			Format: <level-1-usecs>,<level-2-usecs>,...
--			This debugging option can be used to override the
--			default scheduler migration cost matrix. The numbers
--			are indexed by 'CPU domain distance'.
--			E.g. migration_cost=1000,2000,3000 on an SMT NUMA
--			box will set up an intra-core migration cost of
--			1 msec, an inter-core migration cost of 2 msecs,
--			and an inter-node migration cost of 3 msecs.
--
--			WARNING: using the wrong values here can break
--			scheduler performance, so it's only for scheduler
--			development purposes, not production environments.
--
--	migration_debug=
--			[KNL,SMP] migration cost auto-detect verbosity
--			Format=<0|1|2>
--			If a system's migration matrix reported at bootup
--			seems erroneous then this option can be used to
--			increase verbosity of the detection process.
--			We default to 0 (no extra messages), 1 will print
--			some more information, and 2 will be really
--			verbose (probably only useful if you also have a
--			serial console attached to the system).
--
--	migration_factor=
--			[KNL,SMP] multiply/divide migration costs by a factor
--			Format=<percent>
--			This debug option can be used to proportionally
--			increase or decrease the auto-detected migration
--			costs for all entries of the migration matrix.
--			E.g. migration_factor=150 will increase migration
--			costs by 50%. (and thus the scheduler will be less
--			eager migrating cache-hot tasks)
--			migration_factor=80 will decrease migration costs
--			by 20%. (thus the scheduler will be more eager to
--			migrate tasks)
--
--			WARNING: using the wrong values here can break
--			scheduler performance, so it's only for scheduler
--			development purposes, not production environments.
--
- 	mousedev.tap_time=
- 			[MOUSE] Maximum time between finger touching and
- 			leaving touchpad surface for touch to be considered
-Index: linux-cfs-2.6.20.8.q/Documentation/sched-design-CFS.txt
-===================================================================
---- /dev/null
-+++ linux-cfs-2.6.20.8.q/Documentation/sched-design-CFS.txt
-@@ -0,0 +1,107 @@
-+[announce] [patch] Modular Scheduler Core and Completely Fair Scheduler [CFS]
-+
-+i'm pleased to announce the first release of the "Modular Scheduler Core
-+and Completely Fair Scheduler [CFS]" patchset:
-+
-+   http://redhat.com/~mingo/cfs-scheduler/
-+
-+This project is a complete rewrite of the Linux task scheduler. My goal
-+is to address various feature requests and to fix deficiencies in the
-+vanilla scheduler that were suggested/found in the past few years, both
-+for desktop scheduling and for server scheduling workloads.
-+
-+[ QuickStart: apply the patch, recompile, reboot. The new scheduler
-+  will be active by default and all tasks will default to the
-+  SCHED_NORMAL interactive scheduling class. ]
-+
-+Highlights are:
-+
-+ - the introduction of Scheduling Classes: an extensible hierarchy of
-+   scheduler modules. These modules encapsulate scheduling policy
-+   details and are handled by the scheduler core without the core
-+   code assuming about them too much.
-+
-+ - sched_fair.c implements the 'CFS desktop scheduler': it is a
-+   replacement for the vanilla scheduler's SCHED_OTHER interactivity
-+   code.
-+
-+   i'd like to give credit to Con Kolivas for the general approach here:
-+   he has proven via RSDL/SD that 'fair scheduling' is possible and that
-+   it results in better desktop scheduling. Kudos Con!
-+
-+   The CFS patch uses a completely different approach and implementation
-+   from RSDL/SD. My goal was to make CFS's interactivity quality exceed
-+   that of RSDL/SD, which is a high standard to meet :-) Testing
-+   feedback is welcome to decide this one way or another. [ and, in any
-+   case, all of SD's logic could be added via a kernel/sched_sd.c module
-+   as well, if Con is interested in such an approach. ]
-+
-+   CFS's design is quite radical: it does not use runqueues, it uses a
-+   time-ordered rbtree to build a 'timeline' of future task execution,
-+   and thus has no 'array switch' artifacts (by which both the vanilla
-+   scheduler and RSDL/SD are affected).
-+
-+   CFS uses nanosecond granularity accounting and does not rely on any
-+   jiffies or other HZ detail. Thus the CFS scheduler has no notion of
-+   'timeslices' and has no heuristics whatsoever. There is only one
-+   central tunable:
-+
-+         /proc/sys/kernel/sched_granularity_ns
-+
-+   which can be used to tune the scheduler from 'desktop' (low
-+   latencies) to 'server' (good batching) workloads. It defaults to a
-+   setting suitable for desktop workloads. SCHED_BATCH is handled by the
-+   CFS scheduler module too.
-+
-+   due to its design, the CFS scheduler is not prone to any of the
-+   'attacks' that exist today against the heuristics of the stock
-+   scheduler: fiftyp.c, thud.c, chew.c, ring-test.c, massive_intr.c all
-+   work fine and do not impact interactivity and produce the expected
-+   behavior.
-+
-+   the CFS scheduler has a much stronger handling of nice levels and
-+   SCHED_BATCH: both types of workloads should be isolated much more
-+   agressively than under the vanilla scheduler.
-+
-+   ( another rdetail: due to nanosec accounting and timeline sorting,
-+     sched_yield() support is very simple under CFS, and in fact under
-+     CFS sched_yield() behaves much better than under any other
-+     scheduler i have tested so far. )
-+
-+ - sched_rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler
-+   way than the vanilla scheduler does. It uses 100 runqueues (for all
-+   100 RT priority levels, instead of 140 in the vanilla scheduler)
-+   and it needs no expired array.
-+
-+ - reworked/sanitized SMP load-balancing: the runqueue-walking
-+   assumptions are gone from the load-balancing code now, and
-+   iterators of the scheduling modules are used. The balancing code got
-+   quite a bit simpler as a result.
-+
-+the core scheduler got smaller by more than 700 lines:
-+
-+ kernel/sched.c | 1454 ++++++++++++++++------------------------------------------------
-+ 1 file changed, 372 insertions(+), 1082 deletions(-)
-+
-+and even adding all the scheduling modules, the total size impact is
-+relatively small:
-+
-+ 18 files changed, 1454 insertions(+), 1133 deletions(-)
-+
-+most of the increase is due to extensive comments. The kernel size
-+impact is in fact a small negative:
-+
-+   text    data     bss     dec     hex filename
-+  23366    4001      24   27391    6aff kernel/sched.o.vanilla
-+  24159    2705      56   26920    6928 kernel/sched.o.CFS
-+
-+(this is mainly due to the benefit of getting rid of the expired array
-+and its data structure overhead.)
-+
-+thanks go to Thomas Gleixner and Arjan van de Ven for review of this
-+patchset.
-+
-+as usual, any sort of feedback, bugreports, fixes and suggestions are
-+more than welcome,
-+
-+	Ingo
-Index: linux-cfs-2.6.20.8.q/Makefile
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/Makefile
-+++ linux-cfs-2.6.20.8.q/Makefile
-@@ -1,7 +1,7 @@
- VERSION = 2
- PATCHLEVEL = 6
- SUBLEVEL = 20
--EXTRAVERSION = .11
-+EXTRAVERSION = .11-cfs-v9
- NAME = Homicidal Dwarf Hamster
- 
- # *DOCUMENTATION*
-Index: linux-cfs-2.6.20.8.q/arch/i386/kernel/smpboot.c
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/arch/i386/kernel/smpboot.c
-+++ linux-cfs-2.6.20.8.q/arch/i386/kernel/smpboot.c
-@@ -1132,18 +1132,6 @@ exit:
- }
- #endif
- 
--static void smp_tune_scheduling(void)
--{
--	unsigned long cachesize;       /* kB   */
--
--	if (cpu_khz) {
--		cachesize = boot_cpu_data.x86_cache_size;
--
--		if (cachesize > 0)
--			max_cache_size = cachesize * 1024;
--	}
--}
--
- /*
-  * Cycle through the processors sending APIC IPIs to boot each.
-  */
-@@ -1172,7 +1160,6 @@ static void __init smp_boot_cpus(unsigne
- 	x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
- 
- 	current_thread_info()->cpu = 0;
--	smp_tune_scheduling();
- 
- 	set_cpu_sibling_map(0);
- 
-Index: linux-cfs-2.6.20.8.q/arch/i386/kernel/syscall_table.S
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/arch/i386/kernel/syscall_table.S
-+++ linux-cfs-2.6.20.8.q/arch/i386/kernel/syscall_table.S
-@@ -319,3 +319,4 @@ ENTRY(sys_call_table)
- 	.long sys_move_pages
- 	.long sys_getcpu
- 	.long sys_epoll_pwait
-+	.long sys_sched_yield_to	/* 320 */
-Index: linux-cfs-2.6.20.8.q/arch/i386/kernel/tsc.c
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/arch/i386/kernel/tsc.c
-+++ linux-cfs-2.6.20.8.q/arch/i386/kernel/tsc.c
-@@ -61,6 +61,8 @@ static inline int check_tsc_unstable(voi
- 
- void mark_tsc_unstable(void)
- {
-+	sched_clock_unstable_event();
-+
- 	tsc_unstable = 1;
- }
- EXPORT_SYMBOL_GPL(mark_tsc_unstable);
-@@ -107,13 +109,7 @@ unsigned long long sched_clock(void)
- {
- 	unsigned long long this_offset;
- 
--	/*
--	 * in the NUMA case we dont use the TSC as they are not
--	 * synchronized across all CPUs.
--	 */
--#ifndef CONFIG_NUMA
--	if (!cpu_khz || check_tsc_unstable())
--#endif
-+	if (!cpu_khz || !cpu_has_tsc)
- 		/* no locking but a rare wrong value is not a big deal */
- 		return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
- 
-Index: linux-cfs-2.6.20.8.q/arch/ia64/kernel/setup.c
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/arch/ia64/kernel/setup.c
-+++ linux-cfs-2.6.20.8.q/arch/ia64/kernel/setup.c
-@@ -773,7 +773,6 @@ static void __cpuinit
- get_max_cacheline_size (void)
- {
- 	unsigned long line_size, max = 1;
--	unsigned int cache_size = 0;
- 	u64 l, levels, unique_caches;
-         pal_cache_config_info_t cci;
-         s64 status;
-@@ -803,8 +802,6 @@ get_max_cacheline_size (void)
- 		line_size = 1 << cci.pcci_line_size;
- 		if (line_size > max)
- 			max = line_size;
--		if (cache_size < cci.pcci_cache_size)
--			cache_size = cci.pcci_cache_size;
- 		if (!cci.pcci_unified) {
- 			status = ia64_pal_cache_config_info(l,
- 						    /* cache_type (instruction)= */ 1,
-@@ -821,9 +818,6 @@ get_max_cacheline_size (void)
- 			ia64_i_cache_stride_shift = cci.pcci_stride;
- 	}
-   out:
--#ifdef CONFIG_SMP
--	max_cache_size = max(max_cache_size, cache_size);
--#endif
- 	if (max > ia64_max_cacheline_size)
- 		ia64_max_cacheline_size = max;
- }
-Index: linux-cfs-2.6.20.8.q/arch/mips/kernel/smp.c
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/arch/mips/kernel/smp.c
-+++ linux-cfs-2.6.20.8.q/arch/mips/kernel/smp.c
-@@ -245,7 +245,6 @@ void __init smp_prepare_cpus(unsigned in
- {
- 	init_new_context(current, &init_mm);
- 	current_thread_info()->cpu = 0;
--	smp_tune_scheduling();
- 	plat_prepare_cpus(max_cpus);
- #ifndef CONFIG_HOTPLUG_CPU
- 	cpu_present_map = cpu_possible_map;
-Index: linux-cfs-2.6.20.8.q/arch/sparc/kernel/smp.c
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/arch/sparc/kernel/smp.c
-+++ linux-cfs-2.6.20.8.q/arch/sparc/kernel/smp.c
-@@ -69,16 +69,6 @@ void __cpuinit smp_store_cpu_info(int id
- 	cpu_data(id).prom_node = cpu_node;
- 	cpu_data(id).mid = cpu_get_hwmid(cpu_node);
- 
--	/* this is required to tune the scheduler correctly */
--	/* is it possible to have CPUs with different cache sizes? */
--	if (id == boot_cpu_id) {
--		int cache_line,cache_nlines;
--		cache_line = 0x20;
--		cache_line = prom_getintdefault(cpu_node, "ecache-line-size", cache_line);
--		cache_nlines = 0x8000;
--		cache_nlines = prom_getintdefault(cpu_node, "ecache-nlines", cache_nlines);
--		max_cache_size = cache_line * cache_nlines;
--	}
- 	if (cpu_data(id).mid < 0)
- 		panic("No MID found for CPU%d at node 0x%08d", id, cpu_node);
- }
-Index: linux-cfs-2.6.20.8.q/arch/sparc64/kernel/smp.c
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/arch/sparc64/kernel/smp.c
-+++ linux-cfs-2.6.20.8.q/arch/sparc64/kernel/smp.c
-@@ -1293,41 +1293,6 @@ int setup_profiling_timer(unsigned int m
- 	return 0;
- }
- 
--static void __init smp_tune_scheduling(void)
--{
--	struct device_node *dp;
--	int instance;
--	unsigned int def, smallest = ~0U;
--
--	def = ((tlb_type == hypervisor) ?
--	       (3 * 1024 * 1024) :
--	       (4 * 1024 * 1024));
--
--	instance = 0;
--	while (!cpu_find_by_instance(instance, &dp, NULL)) {
--		unsigned int val;
--
--		val = of_getintprop_default(dp, "ecache-size", def);
--		if (val < smallest)
--			smallest = val;
--
--		instance++;
--	}
--
--	/* Any value less than 256K is nonsense.  */
--	if (smallest < (256U * 1024U))
--		smallest = 256 * 1024;
--
--	max_cache_size = smallest;
--
--	if (smallest < 1U * 1024U * 1024U)
--		printk(KERN_INFO "Using max_cache_size of %uKB\n",
--		       smallest / 1024U);
--	else
--		printk(KERN_INFO "Using max_cache_size of %uMB\n",
--		       smallest / 1024U / 1024U);
--}
--
- /* Constrain the number of cpus to max_cpus.  */
- void __init smp_prepare_cpus(unsigned int max_cpus)
- {
-@@ -1363,7 +1328,6 @@ void __init smp_prepare_cpus(unsigned in
- 	}
- 
- 	smp_store_cpu_info(boot_cpu_id);
--	smp_tune_scheduling();
- }
- 
- /* Set this up early so that things like the scheduler can init
-Index: linux-cfs-2.6.20.8.q/fs/proc/array.c
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/fs/proc/array.c
-+++ linux-cfs-2.6.20.8.q/fs/proc/array.c
-@@ -165,7 +165,6 @@ static inline char * task_state(struct t
- 	rcu_read_lock();
- 	buffer += sprintf(buffer,
- 		"State:\t%s\n"
--		"SleepAVG:\t%lu%%\n"
- 		"Tgid:\t%d\n"
- 		"Pid:\t%d\n"
- 		"PPid:\t%d\n"
-@@ -173,9 +172,8 @@ static inline char * task_state(struct t
- 		"Uid:\t%d\t%d\t%d\t%d\n"
- 		"Gid:\t%d\t%d\t%d\t%d\n",
- 		get_task_state(p),
--		(p->sleep_avg/1024)*100/(1020000000/1024),
--	       	p->tgid, p->pid,
--	       	pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
-+		p->tgid, p->pid,
-+		pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
- 		pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0,
- 		p->uid, p->euid, p->suid, p->fsuid,
- 		p->gid, p->egid, p->sgid, p->fsgid);
-@@ -312,6 +310,11 @@ int proc_pid_status(struct task_struct *
- 	return buffer - orig;
- }
- 
-+int proc_pid_sched(struct task_struct *task, char *buffer)
-+{
-+	return sched_print_task_state(task, buffer) - buffer;
-+}
-+
- static int do_task_stat(struct task_struct *task, char * buffer, int whole)
- {
- 	unsigned long vsize, eip, esp, wchan = ~0UL;
-Index: linux-cfs-2.6.20.8.q/fs/proc/base.c
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/fs/proc/base.c
-+++ linux-cfs-2.6.20.8.q/fs/proc/base.c
-@@ -1839,6 +1839,7 @@ static struct pid_entry tgid_base_stuff[
- 	INF("environ",    S_IRUSR, pid_environ),
- 	INF("auxv",       S_IRUSR, pid_auxv),
- 	INF("status",     S_IRUGO, pid_status),
-+	INF("sched",      S_IRUGO, pid_sched),
- 	INF("cmdline",    S_IRUGO, pid_cmdline),
- 	INF("stat",       S_IRUGO, tgid_stat),
- 	INF("statm",      S_IRUGO, pid_statm),
-@@ -2121,6 +2122,7 @@ static struct pid_entry tid_base_stuff[]
- 	INF("environ",   S_IRUSR, pid_environ),
- 	INF("auxv",      S_IRUSR, pid_auxv),
- 	INF("status",    S_IRUGO, pid_status),
-+	INF("sched",     S_IRUGO, pid_sched),
- 	INF("cmdline",   S_IRUGO, pid_cmdline),
- 	INF("stat",      S_IRUGO, tid_stat),
- 	INF("statm",     S_IRUGO, pid_statm),
-Index: linux-cfs-2.6.20.8.q/fs/proc/internal.h
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/fs/proc/internal.h
-+++ linux-cfs-2.6.20.8.q/fs/proc/internal.h
-@@ -36,6 +36,7 @@ extern int proc_exe_link(struct inode *,
- extern int proc_tid_stat(struct task_struct *,  char *);
- extern int proc_tgid_stat(struct task_struct *, char *);
- extern int proc_pid_status(struct task_struct *, char *);
-+extern int proc_pid_sched(struct task_struct *, char *);
- extern int proc_pid_statm(struct task_struct *, char *);
- 
- extern struct file_operations proc_maps_operations;
-Index: linux-cfs-2.6.20.8.q/include/asm-generic/bitops/sched.h
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/include/asm-generic/bitops/sched.h
-+++ linux-cfs-2.6.20.8.q/include/asm-generic/bitops/sched.h
-@@ -6,28 +6,23 @@
- 
- /*
-  * Every architecture must define this function. It's the fastest
-- * way of searching a 140-bit bitmap where the first 100 bits are
-- * unlikely to be set. It's guaranteed that at least one of the 140
-- * bits is cleared.
-+ * way of searching a 100-bit bitmap.  It's guaranteed that at least
-+ * one of the 100 bits is cleared.
-  */
- static inline int sched_find_first_bit(const unsigned long *b)
- {
- #if BITS_PER_LONG == 64
--	if (unlikely(b[0]))
-+	if (b[0])
- 		return __ffs(b[0]);
--	if (likely(b[1]))
--		return __ffs(b[1]) + 64;
--	return __ffs(b[2]) + 128;
-+	return __ffs(b[1]) + 64;
- #elif BITS_PER_LONG == 32
--	if (unlikely(b[0]))
-+	if (b[0])
- 		return __ffs(b[0]);
--	if (unlikely(b[1]))
-+	if (b[1])
- 		return __ffs(b[1]) + 32;
--	if (unlikely(b[2]))
-+	if (b[2])
- 		return __ffs(b[2]) + 64;
--	if (b[3])
--		return __ffs(b[3]) + 96;
--	return __ffs(b[4]) + 128;
-+	return __ffs(b[3]) + 96;
- #else
- #error BITS_PER_LONG not defined
- #endif
-Index: linux-cfs-2.6.20.8.q/include/asm-i386/topology.h
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/include/asm-i386/topology.h
-+++ linux-cfs-2.6.20.8.q/include/asm-i386/topology.h
-@@ -85,7 +85,6 @@ static inline int node_to_first_cpu(int 
- 	.idle_idx		= 1,			\
- 	.newidle_idx		= 2,			\
- 	.wake_idx		= 1,			\
--	.per_cpu_gain		= 100,			\
- 	.flags			= SD_LOAD_BALANCE	\
- 				| SD_BALANCE_EXEC	\
- 				| SD_BALANCE_FORK	\
-Index: linux-cfs-2.6.20.8.q/include/asm-i386/unistd.h
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/include/asm-i386/unistd.h
-+++ linux-cfs-2.6.20.8.q/include/asm-i386/unistd.h
-@@ -325,10 +325,11 @@
- #define __NR_move_pages		317
- #define __NR_getcpu		318
- #define __NR_epoll_pwait	319
-+#define __NR_sched_yield_to	320
- 
- #ifdef __KERNEL__
- 
--#define NR_syscalls 320
-+#define NR_syscalls 321
- 
- #define __ARCH_WANT_IPC_PARSE_VERSION
- #define __ARCH_WANT_OLD_READDIR
-Index: linux-cfs-2.6.20.8.q/include/asm-ia64/topology.h
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/include/asm-ia64/topology.h
-+++ linux-cfs-2.6.20.8.q/include/asm-ia64/topology.h
-@@ -65,7 +65,6 @@ void build_cpu_to_node_map(void);
- 	.max_interval		= 4,			\
- 	.busy_factor		= 64,			\
- 	.imbalance_pct		= 125,			\
--	.per_cpu_gain		= 100,			\
- 	.cache_nice_tries	= 2,			\
- 	.busy_idx		= 2,			\
- 	.idle_idx		= 1,			\
-@@ -97,7 +96,6 @@ void build_cpu_to_node_map(void);
- 	.newidle_idx		= 0, /* unused */	\
- 	.wake_idx		= 1,			\
- 	.forkexec_idx		= 1,			\
--	.per_cpu_gain		= 100,			\
- 	.flags			= SD_LOAD_BALANCE	\
- 				| SD_BALANCE_EXEC	\
- 				| SD_BALANCE_FORK	\
-Index: linux-cfs-2.6.20.8.q/include/asm-mips/mach-ip27/topology.h
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/include/asm-mips/mach-ip27/topology.h
-+++ linux-cfs-2.6.20.8.q/include/asm-mips/mach-ip27/topology.h
-@@ -28,7 +28,6 @@ extern unsigned char __node_distances[MA
- 	.busy_factor		= 32,			\
- 	.imbalance_pct		= 125,			\
- 	.cache_nice_tries	= 1,			\
--	.per_cpu_gain		= 100,			\
- 	.flags			= SD_LOAD_BALANCE	\
- 				| SD_BALANCE_EXEC	\
- 				| SD_WAKE_BALANCE,	\
-Index: linux-cfs-2.6.20.8.q/include/asm-powerpc/topology.h
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/include/asm-powerpc/topology.h
-+++ linux-cfs-2.6.20.8.q/include/asm-powerpc/topology.h
-@@ -57,7 +57,6 @@ static inline int pcibus_to_node(struct 
- 	.busy_factor		= 32,			\
- 	.imbalance_pct		= 125,			\
- 	.cache_nice_tries	= 1,			\
--	.per_cpu_gain		= 100,			\
- 	.busy_idx		= 3,			\
- 	.idle_idx		= 1,			\
- 	.newidle_idx		= 2,			\
-Index: linux-cfs-2.6.20.8.q/include/asm-x86_64/topology.h
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/include/asm-x86_64/topology.h
-+++ linux-cfs-2.6.20.8.q/include/asm-x86_64/topology.h
-@@ -43,7 +43,6 @@ extern int __node_distance(int, int);
- 	.newidle_idx		= 0, 			\
- 	.wake_idx		= 1,			\
- 	.forkexec_idx		= 1,			\
--	.per_cpu_gain		= 100,			\
- 	.flags			= SD_LOAD_BALANCE	\
- 				| SD_BALANCE_FORK	\
- 				| SD_BALANCE_EXEC	\
-Index: linux-cfs-2.6.20.8.q/include/asm-x86_64/unistd.h
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/include/asm-x86_64/unistd.h
-+++ linux-cfs-2.6.20.8.q/include/asm-x86_64/unistd.h
-@@ -619,8 +619,10 @@ __SYSCALL(__NR_sync_file_range, sys_sync
- __SYSCALL(__NR_vmsplice, sys_vmsplice)
- #define __NR_move_pages		279
- __SYSCALL(__NR_move_pages, sys_move_pages)
-+#define __NR_sched_yield_to	280
-+__SYSCALL(__NR_sched_yield_to, sys_sched_yield_to)
- 
--#define __NR_syscall_max __NR_move_pages
-+#define __NR_syscall_max __NR_sched_yield_to
- 
- #ifndef __NO_STUBS
- #define __ARCH_WANT_OLD_READDIR
-Index: linux-cfs-2.6.20.8.q/include/linux/hardirq.h
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/include/linux/hardirq.h
-+++ linux-cfs-2.6.20.8.q/include/linux/hardirq.h
-@@ -79,6 +79,19 @@
- #endif
- 
- #ifdef CONFIG_PREEMPT
-+# define PREEMPT_CHECK_OFFSET 1
-+#else
-+# define PREEMPT_CHECK_OFFSET 0
-+#endif
-+
-+/*
-+ * Check whether we were atomic before we did preempt_disable():
-+ * (used by the scheduler)
-+ */
-+#define in_atomic_preempt_off() \
-+		((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
-+
-+#ifdef CONFIG_PREEMPT
- # define preemptible()	(preempt_count() == 0 && !irqs_disabled())
- # define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1)
- #else
-Index: linux-cfs-2.6.20.8.q/include/linux/ktime.h
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/include/linux/ktime.h
-+++ linux-cfs-2.6.20.8.q/include/linux/ktime.h
-@@ -274,4 +274,6 @@ extern void ktime_get_ts(struct timespec
- /* Get the real (wall-) time in timespec format: */
- #define ktime_get_real_ts(ts)	getnstimeofday(ts)
- 
-+extern ktime_t ktime_get(void);
-+
- #endif
-Index: linux-cfs-2.6.20.8.q/include/linux/sched.h
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/include/linux/sched.h
-+++ linux-cfs-2.6.20.8.q/include/linux/sched.h
-@@ -2,7 +2,6 @@
- #define _LINUX_SCHED_H
- 
- #include <linux/auxvec.h>	/* For AT_VECTOR_SIZE */
--
- /*
-  * cloning flags:
-  */
-@@ -37,6 +36,8 @@
- 
- #ifdef __KERNEL__
- 
-+#include <linux/rbtree.h>	/* For run_node */
-+
- struct sched_param {
- 	int sched_priority;
- };
-@@ -196,13 +197,13 @@ extern void init_idle(struct task_struct
- extern cpumask_t nohz_cpu_mask;
- 
- /*
-- * Only dump TASK_* tasks. (-1 for all tasks)
-+ * Only dump TASK_* tasks. (0 for all tasks)
-  */
- extern void show_state_filter(unsigned long state_filter);
- 
- static inline void show_state(void)
- {
--	show_state_filter(-1);
-+	show_state_filter(0);
- }
- 
- extern void show_regs(struct pt_regs *);
-@@ -464,7 +465,7 @@ struct signal_struct {
- 	 * from jiffies_to_ns(utime + stime) if sched_clock uses something
- 	 * other than jiffies.)
- 	 */
--	unsigned long long sched_time;
-+	unsigned long long sum_sched_runtime;
- 
- 	/*
- 	 * We don't bother to synchronize most readers of this at all,
-@@ -524,6 +525,7 @@ struct signal_struct {
- #define MAX_RT_PRIO		MAX_USER_RT_PRIO
- 
- #define MAX_PRIO		(MAX_RT_PRIO + 40)
-+#define DEFAULT_PRIO		(MAX_RT_PRIO + 20)
- 
- #define rt_prio(prio)		unlikely((prio) < MAX_RT_PRIO)
- #define rt_task(p)		rt_prio((p)->prio)
-@@ -635,7 +637,14 @@ enum idle_type
- /*
-  * sched-domains (multiprocessor balancing) declarations:
-  */
--#define SCHED_LOAD_SCALE	128UL	/* increase resolution of load */
-+
-+/*
-+ * Increase resolution of nice-level calculations:
-+ */
-+#define SCHED_LOAD_SHIFT	10
-+#define SCHED_LOAD_SCALE	(1UL << SCHED_LOAD_SHIFT)
-+
-+#define SCHED_LOAD_SCALE_FUZZ	(SCHED_LOAD_SCALE >> 5)
- 
- #ifdef CONFIG_SMP
- #define SD_LOAD_BALANCE		1	/* Do load balancing on this domain. */
-@@ -684,7 +693,6 @@ struct sched_domain {
- 	unsigned int imbalance_pct;	/* No balance until over watermark */
- 	unsigned long long cache_hot_time; /* Task considered cache hot (ns) */
- 	unsigned int cache_nice_tries;	/* Leave cache hot tasks for # tries */
--	unsigned int per_cpu_gain;	/* CPU % gained by adding domain cpus */
- 	unsigned int busy_idx;
- 	unsigned int idle_idx;
- 	unsigned int newidle_idx;
-@@ -733,12 +741,6 @@ struct sched_domain {
- extern int partition_sched_domains(cpumask_t *partition1,
- 				    cpumask_t *partition2);
- 
--/*
-- * Maximum cache size the migration-costs auto-tuning code will
-- * search from:
-- */
--extern unsigned int max_cache_size;
--
- #endif	/* CONFIG_SMP */
- 
- 
-@@ -789,14 +791,28 @@ struct mempolicy;
- struct pipe_inode_info;
- struct uts_namespace;
- 
--enum sleep_type {
--	SLEEP_NORMAL,
--	SLEEP_NONINTERACTIVE,
--	SLEEP_INTERACTIVE,
--	SLEEP_INTERRUPTED,
--};
-+struct rq;
- 
--struct prio_array;
-+struct sched_class {
-+	struct sched_class *next;
-+
-+	void (*enqueue_task) (struct rq *rq, struct task_struct *p,
-+			      int wakeup, u64 now);
-+	void (*dequeue_task) (struct rq *rq, struct task_struct *p,
-+			      int sleep, u64 now);
-+	void (*yield_task) (struct rq *rq, struct task_struct *p,
-+			    struct task_struct *p_to);
-+
-+	void (*check_preempt_curr) (struct rq *rq, struct task_struct *p);
-+
-+	struct task_struct * (*pick_next_task) (struct rq *rq, u64 now);
-+	void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now);
-+
-+	struct task_struct * (*load_balance_start) (struct rq *rq);
-+	struct task_struct * (*load_balance_next) (struct rq *rq);
-+	void (*task_tick) (struct rq *rq, struct task_struct *p);
-+	void (*task_new) (struct rq *rq, struct task_struct *p);
-+};
- 
- struct task_struct {
- 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
-@@ -813,26 +829,45 @@ struct task_struct {
- #endif
- #endif
- 	int load_weight;	/* for niceness load balancing purposes */
-+	int load_shift;
-+
- 	int prio, static_prio, normal_prio;
-+	int on_rq;
- 	struct list_head run_list;
--	struct prio_array *array;
-+	struct rb_node run_node;
- 
- 	unsigned short ioprio;
- #ifdef CONFIG_BLK_DEV_IO_TRACE
- 	unsigned int btrace_seq;
- #endif
--	unsigned long sleep_avg;
--	unsigned long long timestamp, last_ran;
--	unsigned long long sched_time; /* sched_clock time spent running */
--	enum sleep_type sleep_type;
-+	/* CFS scheduling class statistics fields: */
-+	u64 wait_start_fair;
-+	u64 wait_start;
-+	u64 exec_start;
-+	u64 sleep_start;
-+	u64 block_start;
-+	u64 sleep_max;
-+	u64 block_max;
-+	u64 exec_max;
-+	u64 wait_max;
-+	u64 last_ran;
-+
-+	s64 wait_runtime;
-+	u64 sum_exec_runtime;
-+	s64 fair_key;
-+	s64 sum_wait_runtime;
- 
- 	unsigned long policy;
- 	cpumask_t cpus_allowed;
--	unsigned int time_slice, first_time_slice;
-+	unsigned int time_slice;
-+	struct sched_class *sched_class;
-+
-+	s64 min_wait_runtime;
- 
- #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
- 	struct sched_info sched_info;
- #endif
-+	u64 nr_switches;
- 
- 	struct list_head tasks;
- 	/*
-@@ -1195,8 +1230,9 @@ static inline int set_cpus_allowed(struc
- #endif
- 
- extern unsigned long long sched_clock(void);
-+extern void sched_clock_unstable_event(void);
- extern unsigned long long
--current_sched_time(const struct task_struct *current_task);
-+current_sched_runtime(const struct task_struct *current_task);
- 
- /* sched_exec is called by processes performing an exec */
- #ifdef CONFIG_SMP
-@@ -1212,6 +1248,13 @@ static inline void idle_task_exit(void) 
- #endif
- 
- extern void sched_idle_next(void);
-+extern char * sched_print_task_state(struct task_struct *p, char *buffer);
-+
-+extern unsigned int sysctl_sched_granularity;
-+extern unsigned int sysctl_sched_wakeup_granularity;
-+extern unsigned int sysctl_sched_sleep_history_max;
-+extern unsigned int sysctl_sched_child_runs_first;
-+extern unsigned int sysctl_sched_load_smoothing;
- 
- #ifdef CONFIG_RT_MUTEXES
- extern int rt_mutex_getprio(struct task_struct *p);
-@@ -1290,8 +1333,7 @@ extern void FASTCALL(wake_up_new_task(st
- #else
-  static inline void kick_process(struct task_struct *tsk) { }
- #endif
--extern void FASTCALL(sched_fork(struct task_struct * p, int clone_flags));
--extern void FASTCALL(sched_exit(struct task_struct * p));
-+extern void sched_fork(struct task_struct * p, int clone_flags);
- 
- extern int in_group_p(gid_t);
- extern int in_egroup_p(gid_t);
-Index: linux-cfs-2.6.20.8.q/include/linux/topology.h
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/include/linux/topology.h
-+++ linux-cfs-2.6.20.8.q/include/linux/topology.h
-@@ -96,7 +96,6 @@
- 	.busy_factor		= 64,			\
- 	.imbalance_pct		= 110,			\
- 	.cache_nice_tries	= 0,			\
--	.per_cpu_gain		= 25,			\
- 	.busy_idx		= 0,			\
- 	.idle_idx		= 0,			\
- 	.newidle_idx		= 1,			\
-@@ -128,7 +127,6 @@
- 	.busy_factor		= 64,			\
- 	.imbalance_pct		= 125,			\
- 	.cache_nice_tries	= 1,			\
--	.per_cpu_gain		= 100,			\
- 	.busy_idx		= 2,			\
- 	.idle_idx		= 1,			\
- 	.newidle_idx		= 2,			\
-@@ -159,7 +157,6 @@
- 	.busy_factor		= 64,			\
- 	.imbalance_pct		= 125,			\
- 	.cache_nice_tries	= 1,			\
--	.per_cpu_gain		= 100,			\
- 	.busy_idx		= 2,			\
- 	.idle_idx		= 1,			\
- 	.newidle_idx		= 2,			\
-@@ -193,7 +190,6 @@
- 	.newidle_idx		= 0, /* unused */	\
- 	.wake_idx		= 0, /* unused */	\
- 	.forkexec_idx		= 0, /* unused */	\
--	.per_cpu_gain		= 100,			\
- 	.flags			= SD_LOAD_BALANCE	\
- 				| SD_SERIALIZE,	\
- 	.last_balance		= jiffies,		\
-Index: linux-cfs-2.6.20.8.q/init/main.c
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/init/main.c
-+++ linux-cfs-2.6.20.8.q/init/main.c
-@@ -422,7 +422,7 @@ static void noinline rest_init(void)
- 
- 	/*
- 	 * The boot idle thread must execute schedule()
--	 * at least one to get things moving:
-+	 * at least once to get things moving:
- 	 */
- 	preempt_enable_no_resched();
- 	schedule();
-Index: linux-cfs-2.6.20.8.q/kernel/exit.c
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/kernel/exit.c
-+++ linux-cfs-2.6.20.8.q/kernel/exit.c
-@@ -112,7 +112,7 @@ static void __exit_signal(struct task_st
- 		sig->maj_flt += tsk->maj_flt;
- 		sig->nvcsw += tsk->nvcsw;
- 		sig->nivcsw += tsk->nivcsw;
--		sig->sched_time += tsk->sched_time;
-+		sig->sum_sched_runtime += tsk->sum_exec_runtime;
- 		sig = NULL; /* Marker for below. */
- 	}
- 
-@@ -170,7 +170,6 @@ repeat:
- 		zap_leader = (leader->exit_signal == -1);
- 	}
- 
--	sched_exit(p);
- 	write_unlock_irq(&tasklist_lock);
- 	proc_flush_task(p);
- 	release_thread(p);
-Index: linux-cfs-2.6.20.8.q/kernel/fork.c
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/kernel/fork.c
-+++ linux-cfs-2.6.20.8.q/kernel/fork.c
-@@ -874,7 +874,7 @@ static inline int copy_signal(unsigned l
- 	sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
- 	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
- 	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
--	sig->sched_time = 0;
-+	sig->sum_sched_runtime = 0;
- 	INIT_LIST_HEAD(&sig->cpu_timers[0]);
- 	INIT_LIST_HEAD(&sig->cpu_timers[1]);
- 	INIT_LIST_HEAD(&sig->cpu_timers[2]);
-@@ -1037,7 +1037,7 @@ static struct task_struct *copy_process(
- 
- 	p->utime = cputime_zero;
- 	p->stime = cputime_zero;
-- 	p->sched_time = 0;
-+
- 	p->rchar = 0;		/* I/O counter: bytes read */
- 	p->wchar = 0;		/* I/O counter: bytes written */
- 	p->syscr = 0;		/* I/O counter: read syscalls */
-Index: linux-cfs-2.6.20.8.q/kernel/hrtimer.c
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/kernel/hrtimer.c
-+++ linux-cfs-2.6.20.8.q/kernel/hrtimer.c
-@@ -45,7 +45,7 @@
-  *
-  * returns the time in ktime_t format
-  */
--static ktime_t ktime_get(void)
-+ktime_t ktime_get(void)
- {
- 	struct timespec now;
- 
-Index: linux-cfs-2.6.20.8.q/kernel/posix-cpu-timers.c
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/kernel/posix-cpu-timers.c
-+++ linux-cfs-2.6.20.8.q/kernel/posix-cpu-timers.c
-@@ -161,7 +161,7 @@ static inline cputime_t virt_ticks(struc
- }
- static inline unsigned long long sched_ns(struct task_struct *p)
- {
--	return (p == current) ? current_sched_time(p) : p->sched_time;
-+	return (p == current) ? current_sched_runtime(p) : p->sum_exec_runtime;
- }
- 
- int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
-@@ -246,10 +246,10 @@ static int cpu_clock_sample_group_locked
- 		} while (t != p);
- 		break;
- 	case CPUCLOCK_SCHED:
--		cpu->sched = p->signal->sched_time;
-+		cpu->sched = p->signal->sum_sched_runtime;
- 		/* Add in each other live thread.  */
- 		while ((t = next_thread(t)) != p) {
--			cpu->sched += t->sched_time;
-+			cpu->sched += t->sum_exec_runtime;
- 		}
- 		cpu->sched += sched_ns(p);
- 		break;
-@@ -417,7 +417,7 @@ int posix_cpu_timer_del(struct k_itimer 
-  */
- static void cleanup_timers(struct list_head *head,
- 			   cputime_t utime, cputime_t stime,
--			   unsigned long long sched_time)
-+			   unsigned long long sum_exec_runtime)
- {
- 	struct cpu_timer_list *timer, *next;
- 	cputime_t ptime = cputime_add(utime, stime);
-@@ -446,10 +446,10 @@ static void cleanup_timers(struct list_h
- 	++head;
- 	list_for_each_entry_safe(timer, next, head, entry) {
- 		list_del_init(&timer->entry);
--		if (timer->expires.sched < sched_time) {
-+		if (timer->expires.sched < sum_exec_runtime) {
- 			timer->expires.sched = 0;
- 		} else {
--			timer->expires.sched -= sched_time;
-+			timer->expires.sched -= sum_exec_runtime;
- 		}
- 	}
- }
-@@ -462,7 +462,7 @@ static void cleanup_timers(struct list_h
- void posix_cpu_timers_exit(struct task_struct *tsk)
- {
- 	cleanup_timers(tsk->cpu_timers,
--		       tsk->utime, tsk->stime, tsk->sched_time);
-+		       tsk->utime, tsk->stime, tsk->sum_exec_runtime);
- 
- }
- void posix_cpu_timers_exit_group(struct task_struct *tsk)
-@@ -470,7 +470,7 @@ void posix_cpu_timers_exit_group(struct 
- 	cleanup_timers(tsk->signal->cpu_timers,
- 		       cputime_add(tsk->utime, tsk->signal->utime),
- 		       cputime_add(tsk->stime, tsk->signal->stime),
--		       tsk->sched_time + tsk->signal->sched_time);
-+		       tsk->sum_exec_runtime + tsk->signal->sum_sched_runtime);
- }
- 
- 
-@@ -531,7 +531,7 @@ static void process_timer_rebalance(stru
- 		nsleft = max_t(unsigned long long, nsleft, 1);
- 		do {
- 			if (likely(!(t->flags & PF_EXITING))) {
--				ns = t->sched_time + nsleft;
-+				ns = t->sum_exec_runtime + nsleft;
- 				if (t->it_sched_expires == 0 ||
- 				    t->it_sched_expires > ns) {
- 					t->it_sched_expires = ns;
-@@ -999,7 +999,7 @@ static void check_thread_timers(struct t
- 		struct cpu_timer_list *t = list_entry(timers->next,
- 						      struct cpu_timer_list,
- 						      entry);
--		if (!--maxfire || tsk->sched_time < t->expires.sched) {
-+		if (!--maxfire || tsk->sum_exec_runtime < t->expires.sched) {
- 			tsk->it_sched_expires = t->expires.sched;
- 			break;
- 		}
-@@ -1019,7 +1019,7 @@ static void check_process_timers(struct 
- 	int maxfire;
- 	struct signal_struct *const sig = tsk->signal;
- 	cputime_t utime, stime, ptime, virt_expires, prof_expires;
--	unsigned long long sched_time, sched_expires;
-+	unsigned long long sum_sched_runtime, sched_expires;
- 	struct task_struct *t;
- 	struct list_head *timers = sig->cpu_timers;
- 
-@@ -1039,12 +1039,12 @@ static void check_process_timers(struct 
- 	 */
- 	utime = sig->utime;
- 	stime = sig->stime;
--	sched_time = sig->sched_time;
-+	sum_sched_runtime = sig->sum_sched_runtime;
- 	t = tsk;
- 	do {
- 		utime = cputime_add(utime, t->utime);
- 		stime = cputime_add(stime, t->stime);
--		sched_time += t->sched_time;
-+		sum_sched_runtime += t->sum_exec_runtime;
- 		t = next_thread(t);
- 	} while (t != tsk);
- 	ptime = cputime_add(utime, stime);
-@@ -1085,7 +1085,7 @@ static void check_process_timers(struct 
- 		struct cpu_timer_list *t = list_entry(timers->next,
- 						      struct cpu_timer_list,
- 						      entry);
--		if (!--maxfire || sched_time < t->expires.sched) {
-+		if (!--maxfire || sum_sched_runtime < t->expires.sched) {
- 			sched_expires = t->expires.sched;
- 			break;
- 		}
-@@ -1177,7 +1177,7 @@ static void check_process_timers(struct 
- 		virt_left = cputime_sub(virt_expires, utime);
- 		virt_left = cputime_div_non_zero(virt_left, nthreads);
- 		if (sched_expires) {
--			sched_left = sched_expires - sched_time;
-+			sched_left = sched_expires - sum_sched_runtime;
- 			do_div(sched_left, nthreads);
- 			sched_left = max_t(unsigned long long, sched_left, 1);
- 		} else {
-@@ -1203,7 +1203,7 @@ static void check_process_timers(struct 
- 				t->it_virt_expires = ticks;
- 			}
- 
--			sched = t->sched_time + sched_left;
-+			sched = t->sum_exec_runtime + sched_left;
- 			if (sched_expires && (t->it_sched_expires == 0 ||
- 					      t->it_sched_expires > sched)) {
- 				t->it_sched_expires = sched;
-@@ -1295,7 +1295,7 @@ void run_posix_cpu_timers(struct task_st
- 
- 	if (UNEXPIRED(prof) && UNEXPIRED(virt) &&
- 	    (tsk->it_sched_expires == 0 ||
--	     tsk->sched_time < tsk->it_sched_expires))
-+	     tsk->sum_exec_runtime < tsk->it_sched_expires))
- 		return;
- 
- #undef	UNEXPIRED
-Index: linux-cfs-2.6.20.8.q/kernel/sched.c
-===================================================================
---- linux-cfs-2.6.20.8.q.orig/kernel/sched.c
-+++ linux-cfs-2.6.20.8.q/kernel/sched.c
-@@ -89,110 +89,13 @@
-  */
- #define MIN_TIMESLICE		max(5 * HZ / 1000, 1)
- #define DEF_TIMESLICE		(100 * HZ / 1000)
--#define ON_RUNQUEUE_WEIGHT	 30
--#define CHILD_PENALTY		 95
--#define PARENT_PENALTY		100
--#define EXIT_WEIGHT		  3
--#define PRIO_BONUS_RATIO	 25
--#define MAX_BONUS		(MAX_USER_PRIO * PRIO_BONUS_RATIO / 100)
--#define INTERACTIVE_DELTA	  2
--#define MAX_SLEEP_AVG		(DEF_TIMESLICE * MAX_BONUS)
--#define STARVATION_LIMIT	(MAX_SLEEP_AVG)
--#define NS_MAX_SLEEP_AVG	(JIFFIES_TO_NS(MAX_SLEEP_AVG))
--
--/*
-- * If a task is 'interactive' then we reinsert it in the active
-- * array after it has expired its current timeslice. (it will not
-- * continue to run immediately, it will still roundrobin with
-- * other interactive tasks.)
-- *
-- * This part scales the interactivity limit depending on niceness.
-- *
-- * We scale it linearly, offset by the INTERACTIVE_DELTA delta.
-- * Here are a few examples of different nice levels:
-- *
-- *  TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0]
-- *  TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0]
-- *  TASK_INTERACTIVE(  0): [1,1,1,1,0,0,0,0,0,0,0]
-- *  TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0]
-- *  TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0]
-- *
-- * (the X axis represents the possible -5 ... 0 ... +5 dynamic
-- *  priority range a task can explore, a value of '1' means the
-- *  task is rated interactive.)
-- *
-- * Ie. nice +19 tasks can never get 'interactive' enough to be
-- * reinserted into the active array. And only heavily CPU-hog nice -20
-- * tasks will be expired. Default nice 0 tasks are somewhere between,
-- * it takes some effort for them to get interactive, but it's not
-- * too hard.
-- */
--
--#define CURRENT_BONUS(p) \
--	(NS_TO_JIFFIES((p)->sleep_avg) * MAX_BONUS / \
--		MAX_SLEEP_AVG)
--
--#define GRANULARITY	(10 * HZ / 1000 ? : 1)
--
--#ifdef CONFIG_SMP
--#define TIMESLICE_GRANULARITY(p)	(GRANULARITY * \
--		(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)) * \
--			num_online_cpus())
--#else
--#define TIMESLICE_GRANULARITY(p)	(GRANULARITY * \
--		(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)))
--#endif
--
--#define SCALE(v1,v1_max,v2_max) \
--	(v1) * (v2_max) / (v1_max)
--
--#define DELTA(p) \
--	(SCALE(TASK_NICE(p) + 20, 40, MAX_BONUS) - 20 * MAX_BONUS / 40 + \
--		INTERACTIVE_DELTA)
--
--#define TASK_INTERACTIVE(p) \
--	((p)->prio <= (p)->static_prio - DELTA(p))
--
--#define INTERACTIVE_SLEEP(p) \
--	(JIFFIES_TO_NS(MAX_SLEEP_AVG * \
--		(MAX_BONUS / 2 + DELTA((p)) + 1) / MAX_BONUS - 1))
--
--#define TASK_PREEMPTS_CURR(p, rq) \
--	((p)->prio < (rq)->curr->prio)
--
--#define SCALE_PRIO(x, prio) \
--	max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_TIMESLICE)
--
--static unsigned int static_prio_timeslice(int static_prio)
--{
--	if (static_prio < NICE_TO_PRIO(0))
--		return SCALE_PRIO(DEF_TIMESLICE * 4, static_prio);
--	else
--		return SCALE_PRIO(DEF_TIMESLICE, static_prio);
--}
--
--/*
-- * task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ]
-- * to time slice values: [800ms ... 100ms ... 5ms]
-- *
-- * The higher a thread's priority, the bigger timeslices
-- * it gets during one round of execution. But even the lowest
-- * priority thread gets MIN_TIMESLICE worth of execution time.
-- */
--
--static inline unsigned int task_timeslice(struct task_struct *p)
--{
--	return static_prio_timeslice(p->static_prio);
--}
- 
- /*
-- * These are the runqueue data structures:
-+ * This is the priority-queue data structure of the RT scheduling class:
-  */
--
- struct prio_array {
--	unsigned int nr_active;
--	DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */
--	struct list_head queue[MAX_PRIO];
-+	DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
-+	struct list_head queue[MAX_RT_PRIO];
- };
- 
- /*
-@@ -209,12 +112,13 @@ struct rq {
- 	 * nr_running and cpu_load should be in the same cacheline because
- 	 * remote CPUs use both these fields when doing load calculation.
- 	 */
--	unsigned long nr_running;
-+	long nr_running;
- 	unsigned long raw_weighted_load;
--#ifdef CONFIG_SMP
--	unsigned long cpu_load[3];
--#endif
--	unsigned long long nr_switches;
-+	#define CPU_LOAD_IDX_MAX 5
-+	unsigned long cpu_load[CPU_LOAD_IDX_MAX];
-+
-+	u64 nr_switches;
-+	unsigned long nr_load_updates;
- 
- 	/*
- 	 * This is part of a global counter where only the total sum
-@@ -224,14 +128,29 @@ struct rq {
- 	 */
- 	unsigned long nr_uninterruptible;
- 
--	unsigned long expired_timestamp;
--	/* Cached timestamp set by update_cpu_clock() */
--	unsigned long long most_recent_timestamp;
- 	struct task_struct *curr, *idle;
- 	unsigned long next_balance;
- 	struct mm_struct *prev_mm;
--	struct prio_array *active, *expired, arrays[2];
--	int best_expired_prio;
-+
-+	u64 clock, prev_clock_raw;
-+	s64 clock_max_delta;
-+	u64 fair_clock, prev_fair_clock;
-+	u64 exec_clock, prev_exec_clock;
-+	u64 wait_runtime;
-+
-+	unsigned int clock_warps;
-+	unsigned int clock_unstable_events;
-+
-+	struct sched_class *load_balance_class;
-+
-+	struct prio_array active;
-+	int rt_load_balance_idx;
-+	struct list_head *rt_load_balance_head, *rt_load_balance_curr;
-+
-+	struct rb_root tasks_timeline;
-+	struct rb_node *rb_leftmost;
-+	struct rb_node *rb_load_balance_curr;
-+
- 	atomic_t nr_iowait;
- 
- #ifdef CONFIG_SMP
-@@ -268,7 +187,107 @@ struct rq {
- 	struct lock_class_key rq_lock_key;
- };
- 
--static DEFINE_PER_CPU(struct rq, runqueues);
-+static DEFINE_PER_CPU(struct rq, runqueues) ____cacheline_aligned_in_smp;
-+
-+static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
-+{
-+	rq->curr->sched_class->check_preempt_curr(rq, p);
-+}
-+
-+#define SCALE_PRIO(x, prio) \
-+	max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_TIMESLICE)
-+
-+/*
-+ * static_prio_timeslice() scales user-nice values [ -20 ... 0 ... 19 ]
-+ * to time slice values: [800ms ... 100ms ... 5ms]
-+ */
-+static unsigned int static_prio_timeslice(int static_prio)
-+{
-+	if (static_prio == NICE_TO_PRIO(19))
-+		return 1;
-+
-+	if (static_prio < NICE_TO_PRIO(0))
-+		return SCALE_PRIO(DEF_TIMESLICE * 4, static_prio);
-+	else
-+		return SCALE_PRIO(DEF_TIMESLICE, static_prio);
-+}
-+
-+/*
-+ * Print out various scheduling related per-task fields:
-+ */
-+char * sched_print_task_state(struct task_struct *p, char *buffer)
-+{
-+	struct rq *this_rq = &per_cpu(runqueues, raw_smp_processor_id());
-+	unsigned long long t0, t1;
-+
-+#define P(F) \
-+	buffer += sprintf(buffer, "%-25s:%20Ld\n", #F, (long long)p->F)
-+
-+	P(wait_start);
-+	P(wait_start_fair);
-+	P(exec_start);
-+	P(sleep_start);
-+	P(block_start);
-+	P(sleep_max);
-+	P(block_max);
-+	P(exec_max);
-+	P(wait_max);
-+	P(min_wait_runtime);
-+	P(last_ran);
-+	P(wait_runtime);
-+	P(sum_exec_runtime);
-+#undef P
-+
-+	t0 = sched_clock();
-+	t1 = sched_clock();
-+	buffer += sprintf(buffer, "%-25s:%20Ld\n", "clock-delta",
-+				(long long)t1-t0);
-+	buffer += sprintf(buffer, "%-25s:%20Ld\n", "rq-wait_runtime",
-+				(long long)this_rq->wait_runtime);
-+	buffer += sprintf(buffer, "%-25s:%20Ld\n", "rq-exec_clock",
-+				(long long)this_rq->exec_clock);
-+	buffer += sprintf(buffer, "%-25s:%20Ld\n", "rq-fair_clock",
-+				(long long)this_rq->fair_clock);
-+	buffer += sprintf(buffer, "%-25s:%20Ld\n", "rq-clock",
-+				(long long)this_rq->clock);
-+	buffer += sprintf(buffer, "%-25s:%20Ld\n", "rq-prev_clock_raw",
-+				(long long)this_rq->prev_clock_raw);
-+	buffer += sprintf(buffer, "%-25s:%20Ld\n", "rq-clock_max_delta",
-+				(long long)this_rq->clock_max_delta);
-+	buffer += sprintf(buffer, "%-25s:%20u\n", "rq-clock_warps",
-+				this_rq->clock_warps);
-+	buffer += sprintf(buffer, "%-25s:%20u\n", "rq-clock_unstable_events",
-+				this_rq->clock_unstable_events);
-+	return buffer;
-+}
-+
-+/*
-+ * Per-runqueue clock, as finegrained as the platform can give us:
-+ */
-+static inline unsigned long long __rq_clock(struct rq *rq)
-+{
-+	u64 now = sched_clock();
-+	u64 clock = rq->clock;
-+	u64 prev_raw = rq->prev_clock_raw;
-+	s64 delta = now - prev_raw;
-+
-+	/*
-+	 * Protect against sched_clock() occasionally going backwards:
-+	 */
-+	if (unlikely(delta < 0)) {
-+		clock++;
-+		rq->clock_warps++;
-+	} else {
-+		if (unlikely(delta > rq->clock_max_delta))
-+			rq->clock_max_delta = delta;
-+		clock += delta;
-+	}
-+
-+	rq->prev_clock_raw = now;
-+	rq->clock = clock;
-+
-+	return clock;
-+}
- 
- static inline int cpu_of(struct rq *rq)
- {
-@@ -279,6 +298,16 @@ static inline int cpu_of(struct rq *rq)
- #endif
- }
- 
-+static inline unsigned long long rq_clock(struct rq *rq)
-+{
-+	int this_cpu = smp_processor_id();
-+
-+	if (this_cpu == cpu_of(rq))
-+		return __rq_clock(rq);
-+
-+	return rq->clock;
-+}
-+
- /*
-  * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
-  * See detach_destroy_domains: synchronize_sched for details.
-@@ -423,134 +452,6 @@ static inline void task_rq_unlock(struct
- 	spin_unlock_irqrestore(&rq->lock, *flags);
- }
- 
--#ifdef CONFIG_SCHEDSTATS
--/*
-- * bump this up when changing the output format or the meaning of an existing
-- * format, so that tools can adapt (or abort)
-- */
--#define SCHEDSTAT_VERSION 14
--
--static int show_schedstat(struct seq_file *seq, void *v)
--{
--	int cpu;
--
--	seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
--	seq_printf(seq, "timestamp %lu\n", jiffies);
--	for_each_online_cpu(cpu) {
--		struct rq *rq = cpu_rq(cpu);
--#ifdef CONFIG_SMP
--		struct sched_domain *sd;
--		int dcnt = 0;
--#endif
--
--		/* runqueue-specific stats */
--		seq_printf(seq,
--		    "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
--		    cpu, rq->yld_both_empty,
--		    rq->yld_act_empty, rq->yld_exp_empty, rq->yld_cnt,
--		    rq->sched_switch, rq->sched_cnt, rq->sched_goidle,
--		    rq->ttwu_cnt, rq->ttwu_local,
--		    rq->rq_sched_info.cpu_time,
--		    rq->rq_sched_info.run_delay, rq->rq_sched_info.pcnt);
--
--		seq_printf(seq, "\n");
--
--#ifdef CONFIG_SMP
--		/* domain-specific stats */
--		preempt_disable();
--		for_each_domain(cpu, sd) {
--			enum idle_type itype;
--			char mask_str[NR_CPUS];
--
--			cpumask_scnprintf(mask_str, NR_CPUS, sd->span);
--			seq_printf(seq, "domain%d %s", dcnt++, mask_str);
--			for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES;
--					itype++) {
--				seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu "
--						"%lu",
--				    sd->lb_cnt[itype],
--				    sd->lb_balanced[itype],
--				    sd->lb_failed[itype],
--				    sd->lb_imbalance[itype],
--				    sd->lb_gained[itype],
--				    sd->lb_hot_gained[itype],
--				    sd->lb_nobusyq[itype],
--				    sd->lb_nobusyg[itype]);
--			}
--			seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu"
--			    " %lu %lu %lu\n",
--			    sd->alb_cnt, sd->alb_failed, sd->alb_pushed,
--			    sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed,
--			    sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed,
--			    sd->ttwu_wake_remote, sd->ttwu_move_affine,
--			    sd->ttwu_move_balance);
--		}
--		preempt_enable();
--#endif
--	}
--	return 0;
--}
--
--static int schedstat_open(struct inode *inode, struct file *file)
--{
--	unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);
--	char *buf = kmalloc(size, GFP_KERNEL);
--	struct seq_file *m;
--	int res;
--
--	if (!buf)
--		return -ENOMEM;
--	res = single_open(file, show_schedstat, NULL);
--	if (!res) {
--		m = file->private_data;
--		m->buf = buf;
--		m->size = size;
--	} else
--		kfree(buf);
--	return res;
--}
--
--const struct file_operations proc_schedstat_operations = {
--	.open    = schedstat_open,
--	.read    = seq_read,
--	.llseek  = seq_lseek,
--	.release = single_release,
--};
--
--/*
-- * Expects runqueue lock to be held for atomicity of update
-- */
--static inline void
--rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies)
--{
--	if (rq) {
--		rq->rq_sched_info.run_delay += delta_jiffies;
--		rq->rq_sched_info.pcnt++;
--	}
--}
--
--/*
-- * Expects runqueue lock to be held for atomicity of update
-- */
--static inline void
--rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies)
--{
--	if (rq)
--		rq->rq_sched_info.cpu_time += delta_jiffies;
--}
--# define schedstat_inc(rq, field)	do { (rq)->field++; } while (0)
--# define schedstat_add(rq, field, amt)	do { (rq)->field += (amt); } while (0)
--#else /* !CONFIG_SCHEDSTATS */
--static inline void
--rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies)
--{}
--static inline void
--rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies)
--{}
--# define schedstat_inc(rq, field)	do { } while (0)
--# define schedstat_add(rq, field, amt)	do { } while (0)
--#endif
--
- /*
-  * this_rq_lock - lock this runqueue and disable interrupts.
-  */
-@@ -566,178 +467,60 @@ static inline struct rq *this_rq_lock(vo
- 	return rq;
- }
- 
--#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
--/*
-- * Called when a process is dequeued from the active array and given
-- * the cpu.  We should note that with the exception of interactive
-- * tasks, the expired queue will become the active queue after the active
-- * queue is empty, without explicitly dequeuing and requeuing tasks in the
-- * expired queue.  (Interactive tasks may be requeued directly to the
-- * active queue, thus delaying tasks in the expired queue from running;
-- * see scheduler_tick()).
-- *
-- * This function is only called from sched_info_arrive(), rather than
-- * dequeue_task(). Even though a task may be queued and dequeued multiple
-- * times as it is shuffled about, we're really interested in knowing how
-- * long it was from the *first* time it was queued to the time that it
-- * finally hit a cpu.
-- */
--static inline void sched_info_dequeued(struct task_struct *t)
--{
--	t->sched_info.last_queued = 0;
--}
--
- /*
-- * Called when a task finally hits the cpu.  We can now calculate how
-- * long it was waiting to run.  We also note when it began so that we
-- * can keep stats on how long its timeslice is.
-+ * CPU frequency is/was unstable - start new by setting prev_clock_raw:
-  */
--static void sched_info_arrive(struct task_struct *t)
-+void sched_clock_unstable_event(void)
- {
--	unsigned long now = jiffies, delta_jiffies = 0;
--
--	if (t->sched_info.last_queued)
--		delta_jiffies = now - t->sched_info.last_queued;
--	sched_info_dequeued(t);
--	t->sched_info.run_delay += delta_jiffies;
--	t->sched_info.last_arrival = now;
--	t->sched_info.pcnt++;
-+	unsigned long flags;
-+	struct rq *rq;
- 
--	rq_sched_info_arrive(task_rq(t), delta_jiffies);
-+	rq = task_rq_lock(current, &flags);
-+	rq->prev_clock_raw = sched_clock();
-+	rq->clock_unstable_events++;
-+	task_rq_unlock(rq, &flags);
- }
- 
- /*
-- * Called when a process is queued into either the active or expired
-- * array.  The time is noted and later used to determine how long we
-- * had to wait for us to reach the cpu.  Since the expired queue will
-- * become the active queue after active queue is empty, without dequeuing
-- * and requeuing any tasks, we are interested in queuing to either. It
-- * is unusual but not impossible for tasks to be dequeued and immediately
-- * requeued in the same or another array: this can happen in sched_yield(),
-- * set_user_nice(), and even load_balance() as it moves tasks from runqueue
-- * to runqueue.
-+ * resched_task - mark a task 'to be rescheduled now'.
-  *
-- * This function is only called from enqueue_task(), but also only updates
-- * the timestamp if it is already not set.  It's assumed that
-- * sched_info_dequeued() will clear that stamp when appropriate.
-- */
--static inline void sched_info_queued(struct task_struct *t)
--{
--	if (unlikely(sched_info_on()))
--		if (!t->sched_info.last_queued)
--			t->sched_info.last_queued = jiffies;
--}
--
--/*
-- * Called when a process ceases being the active-running process, either
-- * voluntarily or involuntarily.  Now we can calculate how long we ran.
-+ * On UP this means the setting of the need_resched flag, on SMP it
-+ * might also involve a cross-CPU call to trigger the scheduler on
-+ * the target CPU.
-  */
--static inline void sched_info_depart(struct task_struct *t)
--{
--	unsigned long delta_jiffies = jiffies - t->sched_info.last_arrival;
-+#ifdef CONFIG_SMP
- 
--	t->sched_info.cpu_time += delta_jiffies;
--	rq_sched_info_depart(task_rq(t), delta_jiffies);
--}
-+#ifndef tsk_is_polling
-+#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
-+#endif
- 
--/*
-- * Called when tasks are switched involuntarily due, typically, to expiring
-- * their time slice.  (This may also be called when switching to or from
-- * the idle task.)  We are only called when prev != next.
-- */
--static inline void
--__sched_info_switch(struct task_struct *prev, struct task_struct *next)
-+static void resched_task(struct task_struct *p)
- {
--	struct rq *rq = task_rq(prev);
--
--	/*
--	 * prev now departs the cpu.  It's not interesting to record
--	 * stats about how efficient we were at scheduling the idle
--	 * process, however.
--	 */
--	if (prev != rq->idle)
--		sched_info_depart(prev);
-+	int cpu;
- 
--	if (next != rq->idle)
--		sched_info_arrive(next);
--}
--static inline void
--sched_info_switch(struct task_struct *prev, struct task_struct *next)
--{
--	if (unlikely(sched_info_on()))
--		__sched_info_switch(prev, next);
--}
--#else
--#define sched_info_queued(t)		do { } while (0)
--#define sched_info_switch(t, next)	do { } while (0)
--#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
-+	assert_spin_locked(&task_rq(p)->lock);
- 
--/*
-- * Adding/removing a task to/from a priority array:
-- */
--static void dequeue_task(struct task_struct *p, struct prio_array *array)
--{
--	array->nr_active--;
--	list_del(&p->run_list);
--	if (list_empty(array->queue + p->prio))
--		__clear_bit(p->prio, array->bitmap);
--}
-+	if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
-+		return;
- 
--static void enqueue_task(struct task_struct *p, struct prio_array *array)
--{
--	sched_info_queued(p);
--	list_add_tail(&p->run_list, array->queue + p->prio);
--	__set_bit(p->prio, array->bitmap);
--	array->nr_active++;
--	p->array = array;
--}
-+	set_tsk_thread_flag(p, TIF_NEED_RESCHED);
- 
--/*
-- * Put task to the end of the run list without the overhead of dequeue
-- * followed by enqueue.
-- */
--static void requeue_task(struct task_struct *p, struct prio_array *array)
--{
--	list_move_tail(&p->run_list, array->queue + p->prio);
--}
-+	cpu = task_cpu(p);
-+	if (cpu == smp_processor_id())
-+		return;
- 
--static inline void
--enqueue_task_head(struct task_struct *p, struct prio_array *array)
--{
--	list_add(&p->run_list, array->queue + p->prio);
--	__set_bit(p->prio, array->bitmap);
--	array->nr_active++;
--	p->array = array;
-+	/* NEED_RESCHED must be visible before we test polling */
-+	smp_mb();
-+	if (!tsk_is_polling(p))
-+		smp_send_reschedule(cpu);
- }
--
--/*
-- * __normal_prio - return the priority that is based on the static
-- * priority but is modified by bonuses/penalties.
-- *
-- * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
-- * into the -5 ... 0 ... +5 bonus/penalty range.
-- *
-- * We use 25% of the full 0...39 priority range so that:
-- *
-- * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
-- * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
-- *
-- * Both properties are important to certain workloads.
-- */
--
--static inline int __normal_prio(struct task_struct *p)
-+#else
-+static inline void resched_task(struct task_struct *p)
- {
--	int bonus, prio;
--
--	bonus = CURRENT_BONUS(p) - MAX_BONUS / 2;
--
--	prio = p->static_prio - bonus;
--	if (prio < MAX_RT_PRIO)
--		prio = MAX_RT_PRIO;
--	if (prio > MAX_PRIO-1)
--		prio = MAX_PRIO-1;
--	return prio;
-+	assert_spin_locked(&task_rq(p)->lock);
-+	set_tsk_need_resched(p);
- }
-+#endif
- 
- /*
-  * To aid in avoiding the subversion of "niceness" due to uneven distribution
-@@ -761,22 +544,33 @@ static inline int __normal_prio(struct t
- #define RTPRIO_TO_LOAD_WEIGHT(rp) \
- 	(PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp))
- 
-+/*
-+ * Nice levels are logarithmic. These are the load shifts assigned
-+ * to nice levels, where a step of every 2 nice levels means a
-+ * multiplicator of 2:
-+ */
-+const int prio_to_load_shift[40] = {
-+/* -20 */ 20, 19, 19, 18, 18, 17, 17, 16, 16, 15,
-+/* -10 */ 15, 14, 14, 13, 13, 12, 12, 11, 11, 10,
-+/*   0 */ 10,  9,  9,  8,  8,  7,  7,  6,  6,  5,
-+/*  10 */  5,  4,  4,  3,  3,  2,  2,  1,  1,  0
-+};
-+
-+static int get_load_shift(struct task_struct *p)
-+{
-+	int prio = p->static_prio;
-+
-+	if (rt_prio(prio) || p->policy == SCHED_BATCH)
-+		return 0;
-+
-+	return prio_to_load_shift[prio - MAX_RT_PRIO];
-+}
-+
- static void set_load_weight(struct task_struct *p)
- {
--	if (has_rt_policy(p)) {
--#ifdef CONFIG_SMP
--		if (p == task_rq(p)->migration_thread)
--			/*
--			 * The migration thread does the actual balancing.
--			 * Giving its load any weight will skew balancing
--			 * adversely.
--			 */
--			p->load_weight = 0;
--		else
--#endif
--			p->load_weight = RTPRIO_TO_LOAD_WEIGHT(p->rt_priority);
--	} else
--		p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio);
-+	p->load_shift = get_load_shift(p);
-+	p->load_weight = 1 << p->load_shift;
-+	p->wait_runtime = 0;
- }
- 
- static inline void
-@@ -803,6 +597,40 @@ static inline void dec_nr_running(struct
- 	dec_raw_weighted_load(rq, p);
- }
- 
-+static void activate_task(struct rq *rq, struct task_struct *p, int wakeup);
-+
-+#include "sched_stats.h"
-+#include "sched_rt.c"
-+#include "sched_fair.c"
-+#include "sched_debug.c"
-+
-+#define sched_class_highest (&rt_sched_class)
-+
-+static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
-+{
-+	u64 now = rq_clock(rq);
-+
-+	sched_info_queued(p);
-+	p->sched_class->enqueue_task(rq, p, wakeup, now);
-+	p->on_rq = 1;
-+}
-+
-+static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
-+{
-+	u64 now = rq_clock(rq);
-+
-+	p->sched_class->dequeue_task(rq, p, sleep, now);
-+	p->on_rq = 0;
-+}
-+
-+/*
-+ * __normal_prio - return the priority that is based on the static prio
-+ */
-+static inline int __normal_prio(struct task_struct *p)
-+{
-+	return p->static_prio;
-+}
-+
- /*
-  * Calculate the expected normal priority: i.e. priority
-  * without taking RT-inheritance into account. Might be
-@@ -842,210 +670,31 @@ static int effective_prio(struct task_st
- }
- 
- /*
-- * __activate_task - move a task to the runqueue.
-+ * activate_task - move a task to the runqueue.
-  */
--static void __activate_task(struct task_struct *p, struct rq *rq)
-+static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
- {
--	struct prio_array *target = rq->active;
--
--	if (batch_task(p))
--		target = rq->expired;
--	enqueue_task(p, target);
-+	enqueue_task(rq, p, wakeup);
- 	inc_nr_running(p, rq);
- }
- 
- /*
-- * __activate_idle_task - move idle task to the _front_ of runqueue.
-+ * activate_idle_task - move idle task to the _front_ of runqueue.
-  */
--static inline void __activate_idle_task(struct task_struct *p, struct rq *rq)
-+static inline void activate_idle_task(struct task_struct *p, struct rq *rq)
- {
--	enqueue_task_head(p, rq->active);
-+	enqueue_task(rq, p, 0);
- 	inc_nr_running(p, rq);
- }
- 
- /*
-- * Recalculate p->normal_prio and p->prio after having slept,
-- * updating the sleep-average too:
-- */
--static int recalc_task_prio(struct task_struct *p, unsigned long long now)
--{
--	/* Caller must always ensure 'now >= p->timestamp' */
--	unsigned long sleep_time = now - p->timestamp;
--
--	if (batch_task(p))
--		sleep_time = 0;
--
--	if (likely(sleep_time > 0)) {
--		/*
--		 * This ceiling is set to the lowest priority that would allow
--		 * a task to be reinserted into the active array on timeslice
--		 * completion.
--		 */
--		unsigned long ceiling = INTERACTIVE_SLEEP(p);
--
--		if (p->mm && sleep_time > ceiling && p->sleep_avg < ceiling) {
--			/*
--			 * Prevents user tasks from achieving best priority
--			 * with one single large enough sleep.
--			 */
--			p->sleep_avg = ceiling;
--			/*
--			 * Using INTERACTIVE_SLEEP() as a ceiling places a
--			 * nice(0) task 1ms sleep away from promotion, and
--			 * gives it 700ms to round-robin with no chance of
--			 * being demoted.  This is more than generous, so
--			 * mark this sleep as non-interactive to prevent the
--			 * on-runqueue bonus logic from intervening should
--			 * this task not receive cpu immediately.
--			 */
--			p->sleep_type = SLEEP_NONINTERACTIVE;
--		} else {
--			/*
--			 * Tasks waking from uninterruptible sleep are
--			 * limited in their sleep_avg rise as they
--			 * are likely to be waiting on I/O
--			 */
--			if (p->sleep_type == SLEEP