summaryrefslogtreecommitdiff
path: root/packages
diff options
context:
space:
mode:
authorLeon Woestenberg <leon.woestenberg@gmail.com>2007-05-06 15:33:37 +0000
committerLeon Woestenberg <leon.woestenberg@gmail.com>2007-05-06 15:33:37 +0000
commit7c8380efe7ac5b7d7aaf01f91599bb5e64542710 (patch)
treea0a76753f49c2f41304cf099c6d85c39ff546497 /packages
parent33de73cb673b87938d734673c5fec3e5957250a7 (diff)
parent4e64c8dfdb92fa358349c24969c617039ab88cd3 (diff)
merge of '61b65ffb0a858399f89407df40b6469b203bda44'
and 'ed3f0331cfddd29d423591aa25379dc841a37572'
Diffstat (limited to 'packages')
-rw-r--r--packages/linux/linux-efika-2.6.20.11/.mtn2git_empty0
-rw-r--r--packages/linux/linux-efika-2.6.20.11/sched-cfs-v9-v2.6.20.11.patch5590
-rw-r--r--packages/linux/linux-efika_2.6.20.11.bb86
3 files changed, 5676 insertions, 0 deletions
diff --git a/packages/linux/linux-efika-2.6.20.11/.mtn2git_empty b/packages/linux/linux-efika-2.6.20.11/.mtn2git_empty
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/packages/linux/linux-efika-2.6.20.11/.mtn2git_empty
diff --git a/packages/linux/linux-efika-2.6.20.11/sched-cfs-v9-v2.6.20.11.patch b/packages/linux/linux-efika-2.6.20.11/sched-cfs-v9-v2.6.20.11.patch
new file mode 100644
index 0000000000..29071a99ac
--- /dev/null
+++ b/packages/linux/linux-efika-2.6.20.11/sched-cfs-v9-v2.6.20.11.patch
@@ -0,0 +1,5590 @@
+This is the Complete Fair Scheduler (CFS) v9 patch for
+linux 2.6.20.10 patch (rediffed cleanly against .11).
+
+http://people.redhat.com/mingo/cfs-scheduler/
+
+Index: linux-cfs-2.6.20.8.q/Documentation/kernel-parameters.txt
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/Documentation/kernel-parameters.txt
++++ linux-cfs-2.6.20.8.q/Documentation/kernel-parameters.txt
+@@ -914,49 +914,6 @@ and is between 256 and 4096 characters.
+
+ mga= [HW,DRM]
+
+- migration_cost=
+- [KNL,SMP] debug: override scheduler migration costs
+- Format: <level-1-usecs>,<level-2-usecs>,...
+- This debugging option can be used to override the
+- default scheduler migration cost matrix. The numbers
+- are indexed by 'CPU domain distance'.
+- E.g. migration_cost=1000,2000,3000 on an SMT NUMA
+- box will set up an intra-core migration cost of
+- 1 msec, an inter-core migration cost of 2 msecs,
+- and an inter-node migration cost of 3 msecs.
+-
+- WARNING: using the wrong values here can break
+- scheduler performance, so it's only for scheduler
+- development purposes, not production environments.
+-
+- migration_debug=
+- [KNL,SMP] migration cost auto-detect verbosity
+- Format=<0|1|2>
+- If a system's migration matrix reported at bootup
+- seems erroneous then this option can be used to
+- increase verbosity of the detection process.
+- We default to 0 (no extra messages), 1 will print
+- some more information, and 2 will be really
+- verbose (probably only useful if you also have a
+- serial console attached to the system).
+-
+- migration_factor=
+- [KNL,SMP] multiply/divide migration costs by a factor
+- Format=<percent>
+- This debug option can be used to proportionally
+- increase or decrease the auto-detected migration
+- costs for all entries of the migration matrix.
+- E.g. migration_factor=150 will increase migration
+- costs by 50%. (and thus the scheduler will be less
+- eager migrating cache-hot tasks)
+- migration_factor=80 will decrease migration costs
+- by 20%. (thus the scheduler will be more eager to
+- migrate tasks)
+-
+- WARNING: using the wrong values here can break
+- scheduler performance, so it's only for scheduler
+- development purposes, not production environments.
+-
+ mousedev.tap_time=
+ [MOUSE] Maximum time between finger touching and
+ leaving touchpad surface for touch to be considered
+Index: linux-cfs-2.6.20.8.q/Documentation/sched-design-CFS.txt
+===================================================================
+--- /dev/null
++++ linux-cfs-2.6.20.8.q/Documentation/sched-design-CFS.txt
+@@ -0,0 +1,107 @@
++[announce] [patch] Modular Scheduler Core and Completely Fair Scheduler [CFS]
++
++i'm pleased to announce the first release of the "Modular Scheduler Core
++and Completely Fair Scheduler [CFS]" patchset:
++
++ http://redhat.com/~mingo/cfs-scheduler/
++
++This project is a complete rewrite of the Linux task scheduler. My goal
++is to address various feature requests and to fix deficiencies in the
++vanilla scheduler that were suggested/found in the past few years, both
++for desktop scheduling and for server scheduling workloads.
++
++[ QuickStart: apply the patch, recompile, reboot. The new scheduler
++ will be active by default and all tasks will default to the
++ SCHED_NORMAL interactive scheduling class. ]
++
++Highlights are:
++
++ - the introduction of Scheduling Classes: an extensible hierarchy of
++ scheduler modules. These modules encapsulate scheduling policy
++ details and are handled by the scheduler core without the core
++ code assuming about them too much.
++
++ - sched_fair.c implements the 'CFS desktop scheduler': it is a
++ replacement for the vanilla scheduler's SCHED_OTHER interactivity
++ code.
++
++ i'd like to give credit to Con Kolivas for the general approach here:
++ he has proven via RSDL/SD that 'fair scheduling' is possible and that
++ it results in better desktop scheduling. Kudos Con!
++
++ The CFS patch uses a completely different approach and implementation
++ from RSDL/SD. My goal was to make CFS's interactivity quality exceed
++ that of RSDL/SD, which is a high standard to meet :-) Testing
++ feedback is welcome to decide this one way or another. [ and, in any
++ case, all of SD's logic could be added via a kernel/sched_sd.c module
++ as well, if Con is interested in such an approach. ]
++
++ CFS's design is quite radical: it does not use runqueues, it uses a
++ time-ordered rbtree to build a 'timeline' of future task execution,
++ and thus has no 'array switch' artifacts (by which both the vanilla
++ scheduler and RSDL/SD are affected).
++
++ CFS uses nanosecond granularity accounting and does not rely on any
++ jiffies or other HZ detail. Thus the CFS scheduler has no notion of
++ 'timeslices' and has no heuristics whatsoever. There is only one
++ central tunable:
++
++ /proc/sys/kernel/sched_granularity_ns
++
++ which can be used to tune the scheduler from 'desktop' (low
++ latencies) to 'server' (good batching) workloads. It defaults to a
++ setting suitable for desktop workloads. SCHED_BATCH is handled by the
++ CFS scheduler module too.
++
++ due to its design, the CFS scheduler is not prone to any of the
++ 'attacks' that exist today against the heuristics of the stock
++ scheduler: fiftyp.c, thud.c, chew.c, ring-test.c, massive_intr.c all
++ work fine and do not impact interactivity and produce the expected
++ behavior.
++
++ the CFS scheduler has a much stronger handling of nice levels and
++ SCHED_BATCH: both types of workloads should be isolated much more
++ agressively than under the vanilla scheduler.
++
++ ( another rdetail: due to nanosec accounting and timeline sorting,
++ sched_yield() support is very simple under CFS, and in fact under
++ CFS sched_yield() behaves much better than under any other
++ scheduler i have tested so far. )
++
++ - sched_rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler
++ way than the vanilla scheduler does. It uses 100 runqueues (for all
++ 100 RT priority levels, instead of 140 in the vanilla scheduler)
++ and it needs no expired array.
++
++ - reworked/sanitized SMP load-balancing: the runqueue-walking
++ assumptions are gone from the load-balancing code now, and
++ iterators of the scheduling modules are used. The balancing code got
++ quite a bit simpler as a result.
++
++the core scheduler got smaller by more than 700 lines:
++
++ kernel/sched.c | 1454 ++++++++++++++++------------------------------------------------
++ 1 file changed, 372 insertions(+), 1082 deletions(-)
++
++and even adding all the scheduling modules, the total size impact is
++relatively small:
++
++ 18 files changed, 1454 insertions(+), 1133 deletions(-)
++
++most of the increase is due to extensive comments. The kernel size
++impact is in fact a small negative:
++
++ text data bss dec hex filename
++ 23366 4001 24 27391 6aff kernel/sched.o.vanilla
++ 24159 2705 56 26920 6928 kernel/sched.o.CFS
++
++(this is mainly due to the benefit of getting rid of the expired array
++and its data structure overhead.)
++
++thanks go to Thomas Gleixner and Arjan van de Ven for review of this
++patchset.
++
++as usual, any sort of feedback, bugreports, fixes and suggestions are
++more than welcome,
++
++ Ingo
+Index: linux-cfs-2.6.20.8.q/Makefile
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/Makefile
++++ linux-cfs-2.6.20.8.q/Makefile
+@@ -1,7 +1,7 @@
+ VERSION = 2
+ PATCHLEVEL = 6
+ SUBLEVEL = 20
+-EXTRAVERSION = .11
++EXTRAVERSION = .11-cfs-v9
+ NAME = Homicidal Dwarf Hamster
+
+ # *DOCUMENTATION*
+Index: linux-cfs-2.6.20.8.q/arch/i386/kernel/smpboot.c
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/arch/i386/kernel/smpboot.c
++++ linux-cfs-2.6.20.8.q/arch/i386/kernel/smpboot.c
+@@ -1132,18 +1132,6 @@ exit:
+ }
+ #endif
+
+-static void smp_tune_scheduling(void)
+-{
+- unsigned long cachesize; /* kB */
+-
+- if (cpu_khz) {
+- cachesize = boot_cpu_data.x86_cache_size;
+-
+- if (cachesize > 0)
+- max_cache_size = cachesize * 1024;
+- }
+-}
+-
+ /*
+ * Cycle through the processors sending APIC IPIs to boot each.
+ */
+@@ -1172,7 +1160,6 @@ static void __init smp_boot_cpus(unsigne
+ x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
+
+ current_thread_info()->cpu = 0;
+- smp_tune_scheduling();
+
+ set_cpu_sibling_map(0);
+
+Index: linux-cfs-2.6.20.8.q/arch/i386/kernel/syscall_table.S
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/arch/i386/kernel/syscall_table.S
++++ linux-cfs-2.6.20.8.q/arch/i386/kernel/syscall_table.S
+@@ -319,3 +319,4 @@ ENTRY(sys_call_table)
+ .long sys_move_pages
+ .long sys_getcpu
+ .long sys_epoll_pwait
++ .long sys_sched_yield_to /* 320 */
+Index: linux-cfs-2.6.20.8.q/arch/i386/kernel/tsc.c
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/arch/i386/kernel/tsc.c
++++ linux-cfs-2.6.20.8.q/arch/i386/kernel/tsc.c
+@@ -61,6 +61,8 @@ static inline int check_tsc_unstable(voi
+
+ void mark_tsc_unstable(void)
+ {
++ sched_clock_unstable_event();
++
+ tsc_unstable = 1;
+ }
+ EXPORT_SYMBOL_GPL(mark_tsc_unstable);
+@@ -107,13 +109,7 @@ unsigned long long sched_clock(void)
+ {
+ unsigned long long this_offset;
+
+- /*
+- * in the NUMA case we dont use the TSC as they are not
+- * synchronized across all CPUs.
+- */
+-#ifndef CONFIG_NUMA
+- if (!cpu_khz || check_tsc_unstable())
+-#endif
++ if (!cpu_khz || !cpu_has_tsc)
+ /* no locking but a rare wrong value is not a big deal */
+ return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
+
+Index: linux-cfs-2.6.20.8.q/arch/ia64/kernel/setup.c
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/arch/ia64/kernel/setup.c
++++ linux-cfs-2.6.20.8.q/arch/ia64/kernel/setup.c
+@@ -773,7 +773,6 @@ static void __cpuinit
+ get_max_cacheline_size (void)
+ {
+ unsigned long line_size, max = 1;
+- unsigned int cache_size = 0;
+ u64 l, levels, unique_caches;
+ pal_cache_config_info_t cci;
+ s64 status;
+@@ -803,8 +802,6 @@ get_max_cacheline_size (void)
+ line_size = 1 << cci.pcci_line_size;
+ if (line_size > max)
+ max = line_size;
+- if (cache_size < cci.pcci_cache_size)
+- cache_size = cci.pcci_cache_size;
+ if (!cci.pcci_unified) {
+ status = ia64_pal_cache_config_info(l,
+ /* cache_type (instruction)= */ 1,
+@@ -821,9 +818,6 @@ get_max_cacheline_size (void)
+ ia64_i_cache_stride_shift = cci.pcci_stride;
+ }
+ out:
+-#ifdef CONFIG_SMP
+- max_cache_size = max(max_cache_size, cache_size);
+-#endif
+ if (max > ia64_max_cacheline_size)
+ ia64_max_cacheline_size = max;
+ }
+Index: linux-cfs-2.6.20.8.q/arch/mips/kernel/smp.c
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/arch/mips/kernel/smp.c
++++ linux-cfs-2.6.20.8.q/arch/mips/kernel/smp.c
+@@ -245,7 +245,6 @@ void __init smp_prepare_cpus(unsigned in
+ {
+ init_new_context(current, &init_mm);
+ current_thread_info()->cpu = 0;
+- smp_tune_scheduling();
+ plat_prepare_cpus(max_cpus);
+ #ifndef CONFIG_HOTPLUG_CPU
+ cpu_present_map = cpu_possible_map;
+Index: linux-cfs-2.6.20.8.q/arch/sparc/kernel/smp.c
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/arch/sparc/kernel/smp.c
++++ linux-cfs-2.6.20.8.q/arch/sparc/kernel/smp.c
+@@ -69,16 +69,6 @@ void __cpuinit smp_store_cpu_info(int id
+ cpu_data(id).prom_node = cpu_node;
+ cpu_data(id).mid = cpu_get_hwmid(cpu_node);
+
+- /* this is required to tune the scheduler correctly */
+- /* is it possible to have CPUs with different cache sizes? */
+- if (id == boot_cpu_id) {
+- int cache_line,cache_nlines;
+- cache_line = 0x20;
+- cache_line = prom_getintdefault(cpu_node, "ecache-line-size", cache_line);
+- cache_nlines = 0x8000;
+- cache_nlines = prom_getintdefault(cpu_node, "ecache-nlines", cache_nlines);
+- max_cache_size = cache_line * cache_nlines;
+- }
+ if (cpu_data(id).mid < 0)
+ panic("No MID found for CPU%d at node 0x%08d", id, cpu_node);
+ }
+Index: linux-cfs-2.6.20.8.q/arch/sparc64/kernel/smp.c
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/arch/sparc64/kernel/smp.c
++++ linux-cfs-2.6.20.8.q/arch/sparc64/kernel/smp.c
+@@ -1293,41 +1293,6 @@ int setup_profiling_timer(unsigned int m
+ return 0;
+ }
+
+-static void __init smp_tune_scheduling(void)
+-{
+- struct device_node *dp;
+- int instance;
+- unsigned int def, smallest = ~0U;
+-
+- def = ((tlb_type == hypervisor) ?
+- (3 * 1024 * 1024) :
+- (4 * 1024 * 1024));
+-
+- instance = 0;
+- while (!cpu_find_by_instance(instance, &dp, NULL)) {
+- unsigned int val;
+-
+- val = of_getintprop_default(dp, "ecache-size", def);
+- if (val < smallest)
+- smallest = val;
+-
+- instance++;
+- }
+-
+- /* Any value less than 256K is nonsense. */
+- if (smallest < (256U * 1024U))
+- smallest = 256 * 1024;
+-
+- max_cache_size = smallest;
+-
+- if (smallest < 1U * 1024U * 1024U)
+- printk(KERN_INFO "Using max_cache_size of %uKB\n",
+- smallest / 1024U);
+- else
+- printk(KERN_INFO "Using max_cache_size of %uMB\n",
+- smallest / 1024U / 1024U);
+-}
+-
+ /* Constrain the number of cpus to max_cpus. */
+ void __init smp_prepare_cpus(unsigned int max_cpus)
+ {
+@@ -1363,7 +1328,6 @@ void __init smp_prepare_cpus(unsigned in
+ }
+
+ smp_store_cpu_info(boot_cpu_id);
+- smp_tune_scheduling();
+ }
+
+ /* Set this up early so that things like the scheduler can init
+Index: linux-cfs-2.6.20.8.q/fs/proc/array.c
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/fs/proc/array.c
++++ linux-cfs-2.6.20.8.q/fs/proc/array.c
+@@ -165,7 +165,6 @@ static inline char * task_state(struct t
+ rcu_read_lock();
+ buffer += sprintf(buffer,
+ "State:\t%s\n"
+- "SleepAVG:\t%lu%%\n"
+ "Tgid:\t%d\n"
+ "Pid:\t%d\n"
+ "PPid:\t%d\n"
+@@ -173,9 +172,8 @@ static inline char * task_state(struct t
+ "Uid:\t%d\t%d\t%d\t%d\n"
+ "Gid:\t%d\t%d\t%d\t%d\n",
+ get_task_state(p),
+- (p->sleep_avg/1024)*100/(1020000000/1024),
+- p->tgid, p->pid,
+- pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
++ p->tgid, p->pid,
++ pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
+ pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0,
+ p->uid, p->euid, p->suid, p->fsuid,
+ p->gid, p->egid, p->sgid, p->fsgid);
+@@ -312,6 +310,11 @@ int proc_pid_status(struct task_struct *
+ return buffer - orig;
+ }
+
++int proc_pid_sched(struct task_struct *task, char *buffer)
++{
++ return sched_print_task_state(task, buffer) - buffer;
++}
++
+ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
+ {
+ unsigned long vsize, eip, esp, wchan = ~0UL;
+Index: linux-cfs-2.6.20.8.q/fs/proc/base.c
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/fs/proc/base.c
++++ linux-cfs-2.6.20.8.q/fs/proc/base.c
+@@ -1839,6 +1839,7 @@ static struct pid_entry tgid_base_stuff[
+ INF("environ", S_IRUSR, pid_environ),
+ INF("auxv", S_IRUSR, pid_auxv),
+ INF("status", S_IRUGO, pid_status),
++ INF("sched", S_IRUGO, pid_sched),
+ INF("cmdline", S_IRUGO, pid_cmdline),
+ INF("stat", S_IRUGO, tgid_stat),
+ INF("statm", S_IRUGO, pid_statm),
+@@ -2121,6 +2122,7 @@ static struct pid_entry tid_base_stuff[]
+ INF("environ", S_IRUSR, pid_environ),
+ INF("auxv", S_IRUSR, pid_auxv),
+ INF("status", S_IRUGO, pid_status),
++ INF("sched", S_IRUGO, pid_sched),
+ INF("cmdline", S_IRUGO, pid_cmdline),
+ INF("stat", S_IRUGO, tid_stat),
+ INF("statm", S_IRUGO, pid_statm),
+Index: linux-cfs-2.6.20.8.q/fs/proc/internal.h
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/fs/proc/internal.h
++++ linux-cfs-2.6.20.8.q/fs/proc/internal.h
+@@ -36,6 +36,7 @@ extern int proc_exe_link(struct inode *,
+ extern int proc_tid_stat(struct task_struct *, char *);
+ extern int proc_tgid_stat(struct task_struct *, char *);
+ extern int proc_pid_status(struct task_struct *, char *);
++extern int proc_pid_sched(struct task_struct *, char *);
+ extern int proc_pid_statm(struct task_struct *, char *);
+
+ extern struct file_operations proc_maps_operations;
+Index: linux-cfs-2.6.20.8.q/include/asm-generic/bitops/sched.h
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/include/asm-generic/bitops/sched.h
++++ linux-cfs-2.6.20.8.q/include/asm-generic/bitops/sched.h
+@@ -6,28 +6,23 @@
+
+ /*
+ * Every architecture must define this function. It's the fastest
+- * way of searching a 140-bit bitmap where the first 100 bits are
+- * unlikely to be set. It's guaranteed that at least one of the 140
+- * bits is cleared.
++ * way of searching a 100-bit bitmap. It's guaranteed that at least
++ * one of the 100 bits is cleared.
+ */
+ static inline int sched_find_first_bit(const unsigned long *b)
+ {
+ #if BITS_PER_LONG == 64
+- if (unlikely(b[0]))
++ if (b[0])
+ return __ffs(b[0]);
+- if (likely(b[1]))
+- return __ffs(b[1]) + 64;
+- return __ffs(b[2]) + 128;
++ return __ffs(b[1]) + 64;
+ #elif BITS_PER_LONG == 32
+- if (unlikely(b[0]))
++ if (b[0])
+ return __ffs(b[0]);
+- if (unlikely(b[1]))
++ if (b[1])
+ return __ffs(b[1]) + 32;
+- if (unlikely(b[2]))
++ if (b[2])
+ return __ffs(b[2]) + 64;
+- if (b[3])
+- return __ffs(b[3]) + 96;
+- return __ffs(b[4]) + 128;
++ return __ffs(b[3]) + 96;
+ #else
+ #error BITS_PER_LONG not defined
+ #endif
+Index: linux-cfs-2.6.20.8.q/include/asm-i386/topology.h
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/include/asm-i386/topology.h
++++ linux-cfs-2.6.20.8.q/include/asm-i386/topology.h
+@@ -85,7 +85,6 @@ static inline int node_to_first_cpu(int
+ .idle_idx = 1, \
+ .newidle_idx = 2, \
+ .wake_idx = 1, \
+- .per_cpu_gain = 100, \
+ .flags = SD_LOAD_BALANCE \
+ | SD_BALANCE_EXEC \
+ | SD_BALANCE_FORK \
+Index: linux-cfs-2.6.20.8.q/include/asm-i386/unistd.h
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/include/asm-i386/unistd.h
++++ linux-cfs-2.6.20.8.q/include/asm-i386/unistd.h
+@@ -325,10 +325,11 @@
+ #define __NR_move_pages 317
+ #define __NR_getcpu 318
+ #define __NR_epoll_pwait 319
++#define __NR_sched_yield_to 320
+
+ #ifdef __KERNEL__
+
+-#define NR_syscalls 320
++#define NR_syscalls 321
+
+ #define __ARCH_WANT_IPC_PARSE_VERSION
+ #define __ARCH_WANT_OLD_READDIR
+Index: linux-cfs-2.6.20.8.q/include/asm-ia64/topology.h
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/include/asm-ia64/topology.h
++++ linux-cfs-2.6.20.8.q/include/asm-ia64/topology.h
+@@ -65,7 +65,6 @@ void build_cpu_to_node_map(void);
+ .max_interval = 4, \
+ .busy_factor = 64, \
+ .imbalance_pct = 125, \
+- .per_cpu_gain = 100, \
+ .cache_nice_tries = 2, \
+ .busy_idx = 2, \
+ .idle_idx = 1, \
+@@ -97,7 +96,6 @@ void build_cpu_to_node_map(void);
+ .newidle_idx = 0, /* unused */ \
+ .wake_idx = 1, \
+ .forkexec_idx = 1, \
+- .per_cpu_gain = 100, \
+ .flags = SD_LOAD_BALANCE \
+ | SD_BALANCE_EXEC \
+ | SD_BALANCE_FORK \
+Index: linux-cfs-2.6.20.8.q/include/asm-mips/mach-ip27/topology.h
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/include/asm-mips/mach-ip27/topology.h
++++ linux-cfs-2.6.20.8.q/include/asm-mips/mach-ip27/topology.h
+@@ -28,7 +28,6 @@ extern unsigned char __node_distances[MA
+ .busy_factor = 32, \
+ .imbalance_pct = 125, \
+ .cache_nice_tries = 1, \
+- .per_cpu_gain = 100, \
+ .flags = SD_LOAD_BALANCE \
+ | SD_BALANCE_EXEC \
+ | SD_WAKE_BALANCE, \
+Index: linux-cfs-2.6.20.8.q/include/asm-powerpc/topology.h
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/include/asm-powerpc/topology.h
++++ linux-cfs-2.6.20.8.q/include/asm-powerpc/topology.h
+@@ -57,7 +57,6 @@ static inline int pcibus_to_node(struct
+ .busy_factor = 32, \
+ .imbalance_pct = 125, \
+ .cache_nice_tries = 1, \
+- .per_cpu_gain = 100, \
+ .busy_idx = 3, \
+ .idle_idx = 1, \
+ .newidle_idx = 2, \
+Index: linux-cfs-2.6.20.8.q/include/asm-x86_64/topology.h
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/include/asm-x86_64/topology.h
++++ linux-cfs-2.6.20.8.q/include/asm-x86_64/topology.h
+@@ -43,7 +43,6 @@ extern int __node_distance(int, int);
+ .newidle_idx = 0, \
+ .wake_idx = 1, \
+ .forkexec_idx = 1, \
+- .per_cpu_gain = 100, \
+ .flags = SD_LOAD_BALANCE \
+ | SD_BALANCE_FORK \
+ | SD_BALANCE_EXEC \
+Index: linux-cfs-2.6.20.8.q/include/asm-x86_64/unistd.h
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/include/asm-x86_64/unistd.h
++++ linux-cfs-2.6.20.8.q/include/asm-x86_64/unistd.h
+@@ -619,8 +619,10 @@ __SYSCALL(__NR_sync_file_range, sys_sync
+ __SYSCALL(__NR_vmsplice, sys_vmsplice)
+ #define __NR_move_pages 279
+ __SYSCALL(__NR_move_pages, sys_move_pages)
++#define __NR_sched_yield_to 280
++__SYSCALL(__NR_sched_yield_to, sys_sched_yield_to)
+
+-#define __NR_syscall_max __NR_move_pages
++#define __NR_syscall_max __NR_sched_yield_to
+
+ #ifndef __NO_STUBS
+ #define __ARCH_WANT_OLD_READDIR
+Index: linux-cfs-2.6.20.8.q/include/linux/hardirq.h
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/include/linux/hardirq.h
++++ linux-cfs-2.6.20.8.q/include/linux/hardirq.h
+@@ -79,6 +79,19 @@
+ #endif
+
+ #ifdef CONFIG_PREEMPT
++# define PREEMPT_CHECK_OFFSET 1
++#else
++# define PREEMPT_CHECK_OFFSET 0
++#endif
++
++/*
++ * Check whether we were atomic before we did preempt_disable():
++ * (used by the scheduler)
++ */
++#define in_atomic_preempt_off() \
++ ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
++
++#ifdef CONFIG_PREEMPT
+ # define preemptible() (preempt_count() == 0 && !irqs_disabled())
+ # define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1)
+ #else
+Index: linux-cfs-2.6.20.8.q/include/linux/ktime.h
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/include/linux/ktime.h
++++ linux-cfs-2.6.20.8.q/include/linux/ktime.h
+@@ -274,4 +274,6 @@ extern void ktime_get_ts(struct timespec
+ /* Get the real (wall-) time in timespec format: */
+ #define ktime_get_real_ts(ts) getnstimeofday(ts)
+
++extern ktime_t ktime_get(void);
++
+ #endif
+Index: linux-cfs-2.6.20.8.q/include/linux/sched.h
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/include/linux/sched.h
++++ linux-cfs-2.6.20.8.q/include/linux/sched.h
+@@ -2,7 +2,6 @@
+ #define _LINUX_SCHED_H
+
+ #include <linux/auxvec.h> /* For AT_VECTOR_SIZE */
+-
+ /*
+ * cloning flags:
+ */
+@@ -37,6 +36,8 @@
+
+ #ifdef __KERNEL__
+
++#include <linux/rbtree.h> /* For run_node */
++
+ struct sched_param {
+ int sched_priority;
+ };
+@@ -196,13 +197,13 @@ extern void init_idle(struct task_struct
+ extern cpumask_t nohz_cpu_mask;
+
+ /*
+- * Only dump TASK_* tasks. (-1 for all tasks)
++ * Only dump TASK_* tasks. (0 for all tasks)
+ */
+ extern void show_state_filter(unsigned long state_filter);
+
+ static inline void show_state(void)
+ {
+- show_state_filter(-1);
++ show_state_filter(0);
+ }
+
+ extern void show_regs(struct pt_regs *);
+@@ -464,7 +465,7 @@ struct signal_struct {
+ * from jiffies_to_ns(utime + stime) if sched_clock uses something
+ * other than jiffies.)
+ */
+- unsigned long long sched_time;
++ unsigned long long sum_sched_runtime;
+
+ /*
+ * We don't bother to synchronize most readers of this at all,
+@@ -524,6 +525,7 @@ struct signal_struct {
+ #define MAX_RT_PRIO MAX_USER_RT_PRIO
+
+ #define MAX_PRIO (MAX_RT_PRIO + 40)
++#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
+
+ #define rt_prio(prio) unlikely((prio) < MAX_RT_PRIO)
+ #define rt_task(p) rt_prio((p)->prio)
+@@ -635,7 +637,14 @@ enum idle_type
+ /*
+ * sched-domains (multiprocessor balancing) declarations:
+ */
+-#define SCHED_LOAD_SCALE 128UL /* increase resolution of load */
++
++/*
++ * Increase resolution of nice-level calculations:
++ */
++#define SCHED_LOAD_SHIFT 10
++#define SCHED_LOAD_SCALE (1UL << SCHED_LOAD_SHIFT)
++
++#define SCHED_LOAD_SCALE_FUZZ (SCHED_LOAD_SCALE >> 5)
+
+ #ifdef CONFIG_SMP
+ #define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */
+@@ -684,7 +693,6 @@ struct sched_domain {
+ unsigned int imbalance_pct; /* No balance until over watermark */
+ unsigned long long cache_hot_time; /* Task considered cache hot (ns) */
+ unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */
+- unsigned int per_cpu_gain; /* CPU % gained by adding domain cpus */
+ unsigned int busy_idx;
+ unsigned int idle_idx;
+ unsigned int newidle_idx;
+@@ -733,12 +741,6 @@ struct sched_domain {
+ extern int partition_sched_domains(cpumask_t *partition1,
+ cpumask_t *partition2);
+
+-/*
+- * Maximum cache size the migration-costs auto-tuning code will
+- * search from:
+- */
+-extern unsigned int max_cache_size;
+-
+ #endif /* CONFIG_SMP */
+
+
+@@ -789,14 +791,28 @@ struct mempolicy;
+ struct pipe_inode_info;
+ struct uts_namespace;
+
+-enum sleep_type {
+- SLEEP_NORMAL,
+- SLEEP_NONINTERACTIVE,
+- SLEEP_INTERACTIVE,
+- SLEEP_INTERRUPTED,
+-};
++struct rq;
+
+-struct prio_array;
++struct sched_class {
++ struct sched_class *next;
++
++ void (*enqueue_task) (struct rq *rq, struct task_struct *p,
++ int wakeup, u64 now);
++ void (*dequeue_task) (struct rq *rq, struct task_struct *p,
++ int sleep, u64 now);
++ void (*yield_task) (struct rq *rq, struct task_struct *p,
++ struct task_struct *p_to);
++
++ void (*check_preempt_curr) (struct rq *rq, struct task_struct *p);
++
++ struct task_struct * (*pick_next_task) (struct rq *rq, u64 now);
++ void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now);
++
++ struct task_struct * (*load_balance_start) (struct rq *rq);
++ struct task_struct * (*load_balance_next) (struct rq *rq);
++ void (*task_tick) (struct rq *rq, struct task_struct *p);
++ void (*task_new) (struct rq *rq, struct task_struct *p);
++};
+
+ struct task_struct {
+ volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
+@@ -813,26 +829,45 @@ struct task_struct {
+ #endif
+ #endif
+ int load_weight; /* for niceness load balancing purposes */
++ int load_shift;
++
+ int prio, static_prio, normal_prio;
++ int on_rq;
+ struct list_head run_list;
+- struct prio_array *array;
++ struct rb_node run_node;
+
+ unsigned short ioprio;
+ #ifdef CONFIG_BLK_DEV_IO_TRACE
+ unsigned int btrace_seq;
+ #endif
+- unsigned long sleep_avg;
+- unsigned long long timestamp, last_ran;
+- unsigned long long sched_time; /* sched_clock time spent running */
+- enum sleep_type sleep_type;
++ /* CFS scheduling class statistics fields: */
++ u64 wait_start_fair;
++ u64 wait_start;
++ u64 exec_start;
++ u64 sleep_start;
++ u64 block_start;
++ u64 sleep_max;
++ u64 block_max;
++ u64 exec_max;
++ u64 wait_max;
++ u64 last_ran;
++
++ s64 wait_runtime;
++ u64 sum_exec_runtime;
++ s64 fair_key;
++ s64 sum_wait_runtime;
+
+ unsigned long policy;
+ cpumask_t cpus_allowed;
+- unsigned int time_slice, first_time_slice;
++ unsigned int time_slice;
++ struct sched_class *sched_class;
++
++ s64 min_wait_runtime;
+
+ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+ struct sched_info sched_info;
+ #endif
++ u64 nr_switches;
+
+ struct list_head tasks;
+ /*
+@@ -1195,8 +1230,9 @@ static inline int set_cpus_allowed(struc
+ #endif
+
+ extern unsigned long long sched_clock(void);
++extern void sched_clock_unstable_event(void);
+ extern unsigned long long
+-current_sched_time(const struct task_struct *current_task);
++current_sched_runtime(const struct task_struct *current_task);
+
+ /* sched_exec is called by processes performing an exec */
+ #ifdef CONFIG_SMP
+@@ -1212,6 +1248,13 @@ static inline void idle_task_exit(void)
+ #endif
+
+ extern void sched_idle_next(void);
++extern char * sched_print_task_state(struct task_struct *p, char *buffer);
++
++extern unsigned int sysctl_sched_granularity;
++extern unsigned int sysctl_sched_wakeup_granularity;
++extern unsigned int sysctl_sched_sleep_history_max;
++extern unsigned int sysctl_sched_child_runs_first;
++extern unsigned int sysctl_sched_load_smoothing;
+
+ #ifdef CONFIG_RT_MUTEXES
+ extern int rt_mutex_getprio(struct task_struct *p);
+@@ -1290,8 +1333,7 @@ extern void FASTCALL(wake_up_new_task(st
+ #else
+ static inline void kick_process(struct task_struct *tsk) { }
+ #endif
+-extern void FASTCALL(sched_fork(struct task_struct * p, int clone_flags));
+-extern void FASTCALL(sched_exit(struct task_struct * p));
++extern void sched_fork(struct task_struct * p, int clone_flags);
+
+ extern int in_group_p(gid_t);
+ extern int in_egroup_p(gid_t);
+Index: linux-cfs-2.6.20.8.q/include/linux/topology.h
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/include/linux/topology.h
++++ linux-cfs-2.6.20.8.q/include/linux/topology.h
+@@ -96,7 +96,6 @@
+ .busy_factor = 64, \
+ .imbalance_pct = 110, \
+ .cache_nice_tries = 0, \
+- .per_cpu_gain = 25, \
+ .busy_idx = 0, \
+ .idle_idx = 0, \
+ .newidle_idx = 1, \
+@@ -128,7 +127,6 @@
+ .busy_factor = 64, \
+ .imbalance_pct = 125, \
+ .cache_nice_tries = 1, \
+- .per_cpu_gain = 100, \
+ .busy_idx = 2, \
+ .idle_idx = 1, \
+ .newidle_idx = 2, \
+@@ -159,7 +157,6 @@
+ .busy_factor = 64, \
+ .imbalance_pct = 125, \
+ .cache_nice_tries = 1, \
+- .per_cpu_gain = 100, \
+ .busy_idx = 2, \
+ .idle_idx = 1, \
+ .newidle_idx = 2, \
+@@ -193,7 +190,6 @@
+ .newidle_idx = 0, /* unused */ \
+ .wake_idx = 0, /* unused */ \
+ .forkexec_idx = 0, /* unused */ \
+- .per_cpu_gain = 100, \
+ .flags = SD_LOAD_BALANCE \
+ | SD_SERIALIZE, \
+ .last_balance = jiffies, \
+Index: linux-cfs-2.6.20.8.q/init/main.c
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/init/main.c
++++ linux-cfs-2.6.20.8.q/init/main.c
+@@ -422,7 +422,7 @@ static void noinline rest_init(void)
+
+ /*
+ * The boot idle thread must execute schedule()
+- * at least one to get things moving:
++ * at least once to get things moving:
+ */
+ preempt_enable_no_resched();
+ schedule();
+Index: linux-cfs-2.6.20.8.q/kernel/exit.c
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/kernel/exit.c
++++ linux-cfs-2.6.20.8.q/kernel/exit.c
+@@ -112,7 +112,7 @@ static void __exit_signal(struct task_st
+ sig->maj_flt += tsk->maj_flt;
+ sig->nvcsw += tsk->nvcsw;
+ sig->nivcsw += tsk->nivcsw;
+- sig->sched_time += tsk->sched_time;
++ sig->sum_sched_runtime += tsk->sum_exec_runtime;
+ sig = NULL; /* Marker for below. */
+ }
+
+@@ -170,7 +170,6 @@ repeat:
+ zap_leader = (leader->exit_signal == -1);
+ }
+
+- sched_exit(p);
+ write_unlock_irq(&tasklist_lock);
+ proc_flush_task(p);
+ release_thread(p);
+Index: linux-cfs-2.6.20.8.q/kernel/fork.c
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/kernel/fork.c
++++ linux-cfs-2.6.20.8.q/kernel/fork.c
+@@ -874,7 +874,7 @@ static inline int copy_signal(unsigned l
+ sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
+ sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
+ sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
+- sig->sched_time = 0;
++ sig->sum_sched_runtime = 0;
+ INIT_LIST_HEAD(&sig->cpu_timers[0]);
+ INIT_LIST_HEAD(&sig->cpu_timers[1]);
+ INIT_LIST_HEAD(&sig->cpu_timers[2]);
+@@ -1037,7 +1037,7 @@ static struct task_struct *copy_process(
+
+ p->utime = cputime_zero;
+ p->stime = cputime_zero;
+- p->sched_time = 0;
++
+ p->rchar = 0; /* I/O counter: bytes read */
+ p->wchar = 0; /* I/O counter: bytes written */
+ p->syscr = 0; /* I/O counter: read syscalls */
+Index: linux-cfs-2.6.20.8.q/kernel/hrtimer.c
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/kernel/hrtimer.c
++++ linux-cfs-2.6.20.8.q/kernel/hrtimer.c
+@@ -45,7 +45,7 @@
+ *
+ * returns the time in ktime_t format
+ */
+-static ktime_t ktime_get(void)
++ktime_t ktime_get(void)
+ {
+ struct timespec now;
+
+Index: linux-cfs-2.6.20.8.q/kernel/posix-cpu-timers.c
+===================================================================
+--- linux-cfs-2.6.20.8.q.orig/kernel/posix-cpu-timers.c
++++ linux-cfs-2.6.20.8.q/kernel/posix-cpu-timers.c
+@@ -161,7 +161,7 @@ static inline cputime_t virt_ticks(struc
+ }
+ static inline unsigned long long sched_ns(struct task_struct *p)
+ {
+- return (p == current) ? current_sched_time(p) : p->sched_time;
++ return (p == current) ? current_sched_runtime(p) : p->sum_exec_runtime;
+ }
+
+ int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
+@@ -246,10 +246,10 @@ static int cpu_clock_sample_group_locked
+ } while (t != p);
+ break;
+ case CPUCLOCK_SCHED:
+- cpu->sched = p->signal->sched_time;
++ cpu->sched = p->signal->sum_sched_runtime;
+ /* Add in each other live thread. */
+ while ((t = next_thread(t)) != p) {
+- cpu->sched += t->sched_time;
++ cpu->sched += t->sum_exec_runtime;
+ }
+ cpu->sched += sched_ns(p);
+ break;
+@@ -417,7 +417,7 @@ int posix_cpu_timer_del(struct k_itimer
+ */
+ static void cleanup_timers(struct list_head *head,
+ cputime_t utime, cputime_t stime,
+- unsigned long long sched_time)
++ unsigned long long sum_exec_runtime)
+ {
+ struct cpu_timer_list *timer, *next;