diff options
| author | Leon Woestenberg <leon.woestenberg@gmail.com> | 2007-05-06 15:33:37 +0000 |
|---|---|---|
| committer | Leon Woestenberg <leon.woestenberg@gmail.com> | 2007-05-06 15:33:37 +0000 |
| commit | 7c8380efe7ac5b7d7aaf01f91599bb5e64542710 (patch) | |
| tree | a0a76753f49c2f41304cf099c6d85c39ff546497 /packages | |
| parent | 33de73cb673b87938d734673c5fec3e5957250a7 (diff) | |
| parent | 4e64c8dfdb92fa358349c24969c617039ab88cd3 (diff) | |
merge of '61b65ffb0a858399f89407df40b6469b203bda44'
and 'ed3f0331cfddd29d423591aa25379dc841a37572'
Diffstat (limited to 'packages')
| -rw-r--r-- | packages/linux/linux-efika-2.6.20.11/.mtn2git_empty | 0 | ||||
| -rw-r--r-- | packages/linux/linux-efika-2.6.20.11/sched-cfs-v9-v2.6.20.11.patch | 5590 | ||||
| -rw-r--r-- | packages/linux/linux-efika_2.6.20.11.bb | 86 |
3 files changed, 5676 insertions, 0 deletions
diff --git a/packages/linux/linux-efika-2.6.20.11/.mtn2git_empty b/packages/linux/linux-efika-2.6.20.11/.mtn2git_empty new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/packages/linux/linux-efika-2.6.20.11/.mtn2git_empty diff --git a/packages/linux/linux-efika-2.6.20.11/sched-cfs-v9-v2.6.20.11.patch b/packages/linux/linux-efika-2.6.20.11/sched-cfs-v9-v2.6.20.11.patch new file mode 100644 index 0000000000..29071a99ac --- /dev/null +++ b/packages/linux/linux-efika-2.6.20.11/sched-cfs-v9-v2.6.20.11.patch @@ -0,0 +1,5590 @@ +This is the Complete Fair Scheduler (CFS) v9 patch for +linux 2.6.20.10 patch (rediffed cleanly against .11). + +http://people.redhat.com/mingo/cfs-scheduler/ + +Index: linux-cfs-2.6.20.8.q/Documentation/kernel-parameters.txt +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/Documentation/kernel-parameters.txt ++++ linux-cfs-2.6.20.8.q/Documentation/kernel-parameters.txt +@@ -914,49 +914,6 @@ and is between 256 and 4096 characters. + + mga= [HW,DRM] + +- migration_cost= +- [KNL,SMP] debug: override scheduler migration costs +- Format: <level-1-usecs>,<level-2-usecs>,... +- This debugging option can be used to override the +- default scheduler migration cost matrix. The numbers +- are indexed by 'CPU domain distance'. +- E.g. migration_cost=1000,2000,3000 on an SMT NUMA +- box will set up an intra-core migration cost of +- 1 msec, an inter-core migration cost of 2 msecs, +- and an inter-node migration cost of 3 msecs. +- +- WARNING: using the wrong values here can break +- scheduler performance, so it's only for scheduler +- development purposes, not production environments. +- +- migration_debug= +- [KNL,SMP] migration cost auto-detect verbosity +- Format=<0|1|2> +- If a system's migration matrix reported at bootup +- seems erroneous then this option can be used to +- increase verbosity of the detection process. +- We default to 0 (no extra messages), 1 will print +- some more information, and 2 will be really +- verbose (probably only useful if you also have a +- serial console attached to the system). +- +- migration_factor= +- [KNL,SMP] multiply/divide migration costs by a factor +- Format=<percent> +- This debug option can be used to proportionally +- increase or decrease the auto-detected migration +- costs for all entries of the migration matrix. +- E.g. migration_factor=150 will increase migration +- costs by 50%. (and thus the scheduler will be less +- eager migrating cache-hot tasks) +- migration_factor=80 will decrease migration costs +- by 20%. (thus the scheduler will be more eager to +- migrate tasks) +- +- WARNING: using the wrong values here can break +- scheduler performance, so it's only for scheduler +- development purposes, not production environments. +- + mousedev.tap_time= + [MOUSE] Maximum time between finger touching and + leaving touchpad surface for touch to be considered +Index: linux-cfs-2.6.20.8.q/Documentation/sched-design-CFS.txt +=================================================================== +--- /dev/null ++++ linux-cfs-2.6.20.8.q/Documentation/sched-design-CFS.txt +@@ -0,0 +1,107 @@ ++[announce] [patch] Modular Scheduler Core and Completely Fair Scheduler [CFS] ++ ++i'm pleased to announce the first release of the "Modular Scheduler Core ++and Completely Fair Scheduler [CFS]" patchset: ++ ++ http://redhat.com/~mingo/cfs-scheduler/ ++ ++This project is a complete rewrite of the Linux task scheduler. My goal ++is to address various feature requests and to fix deficiencies in the ++vanilla scheduler that were suggested/found in the past few years, both ++for desktop scheduling and for server scheduling workloads. ++ ++[ QuickStart: apply the patch, recompile, reboot. The new scheduler ++ will be active by default and all tasks will default to the ++ SCHED_NORMAL interactive scheduling class. ] ++ ++Highlights are: ++ ++ - the introduction of Scheduling Classes: an extensible hierarchy of ++ scheduler modules. These modules encapsulate scheduling policy ++ details and are handled by the scheduler core without the core ++ code assuming about them too much. ++ ++ - sched_fair.c implements the 'CFS desktop scheduler': it is a ++ replacement for the vanilla scheduler's SCHED_OTHER interactivity ++ code. ++ ++ i'd like to give credit to Con Kolivas for the general approach here: ++ he has proven via RSDL/SD that 'fair scheduling' is possible and that ++ it results in better desktop scheduling. Kudos Con! ++ ++ The CFS patch uses a completely different approach and implementation ++ from RSDL/SD. My goal was to make CFS's interactivity quality exceed ++ that of RSDL/SD, which is a high standard to meet :-) Testing ++ feedback is welcome to decide this one way or another. [ and, in any ++ case, all of SD's logic could be added via a kernel/sched_sd.c module ++ as well, if Con is interested in such an approach. ] ++ ++ CFS's design is quite radical: it does not use runqueues, it uses a ++ time-ordered rbtree to build a 'timeline' of future task execution, ++ and thus has no 'array switch' artifacts (by which both the vanilla ++ scheduler and RSDL/SD are affected). ++ ++ CFS uses nanosecond granularity accounting and does not rely on any ++ jiffies or other HZ detail. Thus the CFS scheduler has no notion of ++ 'timeslices' and has no heuristics whatsoever. There is only one ++ central tunable: ++ ++ /proc/sys/kernel/sched_granularity_ns ++ ++ which can be used to tune the scheduler from 'desktop' (low ++ latencies) to 'server' (good batching) workloads. It defaults to a ++ setting suitable for desktop workloads. SCHED_BATCH is handled by the ++ CFS scheduler module too. ++ ++ due to its design, the CFS scheduler is not prone to any of the ++ 'attacks' that exist today against the heuristics of the stock ++ scheduler: fiftyp.c, thud.c, chew.c, ring-test.c, massive_intr.c all ++ work fine and do not impact interactivity and produce the expected ++ behavior. ++ ++ the CFS scheduler has a much stronger handling of nice levels and ++ SCHED_BATCH: both types of workloads should be isolated much more ++ agressively than under the vanilla scheduler. ++ ++ ( another rdetail: due to nanosec accounting and timeline sorting, ++ sched_yield() support is very simple under CFS, and in fact under ++ CFS sched_yield() behaves much better than under any other ++ scheduler i have tested so far. ) ++ ++ - sched_rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler ++ way than the vanilla scheduler does. It uses 100 runqueues (for all ++ 100 RT priority levels, instead of 140 in the vanilla scheduler) ++ and it needs no expired array. ++ ++ - reworked/sanitized SMP load-balancing: the runqueue-walking ++ assumptions are gone from the load-balancing code now, and ++ iterators of the scheduling modules are used. The balancing code got ++ quite a bit simpler as a result. ++ ++the core scheduler got smaller by more than 700 lines: ++ ++ kernel/sched.c | 1454 ++++++++++++++++------------------------------------------------ ++ 1 file changed, 372 insertions(+), 1082 deletions(-) ++ ++and even adding all the scheduling modules, the total size impact is ++relatively small: ++ ++ 18 files changed, 1454 insertions(+), 1133 deletions(-) ++ ++most of the increase is due to extensive comments. The kernel size ++impact is in fact a small negative: ++ ++ text data bss dec hex filename ++ 23366 4001 24 27391 6aff kernel/sched.o.vanilla ++ 24159 2705 56 26920 6928 kernel/sched.o.CFS ++ ++(this is mainly due to the benefit of getting rid of the expired array ++and its data structure overhead.) ++ ++thanks go to Thomas Gleixner and Arjan van de Ven for review of this ++patchset. ++ ++as usual, any sort of feedback, bugreports, fixes and suggestions are ++more than welcome, ++ ++ Ingo +Index: linux-cfs-2.6.20.8.q/Makefile +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/Makefile ++++ linux-cfs-2.6.20.8.q/Makefile +@@ -1,7 +1,7 @@ + VERSION = 2 + PATCHLEVEL = 6 + SUBLEVEL = 20 +-EXTRAVERSION = .11 ++EXTRAVERSION = .11-cfs-v9 + NAME = Homicidal Dwarf Hamster + + # *DOCUMENTATION* +Index: linux-cfs-2.6.20.8.q/arch/i386/kernel/smpboot.c +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/arch/i386/kernel/smpboot.c ++++ linux-cfs-2.6.20.8.q/arch/i386/kernel/smpboot.c +@@ -1132,18 +1132,6 @@ exit: + } + #endif + +-static void smp_tune_scheduling(void) +-{ +- unsigned long cachesize; /* kB */ +- +- if (cpu_khz) { +- cachesize = boot_cpu_data.x86_cache_size; +- +- if (cachesize > 0) +- max_cache_size = cachesize * 1024; +- } +-} +- + /* + * Cycle through the processors sending APIC IPIs to boot each. + */ +@@ -1172,7 +1160,6 @@ static void __init smp_boot_cpus(unsigne + x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; + + current_thread_info()->cpu = 0; +- smp_tune_scheduling(); + + set_cpu_sibling_map(0); + +Index: linux-cfs-2.6.20.8.q/arch/i386/kernel/syscall_table.S +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/arch/i386/kernel/syscall_table.S ++++ linux-cfs-2.6.20.8.q/arch/i386/kernel/syscall_table.S +@@ -319,3 +319,4 @@ ENTRY(sys_call_table) + .long sys_move_pages + .long sys_getcpu + .long sys_epoll_pwait ++ .long sys_sched_yield_to /* 320 */ +Index: linux-cfs-2.6.20.8.q/arch/i386/kernel/tsc.c +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/arch/i386/kernel/tsc.c ++++ linux-cfs-2.6.20.8.q/arch/i386/kernel/tsc.c +@@ -61,6 +61,8 @@ static inline int check_tsc_unstable(voi + + void mark_tsc_unstable(void) + { ++ sched_clock_unstable_event(); ++ + tsc_unstable = 1; + } + EXPORT_SYMBOL_GPL(mark_tsc_unstable); +@@ -107,13 +109,7 @@ unsigned long long sched_clock(void) + { + unsigned long long this_offset; + +- /* +- * in the NUMA case we dont use the TSC as they are not +- * synchronized across all CPUs. +- */ +-#ifndef CONFIG_NUMA +- if (!cpu_khz || check_tsc_unstable()) +-#endif ++ if (!cpu_khz || !cpu_has_tsc) + /* no locking but a rare wrong value is not a big deal */ + return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); + +Index: linux-cfs-2.6.20.8.q/arch/ia64/kernel/setup.c +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/arch/ia64/kernel/setup.c ++++ linux-cfs-2.6.20.8.q/arch/ia64/kernel/setup.c +@@ -773,7 +773,6 @@ static void __cpuinit + get_max_cacheline_size (void) + { + unsigned long line_size, max = 1; +- unsigned int cache_size = 0; + u64 l, levels, unique_caches; + pal_cache_config_info_t cci; + s64 status; +@@ -803,8 +802,6 @@ get_max_cacheline_size (void) + line_size = 1 << cci.pcci_line_size; + if (line_size > max) + max = line_size; +- if (cache_size < cci.pcci_cache_size) +- cache_size = cci.pcci_cache_size; + if (!cci.pcci_unified) { + status = ia64_pal_cache_config_info(l, + /* cache_type (instruction)= */ 1, +@@ -821,9 +818,6 @@ get_max_cacheline_size (void) + ia64_i_cache_stride_shift = cci.pcci_stride; + } + out: +-#ifdef CONFIG_SMP +- max_cache_size = max(max_cache_size, cache_size); +-#endif + if (max > ia64_max_cacheline_size) + ia64_max_cacheline_size = max; + } +Index: linux-cfs-2.6.20.8.q/arch/mips/kernel/smp.c +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/arch/mips/kernel/smp.c ++++ linux-cfs-2.6.20.8.q/arch/mips/kernel/smp.c +@@ -245,7 +245,6 @@ void __init smp_prepare_cpus(unsigned in + { + init_new_context(current, &init_mm); + current_thread_info()->cpu = 0; +- smp_tune_scheduling(); + plat_prepare_cpus(max_cpus); + #ifndef CONFIG_HOTPLUG_CPU + cpu_present_map = cpu_possible_map; +Index: linux-cfs-2.6.20.8.q/arch/sparc/kernel/smp.c +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/arch/sparc/kernel/smp.c ++++ linux-cfs-2.6.20.8.q/arch/sparc/kernel/smp.c +@@ -69,16 +69,6 @@ void __cpuinit smp_store_cpu_info(int id + cpu_data(id).prom_node = cpu_node; + cpu_data(id).mid = cpu_get_hwmid(cpu_node); + +- /* this is required to tune the scheduler correctly */ +- /* is it possible to have CPUs with different cache sizes? */ +- if (id == boot_cpu_id) { +- int cache_line,cache_nlines; +- cache_line = 0x20; +- cache_line = prom_getintdefault(cpu_node, "ecache-line-size", cache_line); +- cache_nlines = 0x8000; +- cache_nlines = prom_getintdefault(cpu_node, "ecache-nlines", cache_nlines); +- max_cache_size = cache_line * cache_nlines; +- } + if (cpu_data(id).mid < 0) + panic("No MID found for CPU%d at node 0x%08d", id, cpu_node); + } +Index: linux-cfs-2.6.20.8.q/arch/sparc64/kernel/smp.c +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/arch/sparc64/kernel/smp.c ++++ linux-cfs-2.6.20.8.q/arch/sparc64/kernel/smp.c +@@ -1293,41 +1293,6 @@ int setup_profiling_timer(unsigned int m + return 0; + } + +-static void __init smp_tune_scheduling(void) +-{ +- struct device_node *dp; +- int instance; +- unsigned int def, smallest = ~0U; +- +- def = ((tlb_type == hypervisor) ? +- (3 * 1024 * 1024) : +- (4 * 1024 * 1024)); +- +- instance = 0; +- while (!cpu_find_by_instance(instance, &dp, NULL)) { +- unsigned int val; +- +- val = of_getintprop_default(dp, "ecache-size", def); +- if (val < smallest) +- smallest = val; +- +- instance++; +- } +- +- /* Any value less than 256K is nonsense. */ +- if (smallest < (256U * 1024U)) +- smallest = 256 * 1024; +- +- max_cache_size = smallest; +- +- if (smallest < 1U * 1024U * 1024U) +- printk(KERN_INFO "Using max_cache_size of %uKB\n", +- smallest / 1024U); +- else +- printk(KERN_INFO "Using max_cache_size of %uMB\n", +- smallest / 1024U / 1024U); +-} +- + /* Constrain the number of cpus to max_cpus. */ + void __init smp_prepare_cpus(unsigned int max_cpus) + { +@@ -1363,7 +1328,6 @@ void __init smp_prepare_cpus(unsigned in + } + + smp_store_cpu_info(boot_cpu_id); +- smp_tune_scheduling(); + } + + /* Set this up early so that things like the scheduler can init +Index: linux-cfs-2.6.20.8.q/fs/proc/array.c +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/fs/proc/array.c ++++ linux-cfs-2.6.20.8.q/fs/proc/array.c +@@ -165,7 +165,6 @@ static inline char * task_state(struct t + rcu_read_lock(); + buffer += sprintf(buffer, + "State:\t%s\n" +- "SleepAVG:\t%lu%%\n" + "Tgid:\t%d\n" + "Pid:\t%d\n" + "PPid:\t%d\n" +@@ -173,9 +172,8 @@ static inline char * task_state(struct t + "Uid:\t%d\t%d\t%d\t%d\n" + "Gid:\t%d\t%d\t%d\t%d\n", + get_task_state(p), +- (p->sleep_avg/1024)*100/(1020000000/1024), +- p->tgid, p->pid, +- pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0, ++ p->tgid, p->pid, ++ pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0, + pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0, + p->uid, p->euid, p->suid, p->fsuid, + p->gid, p->egid, p->sgid, p->fsgid); +@@ -312,6 +310,11 @@ int proc_pid_status(struct task_struct * + return buffer - orig; + } + ++int proc_pid_sched(struct task_struct *task, char *buffer) ++{ ++ return sched_print_task_state(task, buffer) - buffer; ++} ++ + static int do_task_stat(struct task_struct *task, char * buffer, int whole) + { + unsigned long vsize, eip, esp, wchan = ~0UL; +Index: linux-cfs-2.6.20.8.q/fs/proc/base.c +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/fs/proc/base.c ++++ linux-cfs-2.6.20.8.q/fs/proc/base.c +@@ -1839,6 +1839,7 @@ static struct pid_entry tgid_base_stuff[ + INF("environ", S_IRUSR, pid_environ), + INF("auxv", S_IRUSR, pid_auxv), + INF("status", S_IRUGO, pid_status), ++ INF("sched", S_IRUGO, pid_sched), + INF("cmdline", S_IRUGO, pid_cmdline), + INF("stat", S_IRUGO, tgid_stat), + INF("statm", S_IRUGO, pid_statm), +@@ -2121,6 +2122,7 @@ static struct pid_entry tid_base_stuff[] + INF("environ", S_IRUSR, pid_environ), + INF("auxv", S_IRUSR, pid_auxv), + INF("status", S_IRUGO, pid_status), ++ INF("sched", S_IRUGO, pid_sched), + INF("cmdline", S_IRUGO, pid_cmdline), + INF("stat", S_IRUGO, tid_stat), + INF("statm", S_IRUGO, pid_statm), +Index: linux-cfs-2.6.20.8.q/fs/proc/internal.h +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/fs/proc/internal.h ++++ linux-cfs-2.6.20.8.q/fs/proc/internal.h +@@ -36,6 +36,7 @@ extern int proc_exe_link(struct inode *, + extern int proc_tid_stat(struct task_struct *, char *); + extern int proc_tgid_stat(struct task_struct *, char *); + extern int proc_pid_status(struct task_struct *, char *); ++extern int proc_pid_sched(struct task_struct *, char *); + extern int proc_pid_statm(struct task_struct *, char *); + + extern struct file_operations proc_maps_operations; +Index: linux-cfs-2.6.20.8.q/include/asm-generic/bitops/sched.h +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/include/asm-generic/bitops/sched.h ++++ linux-cfs-2.6.20.8.q/include/asm-generic/bitops/sched.h +@@ -6,28 +6,23 @@ + + /* + * Every architecture must define this function. It's the fastest +- * way of searching a 140-bit bitmap where the first 100 bits are +- * unlikely to be set. It's guaranteed that at least one of the 140 +- * bits is cleared. ++ * way of searching a 100-bit bitmap. It's guaranteed that at least ++ * one of the 100 bits is cleared. + */ + static inline int sched_find_first_bit(const unsigned long *b) + { + #if BITS_PER_LONG == 64 +- if (unlikely(b[0])) ++ if (b[0]) + return __ffs(b[0]); +- if (likely(b[1])) +- return __ffs(b[1]) + 64; +- return __ffs(b[2]) + 128; ++ return __ffs(b[1]) + 64; + #elif BITS_PER_LONG == 32 +- if (unlikely(b[0])) ++ if (b[0]) + return __ffs(b[0]); +- if (unlikely(b[1])) ++ if (b[1]) + return __ffs(b[1]) + 32; +- if (unlikely(b[2])) ++ if (b[2]) + return __ffs(b[2]) + 64; +- if (b[3]) +- return __ffs(b[3]) + 96; +- return __ffs(b[4]) + 128; ++ return __ffs(b[3]) + 96; + #else + #error BITS_PER_LONG not defined + #endif +Index: linux-cfs-2.6.20.8.q/include/asm-i386/topology.h +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/include/asm-i386/topology.h ++++ linux-cfs-2.6.20.8.q/include/asm-i386/topology.h +@@ -85,7 +85,6 @@ static inline int node_to_first_cpu(int + .idle_idx = 1, \ + .newidle_idx = 2, \ + .wake_idx = 1, \ +- .per_cpu_gain = 100, \ + .flags = SD_LOAD_BALANCE \ + | SD_BALANCE_EXEC \ + | SD_BALANCE_FORK \ +Index: linux-cfs-2.6.20.8.q/include/asm-i386/unistd.h +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/include/asm-i386/unistd.h ++++ linux-cfs-2.6.20.8.q/include/asm-i386/unistd.h +@@ -325,10 +325,11 @@ + #define __NR_move_pages 317 + #define __NR_getcpu 318 + #define __NR_epoll_pwait 319 ++#define __NR_sched_yield_to 320 + + #ifdef __KERNEL__ + +-#define NR_syscalls 320 ++#define NR_syscalls 321 + + #define __ARCH_WANT_IPC_PARSE_VERSION + #define __ARCH_WANT_OLD_READDIR +Index: linux-cfs-2.6.20.8.q/include/asm-ia64/topology.h +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/include/asm-ia64/topology.h ++++ linux-cfs-2.6.20.8.q/include/asm-ia64/topology.h +@@ -65,7 +65,6 @@ void build_cpu_to_node_map(void); + .max_interval = 4, \ + .busy_factor = 64, \ + .imbalance_pct = 125, \ +- .per_cpu_gain = 100, \ + .cache_nice_tries = 2, \ + .busy_idx = 2, \ + .idle_idx = 1, \ +@@ -97,7 +96,6 @@ void build_cpu_to_node_map(void); + .newidle_idx = 0, /* unused */ \ + .wake_idx = 1, \ + .forkexec_idx = 1, \ +- .per_cpu_gain = 100, \ + .flags = SD_LOAD_BALANCE \ + | SD_BALANCE_EXEC \ + | SD_BALANCE_FORK \ +Index: linux-cfs-2.6.20.8.q/include/asm-mips/mach-ip27/topology.h +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/include/asm-mips/mach-ip27/topology.h ++++ linux-cfs-2.6.20.8.q/include/asm-mips/mach-ip27/topology.h +@@ -28,7 +28,6 @@ extern unsigned char __node_distances[MA + .busy_factor = 32, \ + .imbalance_pct = 125, \ + .cache_nice_tries = 1, \ +- .per_cpu_gain = 100, \ + .flags = SD_LOAD_BALANCE \ + | SD_BALANCE_EXEC \ + | SD_WAKE_BALANCE, \ +Index: linux-cfs-2.6.20.8.q/include/asm-powerpc/topology.h +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/include/asm-powerpc/topology.h ++++ linux-cfs-2.6.20.8.q/include/asm-powerpc/topology.h +@@ -57,7 +57,6 @@ static inline int pcibus_to_node(struct + .busy_factor = 32, \ + .imbalance_pct = 125, \ + .cache_nice_tries = 1, \ +- .per_cpu_gain = 100, \ + .busy_idx = 3, \ + .idle_idx = 1, \ + .newidle_idx = 2, \ +Index: linux-cfs-2.6.20.8.q/include/asm-x86_64/topology.h +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/include/asm-x86_64/topology.h ++++ linux-cfs-2.6.20.8.q/include/asm-x86_64/topology.h +@@ -43,7 +43,6 @@ extern int __node_distance(int, int); + .newidle_idx = 0, \ + .wake_idx = 1, \ + .forkexec_idx = 1, \ +- .per_cpu_gain = 100, \ + .flags = SD_LOAD_BALANCE \ + | SD_BALANCE_FORK \ + | SD_BALANCE_EXEC \ +Index: linux-cfs-2.6.20.8.q/include/asm-x86_64/unistd.h +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/include/asm-x86_64/unistd.h ++++ linux-cfs-2.6.20.8.q/include/asm-x86_64/unistd.h +@@ -619,8 +619,10 @@ __SYSCALL(__NR_sync_file_range, sys_sync + __SYSCALL(__NR_vmsplice, sys_vmsplice) + #define __NR_move_pages 279 + __SYSCALL(__NR_move_pages, sys_move_pages) ++#define __NR_sched_yield_to 280 ++__SYSCALL(__NR_sched_yield_to, sys_sched_yield_to) + +-#define __NR_syscall_max __NR_move_pages ++#define __NR_syscall_max __NR_sched_yield_to + + #ifndef __NO_STUBS + #define __ARCH_WANT_OLD_READDIR +Index: linux-cfs-2.6.20.8.q/include/linux/hardirq.h +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/include/linux/hardirq.h ++++ linux-cfs-2.6.20.8.q/include/linux/hardirq.h +@@ -79,6 +79,19 @@ + #endif + + #ifdef CONFIG_PREEMPT ++# define PREEMPT_CHECK_OFFSET 1 ++#else ++# define PREEMPT_CHECK_OFFSET 0 ++#endif ++ ++/* ++ * Check whether we were atomic before we did preempt_disable(): ++ * (used by the scheduler) ++ */ ++#define in_atomic_preempt_off() \ ++ ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET) ++ ++#ifdef CONFIG_PREEMPT + # define preemptible() (preempt_count() == 0 && !irqs_disabled()) + # define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1) + #else +Index: linux-cfs-2.6.20.8.q/include/linux/ktime.h +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/include/linux/ktime.h ++++ linux-cfs-2.6.20.8.q/include/linux/ktime.h +@@ -274,4 +274,6 @@ extern void ktime_get_ts(struct timespec + /* Get the real (wall-) time in timespec format: */ + #define ktime_get_real_ts(ts) getnstimeofday(ts) + ++extern ktime_t ktime_get(void); ++ + #endif +Index: linux-cfs-2.6.20.8.q/include/linux/sched.h +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/include/linux/sched.h ++++ linux-cfs-2.6.20.8.q/include/linux/sched.h +@@ -2,7 +2,6 @@ + #define _LINUX_SCHED_H + + #include <linux/auxvec.h> /* For AT_VECTOR_SIZE */ +- + /* + * cloning flags: + */ +@@ -37,6 +36,8 @@ + + #ifdef __KERNEL__ + ++#include <linux/rbtree.h> /* For run_node */ ++ + struct sched_param { + int sched_priority; + }; +@@ -196,13 +197,13 @@ extern void init_idle(struct task_struct + extern cpumask_t nohz_cpu_mask; + + /* +- * Only dump TASK_* tasks. (-1 for all tasks) ++ * Only dump TASK_* tasks. (0 for all tasks) + */ + extern void show_state_filter(unsigned long state_filter); + + static inline void show_state(void) + { +- show_state_filter(-1); ++ show_state_filter(0); + } + + extern void show_regs(struct pt_regs *); +@@ -464,7 +465,7 @@ struct signal_struct { + * from jiffies_to_ns(utime + stime) if sched_clock uses something + * other than jiffies.) + */ +- unsigned long long sched_time; ++ unsigned long long sum_sched_runtime; + + /* + * We don't bother to synchronize most readers of this at all, +@@ -524,6 +525,7 @@ struct signal_struct { + #define MAX_RT_PRIO MAX_USER_RT_PRIO + + #define MAX_PRIO (MAX_RT_PRIO + 40) ++#define DEFAULT_PRIO (MAX_RT_PRIO + 20) + + #define rt_prio(prio) unlikely((prio) < MAX_RT_PRIO) + #define rt_task(p) rt_prio((p)->prio) +@@ -635,7 +637,14 @@ enum idle_type + /* + * sched-domains (multiprocessor balancing) declarations: + */ +-#define SCHED_LOAD_SCALE 128UL /* increase resolution of load */ ++ ++/* ++ * Increase resolution of nice-level calculations: ++ */ ++#define SCHED_LOAD_SHIFT 10 ++#define SCHED_LOAD_SCALE (1UL << SCHED_LOAD_SHIFT) ++ ++#define SCHED_LOAD_SCALE_FUZZ (SCHED_LOAD_SCALE >> 5) + + #ifdef CONFIG_SMP + #define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */ +@@ -684,7 +693,6 @@ struct sched_domain { + unsigned int imbalance_pct; /* No balance until over watermark */ + unsigned long long cache_hot_time; /* Task considered cache hot (ns) */ + unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ +- unsigned int per_cpu_gain; /* CPU % gained by adding domain cpus */ + unsigned int busy_idx; + unsigned int idle_idx; + unsigned int newidle_idx; +@@ -733,12 +741,6 @@ struct sched_domain { + extern int partition_sched_domains(cpumask_t *partition1, + cpumask_t *partition2); + +-/* +- * Maximum cache size the migration-costs auto-tuning code will +- * search from: +- */ +-extern unsigned int max_cache_size; +- + #endif /* CONFIG_SMP */ + + +@@ -789,14 +791,28 @@ struct mempolicy; + struct pipe_inode_info; + struct uts_namespace; + +-enum sleep_type { +- SLEEP_NORMAL, +- SLEEP_NONINTERACTIVE, +- SLEEP_INTERACTIVE, +- SLEEP_INTERRUPTED, +-}; ++struct rq; + +-struct prio_array; ++struct sched_class { ++ struct sched_class *next; ++ ++ void (*enqueue_task) (struct rq *rq, struct task_struct *p, ++ int wakeup, u64 now); ++ void (*dequeue_task) (struct rq *rq, struct task_struct *p, ++ int sleep, u64 now); ++ void (*yield_task) (struct rq *rq, struct task_struct *p, ++ struct task_struct *p_to); ++ ++ void (*check_preempt_curr) (struct rq *rq, struct task_struct *p); ++ ++ struct task_struct * (*pick_next_task) (struct rq *rq, u64 now); ++ void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now); ++ ++ struct task_struct * (*load_balance_start) (struct rq *rq); ++ struct task_struct * (*load_balance_next) (struct rq *rq); ++ void (*task_tick) (struct rq *rq, struct task_struct *p); ++ void (*task_new) (struct rq *rq, struct task_struct *p); ++}; + + struct task_struct { + volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ +@@ -813,26 +829,45 @@ struct task_struct { + #endif + #endif + int load_weight; /* for niceness load balancing purposes */ ++ int load_shift; ++ + int prio, static_prio, normal_prio; ++ int on_rq; + struct list_head run_list; +- struct prio_array *array; ++ struct rb_node run_node; + + unsigned short ioprio; + #ifdef CONFIG_BLK_DEV_IO_TRACE + unsigned int btrace_seq; + #endif +- unsigned long sleep_avg; +- unsigned long long timestamp, last_ran; +- unsigned long long sched_time; /* sched_clock time spent running */ +- enum sleep_type sleep_type; ++ /* CFS scheduling class statistics fields: */ ++ u64 wait_start_fair; ++ u64 wait_start; ++ u64 exec_start; ++ u64 sleep_start; ++ u64 block_start; ++ u64 sleep_max; ++ u64 block_max; ++ u64 exec_max; ++ u64 wait_max; ++ u64 last_ran; ++ ++ s64 wait_runtime; ++ u64 sum_exec_runtime; ++ s64 fair_key; ++ s64 sum_wait_runtime; + + unsigned long policy; + cpumask_t cpus_allowed; +- unsigned int time_slice, first_time_slice; ++ unsigned int time_slice; ++ struct sched_class *sched_class; ++ ++ s64 min_wait_runtime; + + #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) + struct sched_info sched_info; + #endif ++ u64 nr_switches; + + struct list_head tasks; + /* +@@ -1195,8 +1230,9 @@ static inline int set_cpus_allowed(struc + #endif + + extern unsigned long long sched_clock(void); ++extern void sched_clock_unstable_event(void); + extern unsigned long long +-current_sched_time(const struct task_struct *current_task); ++current_sched_runtime(const struct task_struct *current_task); + + /* sched_exec is called by processes performing an exec */ + #ifdef CONFIG_SMP +@@ -1212,6 +1248,13 @@ static inline void idle_task_exit(void) + #endif + + extern void sched_idle_next(void); ++extern char * sched_print_task_state(struct task_struct *p, char *buffer); ++ ++extern unsigned int sysctl_sched_granularity; ++extern unsigned int sysctl_sched_wakeup_granularity; ++extern unsigned int sysctl_sched_sleep_history_max; ++extern unsigned int sysctl_sched_child_runs_first; ++extern unsigned int sysctl_sched_load_smoothing; + + #ifdef CONFIG_RT_MUTEXES + extern int rt_mutex_getprio(struct task_struct *p); +@@ -1290,8 +1333,7 @@ extern void FASTCALL(wake_up_new_task(st + #else + static inline void kick_process(struct task_struct *tsk) { } + #endif +-extern void FASTCALL(sched_fork(struct task_struct * p, int clone_flags)); +-extern void FASTCALL(sched_exit(struct task_struct * p)); ++extern void sched_fork(struct task_struct * p, int clone_flags); + + extern int in_group_p(gid_t); + extern int in_egroup_p(gid_t); +Index: linux-cfs-2.6.20.8.q/include/linux/topology.h +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/include/linux/topology.h ++++ linux-cfs-2.6.20.8.q/include/linux/topology.h +@@ -96,7 +96,6 @@ + .busy_factor = 64, \ + .imbalance_pct = 110, \ + .cache_nice_tries = 0, \ +- .per_cpu_gain = 25, \ + .busy_idx = 0, \ + .idle_idx = 0, \ + .newidle_idx = 1, \ +@@ -128,7 +127,6 @@ + .busy_factor = 64, \ + .imbalance_pct = 125, \ + .cache_nice_tries = 1, \ +- .per_cpu_gain = 100, \ + .busy_idx = 2, \ + .idle_idx = 1, \ + .newidle_idx = 2, \ +@@ -159,7 +157,6 @@ + .busy_factor = 64, \ + .imbalance_pct = 125, \ + .cache_nice_tries = 1, \ +- .per_cpu_gain = 100, \ + .busy_idx = 2, \ + .idle_idx = 1, \ + .newidle_idx = 2, \ +@@ -193,7 +190,6 @@ + .newidle_idx = 0, /* unused */ \ + .wake_idx = 0, /* unused */ \ + .forkexec_idx = 0, /* unused */ \ +- .per_cpu_gain = 100, \ + .flags = SD_LOAD_BALANCE \ + | SD_SERIALIZE, \ + .last_balance = jiffies, \ +Index: linux-cfs-2.6.20.8.q/init/main.c +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/init/main.c ++++ linux-cfs-2.6.20.8.q/init/main.c +@@ -422,7 +422,7 @@ static void noinline rest_init(void) + + /* + * The boot idle thread must execute schedule() +- * at least one to get things moving: ++ * at least once to get things moving: + */ + preempt_enable_no_resched(); + schedule(); +Index: linux-cfs-2.6.20.8.q/kernel/exit.c +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/kernel/exit.c ++++ linux-cfs-2.6.20.8.q/kernel/exit.c +@@ -112,7 +112,7 @@ static void __exit_signal(struct task_st + sig->maj_flt += tsk->maj_flt; + sig->nvcsw += tsk->nvcsw; + sig->nivcsw += tsk->nivcsw; +- sig->sched_time += tsk->sched_time; ++ sig->sum_sched_runtime += tsk->sum_exec_runtime; + sig = NULL; /* Marker for below. */ + } + +@@ -170,7 +170,6 @@ repeat: + zap_leader = (leader->exit_signal == -1); + } + +- sched_exit(p); + write_unlock_irq(&tasklist_lock); + proc_flush_task(p); + release_thread(p); +Index: linux-cfs-2.6.20.8.q/kernel/fork.c +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/kernel/fork.c ++++ linux-cfs-2.6.20.8.q/kernel/fork.c +@@ -874,7 +874,7 @@ static inline int copy_signal(unsigned l + sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; + sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; + sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; +- sig->sched_time = 0; ++ sig->sum_sched_runtime = 0; + INIT_LIST_HEAD(&sig->cpu_timers[0]); + INIT_LIST_HEAD(&sig->cpu_timers[1]); + INIT_LIST_HEAD(&sig->cpu_timers[2]); +@@ -1037,7 +1037,7 @@ static struct task_struct *copy_process( + + p->utime = cputime_zero; + p->stime = cputime_zero; +- p->sched_time = 0; ++ + p->rchar = 0; /* I/O counter: bytes read */ + p->wchar = 0; /* I/O counter: bytes written */ + p->syscr = 0; /* I/O counter: read syscalls */ +Index: linux-cfs-2.6.20.8.q/kernel/hrtimer.c +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/kernel/hrtimer.c ++++ linux-cfs-2.6.20.8.q/kernel/hrtimer.c +@@ -45,7 +45,7 @@ + * + * returns the time in ktime_t format + */ +-static ktime_t ktime_get(void) ++ktime_t ktime_get(void) + { + struct timespec now; + +Index: linux-cfs-2.6.20.8.q/kernel/posix-cpu-timers.c +=================================================================== +--- linux-cfs-2.6.20.8.q.orig/kernel/posix-cpu-timers.c ++++ linux-cfs-2.6.20.8.q/kernel/posix-cpu-timers.c +@@ -161,7 +161,7 @@ static inline cputime_t virt_ticks(struc + } + static inline unsigned long long sched_ns(struct task_struct *p) + { +- return (p == current) ? current_sched_time(p) : p->sched_time; ++ return (p == current) ? current_sched_runtime(p) : p->sum_exec_runtime; + } + + int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) +@@ -246,10 +246,10 @@ static int cpu_clock_sample_group_locked + } while (t != p); + break; + case CPUCLOCK_SCHED: +- cpu->sched = p->signal->sched_time; ++ cpu->sched = p->signal->sum_sched_runtime; + /* Add in each other live thread. */ + while ((t = next_thread(t)) != p) { +- cpu->sched += t->sched_time; ++ cpu->sched += t->sum_exec_runtime; + } + cpu->sched += sched_ns(p); + break; +@@ -417,7 +417,7 @@ int posix_cpu_timer_del(struct k_itimer + */ + static void cleanup_timers(struct list_head *head, + cputime_t utime, cputime_t stime, +- unsigned long long sched_time) ++ unsigned long long sum_exec_runtime) + { + struct cpu_timer_list *timer, *next; |
