From 743f5cdaf5ccb9fefc7c3ac68ea4676637b8782f Mon Sep 17 00:00:00 2001 From: Leon Woestenberg Date: Sun, 7 Oct 2007 14:43:24 +0000 Subject: linux-efika: Moved from 2.6.20.11-cfs to .20.20-cfs. Needed div64_32() symbol weakening in lib. --- .../linux/linux-efika-2.6.20.11/.mtn2git_empty | 0 .../sched-cfs-v9-v2.6.20.11.patch | 5590 -------------------- .../linux/linux-efika-2.6.20.20/.mtn2git_empty | 0 .../sched-cfs-v9-v2.6.20.11.patch | 5590 ++++++++++++++++++++ .../weaken-div64_32-symbol.patch | 23 + packages/linux/linux-efika-2.6.20/defconfig | 23 +- packages/linux/linux-efika_2.6.20.11.bb | 85 - packages/linux/linux-efika_2.6.20.20.bb | 83 + 8 files changed, 5712 insertions(+), 5682 deletions(-) delete mode 100644 packages/linux/linux-efika-2.6.20.11/.mtn2git_empty delete mode 100644 packages/linux/linux-efika-2.6.20.11/sched-cfs-v9-v2.6.20.11.patch create mode 100644 packages/linux/linux-efika-2.6.20.20/.mtn2git_empty create mode 100644 packages/linux/linux-efika-2.6.20.20/sched-cfs-v9-v2.6.20.11.patch create mode 100644 packages/linux/linux-efika-2.6.20.20/weaken-div64_32-symbol.patch delete mode 100644 packages/linux/linux-efika_2.6.20.11.bb create mode 100644 packages/linux/linux-efika_2.6.20.20.bb (limited to 'packages') diff --git a/packages/linux/linux-efika-2.6.20.11/.mtn2git_empty b/packages/linux/linux-efika-2.6.20.11/.mtn2git_empty deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/packages/linux/linux-efika-2.6.20.11/sched-cfs-v9-v2.6.20.11.patch b/packages/linux/linux-efika-2.6.20.11/sched-cfs-v9-v2.6.20.11.patch deleted file mode 100644 index 29071a99ac..0000000000 --- a/packages/linux/linux-efika-2.6.20.11/sched-cfs-v9-v2.6.20.11.patch +++ /dev/null @@ -1,5590 +0,0 @@ -This is the Complete Fair Scheduler (CFS) v9 patch for -linux 2.6.20.10 patch (rediffed cleanly against .11). - -http://people.redhat.com/mingo/cfs-scheduler/ - -Index: linux-cfs-2.6.20.8.q/Documentation/kernel-parameters.txt -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/Documentation/kernel-parameters.txt -+++ linux-cfs-2.6.20.8.q/Documentation/kernel-parameters.txt -@@ -914,49 +914,6 @@ and is between 256 and 4096 characters. - - mga= [HW,DRM] - -- migration_cost= -- [KNL,SMP] debug: override scheduler migration costs -- Format: ,,... -- This debugging option can be used to override the -- default scheduler migration cost matrix. The numbers -- are indexed by 'CPU domain distance'. -- E.g. migration_cost=1000,2000,3000 on an SMT NUMA -- box will set up an intra-core migration cost of -- 1 msec, an inter-core migration cost of 2 msecs, -- and an inter-node migration cost of 3 msecs. -- -- WARNING: using the wrong values here can break -- scheduler performance, so it's only for scheduler -- development purposes, not production environments. -- -- migration_debug= -- [KNL,SMP] migration cost auto-detect verbosity -- Format=<0|1|2> -- If a system's migration matrix reported at bootup -- seems erroneous then this option can be used to -- increase verbosity of the detection process. -- We default to 0 (no extra messages), 1 will print -- some more information, and 2 will be really -- verbose (probably only useful if you also have a -- serial console attached to the system). -- -- migration_factor= -- [KNL,SMP] multiply/divide migration costs by a factor -- Format= -- This debug option can be used to proportionally -- increase or decrease the auto-detected migration -- costs for all entries of the migration matrix. -- E.g. migration_factor=150 will increase migration -- costs by 50%. (and thus the scheduler will be less -- eager migrating cache-hot tasks) -- migration_factor=80 will decrease migration costs -- by 20%. (thus the scheduler will be more eager to -- migrate tasks) -- -- WARNING: using the wrong values here can break -- scheduler performance, so it's only for scheduler -- development purposes, not production environments. -- - mousedev.tap_time= - [MOUSE] Maximum time between finger touching and - leaving touchpad surface for touch to be considered -Index: linux-cfs-2.6.20.8.q/Documentation/sched-design-CFS.txt -=================================================================== ---- /dev/null -+++ linux-cfs-2.6.20.8.q/Documentation/sched-design-CFS.txt -@@ -0,0 +1,107 @@ -+[announce] [patch] Modular Scheduler Core and Completely Fair Scheduler [CFS] -+ -+i'm pleased to announce the first release of the "Modular Scheduler Core -+and Completely Fair Scheduler [CFS]" patchset: -+ -+ http://redhat.com/~mingo/cfs-scheduler/ -+ -+This project is a complete rewrite of the Linux task scheduler. My goal -+is to address various feature requests and to fix deficiencies in the -+vanilla scheduler that were suggested/found in the past few years, both -+for desktop scheduling and for server scheduling workloads. -+ -+[ QuickStart: apply the patch, recompile, reboot. The new scheduler -+ will be active by default and all tasks will default to the -+ SCHED_NORMAL interactive scheduling class. ] -+ -+Highlights are: -+ -+ - the introduction of Scheduling Classes: an extensible hierarchy of -+ scheduler modules. These modules encapsulate scheduling policy -+ details and are handled by the scheduler core without the core -+ code assuming about them too much. -+ -+ - sched_fair.c implements the 'CFS desktop scheduler': it is a -+ replacement for the vanilla scheduler's SCHED_OTHER interactivity -+ code. -+ -+ i'd like to give credit to Con Kolivas for the general approach here: -+ he has proven via RSDL/SD that 'fair scheduling' is possible and that -+ it results in better desktop scheduling. Kudos Con! -+ -+ The CFS patch uses a completely different approach and implementation -+ from RSDL/SD. My goal was to make CFS's interactivity quality exceed -+ that of RSDL/SD, which is a high standard to meet :-) Testing -+ feedback is welcome to decide this one way or another. [ and, in any -+ case, all of SD's logic could be added via a kernel/sched_sd.c module -+ as well, if Con is interested in such an approach. ] -+ -+ CFS's design is quite radical: it does not use runqueues, it uses a -+ time-ordered rbtree to build a 'timeline' of future task execution, -+ and thus has no 'array switch' artifacts (by which both the vanilla -+ scheduler and RSDL/SD are affected). -+ -+ CFS uses nanosecond granularity accounting and does not rely on any -+ jiffies or other HZ detail. Thus the CFS scheduler has no notion of -+ 'timeslices' and has no heuristics whatsoever. There is only one -+ central tunable: -+ -+ /proc/sys/kernel/sched_granularity_ns -+ -+ which can be used to tune the scheduler from 'desktop' (low -+ latencies) to 'server' (good batching) workloads. It defaults to a -+ setting suitable for desktop workloads. SCHED_BATCH is handled by the -+ CFS scheduler module too. -+ -+ due to its design, the CFS scheduler is not prone to any of the -+ 'attacks' that exist today against the heuristics of the stock -+ scheduler: fiftyp.c, thud.c, chew.c, ring-test.c, massive_intr.c all -+ work fine and do not impact interactivity and produce the expected -+ behavior. -+ -+ the CFS scheduler has a much stronger handling of nice levels and -+ SCHED_BATCH: both types of workloads should be isolated much more -+ agressively than under the vanilla scheduler. -+ -+ ( another rdetail: due to nanosec accounting and timeline sorting, -+ sched_yield() support is very simple under CFS, and in fact under -+ CFS sched_yield() behaves much better than under any other -+ scheduler i have tested so far. ) -+ -+ - sched_rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler -+ way than the vanilla scheduler does. It uses 100 runqueues (for all -+ 100 RT priority levels, instead of 140 in the vanilla scheduler) -+ and it needs no expired array. -+ -+ - reworked/sanitized SMP load-balancing: the runqueue-walking -+ assumptions are gone from the load-balancing code now, and -+ iterators of the scheduling modules are used. The balancing code got -+ quite a bit simpler as a result. -+ -+the core scheduler got smaller by more than 700 lines: -+ -+ kernel/sched.c | 1454 ++++++++++++++++------------------------------------------------ -+ 1 file changed, 372 insertions(+), 1082 deletions(-) -+ -+and even adding all the scheduling modules, the total size impact is -+relatively small: -+ -+ 18 files changed, 1454 insertions(+), 1133 deletions(-) -+ -+most of the increase is due to extensive comments. The kernel size -+impact is in fact a small negative: -+ -+ text data bss dec hex filename -+ 23366 4001 24 27391 6aff kernel/sched.o.vanilla -+ 24159 2705 56 26920 6928 kernel/sched.o.CFS -+ -+(this is mainly due to the benefit of getting rid of the expired array -+and its data structure overhead.) -+ -+thanks go to Thomas Gleixner and Arjan van de Ven for review of this -+patchset. -+ -+as usual, any sort of feedback, bugreports, fixes and suggestions are -+more than welcome, -+ -+ Ingo -Index: linux-cfs-2.6.20.8.q/Makefile -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/Makefile -+++ linux-cfs-2.6.20.8.q/Makefile -@@ -1,7 +1,7 @@ - VERSION = 2 - PATCHLEVEL = 6 - SUBLEVEL = 20 --EXTRAVERSION = .11 -+EXTRAVERSION = .11-cfs-v9 - NAME = Homicidal Dwarf Hamster - - # *DOCUMENTATION* -Index: linux-cfs-2.6.20.8.q/arch/i386/kernel/smpboot.c -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/arch/i386/kernel/smpboot.c -+++ linux-cfs-2.6.20.8.q/arch/i386/kernel/smpboot.c -@@ -1132,18 +1132,6 @@ exit: - } - #endif - --static void smp_tune_scheduling(void) --{ -- unsigned long cachesize; /* kB */ -- -- if (cpu_khz) { -- cachesize = boot_cpu_data.x86_cache_size; -- -- if (cachesize > 0) -- max_cache_size = cachesize * 1024; -- } --} -- - /* - * Cycle through the processors sending APIC IPIs to boot each. - */ -@@ -1172,7 +1160,6 @@ static void __init smp_boot_cpus(unsigne - x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; - - current_thread_info()->cpu = 0; -- smp_tune_scheduling(); - - set_cpu_sibling_map(0); - -Index: linux-cfs-2.6.20.8.q/arch/i386/kernel/syscall_table.S -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/arch/i386/kernel/syscall_table.S -+++ linux-cfs-2.6.20.8.q/arch/i386/kernel/syscall_table.S -@@ -319,3 +319,4 @@ ENTRY(sys_call_table) - .long sys_move_pages - .long sys_getcpu - .long sys_epoll_pwait -+ .long sys_sched_yield_to /* 320 */ -Index: linux-cfs-2.6.20.8.q/arch/i386/kernel/tsc.c -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/arch/i386/kernel/tsc.c -+++ linux-cfs-2.6.20.8.q/arch/i386/kernel/tsc.c -@@ -61,6 +61,8 @@ static inline int check_tsc_unstable(voi - - void mark_tsc_unstable(void) - { -+ sched_clock_unstable_event(); -+ - tsc_unstable = 1; - } - EXPORT_SYMBOL_GPL(mark_tsc_unstable); -@@ -107,13 +109,7 @@ unsigned long long sched_clock(void) - { - unsigned long long this_offset; - -- /* -- * in the NUMA case we dont use the TSC as they are not -- * synchronized across all CPUs. -- */ --#ifndef CONFIG_NUMA -- if (!cpu_khz || check_tsc_unstable()) --#endif -+ if (!cpu_khz || !cpu_has_tsc) - /* no locking but a rare wrong value is not a big deal */ - return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); - -Index: linux-cfs-2.6.20.8.q/arch/ia64/kernel/setup.c -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/arch/ia64/kernel/setup.c -+++ linux-cfs-2.6.20.8.q/arch/ia64/kernel/setup.c -@@ -773,7 +773,6 @@ static void __cpuinit - get_max_cacheline_size (void) - { - unsigned long line_size, max = 1; -- unsigned int cache_size = 0; - u64 l, levels, unique_caches; - pal_cache_config_info_t cci; - s64 status; -@@ -803,8 +802,6 @@ get_max_cacheline_size (void) - line_size = 1 << cci.pcci_line_size; - if (line_size > max) - max = line_size; -- if (cache_size < cci.pcci_cache_size) -- cache_size = cci.pcci_cache_size; - if (!cci.pcci_unified) { - status = ia64_pal_cache_config_info(l, - /* cache_type (instruction)= */ 1, -@@ -821,9 +818,6 @@ get_max_cacheline_size (void) - ia64_i_cache_stride_shift = cci.pcci_stride; - } - out: --#ifdef CONFIG_SMP -- max_cache_size = max(max_cache_size, cache_size); --#endif - if (max > ia64_max_cacheline_size) - ia64_max_cacheline_size = max; - } -Index: linux-cfs-2.6.20.8.q/arch/mips/kernel/smp.c -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/arch/mips/kernel/smp.c -+++ linux-cfs-2.6.20.8.q/arch/mips/kernel/smp.c -@@ -245,7 +245,6 @@ void __init smp_prepare_cpus(unsigned in - { - init_new_context(current, &init_mm); - current_thread_info()->cpu = 0; -- smp_tune_scheduling(); - plat_prepare_cpus(max_cpus); - #ifndef CONFIG_HOTPLUG_CPU - cpu_present_map = cpu_possible_map; -Index: linux-cfs-2.6.20.8.q/arch/sparc/kernel/smp.c -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/arch/sparc/kernel/smp.c -+++ linux-cfs-2.6.20.8.q/arch/sparc/kernel/smp.c -@@ -69,16 +69,6 @@ void __cpuinit smp_store_cpu_info(int id - cpu_data(id).prom_node = cpu_node; - cpu_data(id).mid = cpu_get_hwmid(cpu_node); - -- /* this is required to tune the scheduler correctly */ -- /* is it possible to have CPUs with different cache sizes? */ -- if (id == boot_cpu_id) { -- int cache_line,cache_nlines; -- cache_line = 0x20; -- cache_line = prom_getintdefault(cpu_node, "ecache-line-size", cache_line); -- cache_nlines = 0x8000; -- cache_nlines = prom_getintdefault(cpu_node, "ecache-nlines", cache_nlines); -- max_cache_size = cache_line * cache_nlines; -- } - if (cpu_data(id).mid < 0) - panic("No MID found for CPU%d at node 0x%08d", id, cpu_node); - } -Index: linux-cfs-2.6.20.8.q/arch/sparc64/kernel/smp.c -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/arch/sparc64/kernel/smp.c -+++ linux-cfs-2.6.20.8.q/arch/sparc64/kernel/smp.c -@@ -1293,41 +1293,6 @@ int setup_profiling_timer(unsigned int m - return 0; - } - --static void __init smp_tune_scheduling(void) --{ -- struct device_node *dp; -- int instance; -- unsigned int def, smallest = ~0U; -- -- def = ((tlb_type == hypervisor) ? -- (3 * 1024 * 1024) : -- (4 * 1024 * 1024)); -- -- instance = 0; -- while (!cpu_find_by_instance(instance, &dp, NULL)) { -- unsigned int val; -- -- val = of_getintprop_default(dp, "ecache-size", def); -- if (val < smallest) -- smallest = val; -- -- instance++; -- } -- -- /* Any value less than 256K is nonsense. */ -- if (smallest < (256U * 1024U)) -- smallest = 256 * 1024; -- -- max_cache_size = smallest; -- -- if (smallest < 1U * 1024U * 1024U) -- printk(KERN_INFO "Using max_cache_size of %uKB\n", -- smallest / 1024U); -- else -- printk(KERN_INFO "Using max_cache_size of %uMB\n", -- smallest / 1024U / 1024U); --} -- - /* Constrain the number of cpus to max_cpus. */ - void __init smp_prepare_cpus(unsigned int max_cpus) - { -@@ -1363,7 +1328,6 @@ void __init smp_prepare_cpus(unsigned in - } - - smp_store_cpu_info(boot_cpu_id); -- smp_tune_scheduling(); - } - - /* Set this up early so that things like the scheduler can init -Index: linux-cfs-2.6.20.8.q/fs/proc/array.c -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/fs/proc/array.c -+++ linux-cfs-2.6.20.8.q/fs/proc/array.c -@@ -165,7 +165,6 @@ static inline char * task_state(struct t - rcu_read_lock(); - buffer += sprintf(buffer, - "State:\t%s\n" -- "SleepAVG:\t%lu%%\n" - "Tgid:\t%d\n" - "Pid:\t%d\n" - "PPid:\t%d\n" -@@ -173,9 +172,8 @@ static inline char * task_state(struct t - "Uid:\t%d\t%d\t%d\t%d\n" - "Gid:\t%d\t%d\t%d\t%d\n", - get_task_state(p), -- (p->sleep_avg/1024)*100/(1020000000/1024), -- p->tgid, p->pid, -- pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0, -+ p->tgid, p->pid, -+ pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0, - pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0, - p->uid, p->euid, p->suid, p->fsuid, - p->gid, p->egid, p->sgid, p->fsgid); -@@ -312,6 +310,11 @@ int proc_pid_status(struct task_struct * - return buffer - orig; - } - -+int proc_pid_sched(struct task_struct *task, char *buffer) -+{ -+ return sched_print_task_state(task, buffer) - buffer; -+} -+ - static int do_task_stat(struct task_struct *task, char * buffer, int whole) - { - unsigned long vsize, eip, esp, wchan = ~0UL; -Index: linux-cfs-2.6.20.8.q/fs/proc/base.c -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/fs/proc/base.c -+++ linux-cfs-2.6.20.8.q/fs/proc/base.c -@@ -1839,6 +1839,7 @@ static struct pid_entry tgid_base_stuff[ - INF("environ", S_IRUSR, pid_environ), - INF("auxv", S_IRUSR, pid_auxv), - INF("status", S_IRUGO, pid_status), -+ INF("sched", S_IRUGO, pid_sched), - INF("cmdline", S_IRUGO, pid_cmdline), - INF("stat", S_IRUGO, tgid_stat), - INF("statm", S_IRUGO, pid_statm), -@@ -2121,6 +2122,7 @@ static struct pid_entry tid_base_stuff[] - INF("environ", S_IRUSR, pid_environ), - INF("auxv", S_IRUSR, pid_auxv), - INF("status", S_IRUGO, pid_status), -+ INF("sched", S_IRUGO, pid_sched), - INF("cmdline", S_IRUGO, pid_cmdline), - INF("stat", S_IRUGO, tid_stat), - INF("statm", S_IRUGO, pid_statm), -Index: linux-cfs-2.6.20.8.q/fs/proc/internal.h -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/fs/proc/internal.h -+++ linux-cfs-2.6.20.8.q/fs/proc/internal.h -@@ -36,6 +36,7 @@ extern int proc_exe_link(struct inode *, - extern int proc_tid_stat(struct task_struct *, char *); - extern int proc_tgid_stat(struct task_struct *, char *); - extern int proc_pid_status(struct task_struct *, char *); -+extern int proc_pid_sched(struct task_struct *, char *); - extern int proc_pid_statm(struct task_struct *, char *); - - extern struct file_operations proc_maps_operations; -Index: linux-cfs-2.6.20.8.q/include/asm-generic/bitops/sched.h -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/include/asm-generic/bitops/sched.h -+++ linux-cfs-2.6.20.8.q/include/asm-generic/bitops/sched.h -@@ -6,28 +6,23 @@ - - /* - * Every architecture must define this function. It's the fastest -- * way of searching a 140-bit bitmap where the first 100 bits are -- * unlikely to be set. It's guaranteed that at least one of the 140 -- * bits is cleared. -+ * way of searching a 100-bit bitmap. It's guaranteed that at least -+ * one of the 100 bits is cleared. - */ - static inline int sched_find_first_bit(const unsigned long *b) - { - #if BITS_PER_LONG == 64 -- if (unlikely(b[0])) -+ if (b[0]) - return __ffs(b[0]); -- if (likely(b[1])) -- return __ffs(b[1]) + 64; -- return __ffs(b[2]) + 128; -+ return __ffs(b[1]) + 64; - #elif BITS_PER_LONG == 32 -- if (unlikely(b[0])) -+ if (b[0]) - return __ffs(b[0]); -- if (unlikely(b[1])) -+ if (b[1]) - return __ffs(b[1]) + 32; -- if (unlikely(b[2])) -+ if (b[2]) - return __ffs(b[2]) + 64; -- if (b[3]) -- return __ffs(b[3]) + 96; -- return __ffs(b[4]) + 128; -+ return __ffs(b[3]) + 96; - #else - #error BITS_PER_LONG not defined - #endif -Index: linux-cfs-2.6.20.8.q/include/asm-i386/topology.h -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/include/asm-i386/topology.h -+++ linux-cfs-2.6.20.8.q/include/asm-i386/topology.h -@@ -85,7 +85,6 @@ static inline int node_to_first_cpu(int - .idle_idx = 1, \ - .newidle_idx = 2, \ - .wake_idx = 1, \ -- .per_cpu_gain = 100, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_EXEC \ - | SD_BALANCE_FORK \ -Index: linux-cfs-2.6.20.8.q/include/asm-i386/unistd.h -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/include/asm-i386/unistd.h -+++ linux-cfs-2.6.20.8.q/include/asm-i386/unistd.h -@@ -325,10 +325,11 @@ - #define __NR_move_pages 317 - #define __NR_getcpu 318 - #define __NR_epoll_pwait 319 -+#define __NR_sched_yield_to 320 - - #ifdef __KERNEL__ - --#define NR_syscalls 320 -+#define NR_syscalls 321 - - #define __ARCH_WANT_IPC_PARSE_VERSION - #define __ARCH_WANT_OLD_READDIR -Index: linux-cfs-2.6.20.8.q/include/asm-ia64/topology.h -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/include/asm-ia64/topology.h -+++ linux-cfs-2.6.20.8.q/include/asm-ia64/topology.h -@@ -65,7 +65,6 @@ void build_cpu_to_node_map(void); - .max_interval = 4, \ - .busy_factor = 64, \ - .imbalance_pct = 125, \ -- .per_cpu_gain = 100, \ - .cache_nice_tries = 2, \ - .busy_idx = 2, \ - .idle_idx = 1, \ -@@ -97,7 +96,6 @@ void build_cpu_to_node_map(void); - .newidle_idx = 0, /* unused */ \ - .wake_idx = 1, \ - .forkexec_idx = 1, \ -- .per_cpu_gain = 100, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_EXEC \ - | SD_BALANCE_FORK \ -Index: linux-cfs-2.6.20.8.q/include/asm-mips/mach-ip27/topology.h -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/include/asm-mips/mach-ip27/topology.h -+++ linux-cfs-2.6.20.8.q/include/asm-mips/mach-ip27/topology.h -@@ -28,7 +28,6 @@ extern unsigned char __node_distances[MA - .busy_factor = 32, \ - .imbalance_pct = 125, \ - .cache_nice_tries = 1, \ -- .per_cpu_gain = 100, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_EXEC \ - | SD_WAKE_BALANCE, \ -Index: linux-cfs-2.6.20.8.q/include/asm-powerpc/topology.h -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/include/asm-powerpc/topology.h -+++ linux-cfs-2.6.20.8.q/include/asm-powerpc/topology.h -@@ -57,7 +57,6 @@ static inline int pcibus_to_node(struct - .busy_factor = 32, \ - .imbalance_pct = 125, \ - .cache_nice_tries = 1, \ -- .per_cpu_gain = 100, \ - .busy_idx = 3, \ - .idle_idx = 1, \ - .newidle_idx = 2, \ -Index: linux-cfs-2.6.20.8.q/include/asm-x86_64/topology.h -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/include/asm-x86_64/topology.h -+++ linux-cfs-2.6.20.8.q/include/asm-x86_64/topology.h -@@ -43,7 +43,6 @@ extern int __node_distance(int, int); - .newidle_idx = 0, \ - .wake_idx = 1, \ - .forkexec_idx = 1, \ -- .per_cpu_gain = 100, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_FORK \ - | SD_BALANCE_EXEC \ -Index: linux-cfs-2.6.20.8.q/include/asm-x86_64/unistd.h -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/include/asm-x86_64/unistd.h -+++ linux-cfs-2.6.20.8.q/include/asm-x86_64/unistd.h -@@ -619,8 +619,10 @@ __SYSCALL(__NR_sync_file_range, sys_sync - __SYSCALL(__NR_vmsplice, sys_vmsplice) - #define __NR_move_pages 279 - __SYSCALL(__NR_move_pages, sys_move_pages) -+#define __NR_sched_yield_to 280 -+__SYSCALL(__NR_sched_yield_to, sys_sched_yield_to) - --#define __NR_syscall_max __NR_move_pages -+#define __NR_syscall_max __NR_sched_yield_to - - #ifndef __NO_STUBS - #define __ARCH_WANT_OLD_READDIR -Index: linux-cfs-2.6.20.8.q/include/linux/hardirq.h -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/include/linux/hardirq.h -+++ linux-cfs-2.6.20.8.q/include/linux/hardirq.h -@@ -79,6 +79,19 @@ - #endif - - #ifdef CONFIG_PREEMPT -+# define PREEMPT_CHECK_OFFSET 1 -+#else -+# define PREEMPT_CHECK_OFFSET 0 -+#endif -+ -+/* -+ * Check whether we were atomic before we did preempt_disable(): -+ * (used by the scheduler) -+ */ -+#define in_atomic_preempt_off() \ -+ ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET) -+ -+#ifdef CONFIG_PREEMPT - # define preemptible() (preempt_count() == 0 && !irqs_disabled()) - # define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1) - #else -Index: linux-cfs-2.6.20.8.q/include/linux/ktime.h -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/include/linux/ktime.h -+++ linux-cfs-2.6.20.8.q/include/linux/ktime.h -@@ -274,4 +274,6 @@ extern void ktime_get_ts(struct timespec - /* Get the real (wall-) time in timespec format: */ - #define ktime_get_real_ts(ts) getnstimeofday(ts) - -+extern ktime_t ktime_get(void); -+ - #endif -Index: linux-cfs-2.6.20.8.q/include/linux/sched.h -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/include/linux/sched.h -+++ linux-cfs-2.6.20.8.q/include/linux/sched.h -@@ -2,7 +2,6 @@ - #define _LINUX_SCHED_H - - #include /* For AT_VECTOR_SIZE */ -- - /* - * cloning flags: - */ -@@ -37,6 +36,8 @@ - - #ifdef __KERNEL__ - -+#include /* For run_node */ -+ - struct sched_param { - int sched_priority; - }; -@@ -196,13 +197,13 @@ extern void init_idle(struct task_struct - extern cpumask_t nohz_cpu_mask; - - /* -- * Only dump TASK_* tasks. (-1 for all tasks) -+ * Only dump TASK_* tasks. (0 for all tasks) - */ - extern void show_state_filter(unsigned long state_filter); - - static inline void show_state(void) - { -- show_state_filter(-1); -+ show_state_filter(0); - } - - extern void show_regs(struct pt_regs *); -@@ -464,7 +465,7 @@ struct signal_struct { - * from jiffies_to_ns(utime + stime) if sched_clock uses something - * other than jiffies.) - */ -- unsigned long long sched_time; -+ unsigned long long sum_sched_runtime; - - /* - * We don't bother to synchronize most readers of this at all, -@@ -524,6 +525,7 @@ struct signal_struct { - #define MAX_RT_PRIO MAX_USER_RT_PRIO - - #define MAX_PRIO (MAX_RT_PRIO + 40) -+#define DEFAULT_PRIO (MAX_RT_PRIO + 20) - - #define rt_prio(prio) unlikely((prio) < MAX_RT_PRIO) - #define rt_task(p) rt_prio((p)->prio) -@@ -635,7 +637,14 @@ enum idle_type - /* - * sched-domains (multiprocessor balancing) declarations: - */ --#define SCHED_LOAD_SCALE 128UL /* increase resolution of load */ -+ -+/* -+ * Increase resolution of nice-level calculations: -+ */ -+#define SCHED_LOAD_SHIFT 10 -+#define SCHED_LOAD_SCALE (1UL << SCHED_LOAD_SHIFT) -+ -+#define SCHED_LOAD_SCALE_FUZZ (SCHED_LOAD_SCALE >> 5) - - #ifdef CONFIG_SMP - #define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */ -@@ -684,7 +693,6 @@ struct sched_domain { - unsigned int imbalance_pct; /* No balance until over watermark */ - unsigned long long cache_hot_time; /* Task considered cache hot (ns) */ - unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ -- unsigned int per_cpu_gain; /* CPU % gained by adding domain cpus */ - unsigned int busy_idx; - unsigned int idle_idx; - unsigned int newidle_idx; -@@ -733,12 +741,6 @@ struct sched_domain { - extern int partition_sched_domains(cpumask_t *partition1, - cpumask_t *partition2); - --/* -- * Maximum cache size the migration-costs auto-tuning code will -- * search from: -- */ --extern unsigned int max_cache_size; -- - #endif /* CONFIG_SMP */ - - -@@ -789,14 +791,28 @@ struct mempolicy; - struct pipe_inode_info; - struct uts_namespace; - --enum sleep_type { -- SLEEP_NORMAL, -- SLEEP_NONINTERACTIVE, -- SLEEP_INTERACTIVE, -- SLEEP_INTERRUPTED, --}; -+struct rq; - --struct prio_array; -+struct sched_class { -+ struct sched_class *next; -+ -+ void (*enqueue_task) (struct rq *rq, struct task_struct *p, -+ int wakeup, u64 now); -+ void (*dequeue_task) (struct rq *rq, struct task_struct *p, -+ int sleep, u64 now); -+ void (*yield_task) (struct rq *rq, struct task_struct *p, -+ struct task_struct *p_to); -+ -+ void (*check_preempt_curr) (struct rq *rq, struct task_struct *p); -+ -+ struct task_struct * (*pick_next_task) (struct rq *rq, u64 now); -+ void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now); -+ -+ struct task_struct * (*load_balance_start) (struct rq *rq); -+ struct task_struct * (*load_balance_next) (struct rq *rq); -+ void (*task_tick) (struct rq *rq, struct task_struct *p); -+ void (*task_new) (struct rq *rq, struct task_struct *p); -+}; - - struct task_struct { - volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ -@@ -813,26 +829,45 @@ struct task_struct { - #endif - #endif - int load_weight; /* for niceness load balancing purposes */ -+ int load_shift; -+ - int prio, static_prio, normal_prio; -+ int on_rq; - struct list_head run_list; -- struct prio_array *array; -+ struct rb_node run_node; - - unsigned short ioprio; - #ifdef CONFIG_BLK_DEV_IO_TRACE - unsigned int btrace_seq; - #endif -- unsigned long sleep_avg; -- unsigned long long timestamp, last_ran; -- unsigned long long sched_time; /* sched_clock time spent running */ -- enum sleep_type sleep_type; -+ /* CFS scheduling class statistics fields: */ -+ u64 wait_start_fair; -+ u64 wait_start; -+ u64 exec_start; -+ u64 sleep_start; -+ u64 block_start; -+ u64 sleep_max; -+ u64 block_max; -+ u64 exec_max; -+ u64 wait_max; -+ u64 last_ran; -+ -+ s64 wait_runtime; -+ u64 sum_exec_runtime; -+ s64 fair_key; -+ s64 sum_wait_runtime; - - unsigned long policy; - cpumask_t cpus_allowed; -- unsigned int time_slice, first_time_slice; -+ unsigned int time_slice; -+ struct sched_class *sched_class; -+ -+ s64 min_wait_runtime; - - #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) - struct sched_info sched_info; - #endif -+ u64 nr_switches; - - struct list_head tasks; - /* -@@ -1195,8 +1230,9 @@ static inline int set_cpus_allowed(struc - #endif - - extern unsigned long long sched_clock(void); -+extern void sched_clock_unstable_event(void); - extern unsigned long long --current_sched_time(const struct task_struct *current_task); -+current_sched_runtime(const struct task_struct *current_task); - - /* sched_exec is called by processes performing an exec */ - #ifdef CONFIG_SMP -@@ -1212,6 +1248,13 @@ static inline void idle_task_exit(void) - #endif - - extern void sched_idle_next(void); -+extern char * sched_print_task_state(struct task_struct *p, char *buffer); -+ -+extern unsigned int sysctl_sched_granularity; -+extern unsigned int sysctl_sched_wakeup_granularity; -+extern unsigned int sysctl_sched_sleep_history_max; -+extern unsigned int sysctl_sched_child_runs_first; -+extern unsigned int sysctl_sched_load_smoothing; - - #ifdef CONFIG_RT_MUTEXES - extern int rt_mutex_getprio(struct task_struct *p); -@@ -1290,8 +1333,7 @@ extern void FASTCALL(wake_up_new_task(st - #else - static inline void kick_process(struct task_struct *tsk) { } - #endif --extern void FASTCALL(sched_fork(struct task_struct * p, int clone_flags)); --extern void FASTCALL(sched_exit(struct task_struct * p)); -+extern void sched_fork(struct task_struct * p, int clone_flags); - - extern int in_group_p(gid_t); - extern int in_egroup_p(gid_t); -Index: linux-cfs-2.6.20.8.q/include/linux/topology.h -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/include/linux/topology.h -+++ linux-cfs-2.6.20.8.q/include/linux/topology.h -@@ -96,7 +96,6 @@ - .busy_factor = 64, \ - .imbalance_pct = 110, \ - .cache_nice_tries = 0, \ -- .per_cpu_gain = 25, \ - .busy_idx = 0, \ - .idle_idx = 0, \ - .newidle_idx = 1, \ -@@ -128,7 +127,6 @@ - .busy_factor = 64, \ - .imbalance_pct = 125, \ - .cache_nice_tries = 1, \ -- .per_cpu_gain = 100, \ - .busy_idx = 2, \ - .idle_idx = 1, \ - .newidle_idx = 2, \ -@@ -159,7 +157,6 @@ - .busy_factor = 64, \ - .imbalance_pct = 125, \ - .cache_nice_tries = 1, \ -- .per_cpu_gain = 100, \ - .busy_idx = 2, \ - .idle_idx = 1, \ - .newidle_idx = 2, \ -@@ -193,7 +190,6 @@ - .newidle_idx = 0, /* unused */ \ - .wake_idx = 0, /* unused */ \ - .forkexec_idx = 0, /* unused */ \ -- .per_cpu_gain = 100, \ - .flags = SD_LOAD_BALANCE \ - | SD_SERIALIZE, \ - .last_balance = jiffies, \ -Index: linux-cfs-2.6.20.8.q/init/main.c -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/init/main.c -+++ linux-cfs-2.6.20.8.q/init/main.c -@@ -422,7 +422,7 @@ static void noinline rest_init(void) - - /* - * The boot idle thread must execute schedule() -- * at least one to get things moving: -+ * at least once to get things moving: - */ - preempt_enable_no_resched(); - schedule(); -Index: linux-cfs-2.6.20.8.q/kernel/exit.c -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/kernel/exit.c -+++ linux-cfs-2.6.20.8.q/kernel/exit.c -@@ -112,7 +112,7 @@ static void __exit_signal(struct task_st - sig->maj_flt += tsk->maj_flt; - sig->nvcsw += tsk->nvcsw; - sig->nivcsw += tsk->nivcsw; -- sig->sched_time += tsk->sched_time; -+ sig->sum_sched_runtime += tsk->sum_exec_runtime; - sig = NULL; /* Marker for below. */ - } - -@@ -170,7 +170,6 @@ repeat: - zap_leader = (leader->exit_signal == -1); - } - -- sched_exit(p); - write_unlock_irq(&tasklist_lock); - proc_flush_task(p); - release_thread(p); -Index: linux-cfs-2.6.20.8.q/kernel/fork.c -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/kernel/fork.c -+++ linux-cfs-2.6.20.8.q/kernel/fork.c -@@ -874,7 +874,7 @@ static inline int copy_signal(unsigned l - sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; - sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; - sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; -- sig->sched_time = 0; -+ sig->sum_sched_runtime = 0; - INIT_LIST_HEAD(&sig->cpu_timers[0]); - INIT_LIST_HEAD(&sig->cpu_timers[1]); - INIT_LIST_HEAD(&sig->cpu_timers[2]); -@@ -1037,7 +1037,7 @@ static struct task_struct *copy_process( - - p->utime = cputime_zero; - p->stime = cputime_zero; -- p->sched_time = 0; -+ - p->rchar = 0; /* I/O counter: bytes read */ - p->wchar = 0; /* I/O counter: bytes written */ - p->syscr = 0; /* I/O counter: read syscalls */ -Index: linux-cfs-2.6.20.8.q/kernel/hrtimer.c -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/kernel/hrtimer.c -+++ linux-cfs-2.6.20.8.q/kernel/hrtimer.c -@@ -45,7 +45,7 @@ - * - * returns the time in ktime_t format - */ --static ktime_t ktime_get(void) -+ktime_t ktime_get(void) - { - struct timespec now; - -Index: linux-cfs-2.6.20.8.q/kernel/posix-cpu-timers.c -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/kernel/posix-cpu-timers.c -+++ linux-cfs-2.6.20.8.q/kernel/posix-cpu-timers.c -@@ -161,7 +161,7 @@ static inline cputime_t virt_ticks(struc - } - static inline unsigned long long sched_ns(struct task_struct *p) - { -- return (p == current) ? current_sched_time(p) : p->sched_time; -+ return (p == current) ? current_sched_runtime(p) : p->sum_exec_runtime; - } - - int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) -@@ -246,10 +246,10 @@ static int cpu_clock_sample_group_locked - } while (t != p); - break; - case CPUCLOCK_SCHED: -- cpu->sched = p->signal->sched_time; -+ cpu->sched = p->signal->sum_sched_runtime; - /* Add in each other live thread. */ - while ((t = next_thread(t)) != p) { -- cpu->sched += t->sched_time; -+ cpu->sched += t->sum_exec_runtime; - } - cpu->sched += sched_ns(p); - break; -@@ -417,7 +417,7 @@ int posix_cpu_timer_del(struct k_itimer - */ - static void cleanup_timers(struct list_head *head, - cputime_t utime, cputime_t stime, -- unsigned long long sched_time) -+ unsigned long long sum_exec_runtime) - { - struct cpu_timer_list *timer, *next; - cputime_t ptime = cputime_add(utime, stime); -@@ -446,10 +446,10 @@ static void cleanup_timers(struct list_h - ++head; - list_for_each_entry_safe(timer, next, head, entry) { - list_del_init(&timer->entry); -- if (timer->expires.sched < sched_time) { -+ if (timer->expires.sched < sum_exec_runtime) { - timer->expires.sched = 0; - } else { -- timer->expires.sched -= sched_time; -+ timer->expires.sched -= sum_exec_runtime; - } - } - } -@@ -462,7 +462,7 @@ static void cleanup_timers(struct list_h - void posix_cpu_timers_exit(struct task_struct *tsk) - { - cleanup_timers(tsk->cpu_timers, -- tsk->utime, tsk->stime, tsk->sched_time); -+ tsk->utime, tsk->stime, tsk->sum_exec_runtime); - - } - void posix_cpu_timers_exit_group(struct task_struct *tsk) -@@ -470,7 +470,7 @@ void posix_cpu_timers_exit_group(struct - cleanup_timers(tsk->signal->cpu_timers, - cputime_add(tsk->utime, tsk->signal->utime), - cputime_add(tsk->stime, tsk->signal->stime), -- tsk->sched_time + tsk->signal->sched_time); -+ tsk->sum_exec_runtime + tsk->signal->sum_sched_runtime); - } - - -@@ -531,7 +531,7 @@ static void process_timer_rebalance(stru - nsleft = max_t(unsigned long long, nsleft, 1); - do { - if (likely(!(t->flags & PF_EXITING))) { -- ns = t->sched_time + nsleft; -+ ns = t->sum_exec_runtime + nsleft; - if (t->it_sched_expires == 0 || - t->it_sched_expires > ns) { - t->it_sched_expires = ns; -@@ -999,7 +999,7 @@ static void check_thread_timers(struct t - struct cpu_timer_list *t = list_entry(timers->next, - struct cpu_timer_list, - entry); -- if (!--maxfire || tsk->sched_time < t->expires.sched) { -+ if (!--maxfire || tsk->sum_exec_runtime < t->expires.sched) { - tsk->it_sched_expires = t->expires.sched; - break; - } -@@ -1019,7 +1019,7 @@ static void check_process_timers(struct - int maxfire; - struct signal_struct *const sig = tsk->signal; - cputime_t utime, stime, ptime, virt_expires, prof_expires; -- unsigned long long sched_time, sched_expires; -+ unsigned long long sum_sched_runtime, sched_expires; - struct task_struct *t; - struct list_head *timers = sig->cpu_timers; - -@@ -1039,12 +1039,12 @@ static void check_process_timers(struct - */ - utime = sig->utime; - stime = sig->stime; -- sched_time = sig->sched_time; -+ sum_sched_runtime = sig->sum_sched_runtime; - t = tsk; - do { - utime = cputime_add(utime, t->utime); - stime = cputime_add(stime, t->stime); -- sched_time += t->sched_time; -+ sum_sched_runtime += t->sum_exec_runtime; - t = next_thread(t); - } while (t != tsk); - ptime = cputime_add(utime, stime); -@@ -1085,7 +1085,7 @@ static void check_process_timers(struct - struct cpu_timer_list *t = list_entry(timers->next, - struct cpu_timer_list, - entry); -- if (!--maxfire || sched_time < t->expires.sched) { -+ if (!--maxfire || sum_sched_runtime < t->expires.sched) { - sched_expires = t->expires.sched; - break; - } -@@ -1177,7 +1177,7 @@ static void check_process_timers(struct - virt_left = cputime_sub(virt_expires, utime); - virt_left = cputime_div_non_zero(virt_left, nthreads); - if (sched_expires) { -- sched_left = sched_expires - sched_time; -+ sched_left = sched_expires - sum_sched_runtime; - do_div(sched_left, nthreads); - sched_left = max_t(unsigned long long, sched_left, 1); - } else { -@@ -1203,7 +1203,7 @@ static void check_process_timers(struct - t->it_virt_expires = ticks; - } - -- sched = t->sched_time + sched_left; -+ sched = t->sum_exec_runtime + sched_left; - if (sched_expires && (t->it_sched_expires == 0 || - t->it_sched_expires > sched)) { - t->it_sched_expires = sched; -@@ -1295,7 +1295,7 @@ void run_posix_cpu_timers(struct task_st - - if (UNEXPIRED(prof) && UNEXPIRED(virt) && - (tsk->it_sched_expires == 0 || -- tsk->sched_time < tsk->it_sched_expires)) -+ tsk->sum_exec_runtime < tsk->it_sched_expires)) - return; - - #undef UNEXPIRED -Index: linux-cfs-2.6.20.8.q/kernel/sched.c -=================================================================== ---- linux-cfs-2.6.20.8.q.orig/kernel/sched.c -+++ linux-cfs-2.6.20.8.q/kernel/sched.c -@@ -89,110 +89,13 @@ - */ - #define MIN_TIMESLICE max(5 * HZ / 1000, 1) - #define DEF_TIMESLICE (100 * HZ / 1000) --#define ON_RUNQUEUE_WEIGHT 30 --#define CHILD_PENALTY 95 --#define PARENT_PENALTY 100 --#define EXIT_WEIGHT 3 --#define PRIO_BONUS_RATIO 25 --#define MAX_BONUS (MAX_USER_PRIO * PRIO_BONUS_RATIO / 100) --#define INTERACTIVE_DELTA 2 --#define MAX_SLEEP_AVG (DEF_TIMESLICE * MAX_BONUS) --#define STARVATION_LIMIT (MAX_SLEEP_AVG) --#define NS_MAX_SLEEP_AVG (JIFFIES_TO_NS(MAX_SLEEP_AVG)) -- --/* -- * If a task is 'interactive' then we reinsert it in the active -- * array after it has expired its current timeslice. (it will not -- * continue to run immediately, it will still roundrobin with -- * other interactive tasks.) -- * -- * This part scales the interactivity limit depending on niceness. -- * -- * We scale it linearly, offset by the INTERACTIVE_DELTA delta. -- * Here are a few examples of different nice levels: -- * -- * TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0] -- * TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0] -- * TASK_INTERACTIVE( 0): [1,1,1,1,0,0,0,0,0,0,0] -- * TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0] -- * TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0] -- * -- * (the X axis represents the possible -5 ... 0 ... +5 dynamic -- * priority range a task can explore, a value of '1' means the -- * task is rated interactive.) -- * -- * Ie. nice +19 tasks can never get 'interactive' enough to be -- * reinserted into the active array. And only heavily CPU-hog nice -20 -- * tasks will be expired. Default nice 0 tasks are somewhere between, -- * it takes some effort for them to get interactive, but it's not -- * too hard. -- */ -- --#define CURRENT_BONUS(p) \ -- (NS_TO_JIFFIES((p)->sleep_avg) * MAX_BONUS / \ -- MAX_SLEEP_AVG) -- --#define GRANULARITY (10 * HZ / 1000 ? : 1) -- --#ifdef CONFIG_SMP --#define TIMESLICE_GRANULARITY(p) (GRANULARITY * \ -- (1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)) * \ -- num_online_cpus()) --#else --#define TIMESLICE_GRANULARITY(p) (GRANULARITY * \ -- (1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1))) --#endif -- --#define SCALE(v1,v1_max,v2_max) \ -- (v1) * (v2_max) / (v1_max) -- --#define DELTA(p) \ -- (SCALE(TASK_NICE(p) + 20, 40, MAX_BONUS) - 20 * MAX_BONUS / 40 + \ -- INTERACTIVE_DELTA) -- --#define TASK_INTERACTIVE(p) \ -- ((p)->prio <= (p)->static_prio - DELTA(p)) -- --#define INTERACTIVE_SLEEP(p) \ -- (JIFFIES_TO_NS(MAX_SLEEP_AVG * \ -- (MAX_BONUS / 2 + DELTA((p)) + 1) / MAX_BONUS - 1)) -- --#define TASK_PREEMPTS_CURR(p, rq) \ -- ((p)->prio < (rq)->curr->prio) -- --#define SCALE_PRIO(x, prio) \ -- max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_TIMESLICE) -- --static unsigned int static_prio_timeslice(int static_prio) --{ -- if (static_prio < NICE_TO_PRIO(0)) -- return SCALE_PRIO(DEF_TIMESLICE * 4, static_prio); -- else -- return SCALE_PRIO(DEF_TIMESLICE, static_prio); --} -- --/* -- * task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ] -- * to time slice values: [800ms ... 100ms ... 5ms] -- * -- * The higher a thread's priority, the bigger timeslices -- * it gets during one round of execution. But even the lowest -- * priority thread gets MIN_TIMESLICE worth of execution time. -- */ -- --static inline unsigned int task_timeslice(struct task_struct *p) --{ -- return static_prio_timeslice(p->static_prio); --} - - /* -- * These are the runqueue data structures: -+ * This is the priority-queue data structure of the RT scheduling class: - */ -- - struct prio_array { -- unsigned int nr_active; -- DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */ -- struct list_head queue[MAX_PRIO]; -+ DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */ -+ struct list_head queue[MAX_RT_PRIO]; - }; - - /* -@@ -209,12 +112,13 @@ struct rq { - * nr_running and cpu_load should be in the same cacheline because - * remote CPUs use both these fields when doing load calculation. - */ -- unsigned long nr_running; -+ long nr_running; - unsigned long raw_weighted_load; --#ifdef CONFIG_SMP -- unsigned long cpu_load[3]; --#endif -- unsigned long long nr_switches; -+ #define CPU_LOAD_IDX_MAX 5 -+ unsigned long cpu_load[CPU_LOAD_IDX_MAX]; -+ -+ u64 nr_switches; -+ unsigned long nr_load_updates; - - /* - * This is part of a global counter where only the total sum -@@ -224,14 +128,29 @@ struct rq { - */ - unsigned long nr_uninterruptible; - -- unsigned long expired_timestamp; -- /* Cached timestamp set by update_cpu_clock() */ -- unsigned long long most_recent_timestamp; - struct task_struct *curr, *idle; - unsigned long next_balance; - struct mm_struct *prev_mm; -- struct prio_array *active, *expired, arrays[2]; -- int best_expired_prio; -+ -+ u64 clock, prev_clock_raw; -+ s64 clock_max_delta; -+ u64 fair_clock, prev_fair_clock; -+ u64 exec_clock, prev_exec_clock; -+ u64 wait_runtime; -+ -+ unsigned int clock_warps; -+ unsigned int clock_unstable_events; -+ -+ struct sched_class *load_balance_class; -+ -+ struct prio_array active; -+ int rt_load_balance_idx; -+ struct list_head *rt_load_balance_head, *rt_load_balance_curr; -+ -+ struct rb_root tasks_timeline; -+ struct rb_node *rb_leftmost; -+ struct rb_node *rb_load_balance_curr; -+ - atomic_t nr_iowait; - - #ifdef CONFIG_SMP -@@ -268,7 +187,107 @@ struct rq { - struct lock_class_key rq_lock_key; - }; - --static DEFINE_PER_CPU(struct rq, runqueues); -+static DEFINE_PER_CPU(struct rq, runqueues) ____cacheline_aligned_in_smp; -+ -+static inline void check_preempt_curr(struct rq *rq, struct task_struct *p) -+{ -+ rq->curr->sched_class->check_preempt_curr(rq, p); -+} -+ -+#define SCALE_PRIO(x, prio) \ -+ max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_TIMESLICE) -+ -+/* -+ * static_prio_timeslice() scales user-nice values [ -20 ... 0 ... 19 ] -+ * to time slice values: [800ms ... 100ms ... 5ms] -+ */ -+static unsigned int static_prio_timeslice(int static_prio) -+{ -+ if (static_prio == NICE_TO_PRIO(19)) -+ return 1; -+ -+ if (static_prio < NICE_TO_PRIO(0)) -+ return SCALE_PRIO(DEF_TIMESLICE * 4, static_prio); -+ else -+ return SCALE_PRIO(DEF_TIMESLICE, static_prio); -+} -+ -+/* -+ * Print out various scheduling related per-task fields: -+ */ -+char * sched_print_task_state(struct task_struct *p, char *buffer) -+{ -+ struct rq *this_rq = &per_cpu(runqueues, raw_smp_processor_id()); -+ unsigned long long t0, t1; -+ -+#define P(F) \ -+ buffer += sprintf(buffer, "%-25s:%20Ld\n", #F, (long long)p->F) -+ -+ P(wait_start); -+ P(wait_start_fair); -+ P(exec_start); -+ P(sleep_start); -+ P(block_start); -+ P(sleep_max); -+ P(block_max); -+ P(exec_max); -+ P(wait_max); -+ P(min_wait_runtime); -+ P(last_ran); -+ P(wait_runtime); -+ P(sum_exec_runtime); -+#undef P -+ -+ t0 = sched_clock(); -+ t1 = sched_clock(); -+ buffer += sprintf(buffer, "%-25s:%20Ld\n", "clock-delta", -+ (long long)t1-t0); -+ buffer += sprintf(buffer, "%-25s:%20Ld\n", "rq-wait_runtime", -+ (long long)this_rq->wait_runtime); -+ buffer += sprintf(buffer, "%-25s:%20Ld\n", "rq-exec_clock", -+ (long long)this_rq->exec_clock); -+ buffer += sprintf(buffer, "%-25s:%20Ld\n", "rq-fair_clock", -+ (long long)this_rq->fair_clock); -+ buffer += sprintf(buffer, "%-25s:%20Ld\n", "rq-clock", -+ (long long)this_rq->clock); -+ buffer += sprintf(buffer, "%-25s:%20Ld\n", "rq-prev_clock_raw", -+ (long long)this_rq->prev_clock_raw); -+ buffer += sprintf(buffer, "%-25s:%20Ld\n", "rq-clock_max_delta", -+ (long long)this_rq->clock_max_delta); -+ buffer += sprintf(buffer, "%-25s:%20u\n", "rq-clock_warps", -+ this_rq->clock_warps); -+ buffer += sprintf(buffer, "%-25s:%20u\n", "rq-clock_unstable_events", -+ this_rq->clock_unstable_events); -+ return buffer; -+} -+ -+/* -+ * Per-runqueue clock, as finegrained as the platform can give us: -+ */ -+static inline unsigned long long __rq_clock(struct rq *rq) -+{ -+ u64 now = sched_clock(); -+ u64 clock = rq->clock; -+ u64 prev_raw = rq->prev_clock_raw; -+ s64 delta = now - prev_raw; -+ -+ /* -+ * Protect against sched_clock() occasionally going backwards: -+ */ -+ if (unlikely(delta < 0)) { -+ clock++; -+ rq->clock_warps++; -+ } else { -+ if (unlikely(delta > rq->clock_max_delta)) -+ rq->clock_max_delta = delta; -+ clock += delta; -+ } -+ -+ rq->prev_clock_raw = now; -+ rq->clock = clock; -+ -+ return clock; -+} - - static inline int cpu_of(struct rq *rq) - { -@@ -279,6 +298,16 @@ static inline int cpu_of(struct rq *rq) - #endif - } - -+static inline unsigned long long rq_clock(struct rq *rq) -+{ -+ int this_cpu = smp_processor_id(); -+ -+ if (this_cpu == cpu_of(rq)) -+ return __rq_clock(rq); -+ -+ return rq->clock; -+} -+ - /* - * The domain tree (rq->sd) is protected by RCU's quiescent state transition. - * See detach_destroy_domains: synchronize_sched for details. -@@ -423,134 +452,6 @@ static inline void task_rq_unlock(struct - spin_unlock_irqrestore(&rq->lock, *flags); - } - --#ifdef CONFIG_SCHEDSTATS --/* -- * bump this up when changing the output format or the meaning of an existing -- * format, so that tools can adapt (or abort) -- */ --#define SCHEDSTAT_VERSION 14 -- --static int show_schedstat(struct seq_file *seq, void *v) --{ -- int cpu; -- -- seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); -- seq_printf(seq, "timestamp %lu\n", jiffies); -- for_each_online_cpu(cpu) { -- struct rq *rq = cpu_rq(cpu); --#ifdef CONFIG_SMP -- struct sched_domain *sd; -- int dcnt = 0; --#endif -- -- /* runqueue-specific stats */ -- seq_printf(seq, -- "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu", -- cpu, rq->yld_both_empty, -- rq->yld_act_empty, rq->yld_exp_empty, rq->yld_cnt, -- rq->sched_switch, rq->sched_cnt, rq->sched_goidle, -- rq->ttwu_cnt, rq->ttwu_local, -- rq->rq_sched_info.cpu_time, -- rq->rq_sched_info.run_delay, rq->rq_sched_info.pcnt); -- -- seq_printf(seq, "\n"); -- --#ifdef CONFIG_SMP -- /* domain-specific stats */ -- preempt_disable(); -- for_each_domain(cpu, sd) { -- enum idle_type itype; -- char mask_str[NR_CPUS]; -- -- cpumask_scnprintf(mask_str, NR_CPUS, sd->span); -- seq_printf(seq, "domain%d %s", dcnt++, mask_str); -- for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES; -- itype++) { -- seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu " -- "%lu", -- sd->lb_cnt[itype], -- sd->lb_balanced[itype], -- sd->lb_failed[itype], -- sd->lb_imbalance[itype], -- sd->lb_gained[itype], -- sd->lb_hot_gained[itype], -- sd->lb_nobusyq[itype], -- sd->lb_nobusyg[itype]); -- } -- seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu" -- " %lu %lu %lu\n", -- sd->alb_cnt, sd->alb_failed, sd->alb_pushed, -- sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed, -- sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed, -- sd->ttwu_wake_remote, sd->ttwu_move_affine, -- sd->ttwu_move_balance); -- } -- preempt_enable(); --#endif -- } -- return 0; --} -- --static int schedstat_open(struct inode *inode, struct file *file) --{ -- unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32); -- char *buf = kmalloc(size, GFP_KERNEL); -- struct seq_file *m; -- int res; -- -- if (!buf) -- return -ENOMEM; -- res = single_open(file, show_schedstat, NULL); -- if (!res) { -- m = file->private_data; -- m->buf = buf; -- m->size = size; -- } else -- kfree(buf); -- return res; --} -- --const struct file_operations proc_schedstat_operations = { -- .open = schedstat_open, -- .read = seq_read, -- .llseek = seq_lseek, -- .release = single_release, --}; -- --/* -- * Expects runqueue lock to be held for atomicity of update -- */ --static inline void --rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies) --{ -- if (rq) { -- rq->rq_sched_info.run_delay += delta_jiffies; -- rq->rq_sched_info.pcnt++; -- } --} -- --/* -- * Expects runqueue lock to be held for atomicity of update -- */ --static inline void --rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies) --{ -- if (rq) -- rq->rq_sched_info.cpu_time += delta_jiffies; --} --# define schedstat_inc(rq, field) do { (rq)->field++; } while (0) --# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) --#else /* !CONFIG_SCHEDSTATS */ --static inline void --rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies) --{} --static inline void --rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies) --{} --# define schedstat_inc(rq, field) do { } while (0) --# define schedstat_add(rq, field, amt) do { } while (0) --#endif -- - /* - * this_rq_lock - lock this runqueue and disable interrupts. - */ -@@ -566,178 +467,60 @@ static inline struct rq *this_rq_lock(vo - return rq; - } - --#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) --/* -- * Called when a process is dequeued from the active array and given -- * the cpu. We should note that with the exception of interactive -- * tasks, the expired queue will become the active queue after the active -- * queue is empty, without explicitly dequeuing and requeuing tasks in the -- * expired queue. (Interactive tasks may be requeued directly to the -- * active queue, thus delaying tasks in the expired queue from running; -- * see scheduler_tick()). -- * -- * This function is only called from sched_info_arrive(), rather than -- * dequeue_task(). Even though a task may be queued and dequeued multiple -- * times as it is shuffled about, we're really interested in knowing how -- * long it was from the *first* time it was queued to the time that it -- * finally hit a cpu. -- */ --static inline void sched_info_dequeued(struct task_struct *t) --{ -- t->sched_info.last_queued = 0; --} -- - /* -- * Called when a task finally hits the cpu. We can now calculate how -- * long it was waiting to run. We also note when it began so that we -- * can keep stats on how long its timeslice is. -+ * CPU frequency is/was unstable - start new by setting prev_clock_raw: - */ --static void sched_info_arrive(struct task_struct *t) -+void sched_clock_unstable_event(void) - { -- unsigned long now = jiffies, delta_jiffies = 0; -- -- if (t->sched_info.last_queued) -- delta_jiffies = now - t->sched_info.last_queued; -- sched_info_dequeued(t); -- t->sched_info.run_delay += delta_jiffies; -- t->sched_info.last_arrival = now; -- t->sched_info.pcnt++; -+ unsigned long flags; -+ struct rq *rq; - -- rq_sched_info_arrive(task_rq(t), delta_jiffies); -+ rq = task_rq_lock(current, &flags); -+ rq->prev_clock_raw = sched_clock(); -+ rq->clock_unstable_events++; -+ task_rq_unlock(rq, &flags); - } - - /* -- * Called when a process is queued into either the active or expired -- * array. The time is noted and later used to determine how long we -- * had to wait for us to reach the cpu. Since the expired queue will -- * become the active queue after active queue is empty, without dequeuing -- * and requeuing any tasks, we are interested in queuing to either. It -- * is unusual but not impossible for tasks to be dequeued and immediately -- * requeued in the same or another array: this can happen in sched_yield(), -- * set_user_nice(), and even load_balance() as it moves tasks from runqueue -- * to runqueue. -+ * resched_task - mark a task 'to be rescheduled now'. - * -- * This function is only called from enqueue_task(), but also only updates -- * the timestamp if it is already not set. It's assumed that -- * sched_info_dequeued() will clear that stamp when appropriate. -- */ --static inline void sched_info_queued(struct task_struct *t) --{ -- if (unlikely(sched_info_on())) -- if (!t->sched_info.last_queued) -- t->sched_info.last_queued = jiffies; --} -- --/* -- * Called when a process ceases being the active-running process, either -- * voluntarily or involuntarily. Now we can calculate how long we ran. -+ * On UP this means the setting of the need_resched flag, on SMP it -+ * might also involve a cross-CPU call to trigger the scheduler on -+ * the target CPU. - */ --static inline void sched_info_depart(struct task_struct *t) --{ -- unsigned long delta_jiffies = jiffies - t->sched_info.last_arrival; -+#ifdef CONFIG_SMP - -- t->sched_info.cpu_time += delta_jiffies; -- rq_sched_info_depart(task_rq(t), delta_jiffies); --} -+#ifndef tsk_is_polling -+#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) -+#endif - --/* -- * Called when tasks are switched involuntarily due, typically, to expiring -- * their time slice. (This may also be called when switching to or from -- * the idle task.) We are only called when prev != next. -- */ --static inline void --__sched_info_switch(struct task_struct *prev, struct task_struct *next) -+static void resched_task(struct task_struct *p) - { -- struct rq *rq = task_rq(prev); -- -- /* -- * prev now departs the cpu. It's not interesting to record -- * stats about how efficient we were at scheduling the idle -- * process, however. -- */ -- if (prev != rq->idle) -- sched_info_depart(prev); -+ int cpu; - -- if (next != rq->idle) -- sched_info_arrive(next); --} --static inline void --sched_info_switch(struct task_struct *prev, struct task_struct *next) --{ -- if (unlikely(sched_info_on())) -- __sched_info_switch(prev, next); --} --#else --#define sched_info_queued(t) do { } while (0) --#define sched_info_switch(t, next) do { } while (0) --#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ -+ assert_spin_locked(&task_rq(p)->lock); - --/* -- * Adding/removing a task to/from a priority array: -- */ --static void dequeue_task(struct task_struct *p, struct prio_array *array) --{ -- array->nr_active--; -- list_del(&p->run_list); -- if (list_empty(array->queue + p->prio)) -- __clear_bit(p->prio, array->bitmap); --} -+ if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED))) -+ return; - --static void enqueue_task(struct task_struct *p, struct prio_array *array) --{ -- sched_info_queued(p); -- list_add_tail(&p->run_list, array->queue + p->prio); -- __set_bit(p->prio, array->bitmap); -- array->nr_active++; -- p->array = array; --} -+ set_tsk_thread_flag(p, TIF_NEED_RESCHED); - --/* -- * Put task to the end of the run list without the overhead of dequeue -- * followed by enqueue. -- */ --static void requeue_task(struct task_struct *p, struct prio_array *array) --{ -- list_move_tail(&p->run_list, array->queue + p->prio); --} -+ cpu = task_cpu(p); -+ if (cpu == smp_processor_id()) -+ return; - --static inline void --enqueue_task_head(struct task_struct *p, struct prio_array *array) --{ -- list_add(&p->run_list, array->queue + p->prio); -- __set_bit(p->prio, array->bitmap); -- array->nr_active++; -- p->array = array; -+ /* NEED_RESCHED must be visible before we test polling */ -+ smp_mb(); -+ if (!tsk_is_polling(p)) -+ smp_send_reschedule(cpu); - } -- --/* -- * __normal_prio - return the priority that is based on the static -- * priority but is modified by bonuses/penalties. -- * -- * We scale the actual sleep average [0 .... MAX_SLEEP_AVG] -- * into the -5 ... 0 ... +5 bonus/penalty range. -- * -- * We use 25% of the full 0...39 priority range so that: -- * -- * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs. -- * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks. -- * -- * Both properties are important to certain workloads. -- */ -- --static inline int __normal_prio(struct task_struct *p) -+#else -+static inline void resched_task(struct task_struct *p) - { -- int bonus, prio; -- -- bonus = CURRENT_BONUS(p) - MAX_BONUS / 2; -- -- prio = p->static_prio - bonus; -- if (prio < MAX_RT_PRIO) -- prio = MAX_RT_PRIO; -- if (prio > MAX_PRIO-1) -- prio = MAX_PRIO-1; -- return prio; -+ assert_spin_locked(&task_rq(p)->lock); -+ set_tsk_need_resched(p); - } -+#endif - - /* - * To aid in avoiding the subversion of "niceness" due to uneven distribution -@@ -761,22 +544,33 @@ static inline int __normal_prio(struct t - #define RTPRIO_TO_LOAD_WEIGHT(rp) \ - (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp)) - -+/* -+ * Nice levels are logarithmic. These are the load shifts assigned -+ * to nice levels, where a step of every 2 nice levels means a -+ * multiplicator of 2: -+ */ -+const int prio_to_load_shift[40] = { -+/* -20 */ 20, 19, 19, 18, 18, 17, 17, 16, 16, 15, -+/* -10 */ 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, -+/* 0 */ 10, 9, 9, 8, 8, 7, 7, 6, 6, 5, -+/* 10 */ 5, 4, 4, 3, 3, 2, 2, 1, 1, 0 -+}; -+ -+static int get_load_shift(struct task_struct *p) -+{ -+ int prio = p->static_prio; -+ -+ if (rt_prio(prio) || p->policy == SCHED_BATCH) -+ return 0; -+ -+ return prio_to_load_shift[prio - MAX_RT_PRIO]; -+} -+ - static void set_load_weight(struct task_struct *p) - { -- if (has_rt_policy(p)) { --#ifdef CONFIG_SMP -- if (p == task_rq(p)->migration_thread) -- /* -- * The migration thread does the actual balancing. -- * Giving its load any weight will skew balancing -- * adversely. -- */ -- p->load_weight = 0; -- else --#endif -- p->load_weight = RTPRIO_TO_LOAD_WEIGHT(p->rt_priority); -- } else -- p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio); -+ p->load_shift = get_load_shift(p); -+ p->load_weight = 1 << p->load_shift; -+ p->wait_runtime = 0; - } - - static inline void -@@ -803,6 +597,40 @@ static inline void dec_nr_running(struct - dec_raw_weighted_load(rq, p); - } - -+static void activate_task(struct rq *rq, struct task_struct *p, int wakeup); -+ -+#include "sched_stats.h" -+#include "sched_rt.c" -+#include "sched_fair.c" -+#include "sched_debug.c" -+ -+#define sched_class_highest (&rt_sched_class) -+ -+static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) -+{ -+ u64 now = rq_clock(rq); -+ -+ sched_info_queued(p); -+ p->sched_class->enqueue_task(rq, p, wakeup, now); -+ p->on_rq = 1; -+} -+ -+static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) -+{ -+ u64 now = rq_clock(rq); -+ -+ p->sched_class->dequeue_task(rq, p, sleep, now); -+ p->on_rq = 0; -+} -+ -+/* -+ * __normal_prio - return the priority that is based on the static prio -+ */ -+static inline int __normal_prio(struct task_struct *p) -+{ -+ return p->static_prio; -+} -+ - /* - * Calculate the expected normal priority: i.e. priority - * without taking RT-inheritance into account. Might be -@@ -842,210 +670,31 @@ static int effective_prio(struct task_st - } - - /* -- * __activate_task - move a task to the runqueue. -+ * activate_task - move a task to the runqueue. - */ --static void __activate_task(struct task_struct *p, struct rq *rq) -+static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) - { -- struct prio_array *target = rq->active; -- -- if (batch_task(p)) -- target = rq->expired; -- enqueue_task(p, target); -+ enqueue_task(rq, p, wakeup); - inc_nr_running(p, rq); - } - - /* -- * __activate_idle_task - move idle task to the _front_ of runqueue. -+ * activate_idle_task - move idle task to the _front_ of runqueue. - */ --static inline void __activate_idle_task(struct task_struct *p, struct rq *rq) -+static inline void activate_idle_task(struct task_struct *p, struct rq *rq) - { -- enqueue_task_head(p, rq->active); -+ enqueue_task(rq, p, 0); - inc_nr_running(p, rq); - } - - /* -- * Recalculate p->normal_prio and p->prio after having slept, -- * updating the sleep-average too: -- */ --static int recalc_task_prio(struct task_struct *p, unsigned long long now) --{ -- /* Caller must always ensure 'now >= p->timestamp' */ -- unsigned long sleep_time = now - p->timestamp; -- -- if (batch_task(p)) -- sleep_time = 0; -- -- if (likely(sleep_time > 0)) { -- /* -- * This ceiling is set to the lowest priority that would allow -- * a task to be reinserted into the active array on timeslice -- * completion. -- */ -- unsigned long ceiling = INTERACTIVE_SLEEP(p); -- -- if (p->mm && sleep_time > ceiling && p->sleep_avg < ceiling) { -- /* -- * Prevents user tasks from achieving best priority -- * with one single large enough sleep. -- */ -- p->sleep_avg = ceiling; -- /* -- * Using INTERACTIVE_SLEEP() as a ceiling places a -- * nice(0) task 1ms sleep away from promotion, and -- * gives it 700ms to round-robin with no chance of -- * being demoted. This is more than generous, so -- * mark this sleep as non-interactive to prevent the -- * on-runqueue bonus logic from intervening should -- * this task not receive cpu immediately. -- */ -- p->sleep_type = SLEEP_NONINTERACTIVE; -- } else { -- /* -- * Tasks waking from uninterruptible sleep are -- * limited in their sleep_avg rise as they -- * are likely to be waiting on I/O -- */ -- if (p->sleep_type == SLEEP