diff options
| author | Marcin Juszkiewicz <marcin@juszkiewicz.com.pl> | 2009-10-21 12:22:20 +0200 |
|---|---|---|
| committer | Marcin Juszkiewicz <marcin@juszkiewicz.com.pl> | 2009-12-14 11:34:58 +0100 |
| commit | 9d1b79b7848e13e1bf80b736671f76144cc508d4 (patch) | |
| tree | c5e1ff0f4b3c2f08444666fdf8348aa8610fc98d | |
| parent | 575cf43aa9df4192aa9125258545e7943a45f4d5 (diff) | |
linux 2.6.23: keep sched-cfs locally updated to 2.6.23.17
| -rw-r--r-- | recipes/linux/linux-2.6.23/sched-cfs-v2.6.23.12-v24.1.patch | 8567 | ||||
| -rw-r--r-- | recipes/linux/linux_2.6.23.bb | 4 |
2 files changed, 8569 insertions, 2 deletions
diff --git a/recipes/linux/linux-2.6.23/sched-cfs-v2.6.23.12-v24.1.patch b/recipes/linux/linux-2.6.23/sched-cfs-v2.6.23.12-v24.1.patch new file mode 100644 index 0000000000..77ee5c8f1d --- /dev/null +++ b/recipes/linux/linux-2.6.23/sched-cfs-v2.6.23.12-v24.1.patch @@ -0,0 +1,8567 @@ +--- + Documentation/sched-design-CFS.txt | 67 + + Makefile | 2 + arch/i386/Kconfig | 11 + drivers/kvm/kvm.h | 10 + fs/pipe.c | 9 + fs/proc/array.c | 21 + fs/proc/base.c | 2 + fs/proc/proc_misc.c | 15 + include/linux/cgroup.h | 12 + include/linux/cpuset.h | 5 + include/linux/kernel.h | 7 + include/linux/kernel_stat.h | 3 + include/linux/nodemask.h | 94 + + include/linux/sched.h | 174 ++ + include/linux/taskstats.h | 7 + include/linux/topology.h | 5 + init/Kconfig | 26 + init/main.c | 3 + kernel/delayacct.c | 8 + kernel/exit.c | 6 + kernel/fork.c | 5 + kernel/ksysfs.c | 8 + kernel/sched.c | 2310 +++++++++++++++++++++++-------------- + kernel/sched_debug.c | 289 +++- + kernel/sched_fair.c | 885 ++++++-------- + kernel/sched_idletask.c | 26 + kernel/sched_rt.c | 54 + kernel/sched_stats.h | 40 + kernel/sysctl.c | 40 + kernel/timer.c | 7 + kernel/tsacct.c | 4 + kernel/user.c | 249 +++ + mm/memory_hotplug.c | 7 + mm/page_alloc.c | 50 + mm/vmscan.c | 4 + net/unix/af_unix.c | 4 + 36 files changed, 2883 insertions(+), 1586 deletions(-) + +--- linux-2.6.23.orig/Documentation/sched-design-CFS.txt ++++ linux-2.6.23/Documentation/sched-design-CFS.txt +@@ -115,5 +115,72 @@ Some implementation details: + - reworked/sanitized SMP load-balancing: the runqueue-walking + assumptions are gone from the load-balancing code now, and + iterators of the scheduling modules are used. The balancing code got + quite a bit simpler as a result. + ++ ++Group scheduler extension to CFS ++================================ ++ ++Normally the scheduler operates on individual tasks and strives to provide ++fair CPU time to each task. Sometimes, it may be desirable to group tasks ++and provide fair CPU time to each such task group. For example, it may ++be desirable to first provide fair CPU time to each user on the system ++and then to each task belonging to a user. ++ ++CONFIG_FAIR_GROUP_SCHED strives to achieve exactly that. It lets ++SCHED_NORMAL/BATCH tasks be be grouped and divides CPU time fairly among such ++groups. At present, there are two (mutually exclusive) mechanisms to group ++tasks for CPU bandwidth control purpose: ++ ++ - Based on user id (CONFIG_FAIR_USER_SCHED) ++ In this option, tasks are grouped according to their user id. ++ - Based on "cgroup" pseudo filesystem (CONFIG_FAIR_CGROUP_SCHED) ++ This options lets the administrator create arbitrary groups ++ of tasks, using the "cgroup" pseudo filesystem. See ++ Documentation/cgroups.txt for more information about this ++ filesystem. ++ ++Only one of these options to group tasks can be chosen and not both. ++ ++Group scheduler tunables: ++ ++When CONFIG_FAIR_USER_SCHED is defined, a directory is created in sysfs for ++each new user and a "cpu_share" file is added in that directory. ++ ++ # cd /sys/kernel/uids ++ # cat 512/cpu_share # Display user 512's CPU share ++ 1024 ++ # echo 2048 > 512/cpu_share # Modify user 512's CPU share ++ # cat 512/cpu_share # Display user 512's CPU share ++ 2048 ++ # ++ ++CPU bandwidth between two users are divided in the ratio of their CPU shares. ++For ex: if you would like user "root" to get twice the bandwidth of user ++"guest", then set the cpu_share for both the users such that "root"'s ++cpu_share is twice "guest"'s cpu_share ++ ++ ++When CONFIG_FAIR_CGROUP_SCHED is defined, a "cpu.shares" file is created ++for each group created using the pseudo filesystem. See example steps ++below to create task groups and modify their CPU share using the "cgroups" ++pseudo filesystem ++ ++ # mkdir /dev/cpuctl ++ # mount -t cgroup -ocpu none /dev/cpuctl ++ # cd /dev/cpuctl ++ ++ # mkdir multimedia # create "multimedia" group of tasks ++ # mkdir browser # create "browser" group of tasks ++ ++ # #Configure the multimedia group to receive twice the CPU bandwidth ++ # #that of browser group ++ ++ # echo 2048 > multimedia/cpu.shares ++ # echo 1024 > browser/cpu.shares ++ ++ # firefox & # Launch firefox and move it to "browser" group ++ # echo <firefox_pid> > browser/tasks ++ ++ # #Launch gmplayer (or your favourite movie player) ++ # echo <movie_player_pid> > multimedia/tasks +--- linux-2.6.23.orig/Makefile ++++ linux-2.6.23/Makefile +@@ -1,9 +1,9 @@ + VERSION = 2 + PATCHLEVEL = 6 + SUBLEVEL = 23 +-EXTRAVERSION = .17 ++EXTRAVERSION = .17-cfs-v24.1 + NAME = Arr Matey! A Hairy Bilge Rat! + + # *DOCUMENTATION* + # To see a list of typical targets execute "make help" + # More info can be located in ./README +--- linux-2.6.23.orig/arch/i386/Kconfig ++++ linux-2.6.23/arch/i386/Kconfig +@@ -212,10 +212,21 @@ config X86_ES7000 + Only choose this option if you have such a system, otherwise you + should say N here. + + endchoice + ++config SCHED_NO_NO_OMIT_FRAME_POINTER ++ bool "Single-depth WCHAN output" ++ default y ++ help ++ Calculate simpler /proc/<PID>/wchan values. If this option ++ is disabled then wchan values will recurse back to the ++ caller function. This provides more accurate wchan values, ++ at the expense of slightly more scheduling overhead. ++ ++ If in doubt, say "Y". ++ + config PARAVIRT + bool "Paravirtualization support (EXPERIMENTAL)" + depends on EXPERIMENTAL + depends on !(X86_VISWS || X86_VOYAGER) + help +--- linux-2.6.23.orig/drivers/kvm/kvm.h ++++ linux-2.6.23/drivers/kvm/kvm.h +@@ -623,10 +623,20 @@ void __kvm_mmu_free_some_pages(struct kv + int kvm_mmu_load(struct kvm_vcpu *vcpu); + void kvm_mmu_unload(struct kvm_vcpu *vcpu); + + int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run); + ++static inline void kvm_guest_enter(void) ++{ ++ current->flags |= PF_VCPU; ++} ++ ++static inline void kvm_guest_exit(void) ++{ ++ current->flags &= ~PF_VCPU; ++} ++ + static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, + u32 error_code) + { + return vcpu->mmu.page_fault(vcpu, gva, error_code); + } +--- linux-2.6.23.orig/fs/pipe.c ++++ linux-2.6.23/fs/pipe.c +@@ -43,12 +43,11 @@ void pipe_wait(struct pipe_inode_info *p + + /* + * Pipes are system-local resources, so sleeping on them + * is considered a noninteractive wait: + */ +- prepare_to_wait(&pipe->wait, &wait, +- TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE); ++ prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE); + if (pipe->inode) + mutex_unlock(&pipe->inode->i_mutex); + schedule(); + finish_wait(&pipe->wait, &wait); + if (pipe->inode) +@@ -381,11 +380,11 @@ redo: + } + mutex_unlock(&inode->i_mutex); + + /* Signal writers asynchronously that there is more room. */ + if (do_wakeup) { +- wake_up_interruptible(&pipe->wait); ++ wake_up_interruptible_sync(&pipe->wait); + kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); + } + if (ret > 0) + file_accessed(filp); + return ret; +@@ -554,11 +553,11 @@ redo2: + pipe->waiting_writers--; + } + out: + mutex_unlock(&inode->i_mutex); + if (do_wakeup) { +- wake_up_interruptible(&pipe->wait); ++ wake_up_interruptible_sync(&pipe->wait); + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); + } + if (ret > 0) + file_update_time(filp); + return ret; +@@ -648,11 +647,11 @@ pipe_release(struct inode *inode, int de + pipe->writers -= decw; + + if (!pipe->readers && !pipe->writers) { + free_pipe_info(inode); + } else { +- wake_up_interruptible(&pipe->wait); ++ wake_up_interruptible_sync(&pipe->wait); + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); + kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); + } + mutex_unlock(&inode->i_mutex); + +--- linux-2.6.23.orig/fs/proc/array.c ++++ linux-2.6.23/fs/proc/array.c +@@ -365,15 +365,22 @@ static cputime_t task_stime(struct task_ + * grows monotonically - apps rely on that): + */ + stime = nsec_to_clock_t(p->se.sum_exec_runtime) - + cputime_to_clock_t(task_utime(p)); + +- p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); ++ if (stime >= 0) ++ p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); ++ + return p->prev_stime; + } + #endif + ++static cputime_t task_gtime(struct task_struct *p) ++{ ++ return p->gtime; ++} ++ + static int do_task_stat(struct task_struct *task, char *buffer, int whole) + { + unsigned long vsize, eip, esp, wchan = ~0UL; + long priority, nice; + int tty_pgrp = -1, tty_nr = 0; +@@ -385,10 +392,11 @@ static int do_task_stat(struct task_stru + struct mm_struct *mm; + unsigned long long start_time; + unsigned long cmin_flt = 0, cmaj_flt = 0; + unsigned long min_flt = 0, maj_flt = 0; + cputime_t cutime, cstime, utime, stime; ++ cputime_t cgtime, gtime; + unsigned long rsslim = 0; + char tcomm[sizeof(task->comm)]; + unsigned long flags; + + state = *get_task_state(task); +@@ -403,10 +411,11 @@ static int do_task_stat(struct task_stru + get_task_comm(tcomm, task); + + sigemptyset(&sigign); + sigemptyset(&sigcatch); + cutime = cstime = utime = stime = cputime_zero; ++ cgtime = gtime = cputime_zero; + + rcu_read_lock(); + if (lock_task_sighand(task, &flags)) { + struct signal_struct *sig = task->signal; + +@@ -420,27 +429,30 @@ static int do_task_stat(struct task_stru + + cmin_flt = sig->cmin_flt; + cmaj_flt = sig->cmaj_flt; + cutime = sig->cutime; + cstime = sig->cstime; ++ cgtime = sig->cgtime; + rsslim = sig->rlim[RLIMIT_RSS].rlim_cur; + + /* add up live thread stats at the group level */ + if (whole) { + struct task_struct *t = task; + do { + min_flt += t->min_flt; + maj_flt += t->maj_flt; + utime = cputime_add(utime, task_utime(t)); + stime = cputime_add(stime, task_stime(t)); ++ gtime = cputime_add(gtime, task_gtime(t)); + t = next_thread(t); + } while (t != task); + + min_flt += sig->min_flt; + maj_flt += sig->maj_flt; + utime = cputime_add(utime, sig->utime); + stime = cputime_add(stime, sig->stime); ++ gtime = cputime_add(gtime, sig->gtime); + } + + sid = signal_session(sig); + pgid = process_group(task); + ppid = rcu_dereference(task->real_parent)->tgid; +@@ -454,10 +466,11 @@ static int do_task_stat(struct task_stru + if (!whole) { + min_flt = task->min_flt; + maj_flt = task->maj_flt; + utime = task_utime(task); + stime = task_stime(task); ++ gtime = task_gtime(task); + } + + /* scale priority and nice values from timeslices to -20..20 */ + /* to make it look like a "normal" Unix priority/nice value */ + priority = task_prio(task); +@@ -471,11 +484,11 @@ static int do_task_stat(struct task_stru + /* convert nsec -> ticks */ + start_time = nsec_to_clock_t(start_time); + + res = sprintf(buffer, "%d (%s) %c %d %d %d %d %d %u %lu \ + %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ +-%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu\n", ++%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n", + task->pid, + tcomm, + state, + ppid, + pgid, +@@ -516,11 +529,13 @@ static int do_task_stat(struct task_stru + 0UL, + task->exit_signal, + task_cpu(task), + task->rt_priority, + task->policy, +- (unsigned long long)delayacct_blkio_ticks(task)); ++ (unsigned long long)delayacct_blkio_ticks(task), ++ cputime_to_clock_t(gtime), ++ cputime_to_clock_t(cgtime)); + if (mm) + mmput(mm); + return res; + } + +--- linux-2.6.23.orig/fs/proc/base.c ++++ linux-2.6.23/fs/proc/base.c +@@ -302,11 +302,11 @@ static int proc_pid_wchan(struct task_st + static int proc_pid_schedstat(struct task_struct *task, char *buffer) + { + return sprintf(buffer, "%llu %llu %lu\n", + task->sched_info.cpu_time, + task->sched_info.run_delay, +- task->sched_info.pcnt); ++ task->sched_info.pcount); + } + #endif + + /* The badness from the OOM killer */ + unsigned long badness(struct task_struct *p, unsigned long uptime); +--- linux-2.6.23.orig/fs/proc/proc_misc.c ++++ linux-2.6.23/fs/proc/proc_misc.c +@@ -441,20 +441,22 @@ static const struct file_operations proc + static int show_stat(struct seq_file *p, void *v) + { + int i; + unsigned long jif; + cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; ++ cputime64_t guest; + u64 sum = 0; + struct timespec boottime; + unsigned int *per_irq_sum; + + per_irq_sum = kzalloc(sizeof(unsigned int)*NR_IRQS, GFP_KERNEL); + if (!per_irq_sum) + return -ENOMEM; + + user = nice = system = idle = iowait = + irq = softirq = steal = cputime64_zero; ++ guest = cputime64_zero; + getboottime(&boottime); + jif = boottime.tv_sec; + + for_each_possible_cpu(i) { + int j; +@@ -465,26 +467,28 @@ static int show_stat(struct seq_file *p, + idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle); + iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait); + irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); + softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); + steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); ++ guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); + for (j = 0; j < NR_IRQS; j++) { + unsigned int temp = kstat_cpu(i).irqs[j]; + sum += temp; + per_irq_sum[j] += temp; + } + } + +- seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu\n", ++ seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", + (unsigned long long)cputime64_to_clock_t(user), + (unsigned long long)cputime64_to_clock_t(nice), + (unsigned long long)cputime64_to_clock_t(system), + (unsigned long long)cputime64_to_clock_t(idle), + (unsigned long long)cputime64_to_clock_t(iowait), + (unsigned long long)cputime64_to_clock_t(irq), + (unsigned long long)cputime64_to_clock_t(softirq), +- (unsigned long long)cputime64_to_clock_t(steal)); ++ (unsigned long long)cputime64_to_clock_t(steal), ++ (unsigned long long)cputime64_to_clock_t(guest)); + for_each_online_cpu(i) { + + /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ + user = kstat_cpu(i).cpustat.user; + nice = kstat_cpu(i).cpustat.nice; +@@ -492,20 +496,23 @@ static int show_stat(struct seq_file *p, + idle = kstat_cpu(i).cpustat.idle; + iowait = kstat_cpu(i).cpustat.iowait; + irq = kstat_cpu(i).cpustat.irq; + softirq = kstat_cpu(i).cpustat.softirq; + steal = kstat_cpu(i).cpustat.steal; +- seq_printf(p, "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu\n", ++ guest = kstat_cpu(i).cpustat.guest; ++ seq_printf(p, ++ "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", + i, + (unsigned long long)cputime64_to_clock_t(user), + (unsigned long long)cputime64_to_clock_t(nice), + (unsigned long long)cputime64_to_clock_t(system), + (unsigned long long)cputime64_to_clock_t(idle), + (unsigned long long)cputime64_to_clock_t(iowait), + (unsigned long long)cputime64_to_clock_t(irq), + (unsigned long long)cputime64_to_clock_t(softirq), +- (unsigned long long)cputime64_to_clock_t(steal)); ++ (unsigned long long)cputime64_to_clock_t(steal), ++ (unsigned long long)cputime64_to_clock_t(guest)); + } + seq_printf(p, "intr %llu", (unsigned long long)sum); + + #ifndef CONFIG_SMP + /* Touches too many cache lines on SMP setups */ +--- /dev/null ++++ linux-2.6.23/include/linux/cgroup.h +@@ -0,0 +1,12 @@ ++#ifndef _LINUX_CGROUP_H ++#define _LINUX_CGROUP_H ++ ++/* ++ * Control groups are not backported - we use a few compatibility ++ * defines to be able to use the upstream sched.c as-is: ++ */ ++#define task_pid_nr(task) (task)->pid ++#define task_pid_vnr(task) (task)->pid ++#define find_task_by_vpid(pid) find_task_by_pid(pid) ++ ++#endif +--- linux-2.6.23.orig/include/linux/cpuset.h ++++ linux-2.6.23/include/linux/cpuset.h +@@ -144,8 +144,13 @@ static inline int cpuset_do_slab_mem_spr + return 0; + } + + static inline void cpuset_track_online_nodes(void) {} + ++static inline cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p) ++{ ++ return cpu_possible_map; ++} ++ + #endif /* !CONFIG_CPUSETS */ + + #endif /* _LINUX_CPUSET_H */ +--- linux-2.6.23.orig/include/linux/kernel.h ++++ linux-2.6.23/include/linux/kernel.h +@@ -59,10 +59,17 @@ extern const char linux_proc_banner[]; + #define KERN_WARNING "<4>" /* warning conditions */ + #define KERN_NOTICE "<5>" /* normal but significant condition */ + #define KERN_INFO "<6>" /* informational */ + #define KERN_DEBUG "<7>" /* debug-level messages */ + ++/* ++ * Annotation for a "continued" line of log printout (only done after a ++ * line that had no enclosing \n). Only to be used by core/arch code ++ * during early bootup (a continued line is not SMP-safe otherwise). ++ */ ++#define KERN_CONT "" ++ + extern int console_printk[]; + + #define console_loglevel (console_printk[0]) + #define default_message_loglevel (console_printk[1]) + #define minimum_console_loglevel (console_printk[2]) +--- linux-2.6.23.orig/include/linux/kernel_stat.h ++++ linux-2.6.23/include/linux/kernel_stat.h +@@ -21,10 +21,11 @@ struct cpu_usage_stat { + cputime64_t softirq; + cputime64_t irq; + cputime64_t idle; + cputime64_t iowait; + cputime64_t steal; ++ cputime64_t guest; + }; + + struct kernel_stat { + struct cpu_usage_stat cpustat; + unsigned int irqs[NR_IRQS]; +@@ -50,9 +51,11 @@ static inline int kstat_irqs(int irq) + + return sum; + } + + extern void account_user_time(struct task_struct *, cputime_t); ++extern void account_user_time_scaled(struct task_struct *, cputime_t); + extern void account_system_time(struct task_struct *, int, cputime_t); ++extern void account_system_time_scaled(struct task_struct *, cputime_t); + extern void account_steal_time(struct task_struct *, cputime_t); + + #endif /* _LINUX_KERNEL_STAT_H */ +--- linux-2.6.23.orig/include/linux/nodemask.h ++++ linux-2.6.23/include/linux/nodemask.h +@@ -336,46 +336,108 @@ static inline void __nodes_remap(nodemas + if (!nodes_empty(mask)) \ + for ((node) = 0; (node) < 1; (node)++) + #endif /* MAX_NUMNODES */ + + /* ++ * Bitmasks that are kept for all the nodes. ++ */ ++enum node_states { ++ N_POSSIBLE, /* The node could become online at some point */ ++ N_ONLINE, /* The node is online */ ++ N_NORMAL_MEMORY, /* The node has regular memory */ ++#ifdef CONFIG_HIGHMEM ++ N_HIGH_MEMORY, /* The node has regular or high memory */ ++#else ++ N_HIGH_MEMORY = N_NORMAL_MEMORY, ++#endif ++ N_CPU, /* The node has one or more cpus */ ++ NR_NODE_STATES ++}; ++ ++/* + * The following particular system nodemasks and operations + * on them manage all possible and online nodes. + */ + +-extern nodemask_t node_online_map; +-extern nodemask_t node_possible_map; ++extern nodemask_t node_states[NR_NODE_STATES]; + + #if MAX_NUMNODES > 1 +-#define num_online_nodes() nodes_weight(node_online_map) +-#define num_possible_nodes() nodes_weight(node_possible_map) +-#define node_online(node) node_isset((node), node_online_map) +-#define node_possible(node) node_isset((node), node_possible_map) +-#define first_online_node first_node(node_online_map) +-#define next_online_node(nid) next_node((nid), node_online_map) ++static inline int node_state(int node, enum node_states state) ++{ ++ return node_isset(node, node_states[state]); ++} ++ ++static inline void node_set_state(int node, enum node_states state) ++{ ++ __node_set(node, &node_states[state]); ++} ++ ++static inline void node_clear_state(int node, enum node_states state) ++{ ++ __node_clear(node, &node_states[state]); ++} ++ ++static inline int num_node_state(enum node_states state) ++{ ++ return nodes_weight(node_states[state]); ++} ++ ++#define for_each_node_state(__node, __state) \ ++ for_each_node_mask((__node), node_states[__state]) ++ ++#define first_online_node first_node(node_states[N_ONLINE]) ++#define next_online_node(nid) next_node((nid), node_states[N_ONLINE]) ++ + extern int nr_node_ids; + #else +-#define num_online_nodes() 1 +-#define num_possible_nodes() 1 +-#define node_online(node) ((node) == 0) +-#define node_possible(node) ((node) == 0) ++ ++static inline int node_state(int node, enum node_states state) ++{ ++ return node == 0; ++} ++ ++static inline void node_set_state(int node, enum node_states state) ++{ ++} ++ ++static inline void node_clear_state(int node, enum node_states state) ++{ ++} ++ ++static inline int num_node_state(enum node_states state) ++{ ++ return 1; ++} ++ ++#define for_each_node_state(node, __state) \ ++ for ( (node) = 0; (node) == 0; (node) = 1) ++ + #define first_online_node 0 + #define next_online_node(nid) (MAX_NUMNODES) + #define nr_node_ids 1 ++ + #endif + ++#define node_online_map node_states[N_ONLINE] ++#define node_possible_map node_states[N_POSSIBLE] ++ + #define any_online_node(mask) \ + ({ \ + int node; \ + for_each_node_mask(node, (mask)) \ + if (node_online(node)) \ + break; \ + node; \ + }) + +-#define node_set_online(node) set_bit((node), node_online_map.bits) +-#define node_set_offline(node) clear_bit((node), node_online_map.bits) ++#define num_online_nodes() num_node_state(N_ONLINE) ++#define num_possible_nodes() num_node_state(N_POSSIBLE) ++#define node_online(node) node_state((node), N_ONLINE) ++#define node_possible(node) node_state((node), N_POSSIBLE) ++ ++#define node_set_online(node) node_set_state((node), N_ONLINE) ++#define node_set_offline(node) node_clear_state((node), N_ONLINE) + +-#define for_each_node(node) for_each_node_mask((node), node_possible_map) +-#define for_each_online_node(node) for_each_node_mask((node), node_online_map) ++#define for_each_node(node) for_each_node_state(node, N_POSSIBLE) ++#define for_each_online_node(node) for_each_node_state(node, N_ONLINE) + + #endif /* __LINUX_NODEMASK_H */ +--- linux-2.6.23.orig/include/linux/sched.h ++++ linux-2.6.23/include/linux/sched.h +@@ -1,10 +1,21 @@ + #ifndef _LINUX_SCHED_H + #define _LINUX_SCHED_H + + #include <linux/auxvec.h> /* For AT_VECTOR_SIZE */ + ++/* backporting helper macro: */ ++#define cpu_sibling_map(cpu) cpu_sibling_map[cpu] ++ ++/* ++ * * Control groups are not backported - we use a few compatibility ++ * * defines to be able to use the upstream sched.c as-is: ++ * */ ++#define task_pid_nr(task) (task)->pid ++#define task_pid_vnr(task) (task)->pid ++#define find_task_by_vpid(pid) find_task_by_pid(pid) ++ + /* + * cloning flags: + */ + #define CSIGNAL 0x000000ff /* signal mask to be sent at exit */ + #define CLONE_VM 0x00000100 /* set if VM shared between processes */ +@@ -84,10 +95,11 @@ struct sched_param { + #include <linux/param.h> + #include <linux/resource.h> + #include <linux/timer.h> + #include <linux/hrtimer.h> + #include <linux/task_io_accounting.h> ++#include <linux/kobject.h> + + #include <asm/processor.h> + + struct exec_domain; + struct futex_pi_state; +@@ -133,10 +145,11 @@ extern unsigned long nr_active(void); + extern unsigned long nr_iowait(void); + extern unsigned long weighted_cpuload(const int cpu); + + struct seq_file; + struct cfs_rq; ++struct task_group; + #ifdef CONFIG_SCHED_DEBUG + extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); + extern void proc_sched_set_task(struct task_struct *p); + extern void + print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); +@@ -171,12 +184,11 @@ print_cfs_rq(struct seq_file *m, int cpu + #define TASK_TRACED 8 + /* in tsk->exit_state */ + #define EXIT_ZOMBIE 16 + #define EXIT_DEAD 32 + /* in tsk->state again */ +-#define TASK_NONINTERACTIVE 64 +-#define TASK_DEAD 128 ++#define TASK_DEAD 64 + + #define __set_task_state(tsk, state_value) \ + do { (tsk)->state = (state_value); } while (0) + #define set_task_state(tsk, state_value) \ + set_mb((tsk)->state, (state_value)) +@@ -276,10 +288,14 @@ static inline void touch_all_softlockup_ + #endif + + + /* Attach to any functions which should be ignored in wchan output. */ + #define __sched __attribute__((__section__(".sched.text"))) ++ ++/* Linker adds these: start and end of __sched functions */ ++extern char __sched_text_start[], __sched_text_end[]; ++ + /* Is this address in the __sched functions? */ + extern int in_sched_functions(unsigned long addr); + + #define MAX_SCHEDULE_TIMEOUT LONG_MAX + extern signed long FASTCALL(schedule_timeout(signed long timeout)); +@@ -513,10 +529,12 @@ struct signal_struct { + * and for reaped dead child processes forked by this group. + * Live threads maintain their own counters and add to these + * in __exit_signal, except for the group leader. + */ + cputime_t utime, stime, cutime, cstime; ++ cputime_t gtime; ++ cputime_t cgtime; + unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; + unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; + unsigned long inblock, oublock, cinblock, coublock; + + /* +@@ -593,12 +611,27 @@ struct user_struct { + #endif + + /* Hash table maintenance information */ + struct hlist_node uidhash_node; + uid_t uid; ++ ++#ifdef CONFIG_FAIR_USER_SCHED ++ struct task_group *tg; ++#ifdef CONFIG_SYSFS ++ struct kset kset; ++ struct subsys_attribute user_attr; ++ struct work_struct work; ++#endif ++#endif + }; + ++#ifdef CONFIG_FAIR_USER_SCHED ++extern int uids_kobject_init(void); ++#else ++static inline int uids_kobject_init(void) { return 0; } ++#endif ++ + extern struct user_struct *find_user(uid_t); + + extern struct user_struct root_user; + #define INIT_USER (&root_user) + +@@ -606,17 +639,21 @@ struct backing_dev_info; + struct reclaim_state; + + #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) + struct sched_info { + /* cumulative counters */ +- unsigned long pcnt; /* # of times run on this cpu */ ++ unsigned long pcount; /* # of times run on this cpu */ + unsigned long long cpu_time, /* time spent on the cpu */ + run_delay; /* time spent waiting on a runqueue */ + + /* timestamps */ + unsigned long long last_arrival,/* when we last ran on a cpu */ + last_queued; /* when we were last queued to run */ ++#ifdef CONFIG_SCHEDSTATS ++ /* BKL stats */ ++ unsigned int bkl_count; ++#endif + }; + #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ + + #ifdef CONFIG_SCHEDSTATS + extern const struct file_operations proc_schedstat_operations; +@@ -747,43 +784,42 @@ struct sched_domain { + unsigned int balance_interval; /* initialise to 1. units in ms. */ + unsigned int nr_balance_failed; /* initialise to 0 */ + + #ifdef CONFIG_SCHEDSTATS + /* load_balance() stats */ +- unsigned long lb_cnt[CPU_MAX_IDLE_TYPES]; +- unsigned long lb_failed[CPU_MAX_IDLE_TYPES]; +- unsigned long lb_balanced[CPU_MAX_IDLE_TYPES]; +- unsigned long lb_imbalance[CPU_MAX_IDLE_TYPES]; +- unsigned long lb_gained[CPU_MAX_IDLE_TYPES]; +- unsigned long lb_hot_gained[CPU_MAX_IDLE_TYPES]; +- unsigned long lb_nobusyg[CPU_MAX_IDLE_TYPES]; +- unsigned long lb_nobusyq[CPU_MAX_IDLE_TYPES]; ++ unsigned int lb_count[CPU_MAX_IDLE_TYPES]; ++ unsigned int lb_failed[CPU_MAX_IDLE_TYPES]; ++ unsigned int lb_balanced[CPU_MAX_IDLE_TYPES]; ++ unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES]; ++ unsigned int lb_gained[CPU_MAX_IDLE_TYPES]; ++ unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES]; ++ unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES]; ++ unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES]; + + /* Active load balancing */ +- unsigned long alb_cnt; +- unsigned long alb_failed; +- unsigned long alb_pushed; ++ unsigned int alb_count; ++ unsigned int alb_failed; ++ unsigned int alb_pushed; + + /* SD_BALANCE_EXEC stats */ +- unsigned long sbe_cnt; +- unsigned long sbe_balanced; +- unsigned long sbe_pushed; ++ unsigned int sbe_count; ++ unsigned int sbe_balanced; ++ unsigned int sbe_pushed; + + /* SD_BALANCE_FORK stats */ +- unsigned long sbf_cnt; +- unsigned long sbf_balanced; +- unsigned long sbf_pushed; ++ unsigned int sbf_count; ++ unsigned int sbf_balanced; ++ unsigned int sbf_pushed; + + /* try_to_wake_up() stats */ +- unsigned long ttwu_wake_remote; +- unsigned long ttwu_move_affine; +- unsigned long ttwu_move_balance; ++ unsigned int ttwu_wake_remote; ++ unsigned int ttwu_move_affine; ++ unsigned int ttwu_move_balance; + #endif + }; + +-extern int partition_sched_domains(cpumask_t *partition1, +- cpumask_t *partition2); ++extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new); + + #endif /* CONFIG_SMP */ + + /* + * A runqueue laden with a single nice 0 task scores a weighted_cpuload of +@@ -851,27 +887,32 @@ struct uts_namespace; + + struct rq; + struct sched_domain; + + struct sched_class { +- struct sched_class *next; ++ const struct sched_class *next; + + void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup); + void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep); +- void (*yield_task) (struct rq *rq, struct task_struct *p); ++ void (*yield_task) (struct rq *rq); + + void (*check_preempt_curr) (struct rq *rq, struct task_struct *p); + + struct task_struct * (*pick_next_task) (struct rq *rq); + void (*put_prev_task) (struct rq *rq, struct task_struct *p); + ++#ifdef CONFIG_SMP + unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, +- struct rq *busiest, +- unsigned long max_nr_move, unsigned long max_load_move, ++ struct rq *busiest, unsigned long max_load_move, + struct sched_domain *sd, enum cpu_idle_type idle, + int *all_pinned, int *this_best_prio); + ++ int (*move_one_task) (struct rq *this_rq, int this_cpu, ++ struct rq *busiest, struct sched_domain *sd, ++ enum cpu_idle_type idle); ++#endif ++ + void (*set_curr_task) (struct rq *rq); + void (*task_tick) (struct rq *rq, struct task_struct *p); + void (*task_new) (struct rq *rq, struct task_struct *p); + }; + +@@ -885,46 +926,52 @@ struct load_weight { + * Current field usage histogram: + * + * 4 se->block_start + * 4 se->run_node + * 4 se->sleep_start +- * 4 se->sleep_start_fair + * 6 se->load.weight +- * 7 se->delta_fair +- * 15 se->wait_runtime + */ + struct sched_entity { +- long wait_runtime; +- unsigned long delta_fair_run; +- unsigned long delta_fair_sleep; +- unsigned long delta_exec; +- s64 fair_key; + struct load_weight load; /* for load-balancing */ + struct rb_node run_node; + unsigned int on_rq; + + u64 exec_start; + u64 sum_exec_runtime; ++ u64 vruntime; + u64 prev_sum_exec_runtime; +- u64 wait_start_fair; +- u64 sleep_start_fair; + + #ifdef CONFIG_SCHEDSTATS + u64 wait_start; + u64 wait_max; +- s64 sum_wait_runtime; + + u64 sleep_start; + u64 sleep_max; + s64 sum_sleep_runtime; + + u64 block_start; + u64 block_max; + u64 exec_max; ++ u64 slice_max; + +- unsigned long wait_runtime_overruns; +- unsigned long wait_runtime_underruns; ++ u64 nr_migrations; ++ u64 nr_migrations_cold; ++ u64 nr_failed_migrations_affine; ++ u64 nr_failed_migrations_running; ++ u64 nr_failed_migrations_hot; ++ u64 nr_forced_migrations; ++ u64 nr_forced2_migrations; ++ ++ u64 nr_wakeups; ++ u64 nr_wakeups_sync; ++ u64 nr_wakeups_migrate; ++ u64 nr_wakeups_local; ++ u64 nr_wakeups_remote; ++ u64 nr_wakeups_affine; ++ u64 nr_wakeups_affine_attempts; ++ u64 nr_wakeups_passive; ++ u64 nr_wakeups_idle; + #endif + + #ifdef CONFIG_FAIR_GROUP_SCHED + struct sched_entity *parent; + /* rq on which this entity is (to be) queued: */ +@@ -949,11 +996,11 @@ struct task_struct { + #endif + #endif + + int prio, static_prio, normal_prio; + struct list_head run_list; +- struct sched_class *sched_class; ++ const struct sched_class *sched_class; + struct sched_entity se; + + #ifdef CONFIG_PREEMPT_NOTIFIERS + /* list of struct preempt_notifier: */ + struct hlist_head preempt_notifiers; +@@ -1019,11 +1066,12 @@ struct task_struct { + struct completion *vfork_done; /* for vfork() */ + int __user *set_child_tid; /* CLONE_CHILD_SETTID */ + int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ + + unsigned int rt_priority; +- cputime_t utime, stime; ++ cputime_t utime, stime, utimescaled, stimescaled; ++ cputime_t gtime; + cputime_t prev_utime, prev_stime; + unsigned long nvcsw, nivcsw; /* context switch counts */ + struct timespec start_time; /* monotonic time */ + struct timespec real_start_time; /* boot based time */ + /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ +@@ -1312,10 +1360,11 @@ static inline void put_task_struct(struc + #define PF_ALIGNWARN 0x00000001 /* Print alignment warning msgs */ + /* Not implemented yet, only for 486*/ + #define PF_STARTING 0x00000002 /* being created */ + #define PF_EXITING 0x00000004 /* getting shut down */ + #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ ++#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ + #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ + #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ + #define PF_DUMPCORE 0x00000200 /* dumped core */ + #define PF_SIGNALED 0x00000400 /* killed by a signal */ + #define PF_MEMALLOC 0x00000800 /* Allocating memory */ +@@ -1399,19 +1448,30 @@ extern void idle_task_exit(void); + static inline void idle_task_exit(void) {} + #endif + + extern void sched_idle_next(void); + ++#ifdef CONFIG_SCHED_DEBUG + extern unsigned int sysctl_sched_latency; + extern unsigned int sysctl_sched_min_granularity; + extern unsigned int sysctl_sched_wakeup_granularity; + extern unsigned int sysctl_sched_batch_wakeup_granularity; +-extern unsigned int sysctl_sched_stat_granularity; +-extern unsigned int sysctl_sched_runtime_limit; +-extern unsigned int sysctl_sched_compat_yield; + extern unsigned int sysctl_sched_child_runs_first; + extern unsigned int sysctl_sched_features; ++extern unsigned int sysctl_sched_migration_cost; ++extern unsigned int sysctl_sched_nr_migrate; ++#ifdef CONFIG_FAIR_GROUP_SCHED ++extern unsigned int sysctl_sched_min_bal_int_shares; ++extern unsigned int sysctl_sched_max_bal_int_shares; ++#endif ++ ++int sched_nr_latency_handler(struct ctl_table *table, int write, ++ struct file *file, void __user *buffer, size_t *length, ++ loff_t *ppos); ++#endif ++ ++extern unsigned int sysctl_sched_compat_yield; + + #ifdef CONFIG_RT_MUTEXES + extern int rt_mutex_getprio(struct task_struct *p); + extern void rt_mutex_setprio(struct task_struct *p, int prio); + extern void rt_mutex_adjust_pi(struct task_struct *p); +@@ -1841,10 +1901,22 @@ extern long sched_getaffinity(pid_t pid, + + extern int sched_mc_power_savings, sched_smt_power_savings; + + extern void normalize_rt_tasks(void); + ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ ++extern struct task_group init_task_group; |
