diff --git a/COPYING.SWsoft b/COPYING.SWsoft new file mode 100644 index 0000000..059256d --- /dev/null +++ b/COPYING.SWsoft @@ -0,0 +1,350 @@ + +Nothing in this license should be construed as a grant by SWsoft of any rights +beyond the rights specified in the GNU General Public License, and nothing in +this license should be construed as a waiver by SWsoft of its patent, copyright +and/or trademark rights, beyond the waiver required by the GNU General Public +License. This license is expressly inapplicable to any product that is not +within the scope of the GNU General Public License + +---------------------------------------- + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/Makefile b/Makefile index 16e3fbb..fcffc7e 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 27 EXTRAVERSION = +VZVERSION = 037test001 NAME = Rotary Wombat # *DOCUMENTATION* @@ -347,7 +348,7 @@ KBUILD_AFLAGS := -D__ASSEMBLY__ KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null) KERNELVERSION = $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) -export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION +export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION VZVERSION export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC export CPP AR NM STRIP OBJCOPY OBJDUMP MAKE AWK GENKSYMS PERL UTS_MACHINE export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS @@ -1002,7 +1003,8 @@ define filechk_utsrelease.h echo '"$(KERNELRELEASE)" exceeds $(uts_len) characters' >&2; \ exit 1; \ fi; \ - (echo \#define UTS_RELEASE \"$(KERNELRELEASE)\";) + (echo \#define UTS_RELEASE \"$(KERNELRELEASE)\"; \ + echo \#define VZVERSION \"$(VZVERSION)\";) endef define filechk_version.h diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index e9842f6..643f220 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -191,7 +191,7 @@ int __cpuexit __cpu_disable(void) local_flush_tlb_all(); read_lock(&tasklist_lock); - for_each_process(p) { + for_each_process_all(p) { if (p->mm) cpu_clear(cpu, p->mm->cpu_vm_mask); } diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 48e496f..8a2572b 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -612,6 +612,7 @@ source "arch/ia64/kvm/Kconfig" source "lib/Kconfig" +source "kernel/bc/Kconfig" # # Use the generic interrupt handling code in kernel/irq/: # @@ -639,6 +640,8 @@ source "arch/ia64/hp/sim/Kconfig" source "arch/ia64/Kconfig.debug" +source "kernel/Kconfig.openvz" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c index 4f0c30c..067cb28 100644 --- a/arch/ia64/ia32/binfmt_elf32.c +++ b/arch/ia64/ia32/binfmt_elf32.c @@ -17,6 +17,8 @@ #include #include +#include + #include "ia32priv.h" #include "elfcore32.h" @@ -132,6 +134,12 @@ ia64_elf32_init (struct pt_regs *regs) up_write(¤t->mm->mmap_sem); } + if (ub_memory_charge(current->mm, PAGE_ALIGN(IA32_LDT_ENTRIES * + IA32_LDT_ENTRY_SIZE), + VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE, + NULL, UB_SOFT)) + goto skip; + /* * Install LDT as anonymous memory. This gives us all-zero segment descriptors * until a task modifies them via modify_ldt(). @@ -152,7 +160,12 @@ ia64_elf32_init (struct pt_regs *regs) } } up_write(¤t->mm->mmap_sem); - } + } else + ub_memory_uncharge(current->mm, PAGE_ALIGN(IA32_LDT_ENTRIES * + IA32_LDT_ENTRY_SIZE), + VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE, NULL); + +skip: ia64_psr(regs)->ac = 0; /* turn off alignment checking */ regs->loadrs = 0; diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h index b9ac1a6..9504729 100644 --- a/arch/ia64/include/asm/pgalloc.h +++ b/arch/ia64/include/asm/pgalloc.h @@ -20,11 +20,13 @@ #include #include +#include + #include static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - return quicklist_alloc(0, GFP_KERNEL, NULL); + return quicklist_alloc(0, GFP_KERNEL_UBC|__GFP_SOFT_UBC, NULL); } static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) @@ -41,7 +43,7 @@ pgd_populate(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return quicklist_alloc(0, GFP_KERNEL, NULL); + return quicklist_alloc(0, GFP_KERNEL_UBC|__GFP_SOFT_UBC, NULL); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) @@ -59,7 +61,7 @@ pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return quicklist_alloc(0, GFP_KERNEL, NULL); + return quicklist_alloc(0, GFP_KERNEL_UBC|__GFP_SOFT_UBC, NULL); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) @@ -87,7 +89,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr) struct page *page; void *pg; - pg = quicklist_alloc(0, GFP_KERNEL, NULL); + pg = quicklist_alloc(0, GFP_KERNEL_UBC|__GFP_SOFT_UBC, NULL); if (!pg) return NULL; page = virt_to_page(pg); diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index f88fa05..695c23f 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -361,7 +361,7 @@ struct thread_struct { regs->loadrs = 0; \ regs->r8 = get_dumpable(current->mm); /* set "don't zap registers" flag */ \ regs->r12 = new_sp - 16; /* allocate 16 byte scratch area */ \ - if (unlikely(!get_dumpable(current->mm))) { \ + if (unlikely(!get_dumpable(current->mm) || !current->mm->vps_dumpable)) { \ /* \ * Zap scratch regs to avoid leaking bits between processes with different \ * uid/privileges. \ diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index d535833..5b5eb9c 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -308,6 +308,16 @@ #define __NR_dup3 1316 #define __NR_pipe2 1317 #define __NR_inotify_init1 1318 +#define __NR_fairsched_vcpus 1499 +#define __NR_fairsched_mknod 1500 +#define __NR_fairsched_rmnod 1501 +#define __NR_fairsched_chwt 1502 +#define __NR_fairsched_mvpr 1503 +#define __NR_fairsched_rate 1504 +#define __NR_getluid 1505 +#define __NR_setluid 1506 +#define __NR_setublimit 1507 +#define __NR_ubstat 1508 #ifdef __KERNEL__ diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 0dd6c14..d96ff73 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -510,6 +510,74 @@ GLOBAL_ENTRY(clone) br.ret.sptk.many rp END(clone) +GLOBAL_ENTRY(ia64_ret_from_resume) + PT_REGS_UNWIND_INFO(0) +{ /* + * Some versions of gas generate bad unwind info if the first instruction of a + * procedure doesn't go into the first slot of a bundle. This is a workaround. + */ + nop.m 0 + nop.i 0 + /* + * We need to call schedule_tail() to complete the scheduling process. + * Called by ia64_switch_to() after do_fork()->copy_thread(). r8 contains the + * address of the previously executing task. + */ + br.call.sptk.many rp=ia64_invoke_schedule_tail +} + br.call.sptk.many rp=ia64_invoke_resume + ;; + adds sp=256,sp + ;; + /* Return from interrupt, we are all right. */ +(pNonSys) br ia64_leave_kernel + ;; + /* Tricky part follows. We must restore correct syscall + * register frame before doing normal syscall exit job. + * It would the most natural to keep sw->ar_pfs correct, + * then we would be here with correct register frame. + * Unfortunately, IA64 has a feature. Registers were in backstore + * after context switch, and the first br.ret does _NOT_ fetch + * output registers. + * It is quite natural: look, if caller has output regs in his + * frame, they should be consumed. If callee does not have (enough of) + * input/local registers (1 in this case), the situation is unusual. + * Practical evidence: they are filled with something random crap. + * The only case, when this is essential in mainstream kernel + * is sys_clone(). The result is that new process gets some kernel + * information in its register frame. Which is a security problem, btw. + * + * So, we set sw->ar_pfs to pretend the whole frame is of local + * regs. And we have to repartition the frame it manually, using + * information from pt->cr_ifs (the register is invalid in this + * case, but it holds correct pfm). + */ + adds r3=PT(CR_IFS)+16,sp + ;; + ld8 r2=[r3],-(PT(CR_IFS)-PT(R8)) + ;; + extr.u r2=r2,0,37 + mov r8=ar.ec + ;; + extr.u r8=r8,0,5 + ;; + shl r8=r8,52 + ;; + or r2=r2,r8 + ;; + mov ar.pfs=r2 + ;; + movl r2=ia64_leave_syscall + ;; + mov rp=r2 + /* Plus, we should fetch r8 and r10 from pt_regs. Something else? */ + ld8 r8=[r3],PT(R10)-PT(R8) + ;; + ld8 r10=[r3] + ;; + br.ret.sptk.many rp +END(ia64_ret_from_resume) + /* * Invoke a system call, but do some tracing before and after the call. * We MUST preserve the current register frame throughout this routine @@ -1264,6 +1332,34 @@ GLOBAL_ENTRY(ia64_invoke_schedule_tail) br.ret.sptk.many rp END(ia64_invoke_schedule_tail) +GLOBAL_ENTRY(ia64_invoke_resume) + alloc loc1=ar.pfs,0,3,1,0 + mov loc0=rp + adds out0=16,sp + ;; + ld8 r8=[out0] + ;; + cmp.eq p6,p0=r8,r0 + ;; +(p6) br.cond.sptk 1f + ;; + mov loc2=gp + ;; + ld8 r10=[r8],8 + ;; + ld8 gp=[r8] + ;; + mov b7=r10 + ;; + br.call.sptk.many rp=b7 + ;; + mov gp=loc2 +1: + mov ar.pfs=loc1 + mov rp=loc0 + br.ret.sptk.many rp +END(ia64_invoke_resume) + /* * Setup stack and call do_notify_resume_user(), keeping interrupts * disabled. @@ -1698,5 +1794,18 @@ sys_call_table: data8 sys_pipe2 data8 sys_inotify_init1 +.rept 1499-1313 + data8 sys_ni_syscall +.endr + data8 sys_fairsched_vcpus + data8 sys_fairsched_mknod // 1500 + data8 sys_fairsched_rmnod + data8 sys_fairsched_chwt + data8 sys_fairsched_mvpr + data8 sys_fairsched_rate + data8 sys_getluid // 1505 + data8 sys_setluid + data8 sys_setublimit + data8 sys_ubstat .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index c1625c7..634b102 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -90,53 +90,6 @@ ENTRY(fsys_getpid) FSYS_RETURN END(fsys_getpid) -ENTRY(fsys_getppid) - .prologue - .altrp b6 - .body - add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16 - ;; - ld8 r17=[r17] // r17 = current->group_leader - add r9=TI_FLAGS+IA64_TASK_SIZE,r16 - ;; - - ld4 r9=[r9] - add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = ¤t->group_leader->real_parent - ;; - and r9=TIF_ALLWORK_MASK,r9 - -1: ld8 r18=[r17] // r18 = current->group_leader->real_parent - ;; - cmp.ne p8,p0=0,r9 - add r8=IA64_TASK_TGID_OFFSET,r18 // r8 = ¤t->group_leader->real_parent->tgid - ;; - - /* - * The .acq is needed to ensure that the read of tgid has returned its data before - * we re-check "real_parent". - */ - ld4.acq r8=[r8] // r8 = current->group_leader->real_parent->tgid -#ifdef CONFIG_SMP - /* - * Re-read current->group_leader->real_parent. - */ - ld8 r19=[r17] // r19 = current->group_leader->real_parent -(p8) br.spnt.many fsys_fallback_syscall - ;; - cmp.ne p6,p0=r18,r19 // did real_parent change? - mov r19=0 // i must not leak kernel bits... -(p6) br.cond.spnt.few 1b // yes -> redo the read of tgid and the check - ;; - mov r17=0 // i must not leak kernel bits... - mov r18=0 // i must not leak kernel bits... -#else - mov r17=0 // i must not leak kernel bits... - mov r18=0 // i must not leak kernel bits... - mov r19=0 // i must not leak kernel bits... -#endif - FSYS_RETURN -END(fsys_getppid) - ENTRY(fsys_set_tid_address) .prologue .altrp b6 @@ -767,7 +720,7 @@ fsyscall_table: data8 0 // chown data8 0 // lseek // 1040 data8 fsys_getpid // getpid - data8 fsys_getppid // getppid + data8 0 // getppid data8 0 // mount data8 0 // umount data8 0 // setuid // 1045 diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index 66e491d..4308d48 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -1097,7 +1097,7 @@ GLOBAL_ENTRY(start_kernel_thread) mov out1 = r11;; br.call.sptk.many rp = kernel_thread_helper;; mov out0 = r8 - br.call.sptk.many rp = sys_exit;; + br.call.sptk.many rp = do_exit;; 1: br.sptk.few 1b // not reached END(start_kernel_thread) diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index 6da1f20..24950d6 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -75,6 +75,8 @@ EXPORT_SYMBOL(xor_ia64_4); EXPORT_SYMBOL(xor_ia64_5); #endif +EXPORT_SYMBOL(empty_zero_page); + #include EXPORT_SYMBOL(ia64_pal_call_phys_stacked); EXPORT_SYMBOL(ia64_pal_call_phys_static); diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 7dd96c1..d849ed0 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1608,10 +1608,10 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi } printk("\n\n"); if (read_trylock(&tasklist_lock)) { - do_each_thread (g, t) { + do_each_thread_all (g, t) { printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm); show_stack(t, NULL); - } while_each_thread (g, t); + } while_each_thread_all (g, t); read_unlock(&tasklist_lock); } /* FIXME: This will not restore zapped printk locks. */ diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index fc8f350..057bbb3 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -4176,12 +4176,12 @@ pfm_check_task_exist(pfm_context_t *ctx) read_lock(&tasklist_lock); - do_each_thread (g, t) { + do_each_thread_ve (g, t) { if (t->thread.pfm_context == ctx) { ret = 0; goto out; } - } while_each_thread (g, t); + } while_each_thread_ve (g, t); out: read_unlock(&tasklist_lock); diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 3ab8373..af4e88a 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -391,6 +392,9 @@ ia64_load_extra (struct task_struct *task) #endif } +extern char ia64_ret_from_resume; +EXPORT_SYMBOL(ia64_ret_from_resume); + /* * Copy the state of an ia-64 thread. * @@ -464,7 +468,6 @@ copy_thread (int nr, unsigned long clone_flags, child_ptregs->r12 = user_stack_base + user_stack_size - 16; child_ptregs->ar_bspstore = user_stack_base; child_ptregs->ar_rnat = 0; - child_ptregs->loadrs = 0; } } else { /* @@ -676,16 +679,25 @@ out: return error; } +extern void start_kernel_thread (void); +EXPORT_SYMBOL(start_kernel_thread); + pid_t kernel_thread (int (*fn)(void *), void *arg, unsigned long flags) { - extern void start_kernel_thread (void); unsigned long *helper_fptr = (unsigned long *) &start_kernel_thread; struct { struct switch_stack sw; struct pt_regs pt; } regs; + /* Don't allow kernel_thread() inside VE */ + if (!ve_allow_kthreads && !ve_is_super(get_exec_env())) { + printk("kernel_thread call inside container\n"); + dump_stack(); + return -EPERM; + } + memset(®s, 0, sizeof(regs)); regs.pt.cr_iip = helper_fptr[0]; /* set entry point (IP) */ regs.pt.r1 = helper_fptr[1]; /* set GP */ diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 2a9943b..e44debf 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -10,6 +10,7 @@ * Derived from the x86 and Alpha versions. */ #include +#include #include #include #include @@ -105,6 +106,8 @@ ia64_get_scratch_nat_bits (struct pt_regs *pt, unsigned long scratch_unat) # undef GET_BITS } +EXPORT_SYMBOL(ia64_get_scratch_nat_bits); +EXPORT_SYMBOL(__ia64_save_fpu); /* * Set the NaT bits for the scratch registers according to NAT and @@ -461,6 +464,7 @@ ia64_peek (struct task_struct *child, struct switch_stack *child_stack, *val = ret; return 0; } +EXPORT_SYMBOL(ia64_peek); long ia64_poke (struct task_struct *child, struct switch_stack *child_stack, @@ -525,6 +529,7 @@ ia64_get_user_rbs_end (struct task_struct *child, struct pt_regs *pt, *cfmp = cfm; return (unsigned long) ia64_rse_skip_regs(bspstore, ndirty); } +EXPORT_SYMBOL(ia64_get_user_rbs_end); /* * Synchronize (i.e, write) the RSE backing store living in kernel @@ -820,20 +825,20 @@ access_nat_bits (struct task_struct *child, struct pt_regs *pt, if (write_access) { nat_bits = *data; scratch_unat = ia64_put_scratch_nat_bits(pt, nat_bits); - if (unw_set_ar(info, UNW_AR_UNAT, scratch_unat) < 0) { - dprintk("ptrace: failed to set ar.unat\n"); - return -1; - } + if (info->pri_unat_loc) + *info->pri_unat_loc = scratch_unat; + else + info->sw->caller_unat = scratch_unat; for (regnum = 4; regnum <= 7; ++regnum) { unw_get_gr(info, regnum, &dummy, &nat); unw_set_gr(info, regnum, dummy, (nat_bits >> regnum) & 1); } } else { - if (unw_get_ar(info, UNW_AR_UNAT, &scratch_unat) < 0) { - dprintk("ptrace: failed to read ar.unat\n"); - return -1; - } + if (info->pri_unat_loc) + scratch_unat = *info->pri_unat_loc; + else + scratch_unat = info->sw->caller_unat; nat_bits = ia64_get_scratch_nat_bits(pt, scratch_unat); for (regnum = 4; regnum <= 7; ++regnum) { unw_get_gr(info, regnum, &dummy, &nat); diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 19c5a78..cc6c4e6 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -464,6 +465,12 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall) if (!user_mode(&scr->pt)) return; + if (try_to_freeze() && !signal_pending(current)) { + if ((long) scr->pt.r10 != -1) + restart = 0; + goto no_signal; + } + if (current_thread_info()->status & TS_RESTORE_SIGMASK) oldset = ¤t->saved_sigmask; else @@ -519,8 +526,10 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall) if (IS_IA32_PROCESS(&scr->pt)) { scr->pt.r8 = scr->pt.r1; scr->pt.cr_iip -= 2; - } else + } else { ia64_decrement_ip(&scr->pt); + scr->pt.r10 = 0; + } restart = 0; /* don't restart twice if handle_signal() fails... */ } } @@ -542,6 +551,7 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall) } /* Did we come from a system call? */ +no_signal: if (restart) { /* Restart the system call - no handlers present */ if (errno == ERESTARTNOHAND || errno == ERESTARTSYS || errno == ERESTARTNOINTR @@ -561,6 +571,7 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall) ia64_decrement_ip(&scr->pt); if (errno == ERESTART_RESTARTBLOCK) scr->pt.r15 = __NR_restart_syscall; + scr->pt.r10 = 0; } } } diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index bcbb6d8..40c8320 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -204,7 +204,7 @@ do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, un /* Careful about overflows.. */ len = PAGE_ALIGN(len); - if (!len || len > TASK_SIZE) { + if (len > TASK_SIZE) { addr = -EINVAL; goto out; } diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 65c10a4..9f0cdde 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -41,6 +41,8 @@ struct fsyscall_gtod_data_t fsyscall_gtod_data = { struct itc_jitter_data_t itc_jitter_data; volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */ +unsigned int cpu_khz; /* TSC clocks / usec, not used here */ +EXPORT_SYMBOL(cpu_khz); #ifdef CONFIG_IA64_DEBUG_IRQ @@ -358,6 +360,8 @@ ia64_init_itm (void) /* avoid softlock up message when cpu is unplug and plugged again. */ touch_softlockup_watchdog(); + cpu_khz = local_cpu_data->proc_freq / 1000; + /* Setup the CPU local timer tick */ ia64_cpu_local_tick(); diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index ff0e7c1..7288a9f 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -1291,7 +1291,7 @@ within_logging_rate_limit (void) { static unsigned long count, last_time; - if (time_after(jiffies, last_time + 5 * HZ)) + if (time_after(jiffies, last_time + 60 * HZ)) count = 0; if (count < 5) { last_time = jiffies; diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 23088be..da13815 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -148,7 +148,6 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re if ((vma->vm_flags & mask) != mask) goto bad_area; - survive: /* * If for any reason at all we couldn't handle the fault, make * sure we exit gracefully rather than endlessly redo the @@ -276,13 +275,13 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re out_of_memory: up_read(&mm->mmap_sem); - if (is_global_init(current)) { - yield(); - down_read(&mm->mmap_sem); - goto survive; + if (user_mode(regs)) { + /* + * 0-order allocation always success if something really + * fatal not happen: beancounter overdraft or OOM. + */ + force_sig(SIGKILL, current); + return; } - printk(KERN_CRIT "VM: killing process %s\n", current->comm); - if (user_mode(regs)) - do_group_exit(SIGKILL); goto no_context; } diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 200100e..226b5cc 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -37,6 +37,8 @@ #include #include +#include + DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); extern void ia64_tlb_init (void); @@ -111,6 +113,10 @@ ia64_init_addr_space (void) ia64_set_rbs_bot(); + if (ub_memory_charge(current->mm, PAGE_SIZE, VM_DATA_DEFAULT_FLAGS, + NULL, UB_SOFT)) + goto skip; + /* * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore * the problem. When the process attempts to write to the register backing store @@ -127,11 +133,16 @@ ia64_init_addr_space (void) if (insert_vm_struct(current->mm, vma)) { up_write(¤t->mm->mmap_sem); kmem_cache_free(vm_area_cachep, vma); + ub_memory_uncharge(current->mm, PAGE_SIZE, + VM_DATA_DEFAULT_FLAGS, NULL); return; } up_write(¤t->mm->mmap_sem); - } + } else + ub_memory_uncharge(current->mm, PAGE_SIZE, + VM_DATA_DEFAULT_FLAGS, NULL); +skip: /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */ if (!(current->personality & MMAP_PAGE_ZERO)) { vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 587da5e..a9d6b81 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -833,8 +833,12 @@ source "arch/powerpc/sysdev/qe_lib/Kconfig" source "lib/Kconfig" +source "kernel/bc/Kconfig" + source "arch/powerpc/Kconfig.debug" +source "kernel/Kconfig.openvz" + source "security/Kconfig" config KEYS_COMPAT diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h index 812a1d8..c0f7a7f 100644 --- a/arch/powerpc/include/asm/pgalloc-64.h +++ b/arch/powerpc/include/asm/pgalloc-64.h @@ -26,7 +26,8 @@ extern struct kmem_cache *pgtable_cache[]; static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL); + return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], + GFP_KERNEL_UBC | __GFP_SOFT_UBC); } static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) @@ -42,7 +43,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM], - GFP_KERNEL|__GFP_REPEAT); + GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_REPEAT); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) @@ -88,10 +89,15 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd); } +static inline pte_t *do_pte_alloc(gfp_t flags) +{ + return (pte_t *)__get_free_page(flags); +} + static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); + return do_pte_alloc(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, @@ -100,7 +106,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, struct page *page; pte_t *pte; - pte = pte_alloc_one_kernel(mm, address); + pte = do_pte_alloc(GFP_KERNEL_UBC | __GFP_REPEAT | __GFP_ZERO); if (!pte) return NULL; page = virt_to_page(pte); diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index f6cc7a4..34fc004 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -322,3 +322,19 @@ SYSCALL_SPU(epoll_create1) SYSCALL_SPU(dup3) SYSCALL_SPU(pipe2) SYSCALL(inotify_init1) +SYS_SKIP(319, 400) +SYSCALL(ni_syscall) +SYS_SKIP_END() +SYSCALL(fairsched_mknod) /* 400 */ +SYSCALL(fairsched_rmnod) +SYSCALL(fairsched_chwt) +SYSCALL(fairsched_mvpr) +SYSCALL(fairsched_rate) +SYSCALL(fairsched_vcpus) +SYS_SKIP(406, 410) +SYSCALL(ni_syscall) +SYS_SKIP_END() +SYSCALL(getluid) /* 410 */ +SYSCALL(setluid) +SYSCALL(setublimit) +SYSCALL(ubstat) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index e07d0c7..3fea592 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -342,9 +342,14 @@ #define __NR_pipe2 317 #define __NR_inotify_init1 318 +#define __NR_getluid 410 +#define __NR_setluid 411 +#define __NR_setublimit 412 +#define __NR_ubstat 413 + #ifdef __KERNEL__ -#define __NR_syscalls 319 +#define __NR_syscalls 414 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 7a6dfbc..28c26b4 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -835,7 +835,7 @@ _GLOBAL(abs) * Create a kernel thread * kernel_thread(fn, arg, flags) */ -_GLOBAL(kernel_thread) +_GLOBAL(ppc_kernel_thread) stwu r1,-16(r1) stw r30,8(r1) stw r31,12(r1) diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 4dd70cf..2e10116 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -415,7 +415,7 @@ _GLOBAL(scom970_write) * Create a kernel thread * kernel_thread(fn, arg, flags) */ -_GLOBAL(kernel_thread) +_GLOBAL(ppc_kernel_thread) std r29,-24(r1) std r30,-16(r1) stdu r1,-STACK_FRAME_OVERHEAD(r1) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 957bded..ca7410c 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -50,6 +50,8 @@ #include #include +#include + extern unsigned long _get_SP(void); #ifndef CONFIG_SMP @@ -501,8 +503,9 @@ void show_regs(struct pt_regs * regs) printk("NIP: "REG" LR: "REG" CTR: "REG"\n", regs->nip, regs->link, regs->ctr); - printk("REGS: %p TRAP: %04lx %s (%s)\n", - regs, regs->trap, print_tainted(), init_utsname()->release); + printk("REGS: %p TRAP: %04lx %s (%s %s)\n", + regs, regs->trap, print_tainted(), init_utsname()->release, + VZVERSION); printk("MSR: "REG" ", regs->msr); printbits(regs->msr, msr_bits); printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); @@ -1057,6 +1060,20 @@ void dump_stack(void) } EXPORT_SYMBOL(dump_stack); +long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) +{ + extern long ppc_kernel_thread(int (*fn)(void *), void *arg, + unsigned long flags); + + if (!ve_is_super(get_exec_env())) { + printk("kernel_thread call inside container\n"); + dump_stack(); + return -EPERM; + } + + return ppc_kernel_thread(fn, arg, flags); +} + #ifdef CONFIG_PPC64 void ppc64_runlatch_on(void) { diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index 93219c3..a9e16bb 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -43,5 +43,9 @@ .p2align 3 #endif +#define SYS_SKIP(from, to) .rept to - from \ + SYSCALL(sys_ni_syscall) \ + .endr + _GLOBAL(sys_call_table) #include diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 565b7a2..8400dec 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -309,7 +309,6 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - survive: ret = handle_mm_fault(mm, vma, address, is_write); if (unlikely(ret & VM_FAULT_ERROR)) { if (ret & VM_FAULT_OOM) @@ -349,14 +348,12 @@ bad_area_nosemaphore: */ out_of_memory: up_read(&mm->mmap_sem); - if (is_global_init(current)) { - yield(); - down_read(&mm->mmap_sem); - goto survive; - } - printk("VM: killing process %s\n", current->comm); if (user_mode(regs)) - do_group_exit(SIGKILL); + /* + * 0-order allocation always success if something really + * fatal not happen: beancounter overdraft or OOM. Den + */ + force_sig(SIGKILL, current); return SIGKILL; do_sigbus: diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 036fe2f..807473a 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -168,8 +168,8 @@ struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; void pgtable_cache_init(void) { - pgtable_cache[0] = kmem_cache_create(pgtable_cache_name[0], PGD_TABLE_SIZE, PGD_TABLE_SIZE, SLAB_PANIC, pgd_ctor); - pgtable_cache[1] = kmem_cache_create(pgtable_cache_name[1], PMD_TABLE_SIZE, PMD_TABLE_SIZE, SLAB_PANIC, pmd_ctor); + pgtable_cache[0] = kmem_cache_create(pgtable_cache_name[0], PGD_TABLE_SIZE, PGD_TABLE_SIZE, SLAB_PANIC|SLAB_UBC|SLAB_NO_CHARGE, pgd_ctor); + pgtable_cache[1] = kmem_cache_create(pgtable_cache_name[1], PMD_TABLE_SIZE, PMD_TABLE_SIZE, SLAB_PANIC|SLAB_UBC|SLAB_NO_CHARGE, pmd_ctor); } #ifdef CONFIG_SPARSEMEM_VMEMMAP diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 2001abd..ea128b6 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -83,7 +83,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *ret; - ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER); + ret = (pgd_t *)__get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC | + __GFP_ZERO, PGDIR_ORDER); return ret; } @@ -117,6 +118,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) #else gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO; #endif + flags |= (__GFP_UBC | __GFP_SOFT_UBC); ptepage = alloc_pages(flags, 0); if (!ptepage) diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c index 19f6bfd..4f23f43 100644 --- a/arch/powerpc/platforms/cell/spu_callbacks.c +++ b/arch/powerpc/platforms/cell/spu_callbacks.c @@ -46,6 +46,8 @@ static void *spu_syscall_table[] = { #define PPC_SYS_SPU(func) ppc_##func, #define SYSX_SPU(f, f3264, f32) f, +#define SYS_SKIP(from, to) [from ... to] = sys_ni_syscall, + #include }; diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8d41908..2e2f811 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -587,6 +587,8 @@ source "fs/Kconfig" source "arch/s390/Kconfig.debug" +source "kernel/Kconfig.openvz" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 00b9b4d..6194a6a 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -573,8 +573,19 @@ out: */ int __cpuinit start_secondary(void *cpuvoid) { - /* Setup the cpu */ - cpu_init(); + /* Setup the cpu */ + cpu_init(); + +#ifdef CONFIG_VE + /* TSC reset. kill whatever might rely on old values */ + VE_TASK_INFO(current)->wakeup_stamp = 0; + /* + * Cosmetic: sleep_time won't be changed afterwards for the idle + * thread; keep it 0 rather than -cycles. + */ + VE_TASK_INFO(idle)->sleep_time = 0; +#endif + preempt_disable(); /* Enable TOD clock interrupts on the secondary cpu. */ init_cpu_timer(); @@ -831,6 +842,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus) for_each_possible_cpu(cpu) if (cpu != smp_processor_id()) smp_create_idle(cpu); + +#ifdef CONFIG_VE + /* TSC reset. kill whatever might rely on old values */ + VE_TASK_INFO(current)->wakeup_stamp = 0; +#endif } void __init smp_prepare_boot_cpu(void) diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index b9dbd2d..d0a8c54 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -670,7 +670,7 @@ asids_proc_info(char *buf, char **start, off_t fpos, int length, int *eof, void int len=0; struct task_struct *p; read_lock(&tasklist_lock); - for_each_process(p) { + for_each_process_ve(p) { int pid = p->pid; if (!pid) diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h index 5bdfa2c..dd89e73 100644 --- a/arch/sparc/include/asm/pgalloc_64.h +++ b/arch/sparc/include/asm/pgalloc_64.h @@ -16,7 +16,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - return quicklist_alloc(0, GFP_KERNEL, NULL); + return quicklist_alloc(0, GFP_KERNEL_UBC, NULL); } static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) @@ -28,7 +28,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return quicklist_alloc(0, GFP_KERNEL, NULL); + return quicklist_alloc(0, GFP_KERNEL_UBC|__GFP_REPEAT, NULL); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) @@ -48,7 +48,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, struct page *page; void *pg; - pg = quicklist_alloc(0, GFP_KERNEL, NULL); + pg = quicklist_alloc(0, GFP_KERNEL_UBC, NULL); if (!pg) return NULL; page = virt_to_page(pg); diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index c0a737d..5ef8b1d 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -163,14 +163,14 @@ register struct thread_info *current_thread_info_reg asm("g6"); struct thread_info *ret; \ \ ret = (struct thread_info *) \ - __get_free_pages(GFP_KERNEL, __THREAD_INFO_ORDER); \ + __get_free_pages(GFP_KERNEL_UBC, __THREAD_INFO_ORDER); \ if (ret) \ memset(ret, 0, PAGE_SIZE<<__THREAD_INFO_ORDER); \ ret; \ }) #else #define alloc_thread_info(tsk) \ - ((struct thread_info *)__get_free_pages(GFP_KERNEL, __THREAD_INFO_ORDER)) + ((struct thread_info *)__get_free_pages(GFP_KERNEL_UBC, __THREAD_INFO_ORDER)) #endif #define free_thread_info(ti) \ @@ -237,6 +237,7 @@ register struct thread_info *current_thread_info_reg asm("g6"); #define TIF_ABI_PENDING 12 #define TIF_MEMDIE 13 #define TIF_POLLING_NRFLAG 14 +#define TIF_FREEZE 15 /* Freeze request (atomic PF_FREEZE) */ #define _TIF_SYSCALL_TRACE (1<comm, task_pid_nr(current), str, ++die_counter); + printk("VE:EXCVE %d:%d, CPU %d, VCPU %d:%d\n", + VEID(VE_TASK_INFO(current)->owner_env), VEID(get_exec_env()), + smp_processor_id(), + task_vsched_id(current), task_cpu(current)); notify_die(DIE_OOPS, str, regs, 0, 255, SIGSEGV); __asm__ __volatile__("flushw"); show_regs(regs); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ed92864..518d26d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1777,6 +1777,7 @@ config SYSVIPC_COMPAT endmenu +source "kernel/Kconfig.openvz" source "net/Kconfig" @@ -1795,3 +1796,5 @@ source "crypto/Kconfig" source "arch/x86/kvm/Kconfig" source "lib/Kconfig" + +source "kernel/bc/Kconfig" diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index ffc1bb4..0c8651c 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -612,7 +612,7 @@ ia32_sys_call_table: .quad stub32_iopl /* 110 */ .quad sys_vhangup .quad quiet_ni_syscall /* old "idle" system call */ - .quad sys32_vm86_warning /* vm86old */ + .quad quiet_ni_syscall /* vm86old */ .quad compat_sys_wait4 .quad sys_swapoff /* 115 */ .quad compat_sys_sysinfo @@ -665,7 +665,7 @@ ia32_sys_call_table: .quad sys_mremap .quad sys_setresuid16 .quad sys_getresuid16 /* 165 */ - .quad sys32_vm86_warning /* vm86 */ + .quad quiet_ni_syscall /* vm86 */ .quad quiet_ni_syscall /* query_module */ .quad sys_poll .quad compat_sys_nfsservctl diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index d3c6408..3b2163f 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -817,20 +817,6 @@ long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, advice); } -long sys32_vm86_warning(void) -{ - struct task_struct *me = current; - static char lastcomm[sizeof(me->comm)]; - - if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) { - compat_printk(KERN_INFO - "%s: vm86 mode not supported on 64 bit kernel\n", - me->comm); - strncpy(lastcomm, me->comm, sizeof(lastcomm)); - } - return -ENOSYS; -} - long sys32_lookup_dcookie(u32 addr_low, u32 addr_high, char __user *buf, size_t len) { diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 109792b..37f57b0 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -225,6 +225,7 @@ ENTRY(ret_from_fork) GET_THREAD_INFO(%ebp) popl %eax CFI_ADJUST_CFA_OFFSET -4 +ret_from_fork_tail: pushl $0x0202 # Reset kernel eflags CFI_ADJUST_CFA_OFFSET 4 popfl @@ -233,6 +234,25 @@ ENTRY(ret_from_fork) CFI_ENDPROC END(ret_from_fork) +ENTRY(i386_ret_from_resume) + CFI_STARTPROC + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + call schedule_tail + GET_THREAD_INFO(%ebp) + popl %eax + CFI_ADJUST_CFA_OFFSET -4 + movl (%esp),%eax + testl %eax,%eax + jz 1f + pushl %esp + call *%eax + addl $4,%esp +1: + addl $256,%esp + jmp ret_from_fork_tail + CFI_ENDPROC + /* * Return to user mode is not as complex as all this looks, * but we want the default path for a system call return to diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 89434d4..f422ac6 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -279,7 +279,12 @@ ENTRY(ret_from_fork) popf # reset kernel eflags CFI_ADJUST_CFA_OFFSET -4 call schedule_tail +ret_from_fork_tail: GET_THREAD_INFO(%rcx) + btr $TIF_RESUME,TI_flags(%rcx) + jc x86_64_ret_from_resume + +ret_from_fork_check: testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) jnz rff_trace rff_action: @@ -295,6 +300,19 @@ rff_trace: call syscall_trace_leave GET_THREAD_INFO(%rcx) jmp rff_action + +x86_64_ret_from_resume: + movq (%rsp),%rax + testq %rax,%rax + jz 1f + movq %rsp,%rdi + call *%rax +1: + addq $256,%rsp + cmpq $0,ORIG_RAX(%rsp) + jge ret_from_fork_tail + RESTORE_REST + jmp int_ret_from_sys_call CFI_ENDPROC END(ret_from_fork) @@ -1155,7 +1173,7 @@ ENTRY(kernel_thread) xorl %r9d,%r9d # clone now - call do_fork + call do_fork_kthread movq %rax,RAX(%rsp) xorl %edi,%edi diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index eb9ddd8..ee119de 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -144,6 +144,7 @@ int init_fpu(struct task_struct *tsk) set_stopped_child_used_math(tsk); return 0; } +EXPORT_SYMBOL(init_fpu); int fpregs_active(struct task_struct *target, const struct user_regset *regset) { diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index b68e21f..c0d3285 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include @@ -38,9 +40,9 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) & (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1)); if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount * LDT_ENTRY_SIZE); + newldt = ub_vmalloc(mincount * LDT_ENTRY_SIZE); else - newldt = (void *)__get_free_page(GFP_KERNEL); + newldt = (void *)__get_free_page(GFP_KERNEL_UBC); if (!newldt) return -ENOMEM; @@ -110,6 +112,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) } return retval; } +EXPORT_SYMBOL_GPL(init_new_context); /* * No need to lock the MM as we are the last user diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index abb78a2..2f01bd2 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -378,6 +378,21 @@ void touch_nmi_watchdog(void) } EXPORT_SYMBOL(touch_nmi_watchdog); +void smp_show_regs(struct pt_regs *regs, void *info) +{ + static DEFINE_SPINLOCK(show_regs_lock); + + if (regs == NULL) + return; + + spin_lock(&show_regs_lock); + bust_spinlocks(1); + printk("----------- IPI show regs -----------"); + show_regs(regs); + bust_spinlocks(0); + spin_unlock(&show_regs_lock); +} + notrace __kprobes int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) { @@ -423,10 +438,10 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) if (!touched && __get_cpu_var(last_irq_sum) == sum) { /* * Ayiee, looks like this CPU is stuck ... - * wait a few IRQs (5 seconds) before doing the oops ... + * wait a few IRQs (30 seconds) before doing the oops ... */ local_inc(&__get_cpu_var(alert_counter)); - if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz) + if (local_read(&__get_cpu_var(alert_counter)) == 30 * nmi_hz) /* * die_nmi will return ONLY if NOTIFY_STOP happens.. */ diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 31f40b2..f269e6b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include #include @@ -58,6 +60,9 @@ #include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); +EXPORT