diff -urN a/CREDITS b/CREDITS --- a/CREDITS +++ b/CREDITS @@ -52,6 +52,12 @@ S: Buenos Aires S: Argentina +A: Dan Aloni +E: da-x@colinux.org +D: Cooperative Linux +D: Various kernel patches +S: Israel + N: Tim Alpaerts E: tim_alpaerts@toyota-motor-europe.com D: 802.2 class II logical link control layer, diff -urN a/Makefile b/Makefile --- a/Makefile +++ b/Makefile @@ -319,6 +319,11 @@ AS = $(CROSS_COMPILE)as LD = $(CROSS_COMPILE)ld CC = $(CROSS_COMPILE)gcc +ifeq ($(GCCTRACE),Y) +CC = $(CORSS_COMPILE)$(COLINUX_ROOT)/bin/tracewrapper.py gcc +else +CC = $(CROSS_COMPILE)gcc +endif CPP = $(CC) -E AR = $(CROSS_COMPILE)ar NM = $(CROSS_COMPILE)nm diff -urN a/arch/i386/Kconfig b/arch/i386/Kconfig --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -205,6 +205,7 @@ config M586TSC bool "Pentium-Classic" + depends on !COOPERATIVE help Select this for a Pentium Classic processor with the RDTSC (Read Time Stamp Counter) instruction for benchmarking. @@ -543,6 +544,10 @@ If you have a system with several CPUs, you do not need to say Y here: the IO-APIC will be used automatically. +config X86_UP_COPIC + bool 'Cooperative PIC (COPIC) support' + depends on COOPERATIVE + config X86_LOCAL_APIC bool depends on !SMP && X86_UP_APIC @@ -555,7 +560,7 @@ config X86_TSC bool - depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2) && !X86_NUMAQ + depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2) && !X86_NUMAQ && !COOPERATIVE default y config X86_MCE @@ -882,6 +887,10 @@ source kernel/power/Kconfig +config COOPERATIVE + bool 'Cooperative Mode' + default y + source "drivers/acpi/Kconfig" menu "APM (Advanced Power Management) BIOS Support" diff -urN a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -17,6 +17,7 @@ obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_MICROCODE) += microcode.o +obj-$(CONFIG_COOPERATIVE) += cooperative.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o smpboot.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o diff -urN a/arch/i386/kernel/cooperative.c b/arch/i386/kernel/cooperative.c --- a/arch/i386/kernel/cooperative.c +++ b/arch/i386/kernel/cooperative.c @@ -0,0 +1,340 @@ +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/interrupt.h> +#include <linux/mm.h> + +#include <linux/cooperative_internal.h> +#include <asm/cooperative_internal.h> +#include <asm/smp.h> +#include <asm/desc.h> +#include <asm/mmu_context.h> +#include <asm/debugreg.h> +#include <asm/i387.h> + +CO_TRACE_STOP; + + +/* + * The next asm code is the first Linux code that runs in the + * coLinux kernel context. It receives %ecx which contains the + * address of the passage page. The passage page code sets %ecx + * to this value in its context restore part. + */ + +asm( + "" + ".section .text\n" + ".globl co_start\n" + "co_start:\n" + " call co_start_arch\n" + ".previous\n" + ""); + +static int co_passage_page_holding_count = 0; + +static void co_early_cpu_init(void) +{ + /* + * On the first switch to Linux we must set up a valid TR because + * the passage page code assumes such one exists. This is basically + * copied code from cpu_init(). + * + * P.S this is protected by CO_TRACE_STOP so that we don't + * have a monitor context switch. + */ + int cpu = smp_processor_id(); + struct tss_struct * t = &per_cpu(init_tss, cpu); + struct thread_struct *thread = ¤t->thread; + + /* + * Initialize the per-CPU GDT with the boot GDT, + * and set up the GDT descriptor: + */ + memcpy(&per_cpu(cpu_gdt_table, cpu), cpu_gdt_table, GDT_SIZE); + cpu_gdt_descr[cpu].size = GDT_SIZE - 1; + cpu_gdt_descr[cpu].address = (unsigned long)&per_cpu(cpu_gdt_table, cpu); + + /* + * Set up the per-thread TLS descriptor cache: + */ + memcpy(thread->tls_array, &per_cpu(cpu_gdt_table, cpu), GDT_ENTRY_TLS_ENTRIES * 8); + + __asm__ __volatile__("lgdt %0" : : "m" (cpu_gdt_descr[cpu])); + __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); + + /* + * Delete NT + */ + __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl"); + + /* + * Set up and load the per-CPU TSS and LDT + */ + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + enter_lazy_tlb(&init_mm, current); + + load_esp0(t, thread); + set_tss_desc(cpu,t); + per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_TSS].b &= 0xfffffdff; + + load_TR_desc(); + + load_LDT(&init_mm.context); + + /* Set up doublefault TSS pointer in the GDT */ + __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); + per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_DOUBLEFAULT_TSS].b &= 0xfffffdff; + + /* Clear %fs and %gs. */ + asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); + + __asm__ __volatile__("movl %%cr4, %0" : "=r" (mmu_cr4_features)); +} + +asm( + "" + ".section .text\n" + ".globl co_arch_start_kernel\n" + "co_arch_start_kernel:\n" + " call co_startup_entry\n" + ".previous\n" + ""); + +void co_start_arch(void) +{ + co_early_cpu_init(); + co_start_kernel(); +} + +extern void ctrl_alt_del(void); + +void co_handle_jiffies(long count) +{ + unsigned long flags; + struct pt_regs regs; + + if (count > HZ) { + xtime.tv_sec += count / HZ; + count -= ((count / HZ) * HZ); + } + + while (count > 0) { + local_irq_save(flags); + regs.orig_eax = TIMER_IRQ; + do_IRQ(®s); + local_irq_restore(flags); + + count--; + } +} + +void co_handle_incoming_message(co_message_node_t *node_message) +{ + unsigned long flags; + struct pt_regs regs; + co_linux_message_t *message; + + message = (co_linux_message_t *)&node_message->msg.data; + + switch (message->device) { + case CO_DEVICE_POWER: { + co_linux_message_power_t *type = (co_linux_message_power_t *)message->data; + switch (type->type) { + case CO_LINUX_MESSAGE_POWER_ALT_CTRL_DEL: { + ctrl_alt_del(); + break; + } + } + co_free_message(node_message); + break; + } + + case CO_DEVICE_KEYBOARD: { + co_queue_incoming_message(node_message); + + local_irq_save(flags); + regs.orig_eax = KEYBOARD_IRQ; + do_IRQ(®s); + local_irq_restore(flags); + break; + } + + case CO_DEVICE_NETWORK: { + co_queue_incoming_message(node_message); + + local_irq_save(flags); + regs.orig_eax = NETWORK_IRQ; + do_IRQ(®s); + local_irq_restore(flags); + break; + } + + case CO_DEVICE_SERIAL: { + co_queue_incoming_message(node_message); + + local_irq_save(flags); + cocd_interrupt(); + local_irq_restore(flags); + break; + } + + default: + co_free_message(node_message); + break; + } +} + +void co_switch_wrapper_protected(void) +{ + kernel_fpu_begin(); + + /* + * We don't trust the passage page code to safely restore %gs and %fs. + * + * This wrapper ensures that if %fs or %gs are invalid, the processes + * exits with a segmentation fault rather than bringing down the + * machine. + **/ + unsigned long fs = 0; + unsigned long gs = 0; + + asm volatile("movl %%fs,%0": "=m" (fs)); + asm volatile("movl %%gs,%0": "=m" (gs)); + + /* + * Nullify the registers so the passage page code restores to + * null segment values on return. + */ + asm volatile("movl %0, %%fs; movl %0, %%gs" : : "r" (0)); + + /* And switch... */ + co_switch(); + + /* + * Safely restore the registers. + */ + loadsegment(fs, fs); + loadsegment(gs, gs); + + kernel_fpu_end(); +} + +void co_switch_wrapper(void) +{ + /* taken from irq.c: debugging check for stack overflow */ + long esp; + + __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (THREAD_SIZE - 1)); + if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { + printk("co_switch_wrapper: stack overflow: %ld\n", esp - sizeof(struct thread_info)); + co_terminate(CO_TERMINATE_STACK_OVERFLOW); + } + + co_switch_wrapper_protected(); +} + +void co_passage_page_acquire(unsigned long *flags) +{ + local_irq_save(*flags); + co_passage_page_holding_count++; +} + +int co_passage_page_held(void) +{ + return co_passage_page_holding_count; +} + +void co_passage_page_release(unsigned long flags) +{ + co_passage_page_holding_count--; + local_irq_restore(flags); +} + +void co_debug(const char *fmt, ...) +{ +} + +#define MAX_TRACE_POINTS 1024 + +typedef struct { + unsigned char *code; + unsigned char original_byte; + int off; +} co_tracepoint_t; + +co_tracepoint_t tracepoints[MAX_TRACE_POINTS]; +static int active_tracepoints = 0; + +void co_kernel_breakpoint(struct pt_regs * regs) +{ + int i = 0; + unsigned char *code = (unsigned char *)regs->eip; + if (!code) + return; + + for (i=0; i < active_tracepoints; i++) { + if (tracepoints[i].code == code - 1) { + co_debug("TRACEPOINT: %x\n", code - 1); + break; + } + } + + if (i == active_tracepoints) { + /* Bad, we don't know this tracepoint */ + co_terminate(CO_TERMINATE_INVALID_OPERATION); + return; + } + + *tracepoints[i].code = tracepoints[i].original_byte; + regs->eflags |= (1 << 8); /* Enable TF */ + regs->eip = (unsigned long)(code - 1); + tracepoints[i].off = 1; +} + +void co_kernel_set_breakpoints(void) +{ + int i; + + for (i=0; i < active_tracepoints; i++) + if (tracepoints[i].code && tracepoints[i].off) { + *tracepoints[i].code = 0xcc; + tracepoints[i].off = 0; + } +} + +int co_kernel_debug(struct pt_regs *regs, long error_code, unsigned int condition) +{ + /* if not a single step trap */ + if (!(condition & DR_STEP)) + return 0; + + /* if userspace */ + if (regs->xcs & 3) + return 0; + + regs->eflags &= ~(1 << 8); /* Disable TF */ + + co_kernel_set_breakpoints(); + + return 1; +} + +void co_kernel_tracepoint_add(unsigned char *code) +{ + if (active_tracepoints >= MAX_TRACE_POINTS) + return; + + tracepoints[active_tracepoints].code = code; + tracepoints[active_tracepoints].original_byte = *code; + tracepoints[active_tracepoints].off = 0; + active_tracepoints++; + *code = 0xcc; +} + +co_arch_info_t co_arch_info = { + .kernel_cs = __KERNEL_CS, + .kernel_ds = __KERNEL_DS, +}; + +CO_TRACE_CONTINUE; diff -urN a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -4,6 +4,7 @@ #include <linux/smp.h> #include <linux/module.h> #include <linux/percpu.h> +#include <linux/cooperative_internal.h> #include <asm/semaphore.h> #include <asm/processor.h> #include <asm/i387.h> @@ -570,11 +571,13 @@ /* Clear all 6 debug registers: */ + if (!cooperative_mode_enabled()) { #define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) ); - CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); + CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); #undef CD + } /* * Force FPU initialization: diff -urN a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -158,7 +158,7 @@ ALIGN ret_from_exception: preempt_stop -ret_from_intr: +ENTRY(ret_from_intr) GET_THREAD_INFO(%ebp) movl EFLAGS(%esp), %eax # mix EFLAGS and CS movb CS(%esp), %al diff -urN a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -238,6 +238,7 @@ rep movsl 1: +ENTRY(co_startup_entry) checkCPUtype: movl $-1,X86_CPUID # -1 for no CPUID initially @@ -425,7 +426,7 @@ .data ENTRY(stack_start) - .long init_thread_union+THREAD_SIZE + .long init_thread_union+THREAD_SIZE-100 .long __BOOT_DS ready: .byte 0 diff -urN a/arch/i386/kernel/i387.c b/arch/i386/kernel/i387.c --- a/arch/i386/kernel/i387.c +++ b/arch/i386/kernel/i387.c @@ -17,6 +17,7 @@ #include <asm/user.h> #include <asm/ptrace.h> #include <asm/uaccess.h> +#include <linux/cooperative_internal.h> #ifdef CONFIG_MATH_EMULATION #define HAVE_HWFP (boot_cpu_data.hard_math) @@ -37,6 +38,10 @@ if (mask == 0) mask = 0x0000ffbf; } mxcsr_feature_mask &= mask; + + if (cooperative_mode_enabled()) + return; + stts(); } @@ -386,6 +391,7 @@ return err; } + /* * ptrace request handlers. */ diff -urN a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c --- a/arch/i386/kernel/i8259.c +++ b/arch/i386/kernel/i8259.c @@ -26,9 +26,89 @@ #include <asm/i8259.h> #include <linux/irq.h> +#include <linux/cooperative_internal.h> #include <io_ports.h> +#ifdef CONFIG_COOPERATIVE + +CO_TRACE_STOP; + +void proxy_interrupt_handler(unsigned long interrupt, struct pt_regs regs) +{ + unsigned long flags; + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_FORWARD_INTERRUPT; + co_passage_page->params[0] = interrupt + 0x20; + co_passage_page->params[1] = regs.eip; + co_passage_page->params[2] = (unsigned long)(&((&interrupt)[10])); + co_passage_page->host_state.flags &= ~(1 << 9); /* Turn IF off */ + co_switch_wrapper(); + co_callback(flags); +} + +CO_TRACE_CONTINUE; + +#define IRQLIST_16(x) \ + IRQ(x,0) IRQ(x,1) IRQ(x,2) IRQ(x,3) \ + IRQ(x,4) IRQ(x,5) IRQ(x,6) IRQ(x,7) \ + IRQ(x,8) IRQ(x,9) IRQ(x,a) IRQ(x,b) \ + IRQ(x,c) IRQ(x,d) IRQ(x,e) IRQ(x,f) + +#define IRQLIST_224 \ + IRQLIST_16(0x0) IRQLIST_16(0x1) IRQLIST_16(0x2) IRQLIST_16(0x3) \ + IRQLIST_16(0x4) IRQLIST_16(0x5) IRQLIST_16(0x6) IRQLIST_16(0x7) \ + IRQLIST_16(0x8) IRQLIST_16(0x9) IRQLIST_16(0xa) IRQLIST_16(0xb) \ + IRQLIST_16(0xc) IRQLIST_16(0xd) + +#define IRQ(x,y) \ + extern asmlinkage void IRQ_proxy_##x##y##_interrupt(void); +IRQLIST_224; +#undef IRQ + +#define BIRQ(id) \ +asm( \ + "\n"__ALIGN_STR"\n" \ + ".section .text\n" \ + ".globl IRQ_proxy_" #id "_interrupt\n" \ + "IRQ_proxy_" #id "_interrupt:\n" \ + "push %eax\n\t" \ + "cld;\n\t" \ + "pushl %es;\n\t" \ + "pushl %ds;\n\t" \ + "pushl %eax;\n\t" \ + "pushl %ebp;\n\t" \ + "pushl %edi;\n\t" \ + "pushl %esi;\n\t" \ + "pushl %edx;\n\t" \ + "pushl %ecx;\n\t" \ + "pushl %ebx;\n\t" \ + "movl $123, %edx;\n\t" \ + "movl %edx, %ds;\n\t" \ + "movl %edx, %es;\n\t" \ + "pushl $" #id "\n\t" \ + "call proxy_interrupt_handler\n\t" \ + "popl %ebx\n\t" \ + "jmp ret_from_intr\n" \ + ".previous\n" \ + ); \ + +#define IRQ(x,y) BIRQ(x##y) +IRQLIST_224; +#undef IRQ + +#define IRQ(x,y) &IRQ_proxy_##x##y##_interrupt, +void (*proxy_interrupt[NR_IRQS])(void) = { + IRQLIST_224 +}; +#undef IRQ + +#undef IRQLIST_16 +#undef IRQLIST_224 + +#endif + /* * This is the 'legacy' 8259A Programmable Interrupt Controller, * present in the majority of PC/AT boxes. @@ -364,6 +444,9 @@ { int i; + if (cooperative_mode_enabled()) + return; + #ifdef CONFIG_X86_LOCAL_APIC init_bsp_APIC(); #endif @@ -388,6 +471,65 @@ } } +#ifdef CONFIG_X86_UP_COPIC + +/* + * Not like you have any other choice other than using + * COPIC in Cooperative mode. + */ + +static void end_COPIC_irq(unsigned int irq) +{ +} + +#define shutdown_COPIC_irq disable_COPIC_irq + +static void mask_and_ack_COPIC(unsigned int irq) +{ +} + +static unsigned int startup_COPIC_irq(unsigned int irq) +{ + return 0; +} + +void disable_COPIC_irq(unsigned int irq) +{ +} + +void enable_COPIC_irq(unsigned int irq) +{ +} + +static struct hw_interrupt_type co_pic_irq_type = { + "CO-PIC", + startup_COPIC_irq, + shutdown_COPIC_irq, + enable_COPIC_irq, + disable_COPIC_irq, + mask_and_ack_COPIC, + end_COPIC_irq, + NULL +}; + +void __init init_COPIC_irqs(void) +{ + int i; + + for (i = 0; i < NR_IRQS; i++) { + irq_desc[i].status = IRQ_DISABLED; + irq_desc[i].action = 0; + irq_desc[i].depth = 1; + + irq_desc[i].handler = &co_pic_irq_type; + } + +} + +#else +#define init_COPIC_irqs() do {} while (0); +#endif + void __init init_IRQ(void) { int i; @@ -395,6 +537,22 @@ /* all the set up before the call gates are initialised */ pre_intr_init_hook(); + if (cooperative_mode_enabled()) { + printk("Setting proxy interrupt vectors\n"); + + init_COPIC_irqs(); + + for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { + int vector = FIRST_EXTERNAL_VECTOR + i; + if (i >= NR_IRQS) + break; + if (vector != SYSCALL_VECTOR) + set_intr_gate(vector, proxy_interrupt[i]); + } + + return; + } + /* * Cover the whole vector space, no vector can escape * us. (some of these will be overridden and become diff -urN a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c --- a/arch/i386/kernel/ioport.c +++ b/arch/i386/kernel/ioport.c @@ -15,6 +15,7 @@ #include <linux/stddef.h> #include <linux/slab.h> #include <linux/thread_info.h> +#include <linux/cooperative_internal.h> /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) @@ -61,6 +62,9 @@ struct tss_struct * tss; unsigned long *bitmap; + if (cooperative_mode_enabled()) + return -EPERM; + if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) return -EINVAL; if (turn_on && !capable(CAP_SYS_RAWIO)) @@ -133,6 +137,9 @@ unsigned int level = regs->ebx; unsigned int old = (regs->eflags >> 12) & 3; + if (cooperative_mode_enabled()) + return -EPERM; + if (level > 3) return -EINVAL; /* Trying to gain more privileges? */ diff -urN a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -52,6 +52,7 @@ #include <linux/irq.h> #include <linux/err.h> +#include <linux/cooperative_internal.h> asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -147,21 +148,24 @@ /* endless idle loop with no priority at all */ while (1) { while (!need_resched()) { - void (*idle)(void); - /* - * Mark this as an RCU critical section so that - * synchronize_kernel() in the unload path waits - * for our completion. - */ - rcu_read_lock(); - idle = pm_idle; + void (*idle)(void) = pm_idle; + + /* + * Mark this as an RCU critical section so that + * synchronize_kernel() in the unload path waits + * for our completion. + */ + rcu_read_lock(); + + if (cooperative_mode_enabled()) + idle = co_idle_processor; if (!idle) idle = default_idle; irq_stat[smp_processor_id()].idle_timestamp = jiffies; idle(); - rcu_read_unlock(); + rcu_read_unlock(); } schedule(); } diff -urN a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c --- a/arch/i386/kernel/reboot.c +++ b/arch/i386/kernel/reboot.c @@ -13,6 +13,7 @@ #include <asm/uaccess.h> #include <asm/apic.h> #include "mach_reboot.h" +#include <linux/cooperative_internal.h> /* * Power off function, if any @@ -217,6 +218,11 @@ { unsigned long flags; + if (cooperative_mode_enabled()) { + co_terminate(CO_TERMINATE_REBOOT); + return; + } + local_irq_disable(); /* Write zero to CMOS register number 0x0f, which the BIOS POST @@ -332,8 +338,13 @@ */ smp_send_stop(); #endif /* CONFIG_SMP */ - + lapic_shutdown(); + + if (cooperative_mode_enabled()) { + co_terminate(CO_TERMINATE_REBOOT); + return; + } #ifdef CONFIG_X86_IO_APIC disable_IO_APIC(); @@ -364,12 +375,18 @@ void machine_halt(void) { + co_terminate(CO_TERMINATE_HALT); } EXPORT_SYMBOL(machine_halt); void machine_power_off(void) { + if (cooperative_mode_enabled()) { + co_terminate(CO_TERMINATE_POWEROFF); + return; + } + lapic_shutdown(); if (efi_enabled) diff -urN a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -39,6 +39,7 @@ #include <linux/efi.h> #include <linux/init.h> #include <linux/edd.h> +#include <linux/cooperative_internal.h> #include <video/edid.h> #include <asm/e820.h> #include <asm/mpspec.h> @@ -668,8 +669,17 @@ int len = 0; int userdef = 0; - /* Save unparsed command line copy for /proc/cmdline */ - saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; + if (cooperative_mode_enabled()) { + /* + * Better to have 'root=/dev/cobd0' here. + */ + from = co_boot_parameters; + snprintf(saved_command_line, COMMAND_LINE_SIZE, "%s", + co_boot_parameters); + } else { + /* Save unparsed command line copy for /proc/cmdline */ + saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; + } for (;;) { /* @@ -1019,6 +1029,8 @@ static unsigned long __init setup_memory(void) { unsigned long bootmap_size, start_pfn, max_low_pfn; + extern char _end; + unsigned long start_va = 0; /* * partially used pages are not usable - thus @@ -1026,9 +1038,16 @@ */ start_pfn = PFN_UP(init_pg_tables_end); - find_max_pfn(); - - max_low_pfn = find_max_low_pfn(); + if (cooperative_mode_enabled()) { + max_low_pfn = max_pfn = co_memory_size / PAGE_SIZE; + start_pfn = PFN_UP(__pa((unsigned long)&_end)) + 0x10; + start_va = (unsigned long)__va(start_pfn << PAGE_SHIFT); + co_alloc_pages(start_va, 0x20); + } else { + find_max_pfn(); + + max_low_pfn = find_max_low_pfn(); + } #ifdef CONFIG_HIGHMEM highstart_pfn = highend_pfn = max_pfn; @@ -1040,37 +1059,47 @@ #endif printk(KERN_NOTICE "%ldMB LOWMEM available.\n", pages_to_mb(max_low_pfn)); - /* - * Initialize the boot-time allocator (with low memory only): - */ - bootmap_size = init_bootmem(start_pfn, max_low_pfn); - register_bootmem_low_pages(max_low_pfn); /* - * Reserve the bootmem bitmap itself as well. We do this in two - * steps (first step was init_bootmem()) because this catches - * the (very unlikely) case of us accidentally initializing the - * bootmem allocator with an invalid RAM area. + * Initialize the boot-time allocator (with low memory only): */ - reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) + - bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY)); + bootmap_size = init_bootmem(start_pfn, max_low_pfn); - /* - * reserve physical page 0 - it's a special BIOS page on many boxes, - * enabling clean reboots, SMP operation, laptop functions. - */ - reserve_bootmem(0, PAGE_SIZE); - /* reserve EBDA region, it's a 4K region */ - reserve_ebda_region(); + if (cooperative_mode_enabled()) { + unsigned long bootmem_end = start_va + bootmap_size + (0x10 << PAGE_SHIFT); + unsigned long physical_end = __PAGE_OFFSET + (max_low_pfn << PAGE_SHIFT); + + free_bootmem(__pa(bootmem_end), physical_end - bootmem_end); + } else { + register_bootmem_low_pages(max_low_pfn); + + /* + * Reserve the bootmem bitmap itself as well. We do this in two + * steps (first step was init_bootmem()) because this catches + * the (very unlikely) case of us accidentally initializing the + * bootmem allocator with an invalid RAM area. + */ + reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) + + bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY)); + + /* + * reserve physical page 0 - it's a special BIOS page on many boxes, + * enabling clean reboots, SMP operation, laptop functions. + */ + reserve_bootmem(0, PAGE_SIZE); - /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent - PCI prefetch into it (errata #56). Usually the page is reserved anyways, - unless you have no PS/2 mouse plugged in. */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 == 6) - reserve_bootmem(0xa0000 - 4096, 4096); + /* reserve EBDA region, it's a 4K region */ + reserve_ebda_region(); + + /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent + PCI prefetch into it (errata #56). Usually the page is reserved anyways, + unless you have no PS/2 mouse plugged in. */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86 == 6) + reserve_bootmem(0xa0000 - 4096, 4096); + } #ifdef CONFIG_SMP /* @@ -1094,6 +1123,7 @@ #endif #ifdef CONFIG_BLK_DEV_INITRD +#ifndef CONFIG_COOPERATIVE if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { reserve_bootmem(INITRD_START, INITRD_SIZE); @@ -1109,6 +1139,17 @@ initrd_start = 0; } } +#else + if (co_initrd != NULL) { + printk(KERN_INFO "initrd enabled: start: 0x%x size: 0x%08lx)\n", + (unsigned int)co_initrd, (long unsigned int)co_initrd_size); + + initrd_start = (unsigned long)co_initrd; + initrd_end = (unsigned long)co_initrd + co_initrd_size; + + reserve_bootmem(virt_to_phys(co_initrd), co_initrd_size); + } +#endif #endif return max_low_pfn; } @@ -1315,6 +1356,7 @@ efi_enabled = 1; #endif + boot_cpu_data.hard_math = 1; ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); drive_info = DRIVE_INFO; screen_info = SCREEN_INFO; @@ -1338,7 +1380,7 @@ ARCH_SETUP if (efi_enabled) efi_init(); - else { + else if (!cooperative_mode_enabled()) { printk(KERN_INFO "BIOS-provided physical RAM map:\n"); print_memory_map(machine_specific_memory_setup()); } @@ -1392,8 +1434,9 @@ } #endif - - dmi_scan_machine(); + if (!cooperative_mode_enabled()) { + dmi_scan_machine(); + } #ifdef CONFIG_X86_GENERICARCH generic_apic_probe(*cmdline_p); @@ -1411,9 +1454,14 @@ get_smp_config(); #endif - register_memory(max_low_pfn); + if (!cooperative_mode_enabled()) { + register_memory(max_low_pfn); + } #ifdef CONFIG_VT +#ifdef CONFIG_COOPERATIVE_CONSOLE + conswitchp = &colinux_con; +#else #if defined(CONFIG_VGA_CONSOLE) if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) conswitchp = &vga_con; @@ -1421,6 +1469,7 @@ conswitchp = &dummy_con; #endif #endif +#endif } #include "setup_arch_post.h" diff -urN a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c @@ -13,6 +13,7 @@ #include <linux/gfp.h> #include <linux/string.h> #include <linux/elf.h> +#include <linux/cooperative_internal.h> #include <asm/cpufeature.h> #include <asm/msr.h> @@ -43,11 +44,11 @@ static int __init sysenter_setup(void) { - void *page = (void *)get_zeroed_page(GFP_ATOMIC); + void *page = get_zeroed_page(GFP_ATOMIC); __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC); - if (!boot_cpu_has(X86_FEATURE_SEP)) { + if (cooperative_mode_enabled() || !boot_cpu_has(X86_FEATURE_SEP)) { memcpy(page, &vsyscall_int80_start, &vsyscall_int80_end - &vsyscall_int80_start); @@ -59,6 +60,7 @@ &vsyscall_sysenter_end - &vsyscall_sysenter_start); on_each_cpu(enable_sep_cpu, NULL, 1, 1); + return 0; } diff -urN a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -45,6 +45,7 @@ #include <linux/sysdev.h> #include <linux/bcd.h> #include <linux/efi.h> +#include <linux/cooperative_internal.h> #include <asm/io.h> #include <asm/smp.h> @@ -94,8 +95,9 @@ void do_gettimeofday(struct timeval *tv) { unsigned long seq; - unsigned long usec, sec; - unsigned long max_ntp_tick; + unsigned long sec; + long usec; + long max_ntp_tick; do { unsigned long lost; @@ -129,6 +131,13 @@ sec++; } + if (cooperative_mode_enabled()) { + while (usec < 0) { + usec += 1000000; + sec--; + } + } + tv->tv_sec = sec; tv->tv_usec = usec; } @@ -174,6 +183,9 @@ { int retval; + if (cooperative_mode_enabled()) + return -1; + /* gets recalled with irq locally disabled */ spin_lock(&rtc_lock); if (efi_enabled) @@ -243,7 +255,8 @@ * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be * called as close as possible to 500 ms before the new second starts. */ - if ((time_status & STA_UNSYNC) == 0 && + if (!cooperative_mode_enabled() && + (time_status & STA_UNSYNC) == 0 && xtime.tv_sec > last_rtc_update + 660 && (xtime.tv_nsec / 1000) >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 && @@ -307,6 +320,9 @@ { unsigned long retval; + if (cooperative_mode_enabled()) + return co_get_host_time(); + spin_lock(&rtc_lock); if (efi_enabled) diff -urN a/arch/i386/kernel/timers/Makefile b/arch/i386/kernel/timers/Makefile --- a/arch/i386/kernel/timers/Makefile +++ b/arch/i386/kernel/timers/Makefile @@ -7,3 +7,4 @@ obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o obj-$(CONFIG_HPET_TIMER) += timer_hpet.o obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o +obj-$(CONFIG_COOPERATIVE) += timer_cooperative.o diff -urN a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c --- a/arch/i386/kernel/timers/timer.c +++ b/arch/i386/kernel/timers/timer.c @@ -13,6 +13,9 @@ #endif /* list of timers, ordered by preference, NULL terminated */ static struct init_timer_opts* __initdata timers[] = { +#ifdef CONFIG_COOPERATIVE + &timer_cooperative_init, +#endif #ifdef CONFIG_X86_CYCLONE_TIMER &timer_cyclone_init, #endif diff -urN a/arch/i386/kernel/timers/timer_cooperative.c b/arch/i386/kernel/timers/timer_cooperative.c --- a/arch/i386/kernel/timers/timer_cooperative.c +++ b/arch/i386/kernel/timers/timer_cooperative.c @@ -0,0 +1,140 @@ +/* + * Cooperative mode timer. + * + * Dan Aloni <da-x@colinux.org>, 2003-2004 (C). + */ + +#include <linux/init.h> +#include <linux/errno.h> + +#include <asm/timer.h> +#include <asm/cooperative.h> +#include <asm/div64.h> +#include <asm/param.h> + +#include <linux/cooperative.h> +#include <linux/cooperative_internal.h> + +static unsigned long long first_time; +static unsigned long frequencey; +static unsigned long long last_mark, last_mark_quotient; +static unsigned long long last_time; + +static unsigned long long query_host_highprec_time(void) +{ + unsigned long flags; + unsigned long long this_time; + unsigned long long diff; + + co_passage_page_assert_valid(); + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_GET_HIGH_PREC_TIME; + co_switch_wrapper(); + + this_time = *(unsigned long long *)(&co_passage_page->params[0]); + frequencey = *(unsigned long *)(&co_passage_page->params[2]); + diff = ((long long)this_time - (long long)last_time); + + /* + * There shouldn't be any particularly large difference between + * the current and last host timestamps. For sanity, reset the + * global reference variables if we encounter any difference + * larger than one second. + */ + + if (diff < 0 || diff > frequencey) { + first_time = this_time; + last_mark_quotient = last_mark = 0; + } + + last_time = this_time; + co_passage_page_release(flags); + + return this_time; +} + +static unsigned long long monotonic_clock_cooperative(void) +{ + return 0; +} + +static long get_offset_cooperative(void) +{ + unsigned long flags; + + local_irq_save(flags); + + unsigned long long this_time = query_host_highprec_time() - first_time; + unsigned long reminder = 0, result; + long long diff, lldiff; + long signed_result; + + diff = ((long long)this_time - (long long)(last_mark)); + if (diff < 0) + lldiff = -diff; + else + lldiff = diff; + + lldiff *= 1000000; + result = div_ll_X_l_rem(lldiff, frequencey, &reminder); + + signed_result = result; + if (diff < 0) + signed_result = -signed_result; + + local_irq_restore(flags); + return signed_result; +} + +static void mark_offset_cooperative(void) +{ + unsigned long flags; + local_irq_save(flags); + + last_mark += frequencey / HZ; + last_mark_quotient += frequencey % HZ; + if (frequencey > HZ) { + last_mark += 1; + last_mark_quotient -= HZ; + } + + local_irq_restore(flags); +} + +static void delay_cooperative(unsigned long loops) +{ + /* + * A bogos delay loop for creating BogoMIPS... + */ + + loops = loops / 10000; + while (loops) { + query_host_highprec_time(); + loops -= 1; + } +} + +static int __init init_cooperative_timer(char* override) +{ + first_time = query_host_highprec_time(); + + /* Always pick this timer */ + return 0; +} + +/************************************************************/ + +/* tsc timer_opts struct */ +struct timer_opts timer_cooperative = { + .name = "cooperative", + .mark_offset = mark_offset_cooperative, + .get_offset = get_offset_cooperative, + .monotonic_clock = monotonic_clock_cooperative, + .delay = delay_cooperative, +}; + +struct init_timer_opts __initdata timer_cooperative_init = { + .init = init_cooperative_timer, + .opts = &timer_cooperative, +}; diff -urN a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c --- a/arch/i386/kernel/timers/timer_cyclone.c +++ b/arch/i386/kernel/timers/timer_cyclone.c @@ -103,7 +103,7 @@ jiffies_64++; } -static unsigned long get_offset_cyclone(void) +static long get_offset_cyclone(void) { u32 offset; diff -urN a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c @@ -73,7 +73,7 @@ return base + cycles_2_ns(this_offset - last_offset); } -static unsigned long get_offset_hpet(void) +static long get_offset_hpet(void) { register unsigned long eax, edx; diff -urN a/arch/i386/kernel/timers/timer_none.c b/arch/i386/kernel/timers/timer_none.c --- a/arch/i386/kernel/timers/timer_none.c +++ b/arch/i386/kernel/timers/timer_none.c @@ -6,7 +6,7 @@ /* nothing needed */ } -static unsigned long get_offset_none(void) +static long get_offset_none(void) { return 0; } diff -urN a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c --- a/arch/i386/kernel/timers/timer_pit.c +++ b/arch/i386/kernel/timers/timer_pit.c @@ -89,7 +89,7 @@ * comp.protocols.time.ntp! */ -static unsigned long get_offset_pit(void) +static long get_offset_pit(void) { int count; unsigned long flags; diff -urN a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c --- a/arch/i386/kernel/timers/timer_pm.c +++ b/arch/i386/kernel/timers/timer_pm.c @@ -227,7 +227,7 @@ * get the offset (in microseconds) from the last call to mark_offset() * - Called holding a reader xtime_lock */ -static unsigned long get_offset_pmtmr(void) +static long get_offset_pmtmr(void) { u32 now, offset, delta = 0; diff -urN a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -83,7 +83,7 @@ */ static unsigned long fast_gettimeoffset_quotient; -static unsigned long get_offset_tsc(void) +static long get_offset_tsc(void) { register unsigned long eax, edx; diff -urN a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -51,6 +51,9 @@ #include <asm/arch_hooks.h> #include <asm/kdebug.h> +#include <linux/cooperative_internal.h> +#include <asm/cooperative_internal.h> + #include <linux/irq.h> #include <linux/module.h> @@ -382,6 +385,12 @@ } kernel_trap: { + if (cooperative_mode_enabled()) { + if (trapnr == 3) { + co_kernel_breakpoint(regs); + return; + } + } if (!fixup_exception(regs)) die(str, regs, error_code); return; @@ -683,9 +692,15 @@ unsigned int condition; struct task_struct *tsk = current; siginfo_t info; - + __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); + if (cooperative_mode_enabled() && + co_kernel_debug(regs, error_code, condition)) + { + return; + } + if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, SIGTRAP) == NOTIFY_STOP) return; diff -urN a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -14,11 +14,12 @@ { . = __PAGE_OFFSET + 0x100000; /* read-only */ + _kernel_start = . ; _text = .; /* Text and read-only data */ .text : { *(.text) SCHED_TEXT - LOCK_TEXT + LOCK_TEXT *(.fixup) *(.gnu.warning) } = 0x9090 diff -urN a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -449,7 +449,8 @@ printk(KERN_ALERT " printing eip:\n"); printk("%08lx\n", regs->eip); asm("movl %%cr3,%0":"=r" (page)); - page = ((unsigned long *) __va(page))[address >> 22]; + page = ((unsigned long *) __va(CO_P_TO_PP(page)))[address >> 22]; + page = CO_P_TO_PP(page); printk(KERN_ALERT "*pde = %08lx\n", page); /* * We must not directly access the pte in the highpte @@ -462,6 +463,7 @@ page &= PAGE_MASK; address &= 0x003ff000; page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; + page = CO_P_TO_PP(page); printk(KERN_ALERT "*pte = %08lx\n", page); } #endif @@ -522,7 +524,7 @@ pte_t *pte_k; asm("movl %%cr3,%0":"=r" (pgd_paddr)); - pgd = index + (pgd_t *)__va(pgd_paddr); + pgd = index + (pgd_t *)__va(CO_P_TO_PP((unsigned long)pgd_paddr)); pgd_k = init_mm.pgd + index; if (!pgd_present(*pgd_k)) diff -urN a/arch/i386/mm/init.c b/arch/i386/mm/init.c --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -27,6 +27,7 @@ #include <linux/slab.h> #include <linux/proc_fs.h> #include <linux/efi.h> +#include <linux/cooperative_internal.h> #include <asm/processor.h> #include <asm/system.h> @@ -76,7 +77,7 @@ { if (pmd_none(*pmd)) { pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); + set_pmd(pmd, __pmd(CO_PP_TO_P(__pa(page_table)) | _PAGE_TABLE)); if (page_table != pte_offset_kernel(pmd, 0)) BUG(); @@ -313,21 +314,23 @@ set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); #endif - /* Enable PSE if available */ - if (cpu_has_pse) { - set_in_cr4(X86_CR4_PSE); - } - - /* Enable PGE if available */ - if (cpu_has_pge) { - set_in_cr4(X86_CR4_PGE); - __PAGE_KERNEL |= _PAGE_GLOBAL; - __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL; + if (!cooperative_mode_enabled()) { + /* Enable PSE if available */ + if (cpu_has_pse) { + set_in_cr4(X86_CR4_PSE); + } + + /* Enable PGE if available */ + if (cpu_has_pge) { + set_in_cr4(X86_CR4_PGE); + __PAGE_KERNEL |= _PAGE_GLOBAL; + __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL; + } + + kernel_physical_mapping_init(pgd_base); + remap_numa_kva(); } - kernel_physical_mapping_init(pgd_base); - remap_numa_kva(); - /* * Fixed mappings, only the page table structure has to be * created - mappings will be set by set_fixmap(): @@ -394,19 +397,26 @@ unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; unsigned int max_dma, high, low; - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - low = max_low_pfn; - high = highend_pfn; - - if (low < max_dma) - zones_size[ZONE_DMA] = low; - else { - zones_size[ZONE_DMA] = max_dma; - zones_size[ZONE_NORMAL] = low - max_dma; + if (!cooperative_mode_enabled()) { + max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; + low = max_low_pfn; + high = highend_pfn; + + if (low < max_dma) + zones_size[ZONE_DMA] = low; + else { + zones_size[ZONE_DMA] = max_dma; + zones_size[ZONE_NORMAL] = low - max_dma; #ifdef CONFIG_HIGHMEM - zones_size[ZONE_HIGHMEM] = high - low; + zones_size[ZONE_HIGHMEM] = high - low; #endif + } + } else { + zones_size[ZONE_DMA] = 0; + zones_size[ZONE_NORMAL] = max_low_pfn; + zones_size[ZONE_HIGHMEM] = 0; } + free_area_init(zones_size); } #else @@ -574,7 +584,6 @@ if (!mem_map) BUG(); #endif - bad_ppro = ppro_with_ram_bug(); #ifdef CONFIG_HIGHMEM @@ -630,8 +639,10 @@ if (!cpu_has_pae) panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); #endif - if (boot_cpu_data.wp_works_ok < 0) - test_wp_bit(); + if (!cooperative_mode_enabled()) { + if (boot_cpu_data.wp_works_ok < 0) + test_wp_bit(); + } /* * Subtle. SMP is doing it's boot stuff late (because it has to diff -urN a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c --- a/arch/i386/mm/ioremap.c +++ b/arch/i386/mm/ioremap.c @@ -11,6 +11,7 @@ #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/slab.h> +#include <linux/cooperative_internal.h> #include <asm/io.h> #include <asm/fixmap.h> #include <asm/cacheflush.h> @@ -190,7 +191,14 @@ void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) { unsigned long last_addr; - void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD); + void __iomem *p; + + if (cooperative_mode_enabled()) { + panic("ioremap_nocache %ld:%ld\n", phys_addr, size); + return NULL; + } + + p = __ioremap(phys_addr, size, _PAGE_PCD); if (!p) return p; diff -urN a/drivers/block/Kconfig b/drivers/block/Kconfig --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -358,6 +358,15 @@ "real" root file system, etc. See <file:Documentation/initrd.txt> for details. +config BLK_DEV_COBD + tristate 'Cooperative block device support' + default y + depends on COOPERATIVE=y + help + Virtual block device support for cooperative kernels. + + If unsure, say Y. + config INITRAMFS_SOURCE string "Source directory of cpio_list" default "" diff -urN a/drivers/block/Makefile b/drivers/block/Makefile --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_ATARI_SLM) += acsi_slm.o obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o obj-$(CONFIG_BLK_DEV_RAM) += rd.o +obj-$(CONFIG_BLK_DEV_COBD) += cobd.o obj-$(CONFIG_BLK_DEV_LOOP) += loop.o obj-$(CONFIG_BLK_DEV_PS2) += ps2esdi.o obj-$(CONFIG_BLK_DEV_XD) += xd.o diff -urN a/drivers/block/cobd.c b/drivers/block/cobd.c --- a/drivers/block/cobd.c +++ b/drivers/block/cobd.c @@ -0,0 +1,540 @@ +/* + * Copyright (C) 2003 Dan Aloni <da-x@colinux.org> + * + * Cooperative Linux Block Device implementation + */ + +#include <linux/major.h> +#include <linux/config.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/errno.h> +#include <linux/major.h> +#include <linux/stat.h> +#include <linux/slab.h> +#include <linux/bio.h> +#include <linux/blkdev.h> +#include <linux/cooperative_internal.h> +#include <linux/file.h> +#include <linux/ioctl.h> +#include <linux/ctype.h> + +#include <asm/uaccess.h> +#include <asm/types.h> + +#include <linux/devfs_fs_kernel.h> + +#define PBD_BLOCK_SIZE 512 + +static int hardsect_size = 512; +static int hardsect_size_shift = 9; +static spinlock_t cobd_lock = SPIN_LOCK_UNLOCKED; +static int cobd_max; + +struct cobd_device { + int unit; + int refcount; + struct block_device *device; +}; + +static int cobd_request(struct cobd_device *cobd, co_block_request_type_t type, co_block_request_t *out_request) +{ + co_block_request_t *request; + unsigned long flags; + long rc = 0; + + co_passage_page_assert_valid(); + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_DEVICE; + co_passage_page->params[0] = CO_DEVICE_BLOCK; + co_passage_page->params[1] = cobd->unit; + request = (co_block_request_t *)&co_passage_page->params[2]; + request->type = type; + request->rc = -1; + co_switch_wrapper(); + rc = request->rc; + *out_request = *request; + co_passage_page_release(flags); + + return rc; +} + +static int cobd_stat(struct cobd_device *cobd, co_block_request_t *out_request) +{ + return cobd_request(cobd, CO_BLOCK_STAT, out_request); +} + +static int cobd_get_alias(struct cobd_device *cobd, co_block_request_t *out_request) +{ + return cobd_request(cobd, CO_BLOCK_GET_ALIAS, out_request); +} + +static int cobd_ioctl(struct inode * inode, struct file * file, + unsigned int cmd, unsigned long arg) +{ + return -ENOTTY; /* unknown command */ +} + +static int cobd_open(struct inode *inode, struct file *file) +{ + struct cobd_device *cobd = (struct cobd_device *)(inode->i_bdev->bd_disk->private_data); + co_block_request_t *co_request; + co_block_request_t stat_request; + unsigned long flags; + int result; + + if (cobd->device && cobd->device != inode->i_bdev) + return -EBUSY; + + if (cobd_stat(cobd, &stat_request)) + return -ENODEV; + + result = 0; + + co_passage_page_assert_valid(); + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_DEVICE; + co_passage_page->params[0] = CO_DEVICE_BLOCK; + co_passage_page->params[1] = cobd->unit; + co_request = (co_block_request_t *)&co_passage_page->params[2]; + co_request->type = CO_BLOCK_OPEN; + co_switch_wrapper(); + if (co_request->rc) + result = -EIO; + else + cobd->refcount++; + co_passage_page_release(flags); + + if (result) + return result; + + if (cobd->refcount == 1) { + set_capacity(inode->i_bdev->bd_disk, stat_request.disk_size >> 9); + cobd->device = inode->i_bdev; + } + + return 0; +} + +static int cobd_release(struct inode *inode, struct file *file) +{ + struct cobd_device *cobd = (struct cobd_device *)(inode->i_bdev->bd_disk->private_data); + co_block_request_t *co_request; + unsigned long flags; + int ret = 0; + + co_passage_page_assert_valid(); + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_DEVICE; + co_passage_page->params[0] = CO_DEVICE_BLOCK; + co_passage_page->params[1] = cobd->unit; + co_request = (co_block_request_t *)&co_passage_page->params[2]; + co_request->type = CO_BLOCK_CLOSE; + co_switch_wrapper(); + if (co_request->rc) + ret = -EIO; + cobd->refcount--; + co_passage_page_release(flags); + + if (cobd->refcount == 0) + cobd->device = NULL; + + return ret; +} + +/* + * Handle an I/O request. + */ +static int cobd_transfer(struct cobd_device *cobd, unsigned long sector, + unsigned long nsect, char *buffer, int write) +{ + co_block_request_t *co_request; + unsigned long flags; + int ret = 0; + + co_passage_page_assert_valid(); + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_DEVICE; + co_passage_page->params[0] = CO_DEVICE_BLOCK; + co_passage_page->params[1] = cobd->unit; + co_request = (co_block_request_t *)&co_passage_page->params[2]; + if (!write) + co_request->type = CO_BLOCK_READ; + else + co_request->type = CO_BLOCK_WRITE; + co_request->offset = ((unsigned long long)sector) << hardsect_size_shift; + co_request->size = nsect << hardsect_size_shift; + co_request->address = buffer; + co_request->rc = 0; + co_switch_wrapper(); + + if (!co_request->rc) + ret = 1; + + co_passage_page_release(flags); + return ret; +} + +static void do_cobd_request(request_queue_t *q) +{ + struct request *req; + struct cobd_device *cobd; + + while ((req = elv_next_request(q)) != NULL) { + int ret; + + if (!blk_fs_request(req)) { + end_request(req, 0); + continue; + } + cobd = (struct cobd_device *)(req->rq_disk->private_data); + + ret = cobd_transfer(cobd, req->sector, req->current_nr_sectors, + req->buffer, rq_data_dir(req)); + end_request(req, ret); + } +} + +static struct block_device_operations cobd_fops = { + .owner = THIS_MODULE, + .open = cobd_open, + .release = cobd_release, + .ioctl = cobd_ioctl, +}; + +static struct gendisk **cobd_disks; + +static struct cobd_device cobd_devs[CO_MODULE_MAX_COBD]; + +static int __init cobd_drives_init(void) +{ + int result, i; + + if (register_blkdev(COLINUX_MAJOR, "cobd")) { + printk(KERN_WARNING "Unable to get major number %d for cobd device\n", COLINUX_MAJOR); + return -EIO; + } + + cobd_max = CO_MODULE_MAX_COBD; + + result = -ENOMEM; /* for the possible errors */ + + cobd_disks = kmalloc(cobd_max * sizeof(struct gendisk *), GFP_KERNEL); + if (!cobd_disks) + goto fail_malloc; + + for (i=0; i < cobd_max; i++) { + cobd_disks[i] = alloc_disk(1); + if (!cobd_disks[i]) + goto fail_malloc3; + } + + for (i=0; i < cobd_max; i++) { + struct cobd_device *cobd = &cobd_devs[i]; + struct gendisk *disk = cobd_disks[i]; + + disk->queue = blk_init_queue(do_cobd_request, &cobd_lock); + if (!disk->queue) + goto fail_malloc4; + + blk_queue_hardsect_size(disk->queue, hardsect_size); + + cobd->unit = i; + disk->major = COLINUX_MAJOR; + disk->first_minor = i; + disk->fops = &cobd_fops; + sprintf(disk->disk_name, "cobd%d", i); + sprintf(disk->devfs_name, "cobd/%d", i); + disk->private_data = cobd; + } + + devfs_mk_dir("cobd"); + + for (i=0; i < cobd_max; i++) + add_disk(cobd_disks[i]); + + printk(KERN_INFO "cobd: loaded (max %d devices)\n", cobd_max); + return 0; + +/* error path */ +fail_malloc4: + while (i--) + blk_cleanup_queue(cobd_disks[i]->queue); + devfs_remove("cobd"); + i = cobd_max; + +fail_malloc3: + while (i--) + if (cobd_disks[i] != NULL) + put_disk(cobd_disks[i]); + + kfree(cobd_disks); + +fail_malloc: + if (unregister_blkdev(COLINUX_MAJOR, "cobd")) + printk(KERN_WARNING "cobd: cannot unregister blkdev\n"); + + return result; +} + +struct cobd_alias_major { + const char *name; + int registered; + int number; +}; + +struct cobd_alias { + const char *name; + struct cobd_alias_major *major; + int minor_start; + int minor_count; + struct gendisk **gendisk; +}; + +struct cobd_alias_major cobd_aliases_major_ide0 = { + .name = "ide0", + .number = IDE0_MAJOR, +}; + +struct cobd_alias_major cobd_aliases_major_ide1 = { + .name = "ide1", + .number = IDE1_MAJOR, +}; + +struct cobd_alias_major cobd_aliases_major_ide2 = { + .name = "ide2", + .number = IDE2_MAJOR, +}; + +struct cobd_alias_major cobd_aliases_major_ide3 = { + .name = "ide3", + .number = IDE3_MAJOR, +}; + +struct cobd_alias_major cobd_aliases_major_sd = { + .name = "sd", + .number = SCSI_DISK0_MAJOR, +}; + +struct cobd_alias cobd_aliases[] = { + {"hda", &cobd_aliases_major_ide0, 0x00, 21, }, + {"hdb", &cobd_aliases_major_ide0, 0x40, 21, }, + {"hdc", &cobd_aliases_major_ide1, 0x00, 21, }, + {"hdd", &cobd_aliases_major_ide1, 0x40, 21, }, + {"hde", &cobd_aliases_major_ide2, 0x00, 21, }, + {"hdf", &cobd_aliases_major_ide2, 0x40, 21, }, + {"hdg", &cobd_aliases_major_ide3, 0x00, 21, }, + {"hdh", &cobd_aliases_major_ide3, 0x40, 21, }, + {"sda", &cobd_aliases_major_sd, 0x00, 0x10, }, + {"sdb", &cobd_aliases_major_sd, 0x10, 0x10, }, + {"sdc", &cobd_aliases_major_sd, 0x20, 0x10, }, + {"sdd", &cobd_aliases_major_sd, 0x30, 0x10, }, + {"sde", &cobd_aliases_major_sd, 0x40, 0x10, }, + {"sdf", &cobd_aliases_major_sd, 0x50, 0x10, }, + {"sdg", &cobd_aliases_major_sd, 0x60, 0x10, }, + {"sdh", &cobd_aliases_major_sd, 0x70, 0x10, }, + {"sdi", &cobd_aliases_major_sd, 0x80, 0x10, }, + {"sdj", &cobd_aliases_major_sd, 0x90, 0x10, }, + {"sdk", &cobd_aliases_major_sd, 0xa0, 0x10, }, + {"sdl", &cobd_aliases_major_sd, 0xb0, 0x10, }, + {"sdm", &cobd_aliases_major_sd, 0xc0, 0x10, }, + {"sdn", &cobd_aliases_major_sd, 0xd0, 0x10, }, + {"sdp", &cobd_aliases_major_sd, 0xe0, 0x10, }, + {"sdq", &cobd_aliases_major_sd, 0xf0, 0x10, }, + {NULL, }, +}; + +static int __init skip_atoi(const char **s) +{ + /* lib/spprintf.h */ + + int i=0; + + while (isdigit(**s)) + i = i*10 + *((*s)++) - '0'; + + return i; +} + +static int __init cobd_spawn_alias(struct cobd_alias *alias, + const char *alias_name_requested, + int cobd_unit) +{ + const char *index_str_start = &alias_name_requested[strlen(alias->name)]; + const char *index_str_end = index_str_start; + struct cobd_device *cobd; + struct gendisk *disk; + + int index = skip_atoi(&index_str_end); + + if (!((index >= 0) && (index <= alias->minor_count))) { + printk(KERN_WARNING "index out of bounds for alias %s (1 - %d)\n", + alias_name_requested, alias->minor_count); + return -1; + } + + if (alias->gendisk == NULL) { + static struct gendisk **gendisks; + gendisks = kmalloc(alias->minor_count * sizeof(struct gendisk *), GFP_KERNEL); + memset(gendisks, 0, alias->minor_count * sizeof(struct gendisk *)); + + if (!gendisks) { + printk(KERN_WARNING "cannot allocate gendisk array for %s\n", alias->name); + return -ENOMEM; + } + + if (!alias->major->registered) { + if (register_blkdev(alias->major->number, alias->major->name)) { + printk(KERN_WARNING "unable to get major number %d for cobd alias device %s\n", + alias->major->number, alias_name_requested); + kfree(gendisks); + return -EIO; + } + + alias->major->registered = 1; + } + + alias->gendisk = gendisks; + devfs_mk_dir(alias->name); + } + + if (alias->gendisk[index] != NULL) { + printk(KERN_WARNING "alias %s already used\n", alias_name_requested); + return -1; + } + + disk = alloc_disk(1); + if (!disk) { + printk(KERN_WARNING "cannot allocate disk for alias %s\n", alias_name_requested); + return -1; + } + + disk->queue = blk_init_queue(do_cobd_request, &cobd_lock); + if (!disk->queue) { + printk(KERN_WARNING "cannot allocate init queue for alias %s\n", alias_name_requested); + put_disk(disk); + return -1; + } + + cobd = &cobd_devs[cobd_unit]; + blk_queue_hardsect_size(disk->queue, hardsect_size); + disk->major = alias->major->number; + disk->first_minor = alias->minor_start + index; + disk->fops = &cobd_fops; + if (index) + sprintf(disk->disk_name, "%s%d", alias->name, index); + else + sprintf(disk->disk_name, "%s", alias->name); + sprintf(disk->devfs_name, "%s/%d", alias->name, index); + disk->private_data = cobd; + add_disk(disk); + alias->gendisk[index] = disk; + + printk("cobd alias cobd%d -> %s created\n", cobd_unit, alias_name_requested); + + return 0; +} + +static int __init cobd_aliases_init(void) +{ + int unit; + co_block_request_t request; + + for (unit=0; unit < cobd_max; unit++) { + int result = cobd_get_alias(&cobd_devs[unit], &request); + if (result) + continue; + + printk("alias for cobd%d is %s\n", unit, request.alias); + + struct cobd_alias *alias = &cobd_aliases[0]; + while (alias->name) { + const char *match = (strstr(request.alias, alias->name)); + if (match == request.alias) { + cobd_spawn_alias(alias, request.alias, unit); + break; + } + alias++; + } + + if (alias->name == NULL) + printk("alias %s is unknown (see cobd_aliases in cobd.c)\n", request.alias); + } + + return 0; +} + +static void cobd_drives_exit(void) +{ + int i; + + for (i = 0; i < cobd_max; i++) { + blk_cleanup_queue(cobd_disks[i]->queue); + del_gendisk(cobd_disks[i]); + put_disk(cobd_disks[i]); + } + + devfs_remove("cobd"); + if (unregister_blkdev(COLINUX_MAJOR, "cobd")) + printk(KERN_WARNING "cobd: cannot unregister blkdev\n"); + + kfree(cobd_disks); +} + +static void cobd_aliases_exit(void) +{ + struct cobd_alias *alias = &cobd_aliases[0]; + while (alias->name != NULL) { + if (alias->gendisk == NULL) { + alias++; + continue; + } + + int index; + for (index=0; index < alias->minor_count; index++) { + struct gendisk *disk = alias->gendisk[index]; + if (!disk) + return; + + blk_cleanup_queue(disk->queue); + del_gendisk(disk); + put_disk(disk); + } + + devfs_remove(alias->name); + if (!alias->major->registered) { + unregister_blkdev(alias->major->number, alias->major->name); + alias->major->registered = 0; + } + kfree(alias->gendisk); + + alias++; + } +} + +static int __init cobd_init(void) +{ + int result = cobd_drives_init(); + if (result) + return result; + + cobd_aliases_init(); + + return result; +} + +static void cobd_exit(void) +{ + cobd_aliases_exit(); + cobd_drives_exit(); +} + +module_init(cobd_init); +module_exit(cobd_exit); + + diff -urN a/drivers/char/Makefile b/drivers/char/Makefile --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_STALLION) += stallion.o obj-$(CONFIG_ISTALLION) += istallion.o obj-$(CONFIG_DIGI) += pcxx.o +obj-$(CONFIG_COOPERATIVE) += cocd.o obj-$(CONFIG_DIGIEPCA) += epca.o obj-$(CONFIG_SPECIALIX) += specialix.o obj-$(CONFIG_MOXA_INTELLIO) += moxa.o diff -urN a/drivers/char/cocd.c b/drivers/char/cocd.c --- a/drivers/char/cocd.c +++ b/drivers/char/cocd.c @@ -0,0 +1,368 @@ +/* + * Copyright (C) 2004 Dan Aloni <da-x@colinux.org> + * + * Cooperative Linux Serial Line implementation + * + * Compatible with UML, also based on some code from there. + * Also based on The tiny_tty.c example driver by Greg Kroah-Hartman (greg@kroah.com). + */ + +/* + * 20040908: Ballard, Jonathan H. <jhballard@hotmail.com> + * : Implemented cocd_task() & throttle. + * 20041224: Used schedule() instead of shedule_work(). + * 20050101: Uses interruptible_sleep_on() and wake_up() instead of schedule(). + * : Uses list_*() for dispatched data flow to each unit. + * : Handles multiple units in seperate tasks. + * +*/ + + +#include <linux/major.h> +#include <linux/config.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/errno.h> +#include <linux/major.h> +#include <linux/stat.h> +#include <linux/file.h> +#include <linux/ioctl.h> +#include <linux/device.h> +#include <linux/console.h> +#include <linux/wait.h> + +#include <linux/workqueue.h> +#include <linux/devfs_fs_kernel.h> +#include <linux/tty.h> +#include <linux/tty_flip.h> + +#include <linux/cooperative_internal.h> + +#include <asm/uaccess.h> + +struct cocd_tty { + struct semaphore sem; /* locks this structure */ + struct tty_struct *tty; /* tty for this device */ + unsigned open_count; /* open()/close() tally */ + struct work_struct work; /* individual unit task */ + struct list_head inq; /* input queue */ + wait_queue_head_t waitq; + int throttled; /* data flow throttle bit */ +}; + +static struct tty_driver *cocd_driver = NULL; +DECLARE_MUTEX(cocd_sem); + +static void cocd_unit_task(void *data) +{ + co_message_node_t *input; + struct cocd_tty *cocd = data; + co_linux_message_t *message; + char *p, *e, *m; + struct tty_struct *tty; + + tty = cocd->tty; + + while(cocd->open_count) { + down(&cocd->sem); + if(list_empty(&cocd->inq)) { + up(&cocd->sem); + interruptible_sleep_on(&cocd->waitq); + continue; + } + input = list_entry(cocd->inq.prev, co_message_node_t, node); + up(&cocd->sem); + + message = (co_linux_message_t *)&input->msg.data; + e = (m = p = message->data) + message->size; + while(p < e && cocd->open_count) { + if(cocd->throttled) { + interruptible_sleep_on(&cocd->waitq); + continue; + } + if(e < (m += (TTY_FLIPBUF_SIZE - tty->flip.count))) + m = e; + while(p < m) + tty_insert_flip_char(tty, *(p++), 0); + if(tty->flip.count >= TTY_FLIPBUF_SIZE) { + tty_flip_buffer_push(tty); + } + } + down(&cocd->sem); + list_del(&input->node); + up(&cocd->sem); + co_free_message(input); + if(tty->flip.count && cocd->open_count) { + if(cocd->throttled) { + interruptible_sleep_on(&cocd->waitq); + } + tty_flip_buffer_push(tty); + continue; + } + } + down(&cocd->sem); + while(!list_empty(&cocd->inq)) { + input = list_entry(cocd->inq.prev, co_message_node_t, node); + list_del(&input->node); + co_free_message(input); + } + up(&cocd->sem); + kfree(cocd); +} + +int cocd_open(struct tty_struct *tty, struct file * filp) +{ + struct cocd_tty *cocd = NULL; + + down(&cocd_sem); + + /* MOD_INC_USE_COUNT; - Removed in 2.6, reference count is handled + * outside the module in 2.6 + */ + + if ((cocd = (struct cocd_tty *)tty->driver_data)) { + down (&cocd->sem); + } else { + if(!(cocd = kmalloc(sizeof(*cocd), GFP_KERNEL))) { + /* MOD_DEC_USE_COUNT; - Removed in 2.6, reference count + * is handled outside the module in 2.6 + */ + + up(&cocd_sem); + return -ENOMEM; + } + + init_MUTEX_LOCKED(&cocd->sem); + cocd->open_count = 0; + cocd->tty = tty; + cocd->throttled = 0; + INIT_WORK(&cocd->work, cocd_unit_task, cocd); + INIT_LIST_HEAD(&cocd->inq); + init_waitqueue_head(&cocd->waitq); + tty->driver_data = cocd; + tty->low_latency = 1; + schedule_work(&cocd->work); + } + + cocd->open_count++; + + up(&cocd->sem); + up(&cocd_sem); + + return 0; +} + +void cocd_close(struct tty_struct *tty, struct file * filp) +{ + struct cocd_tty *cocd = NULL; + + down(&cocd_sem); + + cocd = (struct cocd_tty *)tty->driver_data; + if (!cocd) { + printk("cocd: no attached struct\n"); + goto out; + } + + down(&cocd->sem); + if (cocd->open_count == 1) { /* last close */ + tty->driver_data = NULL; + wake_up(&cocd->waitq); + } + cocd->open_count--; + up(&cocd->sem); + +out: + /* MOD_DEC_USE_COUNT; - Removed in 2.6, reference count is handled + * outside the module in 2.6 + */ + + up(&cocd_sem); +} + +void cocd_interrupt(void) +{ + if (!cocd_driver) + return; + + co_message_node_t *input; + if(!co_get_message(&input, CO_DEVICE_SERIAL)) + return; + if(!input) + return; + + co_linux_message_t *message; + struct tty_struct *tty; + struct cocd_tty *cocd; + message = (co_linux_message_t *)&input->msg.data; + down(&cocd_sem); + if (message->unit < CO_MODULE_MAX_SERIAL + && (tty = cocd_driver->ttys[message->unit]) + && (cocd = (struct cocd_tty *)tty->driver_data)) { + up(&cocd_sem); + down(&cocd->sem); + list_add_tail(&input->node,&cocd->inq); + up(&cocd->sem); + wake_up(&cocd->waitq); + return; + } + up(&cocd_sem); + co_free_message(input); +} + +int cocd_write(struct tty_struct * tty, + const unsigned char *buf, int count) +{ + const char *kbuf_scan = NULL; + int count_left; + + kbuf_scan = buf; + count_left = count; + + while (count_left > 0) { + int count_partial = count_left; + if (count_partial > 1000) + count_partial = 1000; + + co_send_message(CO_MODULE_LINUX, + CO_MODULE_SERIAL0 + tty->index, + CO_PRIORITY_DISCARDABLE, + CO_MESSAGE_TYPE_STRING, + count_partial, + kbuf_scan); + + count_left -= count_partial; + kbuf_scan += count_partial; + } + + return count; +} + +int cocd_write_room(struct tty_struct *tty) +{ + struct cocd_tty *cocd = NULL; + + cocd = (struct cocd_tty *)tty->driver_data; + if (!cocd) + return 0; + + down(&cocd->sem); + if (cocd->open_count == 0) { + /* port was not opened */ + up(&cocd->sem); + return 0; + } + + up(&cocd->sem); + return 255; +} + +void cocd_hangup(struct tty_struct *tty) +{ +} + +void cocd_throttle(struct tty_struct * tty) +{ + struct cocd_tty *cocd; + cocd = (struct cocd_tty *)tty->driver_data; + if (!cocd) + return; + down(&cocd->sem); + cocd->throttled = 1; + up(&cocd->sem); +} + +void cocd_unthrottle(struct tty_struct * tty) +{ + struct cocd_tty *cocd; + cocd = (struct cocd_tty *)tty->driver_data; + if (!cocd) + return; + down(&cocd->sem); + cocd->throttled = 0; + up(&cocd->sem); + wake_up(&cocd->waitq); +} + +void cocd_flush_buffer(struct tty_struct *tty) +{ +} + +void cocd_set_termios(struct tty_struct *tty, struct termios *old_termios) +{ +} + +int cocd_chars_in_buffer(struct tty_struct *tty) +{ + return 0; +} + +static struct tty_operations cocd_ops = { + .open = cocd_open, + .close = cocd_close, + .write = cocd_write, + .write_room = cocd_write_room, + .flush_buffer = cocd_flush_buffer, + .throttle = cocd_throttle, + .unthrottle = cocd_unthrottle, + .hangup = cocd_hangup, + .chars_in_buffer = cocd_chars_in_buffer, + .set_termios = cocd_set_termios, +}; + +static struct tty_driver *cocd_driver; + +static void cocd_console_write(struct console *c, const char *string, unsigned len) +{ +} + +static struct tty_driver *cocd_console_device(struct console *c, int *index) +{ + *index = c->index; + return cocd_driver; +} + +static int cocd_console_setup(struct console *co, char *options) +{ + return(0); +} + +static struct console cocd_cons = { + name: "ttyS", + write: cocd_console_write, + device: cocd_console_device, + setup: cocd_console_setup, + flags: CON_PRINTBUFFER, + index: -1, +}; + +static int __init cocd_init(void) +{ + cocd_driver = alloc_tty_driver(CO_MODULE_MAX_SERIAL); + + if (!cocd_driver) + panic("Couldn't allocate cocd driver"); + + cocd_driver->owner = THIS_MODULE; + cocd_driver->driver_name = "Cooperative serial lines"; + cocd_driver->name = "ttS"; + cocd_driver->devfs_name = "tts/"; + cocd_driver->major = TTY_MAJOR; + cocd_driver->minor_start = 64; + cocd_driver->type = TTY_DRIVER_TYPE_SERIAL; + cocd_driver->subtype = 0; + cocd_driver->init_termios = tty_std_termios; + cocd_driver->flags = 0; + + tty_set_operations(cocd_driver, &cocd_ops); + + if (tty_register_driver(cocd_driver)) + panic("Couldn't register cocd driver"); + + register_console(&cocd_cons); + + return 0; +} + +module_init(cocd_init); diff -urN a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -16,7 +16,7 @@ default y depends on INPUT && INPUT_KEYBOARD select SERIO - select SERIO_I8042 if PC + select SERIO_I8042 if PC && !COOPERATIVE select SERIO_GSCPS2 if GSC help Say Y here if you want to use a standard AT or PS/2 keyboard. Usually diff -urN a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -26,6 +26,7 @@ #include <linux/input.h> #include <linux/serio.h> #include <linux/workqueue.h> +#include <linux/cooperative_internal.h> #define DRIVER_DESC "AT and PS/2 keyboard driver" @@ -640,6 +641,9 @@ { unsigned char param[2]; + if (cooperative_mode_enabled()) + return 0; + /* * Some systems, where the bit-twiddling when testing the io-lines of the * controller may confuse the keyboard need a full reset of the keyboard. On diff -urN a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig --- a/drivers/input/mouse/Kconfig +++ b/drivers/input/mouse/Kconfig @@ -14,7 +14,7 @@ config MOUSE_PS2 tristate "PS/2 mouse" default y - depends on INPUT && INPUT_MOUSE + depends on INPUT && INPUT_MOUSE && !COOPERATIVE select SERIO select SERIO_I8042 if PC select SERIO_GSCPS2 if GSC @@ -36,9 +36,21 @@ To compile this driver as a module, choose M here: the module will be called psmouse. +config MOUSE_COOPERATIVE + tristate "Cooperative Mouse driver" + default y + depends on INPUT && INPUT_MOUSE && COOPERATIVE + ---help--- + Virtual mouse driver for cooperative kernels. + + If unsure, say Y. + + To compile this driver as a module, choose M here: the + module will be called psmouse. + config MOUSE_SERIAL tristate "Serial mouse" - depends on INPUT && INPUT_MOUSE + depends on INPUT && INPUT_MOUSE && !COOPERATIVE select SERIO ---help--- Say Y here if you have a serial (RS-232, COM port) mouse connected @@ -52,7 +64,7 @@ config MOUSE_INPORT tristate "InPort/MS/ATIXL busmouse" - depends on INPUT && INPUT_MOUSE && ISA + depends on INPUT && INPUT_MOUSE && ISA && !COOPERATIVE help Say Y here if you have an InPort, Microsoft or ATI XL busmouse. They are rather rare these days. @@ -62,13 +74,13 @@ config MOUSE_ATIXL bool "ATI XL variant" - depends on MOUSE_INPORT + depends on MOUSE_INPORT && !COOPERATIVE help Say Y here if your mouse is of the ATI XL variety. config MOUSE_LOGIBM tristate "Logitech busmouse" - depends on INPUT && INPUT_MOUSE && ISA + depends on INPUT && INPUT_MOUSE && ISA && !COOPERATIVE help Say Y here if you have a Logitech busmouse. They are rather rare these days. @@ -78,7 +90,7 @@ config MOUSE_PC110PAD tristate "IBM PC110 touchpad" - depends on INPUT && INPUT_MOUSE && ISA + depends on INPUT && INPUT_MOUSE && ISA && !COOPERATIVE help Say Y if you have the IBM PC-110 micro-notebook and want its touchpad supported. @@ -88,7 +100,7 @@ config MOUSE_MAPLE tristate "Maple bus mouse" - depends on SH_DREAMCAST && INPUT && INPUT_MOUSE && MAPLE + depends on SH_DREAMCAST && INPUT && INPUT_MOUSE && MAPLE && !COOPERATIVE help Say Y if you have a DreamCast console and a mouse attached to its Maple bus. @@ -98,7 +110,7 @@ config MOUSE_AMIGA tristate "Amiga mouse" - depends on AMIGA && INPUT && INPUT_MOUSE + depends on AMIGA && INPUT && INPUT_MOUSE && !COOPERATIVE help Say Y here if you have an Amiga and want its native mouse supported by the kernel. @@ -108,7 +120,7 @@ config MOUSE_RISCPC tristate "Acorn RiscPC mouse" - depends on ARCH_ACORN && INPUT && INPUT_MOUSE + depends on ARCH_ACORN && INPUT && INPUT_MOUSE && !COOPERATIVE help Say Y here if you have the Acorn RiscPC computer and want its native mouse supported. @@ -118,7 +130,7 @@ config MOUSE_VSXXXAA tristate "DEC VSXXX-AA/GA mouse and VSXXX-AB tablet" - depends on INPUT && INPUT_MOUSE + depends on INPUT && INPUT_MOUSE && !COOPERATIVE select SERIO help Say Y (or M) if you want to use a DEC VSXXX-AA (hockey diff -urN a/drivers/input/mouse/Makefile b/drivers/input/mouse/Makefile --- a/drivers/input/mouse/Makefile +++ b/drivers/input/mouse/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_MOUSE_MAPLE) += maplemouse.o obj-$(CONFIG_MOUSE_PC110PAD) += pc110pad.o obj-$(CONFIG_MOUSE_PS2) += psmouse.o +obj-$(CONFIG_MOUSE_COOPERATIVE) += comouse.o obj-$(CONFIG_MOUSE_SERIAL) += sermouse.o obj-$(CONFIG_MOUSE_VSXXXAA) += vsxxxaa.o diff -urN a/drivers/input/mouse/comouse.c b/drivers/input/mouse/comouse.c --- a/drivers/input/mouse/comouse.c +++ b/drivers/input/mouse/comouse.c @@ -0,0 +1,74 @@ +/* + * Virtual mouse driver for Linux + * + * Skeleton based on: + * $Id: sermouse.c,v 1.17 2002/03/13 10:03:43 vojtech Exp $ + * + * Copyright (c) 1999-2001 Vojtech Pavlik + * + * Copyright (c) 2004 Dan Aloni + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/input.h> +#include <linux/config.h> +#include <linux/serio.h> +#include <linux/init.h> + +MODULE_AUTHOR("Dan Aloni <da-x@colinux.org>"); +MODULE_DESCRIPTION("Virtual mouse driver"); +MODULE_LICENSE("GPL"); + +/* + * comouse_interrupt() handles incoming characters, either gathering them into + * packets or passing them to the command routine as command output. + */ + +static irqreturn_t comouse_interrupt(struct serio *serio, + unsigned char data, unsigned int flags, struct pt_regs *regs) +{ + return IRQ_HANDLED; +} + +/* + * comouse_disconnect() cleans up after we don't want talk + * to the mouse anymore. + */ + +static void comouse_disconnect(struct serio *serio) +{ +} + +/* + * comouse_connect() is a callback form the serio module when + * an unhandled serio port is found. + */ + +static void comouse_connect(struct serio *serio, struct serio_driver *dev) +{ +} + +static struct serio_driver comouse_dev = { + .interrupt = comouse_interrupt, + .connect = comouse_connect, + .disconnect = comouse_disconnect, + .driver = { + .name = "comouse", + }, +}; + +int __init comouse_init(void) +{ + serio_register_driver(&comouse_dev); + return 0; +} + +void __exit comouse_exit(void) +{ + serio_unregister_driver(&comouse_dev); +} + +module_init(comouse_init); +module_exit(comouse_exit); diff -urN a/drivers/input/serio/Kconfig b/drivers/input/serio/Kconfig --- a/drivers/input/serio/Kconfig +++ b/drivers/input/serio/Kconfig @@ -20,7 +20,7 @@ tristate "i8042 PC Keyboard controller" if EMBEDDED || !X86 default y select SERIO - depends on !PARISC && (!ARM || ARCH_SHARK || FOOTBRIDGE_HOST) && !M68K + depends on !PARISC && (!ARM || ARCH_SHARK || FOOTBRIDGE_HOST) && !M68K && !COOPERATIVE ---help--- i8042 is the chip over which the standard AT keyboard and PS/2 mouse are connected to the computer. If you use these devices, @@ -132,17 +132,22 @@ module will be called maceps2. config SERIO_RAW - tristate "Raw access to serio ports" - depends on SERIO - help - Say Y here if you want to have raw access to serio ports, such as - AUX ports on i8042 keyboard controller. Each serio port that is - bound to this driver will be accessible via a char device with - major 10 and dynamically allocated minor. The driver will try - allocating minor 1 (that historically corresponds to /dev/psaux) - first. To bind this driver to a serio port use sysfs interface: - - echo -n "serio_raw" > /sys/bus/serio/devices/serioX/driver - - To compile this driver as a module, choose M here: the - module will be called serio_raw. + tristate "Raw access to serio ports" + depends on SERIO + help + Say Y here if you want to have raw access to serio ports, such as + AUX ports on i8042 keyboard controller. Each serio port that is + bound to this driver will be accessible via a char device with + major 10 and dynamically allocated minor. The driver will try + allocating minor 1 (that historically corresponds to /dev/psaux) + first. To bind this driver to a serio port use sysfs interface: + + echo -n "serio_raw" > /sys/bus/serio/devices/serioX/driver + + To compile this driver as a module, choose M here: the + module will be called serio_raw. + +config SERIO_COKBD + tristate "Cooperative Linux virtual keyboard controller driver" + depends on COOPERATIVE + default y diff -urN a/drivers/input/serio/Makefile b/drivers/input/serio/Makefile --- a/drivers/input/serio/Makefile +++ b/drivers/input/serio/Makefile @@ -18,3 +18,4 @@ obj-$(CONFIG_SERIO_PCIPS2) += pcips2.o obj-$(CONFIG_SERIO_MACEPS2) += maceps2.o obj-$(CONFIG_SERIO_RAW) += serio_raw.o +obj-$(CONFIG_SERIO_COKBD) += cokbd.o \ No newline at end of file diff -urN a/drivers/input/serio/cokbd.c b/drivers/input/serio/cokbd.c --- a/drivers/input/serio/cokbd.c +++ b/drivers/input/serio/cokbd.c @@ -0,0 +1,155 @@ +/* + * Cooperative Linux virtual keyboard controller driver + * + * Copyright (c) 1999-2002 Dan Aloni <da-x@colinux.org) + * Based on 98kbd-io.c written by Osamu Tomita> + */ + +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include <linux/config.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/init.h> +#include <linux/serio.h> +#include <linux/sched.h> +#include <linux/cooperative_internal.h> + +#include <asm/io.h> + +MODULE_AUTHOR("Dan Aloni <da-x@colinux.org>"); +MODULE_DESCRIPTION("Cooperative Linux virtual keyboard controller driver"); +MODULE_LICENSE("GPL"); + +/* + * Names. + */ + +#define COKBD_PHYS_DESC "cokbd" + +static struct serio cokbd_port; + +static irqreturn_t cokbdio_interrupt(int irq, void *dev_id, struct pt_regs *regs); + +/* + * cokbd_flush() flushes all data that may be in the keyboard buffers + */ + +static int cokbd_flush(void) +{ +#if (0) + co_linux_message_t *message; + + while (co_get_message(&message, CO_DEVICE_KEYBOARD)) { + co_free_message(message); + } +#endif + return 0; +} + +/* + * cokbd_write() sends a byte out through the keyboard interface. + */ + +#define ATKBD_CMD_GETID 0x02f2 + +static void cokbd_receive(struct serio *port, unsigned char c) +{ + struct pt_regs regs= {0, }; + + serio_interrupt(port, c, 0, ®s); +} + +static int cokbd_write(struct serio *port, unsigned char c) +{ + return 0; +} + +/* + * cokbd_open() is called when a port is open by the higher layer. + * It allocates the interrupt and enables in in the chip. + */ + +static int cokbd_open(struct serio *port) +{ + cokbd_flush(); + + if (request_irq(KEYBOARD_IRQ, cokbdio_interrupt, 0, "cokbd", NULL)) { + printk(KERN_ERR "cobkd.c: Can't get irq %d for %s, unregistering the port.\n", KEYBOARD_IRQ, "KBD"); + serio_unregister_port(port); + return -1; + } + + return 0; +} + +static void cokbd_close(struct serio *port) +{ + printk(KERN_INFO "cokbd closed\n"); + + free_irq(KEYBOARD_IRQ, NULL); + + cokbd_flush(); +} + +/* + * Structures for registering the devices in the serio.c module. + */ + +static struct serio cokbd_port = +{ + .type = SERIO_8042_XL, + .write = cokbd_write, + .open = cokbd_open, + .close = cokbd_close, + .name = "cokbd port", + .phys = COKBD_PHYS_DESC, +}; + +/* + * cokbdio_interrupt() is the most important function in this driver - + * it handles the interrupts from keyboard, and sends incoming bytes + * to the upper layers. + */ + +static irqreturn_t cokbdio_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + co_message_node_t *node_message; + while (co_get_message(&node_message, CO_DEVICE_KEYBOARD)) { + co_linux_message_t *message = (co_linux_message_t *)&node_message->msg.data; + co_scan_code_t *sc = (co_scan_code_t *)message->data; + unsigned long scancode = sc->code; + + if (!sc->down) + scancode |= 0x80; + + cokbd_receive(&cokbd_port, scancode); + + co_free_message(node_message); + } + + return IRQ_HANDLED; +} + +int __init cokbdio_init(void) +{ + serio_register_port(&cokbd_port); + + printk(KERN_INFO "serio: cokbd at irq %d\n", KEYBOARD_IRQ); + + return 0; +} + +void __exit cokbdio_exit(void) +{ + serio_unregister_port(&cokbd_port); +} + +module_init(cokbdio_init); +module_exit(cokbdio_exit); diff -urN a/drivers/net/Kconfig b/drivers/net/Kconfig --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -127,6 +127,10 @@ If you don't know what to use this for, you don't need it. +config COOPERATIVE_CONET + tristate 'Cooperative Virtual Ethernet driver support' + depends on COOPERATIVE + config NET_SB1000 tristate "General Instruments Surfboard 1000" depends on NETDEVICES && PNP diff -urN a/drivers/net/Makefile b/drivers/net/Makefile --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -152,6 +152,7 @@ # This is also a 82596 and should probably be merged obj-$(CONFIG_LP486E) += lp486e.o +obj-$(CONFIG_COOPERATIVE_CONET) += conet.o obj-$(CONFIG_ETH16I) += eth16i.o obj-$(CONFIG_ZORRO8390) += zorro8390.o 8390.o diff -urN a/drivers/net/conet.c b/drivers/net/conet.c --- a/drivers/net/conet.c +++ b/drivers/net/conet.c @@ -0,0 +1,305 @@ +/* + * Copyright (C) 2003-2004 Dan Aloni <da-x@gmx.net> + * Copyright (C) 2004 Pat Erley + * Copyright (C) 2004 George Boutwell + * + * Cooperative Linux Network Device implementation + */ + +#include <linux/config.h> +#include <linux/version.h> +#include <linux/module.h> + +#include <linux/kernel.h> + +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/ethtool.h> + +#include <linux/cooperative_internal.h> +#include <asm/irq.h> + +struct conet_priv { + struct net_device_stats stats; + int status; + int unit; + int enabled; + int handling; +}; + +struct net_device *conet_dev[CO_MODULE_MAX_CONET]; + +irqreturn_t conet_interrupt(int irq, void *dev_id, struct pt_regs *reg_ptr); + +static int conet_get_mac(int unit, char *address) +{ + unsigned long flags = 0; + co_network_request_t *net_request; + int result = 0; + + co_passage_page_assert_valid(); + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_DEVICE; + co_passage_page->params[0] = CO_DEVICE_NETWORK; + net_request = (typeof(net_request))&co_passage_page->params[1]; + net_request->unit = unit; + net_request->type = CO_NETWORK_GET_MAC; + co_switch_wrapper(); + memcpy(address, net_request->mac_address, ETH_ALEN); + result = net_request->result; + co_passage_page_release(flags); + + return result; +} + +int conet_open(struct net_device *dev) +{ + struct conet_priv *priv = (struct conet_priv *)dev->priv; + + if (priv->enabled) + return 0; + + conet_get_mac(priv->unit, dev->dev_addr); + + priv->enabled = 1; + + netif_start_queue(dev); + + return 0; +} + +int conet_stop(struct net_device *dev) +{ + struct conet_priv *priv = (struct conet_priv *)dev->priv; + + priv->enabled = 0; + + netif_stop_queue(dev); /* can't transmit any more */ + + return 0; +} + +int conet_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + int len; + char *data; + struct conet_priv *priv = (struct conet_priv *)dev->priv; + + len = skb->len < ETH_ZLEN ? ETH_ZLEN : skb->len; + data = skb->data; + + dev->trans_start = jiffies; /* save the timestamp */ + + co_send_message(CO_MODULE_LINUX, + CO_MODULE_CONET0 + priv->unit, + CO_PRIORITY_DISCARDABLE, + CO_MESSAGE_TYPE_OTHER, + len, + data); + + priv->stats.tx_bytes+=skb->len; + priv->stats.tx_packets++; + + dev_kfree_skb(skb); + + return 0; +} + +static void conet_rx(struct net_device *dev, co_linux_message_t *message) +{ + struct sk_buff *skb; + struct conet_priv *priv = (struct conet_priv *)dev->priv; + int len; + unsigned char *buf; + + len = message->size; + buf = message->data; + + /* + * The packet has been retrieved from the transmission + * medium. Build an skb around it, so upper layers can handle it + */ + skb = dev_alloc_skb(len+2); + if (!skb) { + printk("conet rx: low on mem - packet dropped\n"); + priv->stats.rx_dropped++; + return; + } + + memcpy(skb_put(skb, len), buf, len); + + /* Write metadata, and then pass to the receive level */ + skb->dev = dev; + skb->protocol = eth_type_trans(skb, dev); + skb->ip_summed = CHECKSUM_NONE; /* make the kernel calculate and verify + the checksum */ + + priv->stats.rx_bytes += len; + priv->stats.rx_packets++; + + netif_rx(skb); + return; +} + +irqreturn_t conet_interrupt(int irq, void *dev_id, struct pt_regs *reg_ptr) +{ + co_message_node_t *node_message; + while (co_get_message(&node_message, CO_DEVICE_NETWORK)) { + struct net_device *dev; + struct conet_priv *priv; + co_linux_message_t *message; + + message = (co_linux_message_t *)&node_message->msg.data; + if (message->unit < 0 || message->unit >= CO_MODULE_MAX_CONET) { + printk("conet intrrupt: buggy network reception\n"); + return IRQ_HANDLED; + } + + dev = conet_dev[message->unit]; + if (!dev) { + co_free_message(node_message); + continue; + } + + if (!netif_running(dev)) { + co_free_message(node_message); + continue; + } + + priv = (struct conet_priv *)dev->priv; + if (priv->handling) { + co_free_message(node_message); + continue; + } + + priv->handling = 1; + conet_rx(dev, message); + co_free_message(node_message); + priv->handling = 0; + } + + return IRQ_HANDLED; +} + +struct net_device_stats* conet_get_stats(struct net_device *dev) +{ + return (struct net_device_stats *)dev->priv; +} + +int conet_init(struct net_device *dev) +{ + struct conet_priv *priv = (struct conet_priv *)dev->priv; + + memset(&priv->stats, 0, sizeof(priv->stats)); + + ether_setup(dev); + + dev->open = conet_open; + dev->stop = conet_stop; + dev->hard_start_xmit = conet_hard_start_xmit; + dev->get_stats = conet_get_stats; + dev->irq = NETWORK_IRQ; + + SET_MODULE_OWNER(dev); + + return 0; +} + +void conet_uninit(struct net_device *dev) +{ +} + +static struct net_device *conet_create(int unit) +{ + struct net_device *dev; + struct conet_priv *priv; + int result = 0; + + dev = kmalloc(sizeof(struct net_device), GFP_KERNEL); + if (!dev) { + return ERR_PTR(-ENOMEM); + } + + memset(dev, 0, sizeof(struct net_device)); + + priv = kmalloc(sizeof(struct conet_priv), GFP_KERNEL); + if (priv == NULL) { + kfree(dev); + return ERR_PTR(-ENOMEM); + } + + memset(priv, 0, sizeof(struct conet_priv)); + priv->unit = unit; + + dev->priv = priv; + dev->init = conet_init; + dev->uninit = conet_uninit; + strcpy(dev->name, "eth%d"); + + result = register_netdev(dev); + if (result) { + printk("conet: error %d registering device \"%s\"\n", result, dev->name); + kfree(dev->priv); + kfree(dev); + return ERR_PTR(-ENODEV); + } + + printk("conet%d: initialized\n", priv->unit); + + return dev; +} + +static void conet_destroy(struct net_device *dev) +{ + struct conet_priv *priv = (struct conet_priv *) dev->priv; + + printk("conet%d: freed\n", priv->unit); + + unregister_netdev(dev); + kfree(dev->priv); + kfree(dev); +} + +static int __init conet_init_module(void) +{ + int unit = 0, result; + struct net_device *dev; + char mac_address[6]; + + result = request_irq(NETWORK_IRQ, &conet_interrupt, 0, "conet", NULL); + + printk("conet: loaded (max %d devices)\n", CO_MODULE_MAX_CONET); + + for (unit=0; unit < CO_MODULE_MAX_CONET; unit++) { + conet_dev[unit] = NULL; + + result = conet_get_mac(unit, mac_address); + if (!result) + continue; + + dev = conet_create(unit); + if (!IS_ERR(dev)) + conet_dev[unit] = dev; + } + + return result; +} + +static void __exit conet_cleanup_module(void) +{ + int unit = 0; + + free_irq(NETWORK_IRQ, NULL); + + for (unit=0; unit < CO_MODULE_MAX_CONET; unit++) { + if (!conet_dev[unit]) + continue; + + conet_destroy(conet_dev[unit]); + } +} + +module_init(conet_init_module); +module_exit(conet_cleanup_module); diff -urN a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig --- a/drivers/video/console/Kconfig +++ b/drivers/video/console/Kconfig @@ -6,7 +6,7 @@ config VGA_CONSOLE bool "VGA text console" if EMBEDDED || !X86 - depends on !ARCH_ACORN && !ARCH_EBSA110 && !4xx && !8xx && !SPARC32 && !SPARC64 && !M68K && !PARISC + depends on !COOPERATIVE && !ARCH_ACORN && !ARCH_EBSA110 && !4xx && !8xx && !SPARC32 && !SPARC64 && !M68K && !PARISC default y help Saying Y here will allow you to use Linux in text mode through a @@ -26,6 +26,14 @@ # fi # fi +config COOPERATIVE_CONSOLE + bool 'coLinux Pseudo-VGA text console' if COOPERATIVE + depends on !VGA_CONSOLE && COOPERATIVE + default y + help + You need to say Y here if you compile a Linux kernel in cooperative + mode. + config VIDEO_SELECT bool "Video mode selection support" depends on (X86 || X86_64) && VGA_CONSOLE @@ -99,7 +107,7 @@ config DUMMY_CONSOLE bool - depends on PROM_CONSOLE!=y || VGA_CONSOLE!=y || SGI_NEWPORT_CONSOLE!=y + depends on PROM_CONSOLE!=y || (COOPERATIVE_CONSOLE!=y && VGA_CONSOLE!=y) || SGI_NEWPORT_CONSOLE!=y default y config FRAMEBUFFER_CONSOLE diff -urN a/drivers/video/console/Makefile b/drivers/video/console/Makefile --- a/drivers/video/console/Makefile +++ b/drivers/video/console/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_PROM_CONSOLE) += promcon.o promcon_tbl.o obj-$(CONFIG_STI_CONSOLE) += sticon.o sticore.o obj-$(CONFIG_VGA_CONSOLE) += vgacon.o +obj-$(CONFIG_COOPERATIVE_CONSOLE) += cocon.o obj-$(CONFIG_MDA_CONSOLE) += mdacon.o obj-$(CONFIG_FRAMEBUFFER_CONSOLE) += fbcon.o bitblit.o font.o obj-$(CONFIG_FB_TILEBLITTING) += tileblit.o diff -urN a/drivers/video/console/cocon.c b/drivers/video/console/cocon.c --- a/drivers/video/console/cocon.c +++ b/drivers/video/console/cocon.c @@ -0,0 +1,464 @@ +/* + * linux/drivers/video/cocon.c -- Cooperative Linux console VGA driver + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive for + * more details. + * + * Based on code copied from vgacon.c. + * + * Dan Aloni <da-x@gmx.net>, 2003-2004 (c) + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/kernel.h> +#include <linux/tty.h> +#include <linux/console.h> +#include <linux/string.h> +#include <linux/kd.h> +#include <linux/slab.h> +#include <linux/vt_kern.h> +#include <linux/selection.h> +#include <linux/init.h> + +#include <linux/cooperative_internal.h> + +/* + * Interface used by the world + */ + +static const char *cocon_startup(void); +static void cocon_init(struct vc_data *c, int init); +static void cocon_deinit(struct vc_data *c); +static void cocon_clear(struct vc_data *c, int, int, int, int); +static void cocon_cursor(struct vc_data *c, int mode); +static int cocon_switch(struct vc_data *c); +static int cocon_blank(struct vc_data *c, int blank, int mode_switch); +/* static int cocon_font_op(struct vc_data *c, struct console_font_op *op); */ +static int cocon_set_palette(struct vc_data *c, unsigned char *table); +static int cocon_scrolldelta(struct vc_data *c, int lines); +static int cocon_set_origin(struct vc_data *c); +static void cocon_save_screen(struct vc_data *c); +static int cocon_scroll(struct vc_data *c, int t, int b, int dir, int lines); +static u8 cocon_build_attr(struct vc_data *c, u8 color, u8 intensity, u8 blink, u8 underline, u8 reverse); +static void cocon_invert_region(struct vc_data *c, u16 *p, int count); + +static const char __init *cocon_startup(void) +{ + unsigned long flags; + co_console_message_t *message; + co_message_t *co_message; + + co_message = co_send_message_save(&flags); + message = (co_console_message_t *)co_message->data; + co_message->from = CO_MODULE_LINUX; + co_message->to = CO_MODULE_CONSOLE; + co_message->priority = CO_PRIORITY_DISCARDABLE; + co_message->type = CO_MESSAGE_TYPE_STRING; + co_message->size = ((char *)(&message->type + 1)) - ((char *)message); + message->type = CO_OPERATION_CONSOLE_STARTUP; + co_send_message_restore(flags); + + return "CoCON"; +} + +static void cocon_init(struct vc_data *c, int init) +{ + unsigned long flags; + co_console_message_t *message; + co_message_t *co_message; + + /* We cannot be loaded as a module, therefore init is always 1 */ + c->vc_can_do_color = 1; + c->vc_cols = 80; + c->vc_rows = 25; + c->vc_complement_mask = 0x7700; + c->vc_visible_origin = 0; + c->vc_origin = 0; + + co_message = co_send_message_save(&flags); + message = (co_console_message_t *)co_message->data; + co_message->from = CO_MODULE_LINUX; + co_message->to = CO_MODULE_CONSOLE; + co_message->priority = CO_PRIORITY_DISCARDABLE; + co_message->type = CO_MESSAGE_TYPE_STRING; + co_message->size = ((char *)(&message->type + 1)) - ((char *)message); + message->type = CO_OPERATION_CONSOLE_INIT; + co_send_message_restore(flags); +} + +static void cocon_deinit(struct vc_data *c) +{ + unsigned long flags; + co_console_message_t *message; + co_message_t *co_message; + + co_message = co_send_message_save(&flags); + message = (co_console_message_t *)co_message->data; + co_message->from = CO_MODULE_LINUX; + co_message->to = CO_MODULE_CONSOLE; + co_message->priority = CO_PRIORITY_DISCARDABLE; + co_message->type = CO_MESSAGE_TYPE_STRING; + co_message->size = ((char *)(&message->type + 1)) - ((char *)message); + message->type = CO_OPERATION_CONSOLE_DEINIT; + co_send_message_restore(flags); + +} + +static void cocon_clear(struct vc_data *c, int top, int left, int rows, int cols) +{ + unsigned long flags; + co_console_message_t *message; + co_message_t *co_message; + + co_message = co_send_message_save(&flags); + message = (co_console_message_t *)co_message->data; + co_message->from = CO_MODULE_LINUX; + co_message->to = CO_MODULE_CONSOLE; + co_message->priority = CO_PRIORITY_DISCARDABLE; + co_message->type = CO_MESSAGE_TYPE_STRING; + co_message->size = ((char *)(&message->clear + 1)) - ((char *)message); + message->type = CO_OPERATION_CONSOLE_CLEAR; + message->clear.top = top; + message->clear.left = left; + message->clear.bottom = top + rows - 1; + message->clear.right = left + cols - 1; + message->clear.charattr = c->vc_video_erase_char; + co_send_message_restore(flags); +} + +static void cocon_putc(struct vc_data *c, int charattr, int y, int x) +{ + unsigned long flags; + co_message_t *co_message; + co_console_message_t *message; + + co_message = co_send_message_save(&flags); + message = (co_console_message_t *)co_message->data; + co_message->from = CO_MODULE_LINUX; + co_message->to = CO_MODULE_CONSOLE; + co_message->priority = CO_PRIORITY_DISCARDABLE; + co_message->type = CO_MESSAGE_TYPE_STRING; + co_message->size = ((char *)(&message->putc + 1)) - ((char *)message); + message->type = CO_OPERATION_CONSOLE_PUTC; + message->putc.x = x; + message->putc.y = y; + message->putc.charattr = charattr; + co_send_message_restore(flags); +} + + +static void cocon_putcs(struct vc_data *conp, + const unsigned short *s, int count, int yy, int xx) +{ + unsigned long flags; + co_console_message_t *message; + co_message_t *co_message; + + if (count > CO_MAX_PARAM_SIZE/2 - 16) + return; + + co_message = co_send_message_save(&flags); + message = (co_console_message_t *)co_message->data; + co_message->from = CO_MODULE_LINUX; + co_message->to = CO_MODULE_CONSOLE; + co_message->priority = CO_PRIORITY_DISCARDABLE; + co_message->type = CO_MESSAGE_TYPE_STRING; + co_message->size = ((char *)(&message->putcs + 1)) - ((char *)message) + + count * sizeof(unsigned short); + message->type = CO_OPERATION_CONSOLE_PUTCS; + message->putcs.x = xx; + message->putcs.y = yy; + message->putcs.count = count; + memcpy(&message->putcs.data, s, count * sizeof(unsigned short)); + co_send_message_restore(flags); +} + +static u8 cocon_build_attr(struct vc_data *c, u8 color, u8 intensity, u8 blink, u8 underline, u8 reverse) +{ + u8 attr = color; + + if (underline) + attr = (attr & 0xf0) | c->vc_ulcolor; + else if (intensity == 0) + attr = (attr & 0xf0) | c->vc_halfcolor; + if (reverse) + attr = ((attr) & 0x88) | ((((attr) >> 4) | ((attr) << 4)) & 0x77); + if (blink) + attr ^= 0x80; + if (intensity == 2) + attr ^= 0x08; + + return attr; +} + +static void cocon_invert_region(struct vc_data *c, u16 *p, int count) +{ + unsigned long flags; + co_message_t *co_message; + co_console_message_t *message; + unsigned long x = (unsigned long)(p - c->vc_origin); // UPDATE: vc_origin = 0; but not yet + + co_message = co_send_message_save(&flags); + message = (co_console_message_t *)co_message->data; + co_message->from = CO_MODULE_LINUX; + co_message->to = CO_MODULE_CONSOLE; + co_message->priority = CO_PRIORITY_DISCARDABLE; + co_message->type = CO_MESSAGE_TYPE_STRING; + co_message->size = ((char *)(&message->invert + 1)) - ((char *)message); + message->type = CO_OPERATION_CONSOLE_INVERT_REGION; + message->invert.y = ((unsigned)x)/c->vc_cols; + message->invert.x = ((unsigned)x)-(message->invert.y); + message->invert.count = count; + co_send_message_restore(flags); + + while (count--) { + u16 a = scr_readw(p); + a = ((a) & 0x88ff) | (((a) & 0x7000) >> 4) | (((a) & 0x0700) << 4); + scr_writew(a, p++); + } + +} + +static void cocon_cursor(struct vc_data *c, int mode) +{ + unsigned long flags; + co_console_message_t *message; + co_message_t *co_message; + + co_message = co_send_message_save(&flags); + message = (co_console_message_t *)co_message->data; + co_message->from = CO_MODULE_LINUX; + co_message->to = CO_MODULE_CONSOLE; + co_message->priority = CO_PRIORITY_DISCARDABLE; + co_message->type = CO_MESSAGE_TYPE_STRING; + co_message->size = ((char *)(&message->cursor + 1)) - ((char *)message);; + if (mode==CM_ERASE) { + message->type = CO_OPERATION_CONSOLE_CURSOR_ERASE; + message->cursor.height = 0; + co_send_message_restore(flags); + return; + } + + if(mode==CM_MOVE) { + message->type = CO_OPERATION_CONSOLE_CURSOR_MOVE; + } else /*(mode==CM_DRAW)*/ { + message->type = CO_OPERATION_CONSOLE_CURSOR_DRAW; + } + message->cursor.x = c->vc_x; + message->cursor.y = c->vc_y; + + switch (c->vc_cursor_type & CUR_HWMASK) { + case CUR_UNDERLINE: + message->cursor.height = 5; + break; + case CUR_TWO_THIRDS: + message->cursor.height = 66; + break; + case CUR_LOWER_THIRD: + message->cursor.height = 33; + break; + case CUR_LOWER_HALF: + message->cursor.height = 50; + break; + case CUR_NONE: + message->cursor.height = 0; + break; + default: + message->cursor.height = 5; + break; + } + + co_send_message_restore(flags); +} + +static int cocon_switch(struct vc_data *c) +{ + unsigned long flags; + co_console_message_t *message; + co_message_t *co_message; + + co_message = co_send_message_save(&flags); + message = (co_console_message_t *)co_message->data; + co_message->from = CO_MODULE_LINUX; + co_message->to = CO_MODULE_CONSOLE; + co_message->priority = CO_PRIORITY_DISCARDABLE; + co_message->type = CO_MESSAGE_TYPE_STRING; + co_message->size = ((char *)(&message->type + 1)) - ((char *)message); + message->type = CO_OPERATION_CONSOLE_SWITCH; + co_send_message_restore(flags); + + return 1; /* Redrawing not needed */ +} + +static int cocon_set_palette(struct vc_data *c, unsigned char *table) +{ + unsigned long flags; + co_console_message_t *message; + co_message_t *co_message; + + co_message = co_send_message_save(&flags); + message = (co_console_message_t *)co_message->data; + co_message->from = CO_MODULE_LINUX; + co_message->to = CO_MODULE_CONSOLE; + co_message->priority = CO_PRIORITY_DISCARDABLE; + co_message->type = CO_MESSAGE_TYPE_STRING; + co_message->size = ((char *)(&message->type + 1)) - ((char *)message); + message->type = CO_OPERATION_CONSOLE_SET_PALETTE; + co_send_message_restore(flags); + + return 1; +} + +static int cocon_blank(struct vc_data *c, int blank, int mode_switchg) +{ + unsigned long flags; + co_console_message_t *message; + co_message_t *co_message; + + co_message = co_send_message_save(&flags); + message = (co_console_message_t *)co_message->data; + co_message->from = CO_MODULE_LINUX; + co_message->to = CO_MODULE_CONSOLE; + co_message->priority = CO_PRIORITY_DISCARDABLE; + co_message->type = CO_MESSAGE_TYPE_STRING; + co_message->size = ((char *)(&message->type + 1)) - ((char *)message); + message->type = CO_OPERATION_CONSOLE_BLANK; + co_send_message_restore(flags); + + return 1; +} + + +static int cocon_scrolldelta(struct vc_data *c, int lines) +{ + unsigned long flags; + co_console_message_t *message; + co_message_t *co_message; + + co_message = co_send_message_save(&flags); + message = (co_console_message_t *)co_message->data; + co_message->from = CO_MODULE_LINUX; + co_message->to = CO_MODULE_CONSOLE; + co_message->priority = CO_PRIORITY_DISCARDABLE; + co_message->type = CO_MESSAGE_TYPE_STRING; + co_message->size = ((char *)(&message->type + 1)) - ((char *)message); + message->type = CO_OPERATION_CONSOLE_SCROLLDELTA; + co_send_message_restore(flags); + + return 1; +} + +static int cocon_set_origin(struct vc_data *c) +{ + unsigned long flags; + co_console_message_t *message; + co_message_t *co_message; + + co_message = co_send_message_save(&flags); + message = (co_console_message_t *)co_message->data; + co_message->from = CO_MODULE_LINUX; + co_message->to = CO_MODULE_CONSOLE; + co_message->priority = CO_PRIORITY_DISCARDABLE; + co_message->type = CO_MESSAGE_TYPE_STRING; + co_message->size = ((char *)(&message->type + 1)) - ((char *)message); + message->type = CO_OPERATION_CONSOLE_SET_ORIGIN; + co_send_message_restore(flags); + + return 1; +} + +static void cocon_save_screen(struct vc_data *c) +{ + unsigned long flags; + co_console_message_t *message; + co_message_t *co_message; + + co_message = co_send_message_save(&flags); + message = (co_console_message_t *)co_message->data; + co_message->from = CO_MODULE_LINUX; + co_message->to = CO_MODULE_CONSOLE; + co_message->priority = CO_PRIORITY_DISCARDABLE; + co_message->type = CO_MESSAGE_TYPE_STRING; + co_message->size = ((char *)(&message->type + 1)) - ((char *)message); + message->type = CO_OPERATION_CONSOLE_SAVE_SCREEN; + co_send_message_restore(flags); +} + +static int cocon_scroll(struct vc_data *c, int t, int b, int dir, int lines) +{ + unsigned long flags; + co_console_message_t *message; + co_message_t *co_message; + + co_message = co_send_message_save(&flags); + message = (co_console_message_t *)co_message->data; + co_message->from = CO_MODULE_LINUX; + co_message->to = CO_MODULE_CONSOLE; + co_message->priority = CO_PRIORITY_DISCARDABLE; + co_message->type = CO_MESSAGE_TYPE_STRING; + co_message->size = ((char *)(&message->scroll + 1)) - ((char *)message); + if (dir == SM_UP) + message->type = CO_OPERATION_CONSOLE_SCROLL_UP; + else + message->type = CO_OPERATION_CONSOLE_SCROLL_DOWN; + message->scroll.top = t; + message->scroll.bottom = b-1; + message->scroll.lines = lines; + co_send_message_restore(flags); + + return 0; +} + +static void cocon_bmove(struct vc_data *c, int sy, int sx, int dy, int dx, int h, int w) +{ + unsigned long flags; + co_console_message_t *message; + co_message_t *co_message; + + co_message = co_send_message_save(&flags); + message = (co_console_message_t *)co_message->data; + co_message->from = CO_MODULE_LINUX; + co_message->to = CO_MODULE_CONSOLE; + co_message->priority = CO_PRIORITY_DISCARDABLE; + co_message->type = CO_MESSAGE_TYPE_STRING; + co_message->size = ((char *)(&message->bmove + 1)) - ((char *)message); + message->type = CO_OPERATION_CONSOLE_BMOVE; + message->bmove.row = dy; + message->bmove.column = dx; + message->bmove.top = sy; + message->bmove.left = sx; + message->bmove.bottom = sy + h - 1; + message->bmove.right = sx + w - 1; + co_send_message_restore(flags); +} + +/* + * The console `switch' structure for the VGA based console + */ + +const struct consw colinux_con = { + con_startup: cocon_startup, + con_init: cocon_init, + con_deinit: cocon_deinit, + con_clear: cocon_clear, + con_putc: cocon_putc, + con_putcs: cocon_putcs, + con_cursor: cocon_cursor, + con_scroll: cocon_scroll, + con_bmove: cocon_bmove, + con_switch: cocon_switch, + con_blank: cocon_blank, + con_set_palette: cocon_set_palette, + con_scrolldelta: cocon_scrolldelta, + con_set_origin: cocon_set_origin, + con_save_screen: cocon_save_screen, + con_build_attr: cocon_build_attr, + con_invert_region: cocon_invert_region, +}; + +MODULE_LICENSE("GPL"); diff -urN a/fs/Kconfig b/fs/Kconfig --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1098,6 +1098,19 @@ containing the directory /) cannot be compiled as a module. +config COFUSE_FS + tristate "Cooperative Host file system support (COFUSE)" + depends on COOPERATIVE + default y + help + In Cooperative mode, this file system allows you to mount an host + directory structure to a local mountpoint. + COFUSE (Cooperative FUSE) is based on the original FUSE + (File System in User Space). + + To compile the cofuse support as a module, choose M here: the + module will be called cofusefs. + config EFS_FS tristate "EFS file system support (read only) (EXPERIMENTAL)" diff -urN a/fs/Makefile b/fs/Makefile --- a/fs/Makefile +++ b/fs/Makefile @@ -94,3 +94,5 @@ obj-$(CONFIG_BEFS_FS) += befs/ obj-$(CONFIG_HOSTFS) += hostfs/ obj-$(CONFIG_HPPFS) += hppfs/ +obj-$(CONFIG_COFUSE_FS) += cofusefs/ + diff -urN a/fs/cofusefs/Makefile b/fs/cofusefs/Makefile --- a/fs/cofusefs/Makefile +++ b/fs/cofusefs/Makefile @@ -0,0 +1,8 @@ +# +# Makefile for the Linux cofuse filesystem routines. +# + +obj-$(CONFIG_COFUSE_FS) += cofusefs.o + +cofusefs-objs := inode.o dir.o file.o util.o dev.o + diff -urN a/fs/cofusefs/dev.c b/fs/cofusefs/dev.c --- a/fs/cofusefs/dev.c +++ b/fs/cofusefs/dev.c @@ -0,0 +1,889 @@ +/* + FUSE: Filesystem in Userspace + Copyright (C) 2001-2004 Miklos Szeredi <miklos@szeredi.hu> + + This program can be distributed under the terms of the GNU GPL. + See the file COPYING. +*/ + +#include "fuse_i.h" + +#include <linux/poll.h> +#include <linux/proc_fs.h> +#include <linux/file.h> + +#ifndef CONFIG_COOPERATIVE + +/* If more requests are outstanding, then the operation will block */ +#define MAX_OUTSTANDING 10 + +static struct proc_dir_entry *proc_fs_fuse; +struct proc_dir_entry *proc_fuse_dev; +static kmem_cache_t *fuse_req_cachep; + +static struct fuse_req *request_new(void) +{ + struct fuse_req *req; + + req = (struct fuse_req *) kmem_cache_alloc(fuse_req_cachep, SLAB_NOFS); + if(req) { + INIT_LIST_HEAD(&req->list); + req->issync = 0; + req->locked = 0; + req->interrupted = 0; + req->sent = 0; + req->finished = 0; + req->in = NULL; + req->out = NULL; + init_waitqueue_head(&req->waitq); + } + + return req; +} + +static void request_free(struct fuse_req *req) +{ + kmem_cache_free(fuse_req_cachep, req); +} + +static int request_restartable(enum fuse_opcode opcode) +{ + switch(opcode) { + case FUSE_LOOKUP: + case FUSE_GETATTR: + case FUSE_READLINK: + case FUSE_GETDIR: + case FUSE_OPEN: + case FUSE_READ: + case FUSE_WRITE: + return 1; + + default: + return 0; + } +} + +/* Called with fuse_lock held. Releases, and then reaquires it. */ +static void request_wait_answer(struct fuse_req *req) +{ + int intr; + + spin_unlock(&fuse_lock); + intr = wait_event_interruptible(req->waitq, req->finished); + spin_lock(&fuse_lock); + if(!intr) + return; + + /* Request interrupted... Wait for it to be unlocked */ + if(req->locked) { + req->interrupted = 1; + spin_unlock(&fuse_lock); + wait_event(req->waitq, !req->locked); + spin_lock(&fuse_lock); + } + + /* Operations which modify the filesystem cannot safely be + restarted, because it is uncertain whether the operation has + completed or not... */ + if(req->sent && !request_restartable(req->in->h.opcode)) + req->out->h.error = -EINTR; + else + req->out->h.error = -ERESTARTSYS; +} + +static int get_unique(struct fuse_conn *fc) +{ + do fc->reqctr++; + while(!fc->reqctr); + return fc->reqctr; +} + +/* Must be called with fuse_lock held, and unlocks it */ +static void request_end(struct fuse_conn *fc, struct fuse_req *req) +{ + fuse_reqend_t endfunc = req->end; + + if(!endfunc) { + wake_up(&req->waitq); + spin_unlock(&fuse_lock); + } else { + spin_unlock(&fuse_lock); + endfunc(fc, req->in, req->out, req->data); + request_free(req); + up(&fc->outstanding); + } +} + +void request_send(struct fuse_conn *fc, struct fuse_in *in, + struct fuse_out *out) +{ + struct fuse_req *req; + + out->h.error = -ERESTARTSYS; + if(down_interruptible(&fc->outstanding)) + return; + + out->h.error = -ENOMEM; + req = request_new(); + if(req) { + req->in = in; + req->out = out; + req->issync = 1; + req->end = NULL; + + spin_lock(&fuse_lock); + out->h.error = -ENOTCONN; + if(fc->file) { + in->h.unique = get_unique(fc); + list_add_tail(&req->list, &fc->pending); + wake_up(&fc->waitq); + request_wait_answer(req); + list_del(&req->list); + } + spin_unlock(&fuse_lock); + request_free(req); + } + + up(&fc->outstanding); +} + + +static inline void destroy_request(struct fuse_req *req) +{ + if(req) { + kfree(req->in); + request_free(req); + } +} + +/* This one is currently only used for sending FORGET and RELEASE, + which are kernel initiated request. So the outstanding semaphore + is not used. */ +int request_send_noreply(struct fuse_conn *fc, struct fuse_in *in) +{ + struct fuse_req *req; + + req = request_new(); + if(!req) + return -ENOMEM; + + req->in = in; + req->issync = 0; + + spin_lock(&fuse_lock); + if(!fc->file) { + spin_unlock(&fuse_lock); + request_free(req); + return -ENOTCONN; + } + + list_add_tail(&req->list, &fc->pending); + wake_up(&fc->waitq); + spin_unlock(&fuse_lock); + return 0; +} + +int request_send_nonblock(struct fuse_conn *fc, struct fuse_in *in, + struct fuse_out *out, fuse_reqend_t end, void *data) +{ + int err; + struct fuse_req *req; + + BUG_ON(!end); + + if(down_trylock(&fc->outstanding)) + return -EWOULDBLOCK; + + err = -ENOMEM; + req = request_new(); + if(req) { + req->in = in; + req->out = out; + req->issync = 1; + req->end = end; + req->data = data; + + spin_lock(&fuse_lock); + err = -ENOTCONN; + if(fc->file) { + in->h.unique = get_unique(fc); + list_add_tail(&req->list, &fc->pending); + wake_up(&fc->waitq); + spin_unlock(&fuse_lock); + return 0; + } + spin_unlock(&fuse_lock); + request_free(req); + } + up(&fc->outstanding); + return err; +} + +static void request_wait(struct fuse_conn *fc) +{ + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue_exclusive(&fc->waitq, &wait); + while(fc->sb != NULL && list_empty(&fc->pending)) { + set_current_state(TASK_INTERRUPTIBLE); + if(signal_pending(current)) + break; + + spin_unlock(&fuse_lock); + schedule(); + spin_lock(&fuse_lock); + } + set_current_state(TASK_RUNNING); + remove_wait_queue(&fc->waitq, &wait); +} + +static inline int copy_in_one(const void *src, size_t srclen, char **dstp, + size_t *dstlenp) +{ + if(*dstlenp < srclen) { + printk("fuse_dev_read: buffer too small\n"); + return -EINVAL; + } + + if(copy_to_user(*dstp, src, srclen)) + return -EFAULT; + + *dstp += srclen; + *dstlenp -= srclen; + + return 0; +} + +static inline int copy_in_args(struct fuse_in *in, char *buf, size_t nbytes) +{ + int err; + int i; + size_t orignbytes = nbytes; + + err = copy_in_one(&in->h, sizeof(in->h), &buf, &nbytes); + if(err) + return err; + + for(i = 0; i < in->numargs; i++) { + struct fuse_in_arg *arg = &in->args[i]; + err = copy_in_one(arg->value, arg->size, &buf, &nbytes); + if(err) + return err; + } + + return orignbytes - nbytes; +} + +static ssize_t fuse_dev_read(struct file *file, char *buf, size_t nbytes, + loff_t *off) +{ + ssize_t ret; + struct fuse_conn *fc = DEV_FC(file); + struct fuse_req *req = NULL; + + spin_lock(&fuse_lock); + request_wait(fc); + if(fc->sb != NULL && !list_empty(&fc->pending)) { + req = list_entry(fc->pending.next, struct fuse_req, list); + list_del_init(&req->list); + req->locked = 1; + } + spin_unlock(&fuse_lock); + if(fc->sb == NULL) + return -ENODEV; + if(req == NULL) + return -EINTR; + + ret = copy_in_args(req->in, buf, nbytes); + spin_lock(&fuse_lock); + if(req->issync) { + if(ret < 0) { + req->out->h.error = -EPROTO; + req->finished = 1; + } else { + list_add_tail(&req->list, &fc->processing); + req->sent = 1; + } + req->locked = 0; + if(ret < 0 || req->interrupted) + /* Unlocks fuse_lock: */ + request_end(fc, req); + else + spin_unlock(&fuse_lock); + } else { + spin_unlock(&fuse_lock); + destroy_request(req); + } + return ret; +} + +static struct fuse_req *request_find(struct fuse_conn *fc, unsigned int unique) +{ + struct list_head *entry; + struct fuse_req *req = NULL; + + list_for_each(entry, &fc->processing) { + struct fuse_req *tmp; + tmp = list_entry(entry, struct fuse_req, list); + if(tmp->in->h.unique == unique) { + req = tmp; + break; + } + } + + return req; +} + +static void process_getdir(struct fuse_req *req) +{ + struct fuse_getdir_out *arg; + arg = (struct fuse_getdir_out *) req->out->args[0].value; + arg->file = fget(arg->fd); +} + +static inline int copy_out_one(struct fuse_out_arg *arg, const char **srcp, + size_t *srclenp, int allowvar) +{ + size_t dstlen = arg->size; + if(*srclenp < dstlen) { + if(!allowvar) { + printk("fuse_dev_write: write is short\n"); + return -EINVAL; + } + dstlen = *srclenp; + } + + if(dstlen) { + if(copy_from_user(arg->value, *srcp, dstlen)) + return -EFAULT; + } + + *srcp += dstlen; + *srclenp -= dstlen; + arg->size = dstlen; + + return 0; +} + +static inline int copy_out_args(struct fuse_out *out, const char *buf, + size_t nbytes) +{ + int err; + int i; + + buf += sizeof(struct fuse_out_header); + nbytes -= sizeof(struct fuse_out_header); + + if(!out->h.error) { + for(i = 0; i < out->numargs; i++) { + struct fuse_out_arg *arg = &out->args[i]; + int allowvar; + + if(out->argvar && i == out->numargs - 1) + allowvar = 1; + else + allowvar = 0; + + err = copy_out_one(arg, &buf, &nbytes, allowvar); + if(err) + return err; + } + } + + if(nbytes != 0) { + printk("fuse_dev_write: write is long\n"); + return -EINVAL; + } + + return 0; +} + +static inline int copy_out_header(struct fuse_out_header *oh, const char *buf, + size_t nbytes) +{ + if(nbytes < sizeof(struct fuse_out_header)) { + printk("fuse_dev_write: write is short\n"); + return -EINVAL; + } + + if(copy_from_user(oh, buf, sizeof(struct fuse_out_header))) + return -EFAULT; + + return 0; +} + +#ifdef KERNEL_2_6 +static int fuse_invalidate(struct fuse_conn *fc, struct fuse_user_header *uh) +{ + struct inode *inode = ilookup(fc->sb, uh->ino); + if (!inode) + return -ENOENT; + invalidate_inode_pages(inode->i_mapping); + iput(inode); + return 0; +} +#else +static int fuse_invalidate(struct fuse_conn *fc, struct fuse_user_header *uh) +{ + struct inode *inode = iget(fc->sb, uh->ino); + int err = -ENOENT; + if(inode) { + if(inode->u.generic_ip) { + invalidate_inode_pages(inode); + err = 0; + } + iput(inode); + } + return err; +} +#endif + +static int fuse_user_request(struct fuse_conn *fc, const char *buf, + size_t nbytes) +{ + struct fuse_user_header uh; + int err; + + if (nbytes < sizeof(struct fuse_user_header)) { + printk("fuse_dev_write: write is short\n"); + return -EINVAL; + } + + if(copy_from_user(&uh, buf, sizeof(struct fuse_out_header))) + return -EFAULT; + + switch(uh.opcode) { + case FUSE_INVALIDATE: + err = fuse_invalidate(fc, &uh); + break; + + default: + err = -ENOSYS; + } + return err; +} + + +static ssize_t fuse_dev_write(struct file *file, const char *buf, + size_t nbytes, loff_t *off) +{ + int err; + struct fuse_conn *fc = DEV_FC(file); + struct fuse_req *req; + struct fuse_out_header oh; + + if(!fc->sb) + return -EPERM; + + err = copy_out_header(&oh, buf, nbytes); + if(err) + return err; + + if (!oh.unique) { + err = fuse_user_request(fc, buf, nbytes); + goto out; + } + + if (oh.error <= -512 || oh.error > 0) { + printk("fuse_dev_write: bad error value\n"); + return -EINVAL; + } + + spin_lock(&fuse_lock); + req = request_find(fc, oh.unique); + if(req != NULL) { + list_del_init(&req->list); + req->locked = 1; + } + spin_unlock(&fuse_lock); + if(!req) + return -ENOENT; + + req->out->h = oh; + err = copy_out_args(req->out, buf, nbytes); + + spin_lock(&fuse_lock); + if(err) + req->out->h.error = -EPROTO; + else { + /* fget() needs to be done in this context */ + if(req->in->h.opcode == FUSE_GETDIR && !oh.error) + process_getdir(req); + } + req->finished = 1; + req->locked = 0; + /* Unlocks fuse_lock: */ + request_end(fc, req); + + out: + if(!err) + return nbytes; + else + return err; +} + + +static unsigned int fuse_dev_poll(struct file *file, poll_table *wait) +{ + struct fuse_conn *fc = DEV_FC(file); + unsigned int mask = POLLOUT | POLLWRNORM; + + if(!fc->sb) + return -EPERM; + + poll_wait(file, &fc->waitq, wait); + + spin_lock(&fuse_lock); + if (!list_empty(&fc->pending)) + mask |= POLLIN | POLLRDNORM; + spin_unlock(&fuse_lock); + + return mask; +} + +static struct fuse_conn *new_conn(void) +{ + struct fuse_conn *fc; + + fc = kmalloc(sizeof(*fc), GFP_KERNEL); + if(fc != NULL) { + fc->sb = NULL; + fc->file = NULL; + fc->flags = 0; + fc->uid = 0; + fc->oldrelease = 0; + init_waitqueue_head(&fc->waitq); + INIT_LIST_HEAD(&fc->pending); + INIT_LIST_HEAD(&fc->processing); + sema_init(&fc->outstanding, MAX_OUTSTANDING); + fc->reqctr = 1; + } + return fc; +} + +static int fuse_dev_open(struct inode *inode, struct file *file) +{ + struct fuse_conn *fc; + + fc = new_conn(); + if(!fc) + return -ENOMEM; + + fc->file = file; + file->private_data = fc; + + return 0; +} + +static void end_requests(struct fuse_conn *fc, struct list_head *head) +{ + while(!list_empty(head)) { + struct fuse_req *req; + req = list_entry(head->next, struct fuse_req, list); + list_del_init(&req->list); + if(req->issync) { + req->out->h.error = -ECONNABORTED; + req->finished = 1; + /* Unlocks fuse_lock: */ + request_end(fc, req); + spin_lock(&fuse_lock); + } else + destroy_request(req); + } +} + +static int fuse_dev_release(struct inode *inode, struct file *file) +{ + struct fuse_conn *fc = DEV_FC(file); + + spin_lock(&fuse_lock); + fc->file = NULL; + end_requests(fc, &fc->pending); + end_requests(fc, &fc->processing); + release_conn(fc); + spin_unlock(&fuse_lock); + return 0; +} + +static struct file_operations fuse_dev_operations = { + .owner = THIS_MODULE, + .read = fuse_dev_read, + .write = fuse_dev_write, + .poll = fuse_dev_poll, + .open = fuse_dev_open, + .release = fuse_dev_release, +}; + +int fuse_dev_init() +{ + int ret; + + proc_fs_fuse = NULL; + proc_fuse_dev = NULL; + + fuse_req_cachep = kmem_cache_create("cofuser_request", + sizeof(struct fuse_req), + 0, 0, NULL, NULL); + if(!fuse_req_cachep) + return -ENOMEM; + + ret = -ENOMEM; + proc_fs_fuse = proc_mkdir("fuse", proc_root_fs); + if(!proc_fs_fuse) { + printk("fuse: failed to create directory in /proc/fs\n"); + goto err; + } + + proc_fs_fuse->owner = THIS_MODULE; + proc_fuse_dev = create_proc_entry("dev", S_IFSOCK | 0600, proc_fs_fuse); + if(!proc_fuse_dev) { + printk("fuse: failed to create entry in /proc/fs/fuse\n"); + goto err; + } + + proc_fuse_dev->proc_fops = &fuse_dev_operations; + + return 0; + + err: + fuse_dev_cleanup(); + return ret; +} + +void fuse_dev_cleanup() +{ + if (cooperative_mode_enabled()) { + kmem_cache_destroy(fuse_req_cachep); + return; + } + + if(proc_fs_fuse) { + remove_proc_entry("dev", proc_fs_fuse); + remove_proc_entry("fuse", proc_root_fs); + } + + kmem_cache_destroy(fuse_req_cachep); +} + +#else + +struct fuse_conn *cofs_volumes[CO_MODULE_MAX_COFS] = {NULL, }; + +static void cofuse_request_start(unsigned long *flags, struct fuse_conn *fc, struct fuse_in *in) +{ + co_passage_page_assert_valid(); + + co_passage_page_acquire(flags); + co_passage_page->operation = CO_OPERATION_DEVICE; + co_passage_page->params[0] = CO_DEVICE_FILESYSTEM; + co_passage_page->params[1] = fc->cofs_unit; + co_passage_page->params[2] = in->h.opcode; + co_passage_page->params[3] = in->h.ino; + co_passage_page->params[4] = 0; +} + +static void cofuse_request_end(unsigned long flags, struct fuse_out *out) +{ + unsigned long ret; + ret = co_passage_page->params[4]; + co_passage_page_release(flags); + out->h.error = ret; +} + +void request_send(struct fuse_conn *fc, struct fuse_in *in, + struct fuse_out *out) +{ + unsigned long flags; + char *str; + + switch ((unsigned long)in->h.opcode) { + case FUSE_STATFS: { + struct fuse_statfs_out *arg; + + arg = (struct fuse_statfs_out *)out->args[0].value; + + cofuse_request_start(&flags, fc, in); + co_switch_wrapper(); + *arg = *(struct fuse_statfs_out *)&co_passage_page->params[5]; + cofuse_request_end(flags, out); + return; + } + + case FUSE_OPEN: { + struct fuse_open_in *opin = (struct fuse_open_in *)in->args[0].value; + + cofuse_request_start(&flags, fc, in); + co_passage_page->params[5] = opin->flags; + co_switch_wrapper(); + cofuse_request_end(flags, out); + return; + } + + case FUSE_WRITE: { + struct fuse_write_in *write_in = (struct fuse_write_in *)in->args[0].value; + unsigned long long *offset_passage = (unsigned long long *)&co_passage_page->params[5]; + + cofuse_request_start(&flags, fc, in); + *offset_passage = write_in->offset; + co_passage_page->params[7] = write_in->size; + co_passage_page->params[8] = (unsigned long)in->args[1].value; + co_switch_wrapper(); + cofuse_request_end(flags, out); + return; + } + + case FUSE_READ: { + struct fuse_read_in *read_in = (struct fuse_read_in *)in->args[0].value; + unsigned long long *offset_passage = (unsigned long long *)&co_passage_page->params[5]; + + cofuse_request_start(&flags, fc, in); + *offset_passage = read_in->offset; + co_passage_page->params[7] = read_in->size; + co_passage_page->params[8] = (unsigned long)out->args[0].value; + co_switch_wrapper(); + cofuse_request_end(flags, out); + return; + } + + case FUSE_LOOKUP: { + struct fuse_lookup_out *arg; + + arg = (struct fuse_lookup_out *)out->args[0].value; + str = (char *)&co_passage_page->params[30]; + + cofuse_request_start(&flags, fc, in); + memcpy(str, (char *)in->args[0].value, in->args[0].size); + co_switch_wrapper(); + *arg = *(struct fuse_lookup_out *)&co_passage_page->params[5]; + cofuse_request_end(flags, out); + return; + } + + case FUSE_RENAME: { + struct fuse_rename_in *arg; + char *str2; + + arg = (struct fuse_rename_in *)in->args[0].value; + str = (char *)(&co_passage_page->params[30]); + str2 = str + in->args[1].size; + + cofuse_request_start(&flags, fc, in); + co_passage_page->params[5] = arg->newdir; + memcpy(str, (char *)in->args[1].value, in->args[1].size); + memcpy(str2, (char *)in->args[2].value, in->args[2].size); + co_switch_wrapper(); + cofuse_request_end(flags, out); + return; + } + + case FUSE_MKNOD: { + struct fuse_mknod_in *inarg; + struct fuse_mknod_out *outarg; + char *str; + + inarg = (struct fuse_mknod_in *)(in->args[0].value); + outarg = (struct fuse_mknod_out *)(out->args[0].value); + + cofuse_request_start(&flags, fc, in); + co_passage_page->params[5] = inarg->mode; + co_passage_page->params[6] = inarg->rdev; + str = (char *)&co_passage_page->params[30]; + memcpy(str, (char *)in->args[1].value, in->args[1].size); + co_switch_wrapper(); + outarg->ino = co_passage_page->params[7]; + outarg->attr = *(struct fuse_attr *)(&co_passage_page->params[8]); + cofuse_request_end(flags, out); + return; + } + + case FUSE_SETATTR: { + struct fuse_setattr_in *inarg; + struct fuse_setattr_out *outarg; + struct fuse_attr *attr; + + inarg = (struct fuse_setattr_in *)(in->args[0].value); + outarg = (struct fuse_setattr_out *)(out->args[0].value); + attr = (struct fuse_attr *)(&co_passage_page->params[6]); + + cofuse_request_start(&flags, fc, in); + co_passage_page->params[5] = inarg->valid; + *attr = inarg->attr; + co_switch_wrapper(); + outarg->attr = *attr; + cofuse_request_end(flags, out); + return; + } + + case FUSE_MKDIR: { + struct fuse_mkdir_in *arg; + + arg = (struct fuse_mkdir_in *)(in->args[0].value); + str = (char *)&co_passage_page->params[30]; + + cofuse_request_start(&flags, fc, in); + co_passage_page->params[5] = arg->mode; + memcpy(str, (char *)in->args[1].value, in->args[1].size); + co_switch_wrapper(); + cofuse_request_end(flags, out); + return; + } + + case FUSE_UNLINK: + case FUSE_RMDIR: { + str = (char *)&co_passage_page->params[30]; + + cofuse_request_start(&flags, fc, in); + memcpy(str, (char *)in->args[0].value, in->args[0].size); + co_switch_wrapper(); + cofuse_request_end(flags, out); + return; + } + + case FUSE_GETATTR: { + struct fuse_getattr_out *arg; + arg = (struct fuse_getattr_out *)out->args[0].value; + + co_passage_page_assert_valid(); + cofuse_request_start(&flags, fc, in); + co_switch_wrapper(); + *arg = *(struct fuse_getattr_out *)&co_passage_page->params[5]; + cofuse_request_end(flags, out); + return; + } + } + + /* printk("cofuse: request_send %d\n", in->h.opcode); */ + out->h.error = -EIO; +} + +int request_send_noreply(struct fuse_conn *fc, struct fuse_in *in) +{ + return -EIO; +} + +int request_send_nonblock(struct fuse_conn *fc, struct fuse_in *in, + struct fuse_out *out, fuse_reqend_t end, void *data) +{ + /* printk("cofuse: request_send_nonblock %d\n", in->h.opcode); */ + request_send(fc, in, out); + end(fc, in, out, data); + return 0; +} + +int fuse_dev_init() +{ + return 0; +} + +void fuse_dev_cleanup() +{ +} + +#endif + +/* + * Local Variables: + * indent-tabs-mode: t + * c-basic-offset: 8 + * End: + */ diff -urN a/fs/cofusefs/dir.c b/fs/cofusefs/dir.c --- a/fs/cofusefs/dir.c +++ b/fs/cofusefs/dir.c @@ -0,0 +1,961 @@ +/* + FUSE: Filesystem in Userspace + Copyright (C) 2001-2004 Miklos Szeredi <miklos@szeredi.hu> + + This program can be distributed under the terms of the GNU GPL. + See the file COPYING. +*/ + +#include "fuse_i.h" + +#include <linux/pagemap.h> +#include <linux/slab.h> +#include <linux/file.h> + +static struct inode_operations fuse_dir_inode_operations; +static struct inode_operations fuse_file_inode_operations; +static struct inode_operations fuse_symlink_inode_operations; + +static struct file_operations fuse_dir_operations; + +static struct dentry_operations fuse_dentry_operations; + +/* FIXME: This should be user configurable */ +#define FUSE_REVALIDATE_TIME (1 * HZ) + +#ifndef KERNEL_2_6 +#define new_decode_dev(x) (x) +#define new_encode_dev(x) (x) +#endif + +static void change_attributes(struct inode *inode, struct fuse_attr *attr) +{ + if(S_ISREG(inode->i_mode) && i_size_read(inode) != attr->size) { +#ifdef KERNEL_2_6 + invalidate_inode_pages(inode->i_mapping); +#else + invalidate_inode_pages(inode); +#endif + } + + inode->i_mode = (inode->i_mode & S_IFMT) + (attr->mode & 07777); + inode->i_nlink = attr->nlink; + inode->i_uid = attr->uid; + inode->i_gid = attr->gid; + i_size_write(inode, attr->size); + inode->i_blksize = PAGE_CACHE_SIZE; + inode->i_blocks = attr->blocks; +#ifdef KERNEL_2_6 + inode->i_atime.tv_sec = attr->atime; + inode->i_atime.tv_nsec = 0; + inode->i_mtime.tv_sec = attr->mtime; + inode->i_mtime.tv_nsec = 0; + inode->i_ctime.tv_sec = attr->ctime; + inode->i_ctime.tv_nsec = 0; +#else + inode->i_atime = attr->atime; + inode->i_mtime = attr->mtime; + inode->i_ctime = attr->ctime; +#endif +} + +static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) +{ + inode->i_mode = attr->mode & S_IFMT; + i_size_write(inode, attr->size); + if(S_ISREG(inode->i_mode)) { + inode->i_op = &fuse_file_inode_operations; + fuse_init_file_inode(inode); + } + else if(S_ISDIR(inode->i_mode)) { + inode->i_op = &fuse_dir_inode_operations; + inode->i_fop = &fuse_dir_operations; + } + else if(S_ISLNK(inode->i_mode)) { + inode->i_op = &fuse_symlink_inode_operations; + } + else { + inode->i_op = &fuse_file_inode_operations; + init_special_inode(inode, inode->i_mode, + new_decode_dev(attr->rdev)); + } + inode->u.generic_ip = inode; +} + +struct inode *fuse_iget(struct super_block *sb, ino_t ino, + struct fuse_attr *attr, int version) +{ + struct inode *inode; + + inode = iget(sb, ino); + if(inode) { + if(!inode->u.generic_ip) + fuse_init_inode(inode, attr); + + change_attributes(inode, attr); + inode->i_version = version; + } + + return inode; +} + +static int fuse_do_lookup(struct inode *dir, struct dentry *entry, + struct fuse_lookup_out *outarg, int *version) +{ + struct fuse_conn *fc = INO_FC(dir); + struct fuse_in in = FUSE_IN_INIT; + struct fuse_out out = FUSE_OUT_INIT; + + if (entry->d_name.len > FUSE_NAME_MAX) + return -ENAMETOOLONG; + + in.h.opcode = FUSE_LOOKUP; + in.h.ino = dir->i_ino; + in.numargs = 1; + in.args[0].size = entry->d_name.len + 1; + in.args[0].value = entry->d_name.name; + out.numargs = 1; + out.args[0].size = sizeof(struct fuse_lookup_out); + out.args[0].value = outarg; + request_send(fc, &in, &out); + + *version = out.h.unique; + return out.h.error; +} + +static int fuse_lookup_iget(struct inode *dir, struct dentry *entry, + struct inode **inodep) +{ + int err; + struct fuse_lookup_out outarg; + int version; + struct inode *inode = NULL; + + err = fuse_do_lookup(dir, entry, &outarg, &version); + if(!err) { + inode = fuse_iget(dir->i_sb, outarg.ino, &outarg.attr, version); + if(!inode) + return -ENOMEM; + } else if(err != -ENOENT) + return err; + + entry->d_time = jiffies; + entry->d_op = &fuse_dentry_operations; + *inodep = inode; + return 0; +} + +static void uncache_dir(struct inode *dir) +{ + struct dentry *entry = d_find_alias(dir); + if (!entry) + dir->i_nlink = 0; + else { + entry->d_time = jiffies - FUSE_REVALIDATE_TIME - 1; + dput(entry); + } +} + +/* create needs to return a positive entry, so this is actually an + mknod+lookup */ +static int _fuse_mknod(struct inode *dir, struct dentry *entry, int mode, + dev_t rdev) +{ + struct fuse_conn *fc = INO_FC(dir); + struct fuse_in in = FUSE_IN_INIT; + struct fuse_out out = FUSE_OUT_INIT; + struct fuse_mknod_in inarg; + struct fuse_mknod_out outarg; + struct inode *inode; + + memset(&inarg, 0, sizeof(inarg)); + inarg.mode = mode; + inarg.rdev = new_encode_dev(rdev); + + in.h.opcode = FUSE_MKNOD; + in.h.ino = dir->i_ino; + in.numargs = 2; + in.args[0].size = sizeof(inarg); + in.args[0].value = &inarg; + in.args[1].size = entry->d_name.len + 1; + in.args[1].value = entry->d_name.name; + out.numargs = 1; + out.args[0].size = sizeof(outarg); + out.args[0].value = &outarg; + request_send(fc, &in, &out); + + if(out.h.error) + return out.h.error; + + inode = fuse_iget(dir->i_sb, outarg.ino, &outarg.attr, out.h.unique); + if(!inode) + return -ENOMEM; + + /* Don't allow userspace to do really stupid things... */ + if((inode->i_mode ^ mode) & S_IFMT) { + iput(inode); + printk("fuse_mknod: inode has wrong type\n"); + return -EPROTO; + } + + d_instantiate(entry, inode); + uncache_dir(dir); + return 0; +} + +static int _fuse_create(struct inode *dir, struct dentry *entry, int mode) +{ + return _fuse_mknod(dir, entry, mode, 0); +} + +/* knfsd needs the new entry instantiated in mkdir/symlink/link. this + should rather be done like mknod: attributes returned in out arg to + save a call to userspace */ +static int lookup_new_entry(struct inode *dir, struct dentry *entry) +{ + struct inode *inode; + int err = fuse_lookup_iget(dir, entry, &inode); + if(err || !inode) { + printk("fuse_mkdir: failed to look up new entry\n"); + return err ? err : -ENOENT; + } + d_instantiate(entry, inode); + uncache_dir(dir); + return 0; +} + +static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode) +{ + struct fuse_conn *fc = INO_FC(dir); + struct fuse_in in = FUSE_IN_INIT; + struct fuse_out out = FUSE_OUT_INIT; + struct fuse_mkdir_in inarg; + + memset(&inarg, 0, sizeof(inarg)); + inarg.mode = mode; + + in.h.opcode = FUSE_MKDIR; + in.h.ino = dir->i_ino; + in.numargs = 2; + in.args[0].size = sizeof(inarg); + in.args[0].value = &inarg; + in.args[1].size = entry->d_name.len + 1; + in.args[1].value = entry->d_name.name; + request_send(fc, &in, &out); + if(out.h.error) + return out.h.error; + + return lookup_new_entry(dir, entry); +} + +static int fuse_symlink(struct inode *dir, struct dentry *entry, + const char *link) +{ + struct fuse_conn *fc = INO_FC(dir); + struct fuse_in in = FUSE_IN_INIT; + struct fuse_out out = FUSE_OUT_INIT; + unsigned int len = strlen(link) + 1; + + if (len > FUSE_SYMLINK_MAX) + return -ENAMETOOLONG; + + in.h.opcode = FUSE_SYMLINK; + in.h.ino = dir->i_ino; + in.numargs = 2; + in.args[0].size = entry->d_name.len + 1; + in.args[0].value = entry->d_name.name; + in.args[1].size = len; + in.args[1].value = link; + request_send(fc, &in, &out); + if(out.h.error) + return out.h.error; + + return lookup_new_entry(dir, entry); +} + +static int fuse_remove(struct inode *dir, struct dentry *entry, + enum fuse_opcode op) +{ + struct fuse_conn *fc = INO_FC(dir); + struct fuse_in in = FUSE_IN_INIT; + struct fuse_out out = FUSE_OUT_INIT; + + in.h.opcode = op; + in.h.ino = dir->i_ino; + in.numargs = 1; + in.args[0].size = entry->d_name.len + 1; + in.args[0].value = entry->d_name.name; + request_send(fc, &in, &out); + + return out.h.error; +} + +static int fuse_unlink(struct inode *dir, struct dentry *entry) +{ + int err = fuse_remove(dir, entry, FUSE_UNLINK); + if(!err) { + /* FIXME: the new i_nlink could be returned by the + unlink operation */ + err = fuse_do_getattr(entry->d_inode); + if(err == -ENOENT) + entry->d_inode->i_nlink = 0; + + uncache_dir(dir); + return 0; + } + return err; +} + +static int fuse_rmdir(struct inode *dir, struct dentry *entry) +{ + int err = fuse_remove(dir, entry, FUSE_RMDIR); + if(!err) { + entry->d_inode->i_nlink = 0; + uncache_dir(dir); + } + return err; +} + +static int fuse_rename(struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent) +{ + struct fuse_conn *fc = INO_FC(olddir); + struct fuse_in in = FUSE_IN_INIT; + struct fuse_out out = FUSE_OUT_INIT; + struct fuse_rename_in inarg; + + memset(&inarg, 0, sizeof(inarg)); + inarg.newdir = newdir->i_ino; + + in.h.opcode = FUSE_RENAME; + in.h.ino = olddir->i_ino; + in.numargs = 3; + in.args[0].size = sizeof(inarg); + in.args[0].value = &inarg; + in.args[1].size = oldent->d_name.len + 1; + in.args[1].value = oldent->d_name.name; + in.args[2].size = newent->d_name.len + 1; + in.args[2].value = newent->d_name.name; + request_send(fc, &in, &out); + + if (!out.h.error) { + uncache_dir(olddir); + if (olddir != newdir) + uncache_dir(newdir); + } + + return out.h.error; +} + +static int fuse_link(struct dentry *entry, struct inode *newdir, + struct dentry *newent) +{ + struct inode *inode = entry->d_inode; + struct fuse_conn *fc = INO_FC(inode); + struct fuse_in in = FUSE_IN_INIT; + struct fuse_out out = FUSE_OUT_INIT; + struct fuse_link_in inarg; + + memset(&inarg, 0, sizeof(inarg)); + inarg.newdir = newdir->i_ino; + + in.h.opcode = FUSE_LINK; + in.h.ino = inode->i_ino; + in.numargs = 2; + in.args[0].size = sizeof(inarg); + in.args[0].value = &inarg; + in.args[1].size = newent->d_name.len + 1; + in.args[1].value = newent->d_name.name; + request_send(fc, &in, &out); + if(out.h.error) + return out.h.error; + + /* Invalidate old entry, so attributes are refreshed */ + d_invalidate(entry); + return lookup_new_entry(newdir, newent); +} + +int fuse_do_getattr(struct inode *inode) +{ + struct fuse_conn *fc = INO_FC(inode); + struct fuse_in in = FUSE_IN_INIT; + struct fuse_out out = FUSE_OUT_INIT; + struct fuse_getattr_out arg; + + in.h.opcode = FUSE_GETATTR; + in.h.ino = inode->i_ino; + out.numargs = 1; + out.args[0].size = sizeof(arg); + out.args[0].value = &arg; + request_send(fc, &in, &out); + + if(!out.h.error) + change_attributes(inode, &arg.attr); + + return out.h.error; +} + +static int fuse_revalidate(struct dentry *entry) +{ + struct inode *inode = entry->d_inode; + struct fuse_conn *fc = INO_FC(inode); + + if(inode->i_ino == FUSE_ROOT_INO) { + if(!(fc->flags & FUSE_ALLOW_OTHER) && + current->fsuid != fc->uid) + return -EACCES; + } else if(time_before_eq(jiffies, entry->d_time + FUSE_REVALIDATE_TIME)) + return 0; + + return fuse_do_getattr(inode); +} + +static int _fuse_permission(struct inode *inode, int mask) +{ + struct fuse_conn *fc = INO_FC(inode); + + if(!(fc->flags & FUSE_ALLOW_OTHER) && current->fsuid != fc->uid) + return -EACCES; + else if(fc->flags & FUSE_DEFAULT_PERMISSIONS) { + int err = generic_permission(inode, mask, NULL); + + /* If permission is denied, try to refresh file + attributes. This is also needed, because the root + node will at first have no permissions */ + + if(err == -EACCES) { + err = fuse_do_getattr(inode); + if(!err) + err = generic_permission(inode, mask, NULL); + } + + /* FIXME: Need some mechanism to revoke permissions: + currently if the filesystem suddenly changes the + file mode, we will not be informed abot that, and + continue to allow access to the file/directory. + + This is actually not so grave, since the user can + simply keep access to the file/directory anyway by + keeping it open... */ + + return err; + } + else + return 0; +} + +static int parse_dirfile(char *buf, size_t nbytes, struct file *file, + void *dstbuf, filldir_t filldir) +{ + while(nbytes >= FUSE_NAME_OFFSET) { + struct fuse_dirent *dirent = (struct fuse_dirent *) buf; + size_t reclen = FUSE_DIRENT_SIZE(dirent); + int over; + + if(dirent->namelen > NAME_MAX) { + printk("fuse_readdir: name too long\n"); + return -EPROTO; + } + if(reclen > nbytes) + break; + + over = filldir(dstbuf, dirent->name, dirent->namelen, + file->f_pos, dirent->ino, dirent->type); + if(over) + break; + + buf += reclen; + file->f_pos += reclen; + nbytes -= reclen; + } + + return 0; +} + +#ifndef CONFIG_COOPERATIVE + +#define DIR_BUFSIZE 2048 +static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) +{ + struct file *cfile = file->private_data; + char *buf; + int ret; + + if(!cfile) + return -EISDIR; + + buf = kmalloc(DIR_BUFSIZE, GFP_KERNEL); + if(!buf) + return -ENOMEM; + + ret = kernel_read(cfile, file->f_pos, buf, DIR_BUFSIZE); + if(ret < 0) + printk("fuse_readdir: failed to read container file\n"); + else + ret = parse_dirfile(buf, ret, file, dstbuf, filldir); + + kfree(buf); + return ret; +} + +#else + +#define DIR_BUFSIZE 4096 + +typedef struct { + struct fuse_conn *fc; + int inode; +} readdir_data_t; + +static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) +{ + readdir_data_t *rd = file->private_data; + unsigned long flags; + int ret, size; + char *buf; + + buf = kmalloc(DIR_BUFSIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + co_passage_page_assert_valid(); + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_DEVICE; + co_passage_page->params[0] = CO_DEVICE_FILESYSTEM; + co_passage_page->params[1] = rd->fc->cofs_unit; + co_passage_page->params[2] = FUSE_DIR_READ; + co_passage_page->params[3] = rd->inode; + co_passage_page->params[5] = DIR_BUFSIZE; + co_passage_page->params[6] = (unsigned long)buf; + co_passage_page->params[8] = file->f_pos; + + co_switch_wrapper(); + + ret = co_passage_page->params[4]; + size = co_passage_page->params[7]; + + co_passage_page_release(flags); + + if (ret) { + printk("fuse_readdir: host returned error: %x\n", ret); + kfree(buf); + return ret; + } + + parse_dirfile(buf, size, file, dstbuf, filldir); + + ret = 0; + kfree(buf); + return ret; +} + +#endif + +static char *read_link(struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + struct fuse_conn *fc = INO_FC(inode); + struct fuse_in in = FUSE_IN_INIT; + struct fuse_out out = FUSE_OUT_INIT; + char *link; + + link = (char *) __get_free_page(GFP_KERNEL); + if(!link) + return ERR_PTR(-ENOMEM); + + in.h.opcode = FUSE_READLINK; + in.h.ino = inode->i_ino; + out.argvar = 1; + out.numargs = 1; + out.args[0].size = PAGE_SIZE - 1; + out.args[0].value = link; + request_send(fc, &in, &out); + if(out.h.error) { + free_page((unsigned long) link); + return ERR_PTR(out.h.error); + } + + link[out.args[0].size] = '\0'; + return link; +} + +static void free_link(char *link) +{ + if(!IS_ERR(link)) + free_page((unsigned long) link); +} + +static int fuse_readlink(struct dentry *dentry, char *buffer, int buflen) +{ + int ret; + char *link; + + link = read_link(dentry); + ret = vfs_readlink(dentry, buffer, buflen, link); + free_link(link); + return ret; +} + +static int fuse_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + int ret; + char *link; + + link = read_link(dentry); + ret = vfs_follow_link(nd, link); + free_link(link); + return ret; +} + +#ifndef CONFIG_COOPERATIVE + +static int fuse_dir_open(struct inode *inode, struct file *file) +{ + struct fuse_conn *fc = INO_FC(inode); + struct fuse_in in = FUSE_IN_INIT; + struct fuse_out out = FUSE_OUT_INIT; + struct fuse_getdir_out outarg; + + in.h.opcode = FUSE_GETDIR; + in.h.ino = inode->i_ino; + out.numargs = 1; + out.args[0].size = sizeof(outarg); + out.args[0].value = &outarg; + request_send(fc, &in, &out); + if(!out.h.error) { + struct file *cfile = outarg.file; + struct inode *inode; + if(!cfile) { + printk("fuse_getdir: invalid file\n"); + return -EPROTO; + } + inode = cfile->f_dentry->d_inode; + if(!S_ISREG(inode->i_mode)) { + printk("fuse_getdir: not a regular file\n"); + fput(cfile); + return -EPROTO; + } + + file->private_data = cfile; + } + + return out.h.error; +} + +static int fuse_dir_release(struct inode *inode, struct file *file) +{ + struct file *cfile = file->private_data; + + if(cfile) + fput(cfile); + + return 0; +} + +#else + +static int fuse_dir_open(struct inode *inode, struct file *file) +{ + struct fuse_conn *fc = INO_FC(inode); + unsigned long flags; + readdir_data_t *rd; + int ret; + + rd = kmalloc(sizeof(*rd), GFP_KERNEL); + if (!rd) + return -ENOMEM; + + rd->fc = fc; + rd->inode = inode->i_ino; + + co_passage_page_assert_valid(); + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_DEVICE; + co_passage_page->params[0] = CO_DEVICE_FILESYSTEM; + co_passage_page->params[1] = fc->cofs_unit; + co_passage_page->params[2] = FUSE_DIR_OPEN; + co_passage_page->params[3] = inode->i_ino; + co_passage_page->params[4] = 0; + + co_switch_wrapper(); + + ret = co_passage_page->params[4]; + + co_passage_page_release(flags); + + if (ret) { + printk("fuse_readdir: host returned error: %x\n", ret); + kfree(rd); + } else { + file->private_data = (void *)rd; + } + + return ret; +} + +static int fuse_dir_release(struct inode *inode, struct file *file) +{ + readdir_data_t *rd = file->private_data; + unsigned long flags; + int ret; + + co_passage_page_assert_valid(); + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_DEVICE; + co_passage_page->params[0] = CO_DEVICE_FILESYSTEM; + co_passage_page->params[1] = rd->fc->cofs_unit; + co_passage_page->params[2] = FUSE_DIR_RELEASE; + co_passage_page->params[3] = rd->inode; + co_passage_page->params[4] = 0; + + co_switch_wrapper(); + + ret = co_passage_page->params[4]; + + co_passage_page_release(flags); + + if (ret) { + printk("fuse_readdir: host returned error: %x\n", ret); + } + + kfree(rd); + + return ret; +} + +#endif + +static unsigned int iattr_to_fattr(struct iattr *iattr, + struct fuse_attr *fattr) +{ + unsigned int ivalid = iattr->ia_valid; + unsigned int fvalid = 0; + + memset(fattr, 0, sizeof(*fattr)); + + if(ivalid & ATTR_MODE) + fvalid |= FATTR_MODE, fattr->mode = iattr->ia_mode; + if(ivalid & ATTR_UID) + fvalid |= FATTR_UID, fattr->uid = iattr->ia_uid; + if(ivalid & ATTR_GID) + fvalid |= FATTR_GID, fattr->gid = iattr->ia_gid; + if(ivalid & ATTR_SIZE) + fvalid |= FATTR_SIZE, fattr->size = iattr->ia_size; + /* You can only _set_ these together (they may change by themselves) */ + if((ivalid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) { + fvalid |= FATTR_UTIME; +#ifdef KERNEL_2_6 + fattr->atime = iattr->ia_atime.tv_sec; + fattr->mtime = iattr->ia_mtime.tv_sec; +#else + fattr->atime = iattr->ia_atime; + fattr->mtime = iattr->ia_mtime; +#endif + } + + return fvalid; +} + +static int fuse_setattr(struct dentry *entry, struct iattr *attr) +{ + struct inode *inode = entry->d_inode; + struct fuse_conn *fc = INO_FC(inode); + struct fuse_in in = FUSE_IN_INIT; + struct fuse_out out = FUSE_OUT_INIT; + struct fuse_setattr_in inarg; + struct fuse_setattr_out outarg; + + /* FIXME: need to fix race between truncate and writepage */ + if (attr->ia_valid & ATTR_SIZE) + fuse_sync_inode(inode); + + memset(&inarg, 0, sizeof(inarg)); + inarg.valid = iattr_to_fattr(attr, &inarg.attr); + + in.h.opcode = FUSE_SETATTR; + in.h.ino = inode->i_ino; + in.numargs = 1; + in.args[0].size = sizeof(inarg); + in.args[0].value = &inarg; + out.numargs = 1; + out.args[0].size = sizeof(outarg); + out.args[0].value = &outarg; + request_send(fc, &in, &out); + + if(!out.h.error) { + if(attr->ia_valid & ATTR_SIZE && + outarg.attr.size < i_size_read(inode)) + vmtruncate(inode, outarg.attr.size); + + change_attributes(inode, &outarg.attr); + } + return out.h.error; +} + +static int _fuse_dentry_revalidate(struct dentry *entry) +{ + if(!entry->d_inode) + return 0; + else if(time_after(jiffies, entry->d_time + FUSE_REVALIDATE_TIME)) { + struct inode *inode = entry->d_inode; + struct fuse_lookup_out outarg; + int version; + int ret; + + ret = fuse_do_lookup(entry->d_parent->d_inode, entry, &outarg, + &version); + if(ret) + return 0; + + if(outarg.ino != inode->i_ino) + return 0; + + change_attributes(inode, &outarg.attr); + inode->i_version = version; + entry->d_time = jiffies; + } + return 1; +} + +#ifdef KERNEL_2_6 + +#define fuse_mknod _fuse_mknod + +static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry, + struct kstat *stat) +{ + struct inode *inode = entry->d_inode; + int err = fuse_revalidate(entry); + if(!err) + generic_fillattr(inode, stat); + + return err; +} + +static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, + struct nameidata *nd) +{ + struct inode *inode; + int err = fuse_lookup_iget(dir, entry, &inode); + if (err) + return ERR_PTR(err); + return d_splice_alias(inode, entry); +} + +static int fuse_create(struct inode *dir, struct dentry *entry, int mode, + struct nameidata *nd) +{ + return _fuse_create(dir, entry, mode); +} + +static int fuse_permission(struct inode *inode, int mask, + struct nameidata *nd) +{ + return _fuse_permission(inode, mask); +} + +static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) +{ + return _fuse_dentry_revalidate(entry); +} +#else /* KERNEL_2_6 */ + +#define fuse_create _fuse_create +#define fuse_permission _fuse_permission + +static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry) +{ + struct inode *inode; + struct dentry *alias; + + int err = fuse_lookup_iget(dir, entry, &inode); + if(err) + return ERR_PTR(err); + + if(inode && S_ISDIR(inode->i_mode) && + (alias = d_find_alias(inode)) != NULL) { + dput(alias); + iput(inode); + printk("fuse: cannot assign an existing directory\n"); + return ERR_PTR(-EPROTO); + } + + d_add(entry, inode); + return NULL; +} + +static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode, + int rdev) +{ + return _fuse_mknod(dir, entry, mode, rdev); +} + +static int fuse_dentry_revalidate(struct dentry *entry, int flags) +{ + return _fuse_dentry_revalidate(entry); +} +#endif /* KERNEL_2_6 */ + + +static struct inode_operations fuse_dir_inode_operations = +{ + .lookup = fuse_lookup, + .create = fuse_create, + .mknod = fuse_mknod, + .mkdir = fuse_mkdir, + .symlink = fuse_symlink, + .unlink = fuse_unlink, + .rmdir = fuse_rmdir, + .rename = fuse_rename, + .link = fuse_link, + .setattr = fuse_setattr, + .permission = fuse_permission, +#ifdef KERNEL_2_6 + .getattr = fuse_getattr, +#else + .revalidate = fuse_revalidate, +#endif +}; + +static struct file_operations fuse_dir_operations = { + .read = generic_read_dir, + .readdir = fuse_readdir, + .open = fuse_dir_open, + .release = fuse_dir_release, +}; + +static struct inode_operations fuse_file_inode_operations = { + .setattr = fuse_setattr, + .permission = fuse_permission, +#ifdef KERNEL_2_6 + .getattr = fuse_getattr, +#else + .revalidate = fuse_revalidate, +#endif +}; + +static struct inode_operations fuse_symlink_inode_operations = +{ + .setattr = fuse_setattr, + .readlink = fuse_readlink, + .follow_link = fuse_follow_link, +#ifdef KERNEL_2_6 + .getattr = fuse_getattr, +#else + .revalidate = fuse_revalidate, +#endif +}; + +static struct dentry_operations fuse_dentry_operations = { + .d_revalidate = fuse_dentry_revalidate, +}; + +/* + * Local Variables: + * indent-tabs-mode: t + * c-basic-offset: 8 + * End: + */ diff -urN a/fs/cofusefs/file.c b/fs/cofusefs/file.c --- a/fs/cofusefs/file.c +++ b/fs/cofusefs/file.c @@ -0,0 +1,542 @@ +/* + FUSE: Filesystem in Userspace + Copyright (C) 2001-2004 Miklos Szeredi <miklos@szeredi.hu> + + This program can be distributed under the terms of the GNU GPL. + See the file COPYING. +*/ +#include "fuse_i.h" + +#include <linux/pagemap.h> +#include <linux/slab.h> +#ifdef KERNEL_2_6 +#include <linux/backing-dev.h> +#include <linux/writeback.h> +#endif + +#ifndef KERNEL_2_6 +#define PageUptodate(page) Page_Uptodate(page) +#endif + +static int fuse_open(struct inode *inode, struct file *file) +{ + struct fuse_conn *fc = INO_FC(inode); + struct fuse_in in = FUSE_IN_INIT; + struct fuse_out out = FUSE_OUT_INIT; + struct fuse_open_in inarg; + int err; + + err = generic_file_open(inode, file); + if(err) + return err; + + /* If opening the root node, no lookup has been performed on + it, so the attributes must be refreshed */ + if(inode->i_ino == FUSE_ROOT_INO) { + int err = fuse_do_getattr(inode); + if(err) + return err; + } + + memset(&inarg, 0, sizeof(inarg)); + inarg.flags = file->f_flags & ~O_EXCL; + + in.h.opcode = FUSE_OPEN; + in.h.ino = inode->i_ino; + in.numargs = 1; + in.args[0].size = sizeof(inarg); + in.args[0].value = &inarg; + request_send(fc, &in, &out); + if(!out.h.error && !(fc->flags & FUSE_KERNEL_CACHE)) { +#ifdef KERNEL_2_6 + invalidate_inode_pages(inode->i_mapping); +#else + invalidate_inode_pages(inode); +#endif + } + + return out.h.error; +} + +void fuse_sync_inode(struct inode *inode) +{ +#ifdef KERNEL_2_6 + filemap_fdatawrite(inode->i_mapping); + filemap_fdatawait(inode->i_mapping); +#else +#ifndef NO_MM + filemap_fdatasync(inode->i_mapping); + filemap_fdatawait(inode->i_mapping); +#endif +#endif +} + +static int fuse_release_old(struct inode *inode, struct file *file) +{ + struct fuse_conn *fc = INO_FC(inode); + struct fuse_in *in = NULL; + struct fuse_open_in *inarg = NULL; + unsigned int s = sizeof(struct fuse_in) + sizeof(struct fuse_open_in); + + in = kmalloc(s, GFP_NOFS); + if(!in) + return -ENOMEM; + memset(in, 0, s); + inarg = (struct fuse_open_in *) (in + 1); + inarg->flags = file->f_flags & ~O_EXCL; + + in->h.opcode = FUSE_RELEASE; + in->h.ino = inode->i_ino; + in->numargs = 1; + in->args[0].size = sizeof(struct fuse_open_in); + in->args[0].value = inarg; + if(!request_send_noreply(fc, in)) + return 0; + + kfree(in); + return 0; +} + +static int fuse_release(struct inode *inode, struct file *file) +{ + struct fuse_conn *fc = INO_FC(inode); + struct fuse_in in = FUSE_IN_INIT; + struct fuse_out out = FUSE_OUT_INIT; + struct fuse_open_in inarg; + + if(file->f_mode & FMODE_WRITE) + fuse_sync_inode(inode); + + if (fc->oldrelease) + return fuse_release_old(inode, file); + + memset(&inarg, 0, sizeof(inarg)); + inarg.flags = file->f_flags & ~O_EXCL; + + in.h.opcode = FUSE_RELEASE2; + in.h.ino = inode->i_ino; + in.numargs = 1; + in.args[0].size = sizeof(inarg); + in.args[0].value = &inarg; + request_send(fc, &in, &out); + if (out.h.error == -ENOSYS) { + fc->oldrelease = 1; + return fuse_release_old(inode, file); + } + return 0; +} + +static int fuse_fsync(struct file *file, struct dentry *de, int datasync) +{ + struct inode *inode = de->d_inode; + struct fuse_conn *fc = INO_FC(inode); + struct fuse_in in = FUSE_IN_INIT; + struct fuse_out out = FUSE_OUT_INIT; + struct fuse_fsync_in inarg; + + memset(&inarg, 0, sizeof(inarg)); + inarg.datasync = datasync; + + in.h.opcode = FUSE_FSYNC; + in.h.ino = inode->i_ino; + in.numargs = 1; + in.args[0].size = sizeof(inarg); + in.args[0].value = &inarg; + request_send(fc, &in, &out); + return out.h.error; + + /* FIXME: need to ensure, that all write requests issued + before this request are completed. Should userspace take + care of this? */ +} + +static int fuse_readpage(struct file *file, struct page *page) +{ + struct inode *inode = page->mapping->host; + struct fuse_conn *fc = INO_FC(inode); + struct fuse_in in = FUSE_IN_INIT; + struct fuse_out out = FUSE_OUT_INIT; + struct fuse_read_in inarg; + char *buffer; + + buffer = kmap(page); + + memset(&inarg, 0, sizeof(inarg)); + inarg.offset = (unsigned long long) page->index << PAGE_CACHE_SHIFT; + inarg.size = PAGE_CACHE_SIZE; + + in.h.opcode = FUSE_READ; + in.h.ino = inode->i_ino; + in.numargs = 1; + in.args[0].size = sizeof(inarg); + in.args[0].value = &inarg; + out.argvar = 1; + out.numargs = 1; + out.args[0].size = PAGE_CACHE_SIZE; + out.args[0].value = buffer; + + request_send(fc, &in, &out); + if(!out.h.error) { + size_t outsize = out.args[0].size; + if(outsize < PAGE_CACHE_SIZE) + memset(buffer + outsize, 0, PAGE_CACHE_SIZE - outsize); + flush_dcache_page(page); + SetPageUptodate(page); + } + + kunmap(page); + unlock_page(page); + + return out.h.error; +} + +static int fuse_is_block_uptodate(struct address_space *mapping, + struct inode *inode, size_t bl_index) +{ + size_t index = bl_index << FUSE_BLOCK_PAGE_SHIFT; + size_t end_index = ((bl_index + 1) << FUSE_BLOCK_PAGE_SHIFT) - 1; + size_t file_end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT; + + if (end_index > file_end_index) + end_index = file_end_index; + + for (; index <= end_index; index++) { + struct page *page = find_get_page(mapping, index); + + if (!page) + return 0; + + if (!PageUptodate(page)) { + page_cache_release(page); + return 0; + } + + page_cache_release(page); + } + + return 1; +} + + +static int fuse_cache_block(struct address_space *mapping, + struct inode *inode, char *bl_buf, + size_t bl_index) +{ + size_t start_index = bl_index << FUSE_BLOCK_PAGE_SHIFT; + size_t end_index = ((bl_index + 1) << FUSE_BLOCK_PAGE_SHIFT) - 1; + size_t file_end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT; + + int i; + + if (end_index > file_end_index) + end_index = file_end_index; + + for (i = 0; start_index + i <= end_index; i++) { + size_t index = start_index + i; + struct page *page; + char *buffer; + + page = grab_cache_page(mapping, index); + if (!page) + return -1; + + if (!PageUptodate(page)) { + buffer = kmap(page); + memcpy(buffer, bl_buf + i * PAGE_CACHE_SIZE, + PAGE_CACHE_SIZE); + flush_dcache_page(page); + SetPageUptodate(page); + kunmap(page); + } + + unlock_page(page); + page_cache_release(page); + } + + return 0; +} + +static int fuse_file_read_block(struct inode *inode, char *bl_buf, + size_t bl_index) +{ + struct fuse_conn *fc = INO_FC(inode); + struct fuse_in in = FUSE_IN_INIT; + struct fuse_out out = FUSE_OUT_INIT; + struct fuse_read_in inarg; + + memset(&inarg, 0, sizeof(inarg)); + inarg.offset = (unsigned long long) bl_index << FUSE_BLOCK_SHIFT; + inarg.size = FUSE_BLOCK_SIZE; + + in.h.opcode = FUSE_READ; + in.h.ino = inode->i_ino; + in.numargs = 1; + in.args[0].size = sizeof(inarg); + in.args[0].value = &inarg; + out.argvar = 1; + out.numargs = 1; + out.args[0].size = FUSE_BLOCK_SIZE; + out.args[0].value = bl_buf; + + request_send(fc, &in, &out); + + if (!out.h.error) { + size_t outsize = out.args[0].size; + if (outsize < FUSE_BLOCK_SIZE) + memset(bl_buf + outsize, 0, FUSE_BLOCK_SIZE - outsize); + } + + return out.h.error; +} + +static void fuse_file_bigread(struct address_space *mapping, + struct inode *inode, loff_t pos, size_t count) +{ + size_t bl_index = pos >> FUSE_BLOCK_SHIFT; + size_t bl_end_index = (pos + count) >> FUSE_BLOCK_SHIFT; + size_t bl_file_end_index = i_size_read(inode) >> FUSE_BLOCK_SHIFT; + + if (bl_end_index > bl_file_end_index) + bl_end_index = bl_file_end_index; + + while (bl_index <= bl_end_index) { + int res; + char *bl_buf = kmalloc(FUSE_BLOCK_SIZE, GFP_NOFS); + if (!bl_buf) + break; + res = fuse_is_block_uptodate(mapping, inode, bl_index); + if (!res) + res = fuse_file_read_block(inode, bl_buf, bl_index); + if (!res) + fuse_cache_block(mapping, inode, bl_buf, bl_index); + kfree(bl_buf); + bl_index++; + } +} + +static ssize_t fuse_file_read(struct file *filp, char *buf, + size_t count, loff_t * ppos) +{ + struct address_space *mapping = filp->f_dentry->d_inode->i_mapping; + struct inode *inode = mapping->host; + struct fuse_conn *fc = INO_FC(inode); + + if(fc->flags & FUSE_LARGE_READ) { + /* Don't allow this to get mixed up with writes */ + down(&inode->i_sem); + fuse_file_bigread(mapping, inode, *ppos, count); + up(&inode->i_sem); + } + + return generic_file_read(filp, buf, count, ppos); +} + +static int write_buffer(struct inode *inode, struct page *page, + unsigned offset, size_t count) +{ + struct fuse_conn *fc = INO_FC(inode); + struct fuse_in in = FUSE_IN_INIT; + struct fuse_out out = FUSE_OUT_INIT; + struct fuse_write_in inarg; + char *buffer; + + buffer = kmap(page); + + memset(&inarg, 0, sizeof(inarg)); + inarg.offset = ((unsigned long long) page->index << PAGE_CACHE_SHIFT) + + offset; + inarg.size = count; + + in.h.opcode = FUSE_WRITE; + in.h.ino = inode->i_ino; + in.numargs = 2; + in.args[0].size = sizeof(inarg); + in.args[0].value = &inarg; + in.args[1].size = count; + in.args[1].value = buffer + offset; + request_send(fc, &in, &out); + kunmap(page); + if(out.h.error) + SetPageError(page); + + return out.h.error; +} + +static int get_write_count(struct inode *inode, struct page *page) +{ + unsigned long end_index; + loff_t size = i_size_read(inode); + int count; + + end_index = size >> PAGE_CACHE_SHIFT; + if(page->index < end_index) + count = PAGE_CACHE_SIZE; + else { + count = size & (PAGE_CACHE_SIZE - 1); + if(page->index > end_index || count == 0) + return 0; + } + return count; +} + +#ifdef KERNEL_2_6 + +static void write_buffer_end(struct fuse_conn *fc, struct fuse_in *in, + struct fuse_out *out, void *_page) +{ + struct page *page = (struct page *) _page; + + if(out->h.error) { + SetPageError(page); + if(out->h.error == -ENOSPC) + set_bit(AS_ENOSPC, &page->mapping->flags); + else + set_bit(AS_EIO, &page->mapping->flags); + } + end_page_writeback(page); + kunmap(page); + kfree(in); +} + +static int write_buffer_nonblock(struct inode *inode, struct page *page, + unsigned offset, size_t count) +{ + int err; + struct fuse_conn *fc = INO_FC(inode); + struct fuse_in *in = NULL; + struct fuse_out *out = NULL; + struct fuse_write_in *inarg = NULL; + char *buffer; + unsigned int s = sizeof(struct fuse_in) + sizeof(struct fuse_out) + + sizeof(struct fuse_write_in); + + in = kmalloc(s, GFP_NOFS); + if(!in) + return -ENOMEM; + memset(in, 0, s); + out = (struct fuse_out *)(in + 1); + inarg = (struct fuse_write_in *)(out + 1); + + buffer = kmap(page); + + inarg->offset = ((unsigned long long) page->index << PAGE_CACHE_SHIFT) + offset; + inarg->size = count; + + in->h.opcode = FUSE_WRITE; + in->h.ino = inode->i_ino; + in->numargs = 2; + in->args[0].size = sizeof(struct fuse_write_in); + in->args[0].value = inarg; + in->args[1].size = count; + in->args[1].value = buffer + offset; + err = request_send_nonblock(fc, in, out, write_buffer_end, page); + if(err) { + if(err != -EWOULDBLOCK) + SetPageError(page); + kunmap(page); + kfree(in); + } + return err; +} + +static int fuse_writepage(struct page *page, struct writeback_control *wbc) +{ + int err; + struct inode *inode = page->mapping->host; + unsigned count = get_write_count(inode, page); + + err = -EINVAL; + if(count) { + /* FIXME: check sync_mode, and wait for previous writes (or + signal userspace to do this) */ + if(wbc->nonblocking) { + SetPageWriteback(page); + err = write_buffer_nonblock(inode, page, 0, count); + if (err) + ClearPageWriteback(page); + if(err == -EWOULDBLOCK) { + __set_page_dirty_nobuffers(page); + err = 0; + } + } else + err = write_buffer(inode, page, 0, count); + } + + unlock_page(page); + return err; +} +#else +static int fuse_writepage(struct page *page) +{ + int err; + struct inode *inode = page->mapping->host; + int count = get_write_count(inode, page); + err = -EINVAL; + if(count) + err = write_buffer(inode, page, 0, count); + + unlock_page(page); + return err; +} +#endif + +static int fuse_prepare_write(struct file *file, struct page *page, + unsigned offset, unsigned to) +{ + /* No op */ + return 0; +} + +static int fuse_commit_write(struct file *file, struct page *page, + unsigned offset, unsigned to) +{ + int err; + struct inode *inode = page->mapping->host; + + err = write_buffer(inode, page, offset, to - offset); + if(!err) { + loff_t pos = (page->index << PAGE_CACHE_SHIFT) + to; + if(pos > i_size_read(inode)) + i_size_write(inode, pos); + } + return err; +} + +static struct file_operations fuse_file_operations = { + .read = fuse_file_read, + .write = generic_file_write, + .mmap = generic_file_mmap, + .open = fuse_open, + .release = fuse_release, + .fsync = fuse_fsync, +#ifdef KERNEL_2_6 + .sendfile = generic_file_sendfile, +#endif +}; + +static struct address_space_operations fuse_file_aops = { + .readpage = fuse_readpage, + .writepage = fuse_writepage, + .prepare_write = fuse_prepare_write, + .commit_write = fuse_commit_write, +}; + +void fuse_init_file_inode(struct inode *inode) +{ +#ifdef KERNEL_2_6 + struct fuse_conn *fc = INO_FC(inode); + /* Readahead somehow defeats big reads on 2.6 (says Michael + Grigoriev) */ + if(fc->flags & FUSE_LARGE_READ) + inode->i_mapping->backing_dev_info->ra_pages = 0; +#endif + inode->i_fop = &fuse_file_operations; + inode->i_data.a_ops = &fuse_file_aops; +} + +/* + * Local Variables: + * indent-tabs-mode: t + * c-basic-offset: 8 + * End: + */ diff -urN a/fs/cofusefs/fuse_i.h b/fs/cofusefs/fuse_i.h --- a/fs/cofusefs/fuse_i.h +++ b/fs/cofusefs/fuse_i.h @@ -0,0 +1,297 @@ +/* + COFUSE: Filesystem in an host of Cooperative Linux + Copyright (C) 2004 Dan Aloni <da-x@colinux.org> + + based on FUSE: Filesystem in Userspace + Copyright (C) 2001-2004 Miklos Szeredi <miklos@szeredi.hu> + + This program can be distributed under the terms of the GNU GPL. + See the file COPYING. +*/ + + +#include <linux/version.h> +#include <linux/config.h> + +#ifndef CONFIG_COOPERATIVE +#include <linux/cofuse.h> +#else +#include <linux/cooperative_internal.h> +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) && LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) +#error Kernel version 2.5.* not supported +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) +#define KERNEL_2_6 +#endif + +#ifndef KERNEL_2_6 +#include <linux/config.h> +#ifdef CONFIG_MODVERSIONS +#define MODVERSIONS +#include <linux/modversions.h> +#endif +#include <config.h> +#ifndef HAVE_I_SIZE_FUNC +#define i_size_read(inode) ((inode)->i_size) +#define i_size_write(inode, size) do { (inode)->i_size = size; } while(0) +#endif +#endif +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/list.h> +#include <linux/spinlock.h> + +/** Read combining parameters */ +#define FUSE_BLOCK_SHIFT 16 +#define FUSE_BLOCK_SIZE 65536 +#define FUSE_BLOCK_MASK 0xffff0000 + +#define FUSE_BLOCK_PAGE_SHIFT (FUSE_BLOCK_SHIFT - PAGE_CACHE_SHIFT) + +/** + * A Fuse connection. + * + * This structure is created, when the client device is opened, and is + * destroyed, when the client device is closed _and_ the filesystem is + * unmounted. + */ +struct fuse_conn { + /** The superblock of the mounted filesystem */ + struct super_block *sb; + +#ifndef CONFIG_COOPERATIVE + /** The opened client device */ + struct file *file; +#else + int cofs_unit; +#endif + /** The user id for this mount */ + uid_t uid; + + /** The fuse mount flags for this mount */ + unsigned int flags; + + /** Is the new (synchronous) release not supported by + userspace? */ + unsigned int oldrelease; + + char opt_pathname[0x80]; + +#ifndef CONFIG_COOPERATIVE + /** Readers of the connection are waiting on this */ + wait_queue_head_t waitq; + + /** The list of pending requests */ + struct list_head pending; + + /** The list of requests being processed */ + struct list_head processing; + + /** Controls the maximum number of outstanding requests */ + struct semaphore outstanding; + + /** The next unique request id */ + int reqctr; +#endif +}; + +/** One input argument of a request */ +struct fuse_in_arg { + unsigned int size; + const void *value; +}; + +/** The request input */ +struct fuse_in { + struct fuse_in_header h; + unsigned int numargs; + struct fuse_in_arg args[3]; +}; + +/** One output argument of a request */ +struct fuse_out_arg { + unsigned int size; + void *value; +}; + +/** The request output */ +struct fuse_out { + struct fuse_out_header h; + unsigned int argvar; + unsigned int numargs; + struct fuse_out_arg args[3]; +}; + +#define FUSE_IN_INIT { {0, 0, 0, current->fsuid, current->fsgid}, 0} +#define FUSE_OUT_INIT { {0, 0}, 0, 0} + +struct fuse_req; +typedef void (*fuse_reqend_t)(struct fuse_conn *, struct fuse_in *, + struct fuse_out *, void *data); + +/** + * A request to the client + */ +struct fuse_req { + /** The request list */ + struct list_head list; + + /** True if the request is synchronous */ + unsigned int issync:1; + + /** The request is locked */ + unsigned int locked:1; + + /** The request has been interrupted while it was locked */ + unsigned int interrupted:1; + + /* The request has been sent to the client */ + unsigned int sent:1; + + /* The request is finished */ + unsigned int finished:1; + + /** The request input */ + struct fuse_in *in; + + /** The request output */ + struct fuse_out *out; + + /** Used to wake up the task waiting for completion of request*/ + wait_queue_head_t waitq; + + /** Request completion callback */ + fuse_reqend_t end; + + /** User data */ + void *data; +}; + +#ifdef KERNEL_2_6 +#define SB_FC(sb) ((sb)->s_fs_info) +#else +#define SB_FC(sb) ((sb)->u.generic_sbp) +#endif +#define INO_FC(inode) SB_FC((inode)->i_sb) +#define DEV_FC(file) ((struct fuse_conn *) (file)->private_data) + + +/** + * The proc entry for the client device ("/proc/fs/fuse/dev") + */ +extern struct proc_dir_entry *proc_fuse_dev; + +/** + * The lock to protect fuses structures + */ +extern spinlock_t cofuse_lock; + + +/** + * Get a filled in inode + */ +struct inode *cofuse_iget(struct super_block *sb, ino_t ino, + struct fuse_attr *attr, int version); + + +/** + * Initialise operations on regular file + */ +void cofuse_init_file_inode(struct inode *inode); + +/** + * Check if the connection can be released, and if yes, then free the + * connection structure + */ +void cofuse_release_conn(struct fuse_conn *fc); + +/** + * Initialize the client device + */ +int cofuse_dev_init(void); + +/** + * Cleanup the client device + */ +void cofuse_dev_cleanup(void); + +/** + * Initialize the fuse filesystem + */ +int cofuse_fs_init(void); + +/** + * Cleanup the fuse filesystem + */ +void cofuse_fs_cleanup(void); + +/** + * Send a request + * + */ +void cofuse_request_send(struct fuse_conn *fc, struct fuse_in *in, + struct fuse_out *out); + +/** + * Send a request for which a reply is not expected + */ +int cofuse_request_send_noreply(struct fuse_conn *fc, struct fuse_in *in); + + +/** + * Send a synchronous request without blocking + */ +int cofuse_request_send_nonblock(struct fuse_conn *fc, struct fuse_in *in, + struct fuse_out *out, fuse_reqend_t end, void *data); + +/** + * Get the attributes of a file + */ +int cofuse_do_getattr(struct inode *inode); + +/** + * Write dirty pages + */ +void cofuse_sync_inode(struct inode *inode); + +/* + * Local Variables: + * indent-tabs-mode: t + * c-basic-offset: 8 + * End: + */ + +#define COFUSE_VERSION "0.1" +#define FUSE_VERSION COFUSE_VERSION + +#define fuse_init_file_inode cofuse_init_file_inode +#define fuse_do_getattr cofuse_do_getattr +#define fuse_sync_inode cofuse_sync_inode +#define fuse_lock cofuse_lock + +#define request_send cofuse_request_send +#define request_send_noreply cofuse_request_send_noreply +#define request_send_nonblock cofuse_request_send_nonblock +#define release_conn cofuse_release_conn +#define fuse_iget cofuse_iget +#define fuse_dev_init cofuse_dev_init +#define fuse_dev_cleanup cofuse_dev_cleanup +#define fuse_fs_init cofuse_fs_init +#define fuse_fs_cleanup cofuse_fs_cleanup + +extern struct fuse_conn *cofs_volumes[CO_MODULE_MAX_COFS]; + +/** Data passed to mount */ +struct cofuse_mount_data { + struct fuse_mount_data *fuse; + int uid; + int gid; + unsigned long file_mode; + unsigned long dir_mode; + unsigned long flags; + char name[0x80]; +}; diff -urN a/fs/cofusefs/inode.c b/fs/cofusefs/inode.c --- a/fs/cofusefs/inode.c +++ b/fs/cofusefs/inode.c @@ -0,0 +1,545 @@ +/* + FUSE: Filesystem in Userspace + Copyright (C) 2001 Miklos Szeredi (miklos@szeredi.hu) + + This program can be distributed under the terms of the GNU GPL. + See the file COPYING. +*/ + +#include "fuse_i.h" + +#include <linux/pagemap.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/file.h> +#include <linux/ctype.h> +#include <linux/proc_fs.h> +#ifdef KERNEL_2_6 +#include <linux/statfs.h> +#endif + +#define FUSE_SUPER_MAGIC 0x65735546 + +#ifndef KERNEL_2_6 +#define kstatfs statfs +#endif + +#ifndef FS_BINARY_MOUNTDATA +#define FS_BINARY_MOUNTDATA 0 +#endif + +static void fuse_read_inode(struct inode *inode) +{ + /* No op */ +} + +static void fuse_clear_inode(struct inode *inode) +{ + unsigned long flags; + struct fuse_conn *fc = INO_FC(inode); + +#ifndef CONFIG_COOPERATIVE + struct fuse_in *in = NULL; + struct fuse_forget_in *inarg = NULL; + unsigned int s = sizeof(struct fuse_in) + sizeof(struct fuse_forget_in); + + if(fc == NULL) + return; + + in = kmalloc(s, GFP_NOFS); + if(!in) + return; + memset(in, 0, s); + inarg = (struct fuse_forget_in *) (in + 1); + inarg->version = inode->i_version; + + in->h.opcode = FUSE_FORGET; + in->h.ino = inode->i_ino; + in->numargs = 1; + in->args[0].size = sizeof(struct fuse_forget_in); + in->args[0].value = inarg; + + if(!request_send_noreply(fc, in)) + return; + + kfree(in); +#else + if (FUSE_ROOT_INO == inode->i_ino) + return; + + co_passage_page_assert_valid(); + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_DEVICE; + co_passage_page->params[0] = CO_DEVICE_FILESYSTEM; + co_passage_page->params[1] = fc->cofs_unit; + co_passage_page->params[2] = FUSE_FORGET; + co_passage_page->params[3] = inode->i_ino; + co_switch_wrapper(); + co_passage_page_release(flags); +#endif +} + +static void fuse_put_super(struct super_block *sb) +{ + struct fuse_conn *fc = SB_FC(sb); + + spin_lock(&fuse_lock); + fc->sb = NULL; + fc->uid = 0; + fc->flags = 0; + /* Flush all readers on this fs */ +#ifndef CONFIG_COOPERATIVE + wake_up_all(&fc->waitq); +#endif + release_conn(fc); + SB_FC(sb) = NULL; + spin_unlock(&fuse_lock); +} + +static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) +{ + stbuf->f_type = FUSE_SUPER_MAGIC; + stbuf->f_bsize = attr->block_size; + stbuf->f_blocks = attr->blocks; + stbuf->f_bfree = stbuf->f_bavail = attr->blocks_free; + stbuf->f_files = attr->files; + stbuf->f_ffree = attr->files_free; + /* Is this field necessary? Most filesystems ignore it... + stbuf->f_fsid.val[0] = (FUSE_SUPER_MAGIC>>16)&0xffff; + stbuf->f_fsid.val[1] = FUSE_SUPER_MAGIC &0xffff; */ + stbuf->f_namelen = attr->namelen; +} + +static int fuse_statfs(struct super_block *sb, struct kstatfs *buf) +{ + struct fuse_conn *fc = SB_FC(sb); + struct fuse_in in = FUSE_IN_INIT; + struct fuse_out out = FUSE_OUT_INIT; + struct fuse_statfs_out outarg; + + in.numargs = 0; + in.h.opcode = FUSE_STATFS; + out.numargs = 1; + out.args[0].size = sizeof(outarg); + out.args[0].value = &outarg; + request_send(fc, &in, &out); + if(!out.h.error) + convert_fuse_statfs(buf, &outarg.st); + + return out.h.error; +} + +#ifndef CONFIG_COOPERATIVE + +static struct fuse_conn *get_conn(struct fuse_mount_data *d) +{ + struct fuse_conn *fc = NULL; + struct file *file; + struct inode *ino; + + if(d == NULL) { + printk("fuse_read_super: Bad mount data\n"); + return NULL; + } + + if(d->version != FUSE_KERNEL_VERSION) { + printk("fuse_read_super: Bad version: %i\n", d->version); + return NULL; + } + + file = fget(d->fd); + ino = NULL; + if(file) + ino = file->f_dentry->d_inode; + + if(!ino || !proc_fuse_dev || proc_fuse_dev->low_ino != ino->i_ino) { + printk("fuse_read_super: Bad file: %i\n", d->fd); + goto out; + } + + fc = file->private_data; + + out: + fput(file); + return fc; +} + +#else + +static int _atoi(const char *s, const char **out) +{ + /* lib/spprintf.h */ + + int i=0; + + while (isdigit(*s)) + i = i*10 + *(s++) - '0'; + + *out = s; + + return i; +} + +static struct fuse_conn *co_get_conn(struct cofuse_mount_data *d) +{ + int index; + int ret; + unsigned long flags; + struct fuse_conn *conn = NULL; + const char *name, *next; + + if (d == NULL) { + printk("cofuse_read_super: Bad mount data\n"); + return NULL; + } + + name = d->name; + + if (strncmp("cofs", name, 4) == 0) + name += 4; + + index = _atoi(name, &next); + if (index < 0 || index >= CO_MODULE_MAX_COFS) { + printk("cofuse_read_super: Invalid index %d\n", index); + return NULL; + } + + if (cofs_volumes[index]) + return cofs_volumes[index]; + + conn = kmalloc(sizeof(struct fuse_conn), GFP_KERNEL); + if (!conn) + return NULL; + + memset(conn, 0, sizeof(*conn)); + + if (*next == ':') { + snprintf(conn->opt_pathname, sizeof(conn->opt_pathname), "%s", next+1); + } + + conn->cofs_unit = index; + + co_passage_page_assert_valid(); + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_DEVICE; + co_passage_page->params[0] = CO_DEVICE_FILESYSTEM; + co_passage_page->params[1] = conn->cofs_unit; + co_passage_page->params[2] = FUSE_MOUNT; + co_passage_page->params[5] = d->uid; + co_passage_page->params[6] = d->gid; + co_passage_page->params[7] = d->dir_mode; + co_passage_page->params[8] = d->file_mode; + memcpy(&co_passage_page->params[30], conn->opt_pathname, strlen(conn->opt_pathname) + 1); + co_switch_wrapper(); + ret = co_passage_page->params[4]; + co_passage_page_release(flags); + + if (ret) { + kfree(conn); + conn = NULL; + } + + return conn; +} + +#endif + +static struct inode *get_root_inode(struct super_block *sb, unsigned int mode) +{ + struct fuse_attr attr; + memset(&attr, 0, sizeof(attr)); + + attr.mode = mode; + return fuse_iget(sb, 1, &attr, 0); +} + + +#ifdef KERNEL_2_6 + +static struct dentry *fuse_get_dentry(struct super_block *sb, void *vobjp) +{ + __u32 *objp = vobjp; + unsigned long ino = objp[0]; + /* __u32 generation = objp[1]; */ + struct inode *inode; + struct dentry *entry; + + if(ino == 0) + return ERR_PTR(-ESTALE); + + inode = ilookup(sb, ino); + if(!inode) + return ERR_PTR(-ESTALE); + + entry = d_alloc_anon(inode); + if(!entry) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + + return entry; +} + +static struct export_operations fuse_export_operations = { + .get_dentry = fuse_get_dentry, +}; +#endif + +static struct super_operations fuse_super_operations = { + .read_inode = fuse_read_inode, + .clear_inode = fuse_clear_inode, + .put_super = fuse_put_super, + .statfs = fuse_statfs, +}; + +static int fuse_read_super(struct super_block *sb, void *data, int silent) +{ + struct fuse_conn *fc; + struct inode *root; +#ifndef CONFIG_COOPERATIVE + struct fuse_mount_data *d = data; +#else + struct cofuse_mount_data *co_d = data; + struct fuse_mount_data *d = co_d->fuse; +#endif + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = FUSE_SUPER_MAGIC; + sb->s_op = &fuse_super_operations; + sb->s_maxbytes = MAX_LFS_FILESIZE; +#ifdef KERNEL_2_6 + sb->s_export_op = &fuse_export_operations; +#endif + +#ifndef CONFIG_COOPERATIVE + fc = get_conn(d); +#else + fc = co_get_conn(co_d); +#endif + if(fc == NULL) + return -EINVAL; + spin_lock(&fuse_lock); + if(fc->sb != NULL) { + printk("fuse_read_super: connection already mounted\n"); + spin_unlock(&fuse_lock); + return -EINVAL; + } + fc->sb = sb; + fc->flags = d->flags; + fc->uid = d->uid; + spin_unlock(&fuse_lock); + + /* fc is needed in fuse_init_file_inode which could be called + from get_root_inode */ + SB_FC(sb) = fc; + + root = get_root_inode(sb, d->rootmode); + if(root == NULL) { + printk("fuse_read_super: failed to get root inode\n"); + return -EINVAL; + } + + sb->s_root = d_alloc_root(root); + if(!sb->s_root) { + printk("fuse_read_super: failed to allocate root\n"); + return -EINVAL; + } + + return 0; +} + +#ifdef CONFIG_COOPERATIVE +/* + * cofuse_getopt and cofuse_parse_options were + * addopted from smb + */ + +struct option { + const char *name; + unsigned long flag; + int val; +}; + +/** + * cofuse_getopt - option parser + * based on smb_getopt from fs/smbfs + * + * @caller: name of the caller, for error messages + * @options: the options string + * @opts: an array of &struct option entries controlling parser operations + * @optopt: output; will contain the current option + * @optarg: output; will contain the value (if one exists) + * @flag: output; may be NULL; should point to a long for or'ing flags + * @value: output; may be NULL; will be overwritten with the integer value + * of the current argument. + * + * Helper to parse options on the format used by mount ("a=b,c=d,e,f"). + * Returns opts->val if a matching entry in the 'opts' array is found, + * 0 when no more tokens are found, -1 if an error is encountered. + */ +static int cofuse_getopt(char *caller, char **options, struct option *opts, + char **optopt, char **optarg, unsigned long *flag, + unsigned long *value) +{ + char *token; + char *val; + int i; + + do { + if ((token = strsep(options, ",")) == NULL) + return 0; + } while (*token == '\0'); + *optopt = token; + + *optarg = NULL; + if ((val = strchr (token, '=')) != NULL) { + *val++ = 0; + if (value) + *value = simple_strtoul(val, NULL, 0); + *optarg = val; + } + + for (i = 0; opts[i].name != NULL; i++) { + if (!strcmp(opts[i].name, token)) { + if (!opts[i].flag && (!val || !*val)) { + printk("%s: the %s option requires an argument\n", + caller, token); + return -1; + } + + if (flag && opts[i].flag) + *flag |= opts[i].flag; + + return opts[i].val; + } + } + printk("%s: Unrecognized mount option %s\n", caller, token); + return -1; +} + +static struct option opts[] = { + { "uid", 0, 'u' }, + { "gid", 0, 'g' }, + { "fmask", 0, 'f' }, + { "dmask", 0, 'd' }, + { NULL, 0, 0} +}; + +/* + * parse_options - based on parse_options from fs/smbfs + */ +static int parse_options(struct cofuse_mount_data *mnt, char *options) +{ + int c; + unsigned long flags; + unsigned long value; + char *optarg; + char *optopt; + + flags = 0; + while ((c = cofuse_getopt("cofuse", &options, opts, + &optopt, &optarg, &flags, &value)) > 0) + { + switch (c) { + case 1: + /* got a "flag" option */ + break; + case 'u': + mnt->uid = value; + break; + case 'g': + mnt->gid = value; + break; + case 'f': + mnt->file_mode = (value & S_IRWXUGO) | S_IFREG; + break; + case 'd': + mnt->dir_mode = (value & S_IRWXUGO) | S_IFDIR; + break; + default: + printk("cofs: Unrecognized mount option %s\n", optopt); + return -1; + } + } + + mnt->flags = flags; + return c; +} +#endif + +#ifdef KERNEL_2_6 +static struct super_block *fuse_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *raw_data) +{ +#ifdef CONFIG_COOPERATIVE + struct cofuse_mount_data co_md = {0, }; + struct fuse_mount_data md = {0, }; + int ret; + + co_md.uid = current->uid; + co_md.gid = current->gid; + co_md.dir_mode = FUSE_S_IRWXU | FUSE_S_IRGRP | FUSE_S_IXGRP | + FUSE_S_IROTH | FUSE_S_IXOTH | S_IFDIR; + co_md.file_mode = FUSE_S_IRWXU | FUSE_S_IRGRP | FUSE_S_IXGRP | + FUSE_S_IROTH | FUSE_S_IXOTH | S_IFREG; + + ret = parse_options(&co_md, raw_data); + if (ret == -1) + return ERR_PTR(-EINVAL); + + md.rootmode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; + md.flags = FUSE_ALLOW_OTHER | FUSE_DEFAULT_PERMISSIONS; + + co_md.fuse = &md; + snprintf(co_md.name, sizeof(co_md.name), "%s", dev_name); + + return get_sb_nodev(fs_type, flags, &co_md, fuse_read_super); +#else + return get_sb_nodev(fs_type, flags, raw_data, fuse_read_super); +#endif +} + +static struct file_system_type fuse_fs_type = { + .owner = THIS_MODULE, + .name = "cofs", + .get_sb = fuse_get_sb, + .kill_sb = kill_anon_super, + .fs_flags = FS_BINARY_MOUNTDATA, +}; +#else +static struct super_block *fuse_read_super_compat(struct super_block *sb, + void *data, int silent) +{ + int err = fuse_read_super(sb, data, silent); + if(err) + return NULL; + else + return sb; +} + +static DECLARE_FSTYPE(fuse_fs_type, "cofs", fuse_read_super_compat, 0); +#endif + +int fuse_fs_init() +{ + int res; + + res = register_filesystem(&fuse_fs_type); + if(res) + printk("fuse: failed to register filesystem\n"); + + return res; +} + +void fuse_fs_cleanup() +{ + unregister_filesystem(&fuse_fs_type); +} + +/* + * Local Variables: + * indent-tabs-mode: t + * c-basic-offset: 8 + * End: + */ diff -urN a/fs/cofusefs/util.c b/fs/cofusefs/util.c --- a/fs/cofusefs/util.c +++ b/fs/cofusefs/util.c @@ -0,0 +1,78 @@ +/* + FUSE: Filesystem in Userspace + Copyright (C) 2001-2004 Miklos Szeredi <miklos@szeredi.hu> + + This program can be distributed under the terms of the GNU GPL. + See the file COPYING. +*/ + +#include "fuse_i.h" + +#include <linux/init.h> +#include <linux/slab.h> + +MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); +MODULE_DESCRIPTION("Filesystem in Userspace"); +#ifdef MODULE_LICENSE +MODULE_LICENSE("GPL"); +#endif + +spinlock_t fuse_lock = SPIN_LOCK_UNLOCKED; + +/* Must be called with the fuse lock held */ +void release_conn(struct fuse_conn *fc) +{ +#ifdef CONFIG_COOPERATIVE + if (cooperative_mode_enabled()) { + cofs_volumes[fc->cofs_unit] = NULL; + kfree(fc); + return; + } +#else + if(fc->sb == NULL && fc->file == NULL) { + kfree(fc); + } +#endif +} + +int __init cofuse_init(void) +{ + int res; + + printk(KERN_DEBUG "cofuse init %s (API version %i.%i)\n", + FUSE_VERSION, + FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); + + res = fuse_fs_init(); + if(res) + goto err; + + res = fuse_dev_init(); + if(res) + goto err_fs_cleanup; + + return 0; + + err_fs_cleanup: + fuse_fs_cleanup(); + err: + return res; +} + +void __exit cofuse_exit(void) +{ + printk(KERN_DEBUG "cofuse exit\n"); + + fuse_fs_cleanup(); + fuse_dev_cleanup(); +} + +module_init(cofuse_init); +module_exit(cofuse_exit); + +/* + * Local Variables: + * indent-tabs-mode: t + * c-basic-offset: 8 + * End: + */ diff -urN a/fs/namespace.c b/fs/namespace.c --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1015,7 +1015,7 @@ /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) flags &= ~MS_MGC_MSK; - + /* Basic sanity checks */ if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE)) diff -urN a/include/asm-i386/bug.h b/include/asm-i386/bug.h --- a/include/asm-i386/bug.h +++ b/include/asm-i386/bug.h @@ -10,11 +10,17 @@ */ #if 1 /* Set to zero for a slightly smaller kernel */ +#ifdef CONFIG_COOPERATIVE +#include <linux/cooperative.h> +extern void co_terminate(co_termination_reason_t reason); +#define BUG() do { co_terminate(CO_TERMINATE_BUG); } while(0) +#else #define BUG() \ __asm__ __volatile__( "ud2\n" \ "\t.word %c0\n" \ "\t.long %c1\n" \ : : "i" (__LINE__), "i" (__FILE__)) +#endif #else #define BUG() __asm__ __volatile__("ud2\n") #endif diff -urN a/include/asm-i386/cooperative.h b/include/asm-i386/cooperative.h --- a/include/asm-i386/cooperative.h +++ b/include/asm-i386/cooperative.h @@ -0,0 +1,184 @@ +/* + * linux/include/asm/cooperative.h + * + * Copyright (C) 2004 Dan Aloni + * + * This file defines the lower level interfaces between the Cooperative Linux + * kernel and the host OS driver. It's for both external inclusion from the + * and internal inclusion in the kernel sources. + */ + +#ifndef __LINUX_ASM_COOPERATIVE_H__ +#define __LINUX_ASM_COOPERATIVE_H__ + +typedef struct { + unsigned short size; + struct x86_idt_entry *table; +} __attribute__((packed)) x86_idt_t; + +typedef struct { + unsigned short limit; + struct x86_dt_entry *base; +} __attribute__((packed)) x86_gdt_t; + +typedef struct { + unsigned char border2[0x4]; + + unsigned long cs; + #define CO_ARCH_STATE_STACK_CS "0x04" + + unsigned long ds; + #define CO_ARCH_STATE_STACK_DS "0x08" + + unsigned long es; + #define CO_ARCH_STATE_STACK_ES "0x0C" + + unsigned long cr3; + #define CO_ARCH_STATE_STACK_CR3 "0x10" + + unsigned long cr4; + #define CO_ARCH_STATE_STACK_CR4 "0x14" + + unsigned long cr2; + #define CO_ARCH_STATE_STACK_CR2 "0x18" + + unsigned long cr0; + #define CO_ARCH_STATE_STACK_CR0 "0x1C" + + x86_gdt_t gdt; + #define CO_ARCH_STATE_STACK_GDT "0x20" + + unsigned long fs; + #define CO_ARCH_STATE_STACK_FS "0x26" + + unsigned long gs; + #define CO_ARCH_STATE_STACK_GS "0x2A" + + unsigned short ldt; + #define CO_ARCH_STATE_STACK_LDT "0x2E" + + x86_idt_t idt; + #define CO_ARCH_STATE_STACK_IDT "0x30" + + unsigned short tr; + #define CO_ARCH_STATE_STACK_TR "0x36" + + unsigned long return_eip; + #define CO_ARCH_STATE_STACK_RETURN_EIP "0x38" + + unsigned long flags; + #define CO_ARCH_STATE_STACK_FLAGS "0x3C" + + unsigned long esp; + #define CO_ARCH_STATE_STACK_ESP "0x40" + + unsigned long ss; + #define CO_ARCH_STATE_STACK_SS "0x44" + + unsigned long dr0; + #define CO_ARCH_STATE_STACK_DR0 "0x48" + + unsigned long dr1; + #define CO_ARCH_STATE_STACK_DR1 "0x4C" + + unsigned long dr2; + #define CO_ARCH_STATE_STACK_DR2 "0x50" + + unsigned long dr3; + #define CO_ARCH_STATE_STACK_DR3 "0x54" + + unsigned long dr6; + #define CO_ARCH_STATE_STACK_DR6 "0x58" + + unsigned long dr7; + #define CO_ARCH_STATE_STACK_DR7 "0x5C" + + unsigned long temp_cr3; + #define CO_ARCH_STATE_STACK_TEMP_CR3 "0x60" + + unsigned long relocate_eip; + #define CO_ARCH_STATE_STACK_RELOCATE_EIP "0x64" + + unsigned long pad1; + #define CO_ARCH_STATE_STACK_RELOCATE_EIP_AFTER "0x68" + + unsigned long va; + #define CO_ARCH_STATE_STACK_VA "0x6C" + + unsigned char fxstate[0x200]; + #define CO_ARCH_STATE_STACK_FXSTATE "0x70" +} __attribute__((packed)) co_arch_state_stack_t; + +#define CO_MAX_PARAM_SIZE 0x400 + +typedef struct co_arch_passage_page_normal_address_space { + unsigned long pgd[0x400]; + unsigned long pte[2][0x400]; +} co_arch_passage_page_normal_address_space_t; + +typedef struct co_arch_passage_page_pae_address_space { + unsigned long long main[0x200]; + unsigned long long pgd[2][0x200]; + unsigned long long pte[2][0x200]; +} co_arch_passage_page_pae_address_space_t; + +typedef struct co_arch_passage_page { + union { + struct { + union { + struct { + unsigned long self_physical_address; + unsigned long dr0; + unsigned long dr1; + unsigned long dr2; + unsigned long dr3; + unsigned long dr6; + unsigned long dr7; + unsigned char code[0x230]; + } __attribute__((packed)); + unsigned char pad[0x250]; /* Be careful! see NOTE below */ + } __attribute__((packed)); + + /* Machine states */ + + /* + * NOTE: *_state fields must be aligned at 16 bytes boundary since + * the fxsave/fxload instructions expect an aligned arugment. + */ + + co_arch_state_stack_t host_state; + co_arch_state_stack_t linuxvm_state; + + /* Control parameters */ + unsigned long operation; + unsigned long params[]; + } __attribute__((packed)); + unsigned char first_page[0x1000]; + }; + + /* page tables for passage address spaces */ + co_arch_passage_page_normal_address_space_t guest_normal; + union { + co_arch_passage_page_normal_address_space_t host_normal; + co_arch_passage_page_pae_address_space_t host_pae; + } __attribute__((packed)); +} co_arch_passage_page_t; + +/* + * Address space layout: + */ + +#define CO_VPTR_BASE (0xffc00000) +#define CO_VPTR_PHYSICAL_TO_PSEUDO_PFN_MAP (CO_VPTR_BASE - 0x1000000) +#define CO_VPTR_PSEUDO_RAM_PAGE_TABLES (CO_VPTR_BASE - 0x1100000) +#define CO_VPTR_PASSAGE_PAGE (CO_VPTR_BASE - 0x1101000) +#define CO_VPTR_IO_AREA_SIZE (0x10000) +#define CO_VPTR_IO_AREA_START (CO_VPTR_BASE - 0x1200000) +#define CO_VPTR_SELF_MAP (CO_VPTR_BASE - 0x1400000) + +typedef struct { + unsigned long kernel_cs; + unsigned long kernel_ds; +} __attribute__((packed)) co_arch_info_t; + +#endif diff -urN a/include/asm-i386/cooperative_internal.h b/include/asm-i386/cooperative_internal.h --- a/include/asm-i386/cooperative_internal.h +++ b/include/asm-i386/cooperative_internal.h @@ -0,0 +1,33 @@ +/* + * linux/include/asm/cooperative_internal.h + * + * Copyright (C) 2004 Dan Aloni + */ + +#ifndef __LINUX_ASM_COOPERATIVE_INTERNAL_H__ +#define __LINUX_ASM_COOPERATIVE_INTERNAL_H__ + +#include <linux/config.h> +#include <asm/ptrace.h> + +#ifdef CONFIG_COOPERATIVE + +extern void co_kernel_breakpoint(struct pt_regs * regs); +extern int co_kernel_debug(struct pt_regs * regs, long error_code, unsigned int condition); + +fastcall unsigned int do_IRQ(struct pt_regs *regs); + +#else + +static inline void co_kernel_breakpoint(struct pt_regs * regs) +{ +} + +static inline int co_kernel_debug(struct pt_regs * regs, long error_code, unsigned int condition) +{ + return 0; +} + +#endif + +#endif diff -urN a/include/asm-i386/dma.h b/include/asm-i386/dma.h --- a/include/asm-i386/dma.h +++ b/include/asm-i386/dma.h @@ -268,6 +268,7 @@ * * Assumes DMA flip-flop is clear. */ +#ifndef CONFIG_COOPERATIVE static __inline__ int get_dma_residue(unsigned int dmanr) { unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE @@ -281,6 +282,7 @@ return (dmanr<=3)? count : (count<<1); } +#endif /* These are in kernel/dma.c: */ diff -urN a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h --- a/include/asm-i386/fixmap.h +++ b/include/asm-i386/fixmap.h @@ -31,6 +31,7 @@ #include <linux/threads.h> #include <asm/kmap_types.h> #endif +#include <asm/cooperative.h> /* * Here we define all the compile-time 'special' virtual diff -urN a/include/asm-i386/io.h b/include/asm-i386/io.h --- a/include/asm-i386/io.h +++ b/include/asm-i386/io.h @@ -104,12 +104,15 @@ * address. */ -static inline void __iomem * ioremap(unsigned long offset, unsigned long size) +static inline void * __iomem ioremap (unsigned long offset, unsigned long size) { +#ifdef CONFIG_COOPERATIVE + panic("ioremap %ld:%ld\n", offset, size); +#endif return __ioremap(offset, size, 0); } -extern void __iomem * ioremap_nocache(unsigned long offset, unsigned long size); +extern void * __iomem ioremap_nocache (unsigned long offset, unsigned long size); extern void iounmap(volatile void __iomem *addr); /* @@ -280,7 +283,7 @@ #endif /* __KERNEL__ */ -#ifdef SLOW_IO_BY_JUMPING +#if SLOW_IO_BY_JUMPING || CONFIG_COOPERATIVE #define __SLOW_DOWN_IO "jmp 1f; 1: jmp 1f; 1:" #else #define __SLOW_DOWN_IO "outb %%al,$0x80;" diff -urN a/include/asm-i386/mach-default/irq_vectors.h b/include/asm-i386/mach-default/irq_vectors.h --- a/include/asm-i386/mach-default/irq_vectors.h +++ b/include/asm-i386/mach-default/irq_vectors.h @@ -67,6 +67,11 @@ #define TIMER_IRQ 0 +#ifdef CONFIG_COOPERATIVE +#define KEYBOARD_IRQ 1 +#define NETWORK_IRQ 2 +#endif + /* * 16 8259A IRQ's, 208 potential APIC interrupt sources. * Right now the APIC is mostly only used for SMP. diff -urN a/include/asm-i386/mach-default/irq_vectors_limits.h b/include/asm-i386/mach-default/irq_vectors_limits.h --- a/include/asm-i386/mach-default/irq_vectors_limits.h +++ b/include/asm-i386/mach-default/irq_vectors_limits.h @@ -5,7 +5,7 @@ #define NR_IRQS FIRST_SYSTEM_VECTOR #define NR_IRQ_VECTORS NR_IRQS #else -#ifdef CONFIG_X86_IO_APIC +#if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_X86_UP_COPIC) #define NR_IRQS 224 # if (224 >= 32 * NR_CPUS) # define NR_IRQ_VECTORS NR_IRQS diff -urN a/include/asm-i386/mc146818rtc.h b/include/asm-i386/mc146818rtc.h --- a/include/asm-i386/mc146818rtc.h +++ b/include/asm-i386/mc146818rtc.h @@ -4,6 +4,7 @@ #ifndef _ASM_MC146818RTC_H #define _ASM_MC146818RTC_H +#include <linux/config.h> #include <asm/io.h> #ifndef RTC_PORT @@ -11,6 +12,8 @@ #define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */ #endif +#ifndef CONFIG_COOPERATIVE + /* * The yet supported machines all access the RTC index register via * an ISA port access but the way to access the date register differs ... @@ -24,6 +27,11 @@ outb_p((val),RTC_PORT(1)); \ }) +#else +#define CMOS_READ(addr) (0) +#define CMOS_WRITE(val, addr) do {} while(0) +#endif + #define RTC_IRQ 8 #endif /* _ASM_MC146818RTC_H */ diff -urN a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h --- a/include/asm-i386/mmzone.h +++ b/include/asm-i386/mmzone.h @@ -6,7 +6,9 @@ #ifndef _ASM_MMZONE_H_ #define _ASM_MMZONE_H_ +#include <linux/config.h> #include <asm/smp.h> +#include <asm/cooperative.h> #ifdef CONFIG_DISCONTIGMEM @@ -116,7 +118,8 @@ (unsigned long)(__page - __zone->zone_mem_map) \ + __zone->zone_start_pfn; \ }) -#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) + +#define pmd_page(pmd) (pfn_to_page(CO_P_TO_PP(pmd_val(pmd)) >> PAGE_SHIFT)) #ifdef CONFIG_X86_NUMAQ /* we have contiguous memory on NUMA-Q */ #define pfn_valid(pfn) ((pfn) < num_physpages) diff -urN a/include/asm-i386/page.h b/include/asm-i386/page.h --- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h @@ -13,6 +13,7 @@ #ifndef __ASSEMBLY__ #include <linux/config.h> +#include <asm/cooperative.h> #ifdef CONFIG_X86_USE_3DNOW @@ -126,6 +127,19 @@ #define __PAGE_OFFSET (0xC0000000UL) #endif +#ifdef CONFIG_COOPERATIVE +#define CO_PA(pfn) (((unsigned long *)CO_VPTR_PSEUDO_RAM_PAGE_TABLES)[pfn]) +#define CO_VA_PFN(pa) (((unsigned long *)CO_VPTR_PHYSICAL_TO_PSEUDO_PFN_MAP)[((pa) >> PAGE_SHIFT)]) +#define CO_PFN_PP_TO_P(pfn) (CO_PA(pfn) >> PAGE_SHIFT) +#define CO_PFN_P_TO_PP(pfn) (CO_VA_PFN(pfn << PAGE_SHIFT)) +#define CO_PP_TO_P(pa) ((CO_PFN_PP_TO_P(pa >> PAGE_SHIFT) << PAGE_SHIFT) | (pa & ~PAGE_MASK)) +#define CO_P_TO_PP(pa) ((CO_PFN_P_TO_PP(pa >> PAGE_SHIFT) << PAGE_SHIFT) | (pa & ~PAGE_MASK)) +#else +#define CO_PFN_P_TO_PP(pfn) pfn +#define CO_PFN_PP_TO_P(pfn) pfn +#define CO_PP_TO_P(pa) pa +#define CO_P_TO_PP(pa) pa +#endif #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) #define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE) diff -urN a/include/asm-i386/param.h b/include/asm-i386/param.h --- a/include/asm-i386/param.h +++ b/include/asm-i386/param.h @@ -2,8 +2,12 @@ #define _ASMi386_PARAM_H #ifdef __KERNEL__ +# include <linux/config.h> +# ifndef CONFIG_COOPERATIVE # define HZ 1000 /* Internal kernel timer frequency */ +# else # define USER_HZ 100 /* .. some user interfaces are in "ticks" */ +# endif # define CLOCKS_PER_SEC (USER_HZ) /* like times() */ #endif diff -urN a/include/asm-i386/pgalloc.h b/include/asm-i386/pgalloc.h --- a/include/asm-i386/pgalloc.h +++ b/include/asm-i386/pgalloc.h @@ -6,15 +6,16 @@ #include <asm/fixmap.h> #include <linux/threads.h> #include <linux/mm.h> /* for struct page */ +#include <asm/cooperative.h> #define pmd_populate_kernel(mm, pmd, pte) \ - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) + set_pmd(pmd, __pmd(_PAGE_TABLE + CO_PP_TO_P(__pa(pte)))) static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte) { set_pmd(pmd, __pmd(_PAGE_TABLE + - ((unsigned long long)page_to_pfn(pte) << - (unsigned long long) PAGE_SHIFT))); + ((CO_PFN_PP_TO_P((unsigned long long)page_to_pfn(pte))) << + (unsigned long long) PAGE_SHIFT))); } /* * Allocate and free page tables. diff -urN a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h --- a/include/asm-i386/pgtable-2level.h +++ b/include/asm-i386/pgtable-2level.h @@ -8,6 +8,9 @@ #define pgd_ERROR(e) \ printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) +#include <linux/config.h> +#include <asm/cooperative.h> + /* * The "pgd_xxx()" functions here are trivial for a folded two-level * setup: the pgd is never bad, and a pmd always exists (as it's folded @@ -33,19 +36,21 @@ #define set_pgd(pgdptr, pgdval) (*(pgdptr) = pgdval) #define pgd_page(pgd) \ -((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) + ((unsigned long) __va(CO_P_TO_PP(pgd_val(pgd)) & PAGE_MASK)) static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) { return (pmd_t *) dir; } + #define ptep_get_and_clear(xp) __pte(xchg(&(xp)->pte_low, 0)) #define pte_same(a, b) ((a).pte_low == (b).pte_low) + #define pte_page(x) pfn_to_page(pte_pfn(x)) +#define pte_pfn(x) CO_PFN_P_TO_PP((unsigned long)(((x).pte_low >> PAGE_SHIFT))) +#define pfn_pte(pfn, prot) __pte((CO_PFN_PP_TO_P(pfn) << PAGE_SHIFT) | pgprot_val(prot)) +#define pfn_pmd(pfn, prot) __pmd((CO_PFN_PP_TO_P(pfn) << PAGE_SHIFT) | pgprot_val(prot)) #define pte_none(x) (!(x).pte_low) -#define pte_pfn(x) ((unsigned long)(((x).pte_low >> PAGE_SHIFT))) -#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) -#define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) /* * All present user pages are user-executable: diff -urN a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -25,6 +25,8 @@ #include <linux/list.h> #include <linux/spinlock.h> +#include <asm/cooperative.h> + /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. @@ -294,10 +296,10 @@ #define page_pte(page) page_pte_prot(page, __pgprot(0)) #define pmd_page_kernel(pmd) \ -((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) +((unsigned long) __va(CO_P_TO_PP(pmd_val(pmd)) & PAGE_MASK)) #ifndef CONFIG_DISCONTIGMEM -#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) +#define pmd_page(pmd) (pfn_to_page(CO_PFN_P_TO_PP(pmd_val(pmd) >> PAGE_SHIFT))) #endif /* !CONFIG_DISCONTIGMEM */ #define pmd_large(pmd) \ diff -urN a/include/asm-i386/processor.h b/include/asm-i386/processor.h --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -182,8 +182,7 @@ } #define load_cr3(pgdir) \ - asm volatile("movl %0,%%cr3": :"r" (__pa(pgdir))) - + asm volatile("movl %0,%%cr3": :"r" (CO_PP_TO_P(__pa(pgdir)))) /* * Intel CPU features in CR4 diff -urN a/include/asm-i386/timer.h b/include/asm-i386/timer.h --- a/include/asm-i386/timer.h +++ b/include/asm-i386/timer.h @@ -19,7 +19,7 @@ struct timer_opts { char* name; void (*mark_offset)(void); - unsigned long (*get_offset)(void); + long (*get_offset)(void); unsigned long long (*monotonic_clock)(void); void (*delay)(unsigned long); }; @@ -50,6 +50,9 @@ #ifdef CONFIG_X86_CYCLONE_TIMER extern struct init_timer_opts timer_cyclone_init; #endif +#ifdef CONFIG_COOPERATIVE +extern struct init_timer_opts timer_cooperative_init; +#endif extern unsigned long calibrate_tsc(void); extern void init_cpu_khz(void); diff -urN a/include/linux/console.h b/include/linux/console.h --- a/include/linux/console.h +++ b/include/linux/console.h @@ -61,6 +61,7 @@ extern const struct consw dummy_con; /* dummy console buffer */ extern const struct consw fb_con; /* frame buffer based console */ extern const struct consw vga_con; /* VGA text console */ +extern const struct consw colinux_con; /* coLinux Mode text console */ extern const struct consw newport_con; /* SGI Newport console */ extern const struct consw prom_con; /* SPARC PROM console */ diff -urN a/include/linux/cooperative.h b/include/linux/cooperative.h --- a/include/linux/cooperative.h +++ b/include/linux/cooperative.h @@ -0,0 +1,322 @@ +/* + * linux/include/linux/cooperative.h + * + * Copyright (C) 2004 Dan Aloni + * + * This file defines the interfaces between the Cooperative Linux kernel + * and the host OS driver. It's for both external inclusion from the + * and internal inclusion in the kernel sources. + */ + +#ifndef __LINUX_COOPERATIVE_H__ +#define __LINUX_COOPERATIVE_H__ + +#ifdef __KERNEL__ +#ifndef CO_KERNEL +#define CO_COLINUX_KERNEL +#define CO_KERNEL +#endif +#endif + +#include <asm/cooperative.h> + +#define CO_LINUX_API_VERSION 8 + +#pragma pack(0) + +#define CO_BOOTPARAM_STRING_LENGTH 0x100 + +typedef enum { + CO_OPERATION_EMPTY=0, + CO_OPERATION_START, + CO_OPERATION_IDLE, + CO_OPERATION_TERMINATE, + CO_OPERATION_MESSAGE_TO_MONITOR, + CO_OPERATION_MESSAGE_FROM_MONITOR, + CO_OPERATION_FORWARD_INTERRUPT, + CO_OPERATION_DEVICE, + CO_OPERATION_GET_TIME, + CO_OPERATION_DEBUG_LINE, + CO_OPERATION_GET_HIGH_PREC_TIME, + CO_OPERATION_TRACE_POINT, + CO_OPERATION_FREE_PAGES, + CO_OPERATION_ALLOC_PAGES, +} co_operation_t; + +#define CO_MODULE_MAX_CONET 16 +#define CO_MODULE_MAX_COBD 32 +#define CO_MODULE_MAX_COFS 32 +#define CO_MODULE_MAX_SERIAL 64 + +typedef enum { + CO_MODULE_LINUX, + CO_MODULE_MONITOR, + CO_MODULE_DAEMON, + CO_MODULE_IDLE, + CO_MODULE_KERNEL_SWITCH, + CO_MODULE_USER_SWITCH, + CO_MODULE_CONSOLE, + CO_MODULE_PRINTK, + + CO_MODULE_CONET0, + CO_MODULE_CONET_END=CO_MODULE_CONET0+CO_MODULE_MAX_CONET-1, + + CO_MODULE_COBD0, + CO_MODULE_COBD_END=CO_MODULE_COBD0+CO_MODULE_MAX_COBD-1, + + CO_MODULE_COFS0, + CO_MODULE_COFS_END=CO_MODULE_COFS0+CO_MODULE_MAX_COFS-1, + + CO_MODULE_SERIAL0, + CO_MODULE_SERIAL_END=CO_MODULE_SERIAL0+CO_MODULE_MAX_SERIAL-1, +} co_module_t; + +typedef enum { + CO_PRIORITY_DISCARDABLE=0, + CO_PRIORITY_IMPORTANT, +} co_priority_t; + +typedef enum { + CO_MESSAGE_TYPE_STRING=0, + CO_MESSAGE_TYPE_OTHER=1, +} co_message_type_t; + +typedef struct { + co_module_t from; + co_module_t to; + co_priority_t priority; + co_message_type_t type; + unsigned long size; + char data[0]; +} __attribute__((packed)) co_message_t; + +typedef enum { + CO_DEVICE_BLOCK=0, + CO_DEVICE_CONSOLE, + CO_DEVICE_KEYBOARD, + CO_DEVICE_NETWORK, + CO_DEVICE_TIMER, + CO_DEVICE_POWER, + CO_DEVICE_SERIAL, + CO_DEVICE_FILESYSTEM, + + CO_DEVICES_TOTAL, +} co_device_t; + +typedef struct { + unsigned char code; + int down; +} co_scan_code_t; + +typedef enum { + CO_LINUX_MESSAGE_POWER_ALT_CTRL_DEL=0, +} co_linux_message_power_type_t; + +typedef struct { + co_linux_message_power_type_t type; +} __attribute__((packed)) co_linux_message_power_t; + +typedef struct { + unsigned long tick_count; +} __attribute__((packed)) co_linux_message_idle_t; + +typedef struct { + co_device_t device; + unsigned long unit; + unsigned long size; + char data[]; +} __attribute__((packed)) co_linux_message_t; + +typedef enum { + CO_TERMINATE_END=0, + CO_TERMINATE_REBOOT, + CO_TERMINATE_POWEROFF, + CO_TERMINATE_PANIC, + CO_TERMINATE_HALT, + CO_TERMINATE_FORCED_OFF, + CO_TERMINATE_FORCED_END, + CO_TERMINATE_INVALID_OPERATION, + CO_TERMINATE_STACK_OVERFLOW, + CO_TERMINATE_BUG, +} co_termination_reason_t; + +typedef void (*co_switcher_t)(co_arch_passage_page_t *page, + unsigned char *from, + unsigned char *to); + +#define co_passage_page_func_low(_from_,_to_) \ + (((co_switcher_t)(co_passage_page->code)) \ + (co_passage_page, \ + (char *)&_from_.border2, \ + (char *)&_to_.border2)) + +#define co_passage_page_func(_from_,_to_) \ + co_passage_page_func_low(co_passage_page->_from_, co_passage_page->_to_) + +#ifdef CO_KERNEL +# ifdef CO_COLINUX_KERNEL +# define co_passage_page ((co_arch_passage_page_t *)(CO_VPTR_PASSAGE_PAGE)) +# define co_current (co_passage_page->linuxvm_state) +# define co_other (co_passage_page->host_state) +# else +# define co_passage_page (cmon->passage_page) +# define co_other (co_passage_page->linuxvm_state) +# define co_current (co_passage_page->host_state) +# endif + +# define co_switch() co_passage_page_func_low(co_current, co_other) +#endif + +/* + * Defines operations on various virtual devices. + */ + +typedef enum { + CO_OPERATION_CONSOLE_STARTUP=0, + CO_OPERATION_CONSOLE_INIT=1, + CO_OPERATION_CONSOLE_DEINIT, + CO_OPERATION_CONSOLE_CLEAR, + CO_OPERATION_CONSOLE_PUTC, + CO_OPERATION_CONSOLE_PUTCS, + CO_OPERATION_CONSOLE_CURSOR_DRAW, + CO_OPERATION_CONSOLE_CURSOR_ERASE, + CO_OPERATION_CONSOLE_CURSOR_MOVE, + CO_OPERATION_CONSOLE_SCROLL_UP, + CO_OPERATION_CONSOLE_SCROLL_DOWN, + CO_OPERATION_CONSOLE_BMOVE, + CO_OPERATION_CONSOLE_SWITCH, + CO_OPERATION_CONSOLE_BLANK, + CO_OPERATION_CONSOLE_FONT_OP, + CO_OPERATION_CONSOLE_SET_PALETTE, + CO_OPERATION_CONSOLE_SCROLLDELTA, + CO_OPERATION_CONSOLE_SET_ORIGIN, + CO_OPERATION_CONSOLE_SAVE_SCREEN, + CO_OPERATION_CONSOLE_INVERT_REGION, +} co_operation_console_t; + + +typedef char co_console_code; +typedef unsigned short co_console_character; +typedef unsigned short co_console_unit; + +typedef struct { + co_console_unit x; + co_console_unit y; + co_console_unit height; +} __attribute__((packed)) co_cursor_pos_t; + +typedef struct { + co_operation_console_t type; + union { + struct { + co_console_unit top; + co_console_unit bottom; + co_console_unit lines; + } scroll; + struct { + co_console_unit y; + co_console_unit x; + co_console_unit count; + co_console_character data[]; + } putcs; + struct { + co_console_unit x; + co_console_unit y; + co_console_character charattr; + } putc; + struct { + co_console_unit top; + co_console_unit left; + co_console_unit bottom; + co_console_unit right; + co_console_character charattr; + } clear; + struct { + co_console_unit y; + co_console_unit x; + co_console_unit count; + } invert; + struct { + co_console_unit row; + co_console_unit column; + co_console_unit top; + co_console_unit left; + co_console_unit bottom; + co_console_unit right; + } bmove; + co_cursor_pos_t cursor; + }; +} __attribute__((packed)) co_console_message_t; + +typedef struct { + unsigned long index; + unsigned long flags; + unsigned long func; + unsigned long pid; +} __attribute__((packed)) co_trace_point_info_t; + +typedef enum { + CO_BLOCK_OPEN=0, + CO_BLOCK_STAT, + CO_BLOCK_READ, + CO_BLOCK_WRITE, + CO_BLOCK_CLOSE, + CO_BLOCK_GET_ALIAS, +} co_block_request_type_t; + +typedef enum { + CO_NETWORK_GET_MAC=0, +} co_network_request_type_t; + +#ifdef CO_KERNEL +/* If we are compiling kernel code (Linux or Host Driver) */ +# ifdef CO_COLINUX_KERNEL +/* Inside Linux, vm_ptr_t considered a valid pointer in its virtual address space */ +typedef void *vm_ptr_t; +# else +/* But inside the host, the type is considered not to be a pointer in its own address space */ +typedef unsigned long vm_ptr_t; +# endif + +typedef struct { + co_block_request_type_t type; + long rc; + union { + struct { + unsigned long long offset; + unsigned long long size; + unsigned long long disk_size; + vm_ptr_t address; + }; + struct { + char alias[20]; + }; + }; +} __attribute__((packed)) co_block_request_t; + +typedef struct { + co_network_request_type_t type; + unsigned long unit; + char mac_address[6]; + char _pad[2]; + int result; +} __attribute__((packed)) co_network_request_t; + +#endif + +typedef struct { + unsigned long api_version; + unsigned long compiler_major; + unsigned long compiler_minor; +} __attribute__((packed)) co_info_t; + +#ifndef COLINUX_TRACE +#define CO_TRACE_STOP +#define CO_TRACE_CONTINUE +#endif + +#pragma pack() + +#include "cooperative_fs.h" + +#endif diff -urN a/include/linux/cooperative_fs.h b/include/linux/cooperative_fs.h --- a/include/linux/cooperative_fs.h +++ b/include/linux/cooperative_fs.h @@ -0,0 +1,267 @@ +/* + FUSE: Filesystem in Userspace + Copyright (C) 2001-2004 Miklos Szeredi <miklos@szeredi.hu> + + This program can be distributed under the terms of the GNU GPL. + See the file COPYING. +*/ + +/* This file defines the kernel interface of FUSE */ + +#pragma pack(0) + +/** Version number of this interface */ +#define FUSE_KERNEL_VERSION 2 + +/** Minor version number of this interface */ +#define FUSE_KERNEL_MINOR_VERSION 2 + +/** The inode number of the root inode */ +#define FUSE_ROOT_INO 1 + +/** Data passed to mount */ +struct fuse_mount_data { + /** The file type of the root inode */ + unsigned int rootmode; + + /** The user ID of the user initiating this mount */ + unsigned int uid; + + /** FUSE specific mount flags */ + unsigned int flags; +}; + +/* FUSE mount flags: */ + +/** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem +module will check permissions based on the file mode. Otherwise no +permission checking is done in the kernel */ +#define FUSE_DEFAULT_PERMISSIONS (1 << 0) + +/** If the FUSE_ALLOW_OTHER flag is given, then not only the user + doing the mount will be allowed to access the filesystem */ +#define FUSE_ALLOW_OTHER (1 << 1) + +/** If the FUSE_KERNEL_CACHE flag is given, then files will be cached + until the INVALIDATE operation is invoked */ +#define FUSE_KERNEL_CACHE (1 << 2) + +/** Allow FUSE to combine reads into 64k chunks. This is useful if + the filesystem is better at handling large chunks. NOTE: in + current implementation the raw throughput is worse for large reads + than for small. */ +#define FUSE_LARGE_READ (1 << 3) + +struct fuse_attr { + unsigned long long size; + unsigned int mode; + unsigned int nlink; + unsigned int uid; + unsigned int gid; + unsigned int rdev; + unsigned long _dummy; + unsigned long blocks; + unsigned long atime; + unsigned long mtime; + unsigned long ctime; +}; + +struct fuse_kstatfs { + long block_size; + long blocks; + long blocks_free; + long files; + long files_free; + long namelen; +}; + +#define FATTR_MODE (1 << 0) +#define FATTR_UID (1 << 1) +#define FATTR_GID (1 << 2) +#define FATTR_SIZE (1 << 3) +#define FATTR_UTIME (1 << 4) + +enum fuse_opcode { + FUSE_LOOKUP = 1, + FUSE_FORGET = 2, /* no reply */ + FUSE_GETATTR = 3, + FUSE_SETATTR = 4, + FUSE_READLINK = 5, + FUSE_SYMLINK = 6, + FUSE_GETDIR = 7, + FUSE_MKNOD = 8, + FUSE_MKDIR = 9, + FUSE_UNLINK = 10, + FUSE_RMDIR = 11, + FUSE_RENAME = 12, + FUSE_LINK = 13, + FUSE_OPEN = 14, + FUSE_READ = 15, + FUSE_WRITE = 16, + FUSE_STATFS = 17, + FUSE_RELEASE = 18, /* no reply */ + FUSE_INVALIDATE = 19, /* user initiated */ + FUSE_FSYNC = 20, + FUSE_RELEASE2 = 21, /* reply needed after all */ + + /* Cooperative Linux does things a little differently: */ + FUSE_DIR_OPEN = 22, + FUSE_DIR_READ = 23, + FUSE_DIR_RELEASE = 24, + + FUSE_MOUNT = 25, +}; + +/* Conservative buffer size for the client */ +#define FUSE_MAX_IN 8192 + +#define FUSE_NAME_MAX 1024 +#define FUSE_SYMLINK_MAX 4096 + +struct fuse_lookup_out { + struct fuse_attr attr; + unsigned long ino; +}; + +struct fuse_forget_in { + int version; +}; + +struct fuse_getattr_out { + struct fuse_attr attr; +}; + +struct fuse_getdir_out { + int fd; + void *file; /* Used by kernel only */ +}; + +/* FIXME: 2.6 needs 32 bit rdev */ +struct fuse_mknod_in { + unsigned short mode; + unsigned short rdev; +}; + +struct fuse_mknod_out { + struct fuse_attr attr; + unsigned long ino; +}; + +struct fuse_mkdir_in { + unsigned short mode; +}; + +struct fuse_rename_in { + unsigned long newdir; +}; + +struct fuse_link_in { + unsigned long newdir; +}; + +struct fuse_setattr_in { + struct fuse_attr attr; + unsigned int valid; +}; + +struct fuse_setattr_out { + struct fuse_attr attr; +}; + +struct fuse_open_in { + unsigned int flags; +}; + +struct fuse_read_in { + unsigned long long offset; + unsigned int size; +}; + +struct fuse_write_in { + unsigned long long offset; + unsigned int size; +}; + +struct fuse_statfs_out { + struct fuse_kstatfs st; +}; + +struct fuse_fsync_in { + int datasync; +}; + +struct fuse_in_header { + int unique; + enum fuse_opcode opcode; + unsigned long ino; + unsigned int uid; + unsigned int gid; +}; + +struct fuse_out_header { + int unique; + int error; +}; + +struct fuse_user_header { + int unique; /* zero */ + enum fuse_opcode opcode; + unsigned long ino; +}; + +struct fuse_dirent { + unsigned long ino; + unsigned short namelen; + unsigned char type; + char name[256]; +}; + +#define FUSE_S_IFMT 00170000 +#define FUSE_S_IFSOCK 0140000 +#define FUSE_S_IFLNK 0120000 +#define FUSE_S_IFREG 0100000 +#define FUSE_S_IFBLK 0060000 +#define FUSE_S_IFDIR 0040000 +#define FUSE_S_IFCHR 0020000 +#define FUSE_S_IFIFO 0010000 +#define FUSE_S_ISUID 0004000 +#define FUSE_S_ISGID 0002000 +#define FUSE_S_ISVTX 0001000 + +#define FUSE_S_IRWXU 00700 +#define FUSE_S_IRUSR 00400 +#define FUSE_S_IWUSR 00200 +#define FUSE_S_IXUSR 00100 + +#define FUSE_S_IRWXG 00070 +#define FUSE_S_IRGRP 00040 +#define FUSE_S_IWGRP 00020 +#define FUSE_S_IXGRP 00010 + +#define FUSE_S_IRWXO 00007 +#define FUSE_S_IROTH 00004 +#define FUSE_S_IWOTH 00002 +#define FUSE_S_IXOTH 00001 + +#define FUSE_DT_UNKNOWN 0 +#define FUSE_DT_FIFO 1 +#define FUSE_DT_CHR 2 +#define FUSE_DT_DIR 4 +#define FUSE_DT_BLK 6 +#define FUSE_DT_REG 8 +#define FUSE_DT_LNK 10 +#define FUSE_DT_SOCK 12 +#define FUSE_DT_WHT 14 + +#define FUSE_NAME_OFFSET ((unsigned int) ((struct fuse_dirent *) 0)->name) +#define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(long) - 1) & ~(sizeof(long) - 1)) +#define FUSE_DIRENT_SIZE(d) \ + FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) +#pragma pack() + +/* + * Local Variables: + * indent-tabs-mode: t + * c-basic-offset: 8 + * End: + */ diff -urN a/include/linux/cooperative_internal.h b/include/linux/cooperative_internal.h --- a/include/linux/cooperative_internal.h +++ b/include/linux/cooperative_internal.h @@ -0,0 +1,80 @@ +/* + * linux/include/linux/cooperative.h + * + * Copyright (C) 2004 Dan Aloni + * + * This header gathers the functions and variables in Cooperative Mode + * when CONFIG_COOPERATIVE is defined. + */ +#ifndef __LINUX_COOPERATIVE_LINUX_H__ +#define __LINUX_COOPERATIVE_LINUX_H__ + +#include <linux/config.h> +#include <linux/cooperative.h> +#include <linux/list.h> + +#ifdef CONFIG_COOPERATIVE + +typedef struct { + struct list_head node; + co_message_t msg; +} co_message_node_t; + +extern void co_debug(const char *fmt, ...); +extern void co_printk(const char *line); + +extern void co_callback(unsigned long flags); +extern void co_switch_wrapper(void); +extern void co_idle_processor(void); +extern void co_terminate(co_termination_reason_t reason); +extern void co_free_pages(unsigned long vaddr, int order); +extern int co_alloc_pages(unsigned long vaddr, int order); +extern void co_start_kernel(void); +extern void co_arch_start_kernel(void); +extern void co_handle_jiffies(long count); + +extern void co_send_message(co_module_t from, + co_module_t to, + co_priority_t priority, + co_message_type_t type, + unsigned long size, + const char *data); +extern unsigned long co_get_host_time(void); +extern co_message_t *co_send_message_save(unsigned long *flags); +extern co_message_t *co_get_message_save(unsigned long *flags); +extern void co_send_message_restore(unsigned long flags); + +extern void cocd_interrupt(void); + +extern void co_handle_incoming_messages(void); +extern void co_handle_incoming_message(co_message_node_t *message); +extern void co_queue_incoming_message(co_message_node_t *message); +extern int co_get_message(co_message_node_t **message, co_device_t device); +extern void co_free_message(co_message_node_t *message); + +extern int co_passage_page_held(void); +extern void co_passage_page_acquire(unsigned long *flags); +extern void co_passage_page_release(unsigned long flags); + +#define co_passage_page_assert_valid() do { \ + if (co_passage_page_held()) \ + BUG(); \ +} while (0); + +extern char co_boot_parameters[CO_BOOTPARAM_STRING_LENGTH]; +extern unsigned long co_core_end; +extern unsigned long co_memory_size; +extern void *co_initrd; +extern unsigned long co_initrd_size; + +#define cooperative_mode_enabled() 1 + +#else + +#define co_printk(line) do {} while (0) +#define co_terminate(reason) do {} while (0) +#define cooperative_mode_enabled() 0 + +#endif + +#endif diff -urN a/include/linux/major.h b/include/linux/major.h --- a/include/linux/major.h +++ b/include/linux/major.h @@ -130,6 +130,7 @@ #define VIOCD_MAJOR 113 #define ATARAID_MAJOR 114 +#define COLINUX_MAJOR 117 #define SCSI_DISK8_MAJOR 128 #define SCSI_DISK9_MAJOR 129 diff -urN a/init/do_mounts.c b/init/do_mounts.c --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -175,6 +175,7 @@ for (p = s; *p; p++) if (*p == '/') *p = '!'; + res = try_name(s, 0); if (res) goto done; diff -urN a/init/main.c b/init/main.c --- a/init/main.c +++ b/init/main.c @@ -537,6 +537,7 @@ panic(panic_later, panic_param); profile_init(); local_irq_enable(); + #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start && !initrd_below_start_ok && initrd_start < min_low_pfn << PAGE_SHIFT) { diff -urN a/kernel/Makefile b/kernel/Makefile --- a/kernel/Makefile +++ b/kernel/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_SYSFS) += ksysfs.o obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ +obj-$(CONFIG_COOPERATIVE) += cooperative.o ifneq ($(CONFIG_IA64),y) # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is diff -urN a/kernel/cooperative.c b/kernel/cooperative.c --- a/kernel/cooperative.c +++ b/kernel/cooperative.c @@ -0,0 +1,354 @@ +/* + * linux/kernel/cooperative.c + * + * Cooperative mode (coLinux) support routines. + * + * Dan Aloni <da-x@colinux.org>, 2003-2004 (C). + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/interrupt.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/cooperative_internal.h> + +CO_TRACE_STOP; + +void start_kernel(void); +extern char _kernel_start, _end; + + +unsigned long co_core_end = 0; +unsigned long co_memory_size = 0; +void *co_initrd = NULL; +unsigned long co_initrd_size = 0; +char co_boot_parameters[CO_BOOTPARAM_STRING_LENGTH]; + + +typedef struct { + struct list_head list; + int num_messages; +} co_message_queue_t; + +int co_messages_active = 0; +co_message_queue_t co_outgoing_messages; +co_message_queue_t co_incoming_messages; +co_message_queue_t *co_incoming_queued_messages; + +void co_start_kernel(void) +{ + co_core_end = co_passage_page->params[0]; + co_memory_size = co_passage_page->params[1]; + co_initrd = (void *)co_passage_page->params[2]; + co_initrd_size = co_passage_page->params[3]; + + memcpy(co_boot_parameters, &co_passage_page->params[10], + sizeof(co_boot_parameters)); + + co_arch_start_kernel(); + + /* should never be reached */ + co_terminate(CO_TERMINATE_END); +} + +co_message_t *co_send_message_save(unsigned long *flags) +{ + co_passage_page_assert_valid(); + co_passage_page_acquire(flags); + co_passage_page->operation = CO_OPERATION_MESSAGE_TO_MONITOR; + return ((co_message_t *)CO_VPTR_IO_AREA_START); +} + +void co_send_message_restore(unsigned long flags) +{ + co_switch_wrapper(); + co_passage_page_release(flags); +} + +void co_send_message_s(co_message_t *message, const char *data) +{ + if ((sizeof(co_message_t) + message->size) > CO_VPTR_IO_AREA_SIZE) + return; + + if (co_passage_page_held()) + return; + + unsigned long flags; + co_message_t *buffer = ((co_message_t *)CO_VPTR_IO_AREA_START); + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_MESSAGE_TO_MONITOR; + *buffer = *message; + memcpy(buffer->data, data, message->size); + co_switch_wrapper(); + co_passage_page_release(flags); +} + +void co_send_message(co_module_t from, + co_module_t to, + co_priority_t priority, + co_message_type_t type, + unsigned long size, + const char *data) +{ + co_message_t params; + + params.from = from; + params.to = to; + params.priority = priority; + params.type = type; + params.size = size; + + co_send_message_s(¶ms, data); +} + +static void co_message_add_to_incoming(co_message_t *message, unsigned long size) +{ + co_message_node_t *message_copy; + + message_copy = kmalloc(size + sizeof(co_message_node_t) - sizeof(co_message_t), + GFP_ATOMIC); + if (!message_copy) + return; + + memcpy(&message_copy->msg, message, size); + list_add_tail(&message_copy->node, &co_incoming_messages.list); +} + +void co_callback(unsigned long flags) +{ + if (co_passage_page->operation != CO_OPERATION_MESSAGE_FROM_MONITOR) { + co_passage_page_release(flags); + return; + } + + long io_size = co_passage_page->params[0]; + unsigned long new_jiffies = co_passage_page->params[1]; + + if (co_messages_active && io_size > 0) { + unsigned char *io_buffer = (char *)CO_VPTR_IO_AREA_START; + unsigned char *io_buffer_end = io_buffer + io_size; + if (!(io_size > CO_VPTR_IO_AREA_SIZE)) { + while (io_buffer < io_buffer_end) { + co_message_t *message = (co_message_t *)io_buffer; + unsigned long size = message->size + sizeof(*message); + + co_message_add_to_incoming(message, size); + io_buffer += size; + } + } + } + + co_passage_page_release(flags); + + co_handle_jiffies(new_jiffies); + co_handle_incoming_messages(); +} + +void co_handle_incoming_messages(void) +{ + if (!co_messages_active) + return; + + if (list_empty(&co_incoming_messages.list)) + return; + + for (;;) { + unsigned long flags; + co_message_node_t *message = NULL; + + /* + * Pop a message from the incoming queue. + */ + local_irq_save(flags); + if (!list_empty(&co_incoming_messages.list)) { + message = list_entry(co_incoming_messages.list.next, + co_message_node_t, node); + list_del(&message->node); + } + local_irq_restore(flags); + + if (!message) + break; + + /* + * Let the interrupt routine of the arch dependant code + * handle the message, and be responsible to free it. + */ + co_handle_incoming_message(message); + } +} + +void co_idle_processor(void) +{ + unsigned long flags; + + co_passage_page_assert_valid(); + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_IDLE; + co_switch_wrapper(); + co_callback(flags); +} + +void co_printk(const char *line) +{ + co_send_message(CO_MODULE_LINUX, + CO_MODULE_PRINTK, + CO_PRIORITY_DISCARDABLE, + CO_MESSAGE_TYPE_STRING, + strlen(line)+1, + line); +} + +void co_debug_line(char *line) +{ +} + +void co_terminate(co_termination_reason_t reason) +{ + unsigned long flags; + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_TERMINATE; + co_passage_page->params[0] = reason; + co_switch_wrapper(); + /* This doesn't really return. This code shouldn't be running. */ + co_passage_page_release(flags); +} + +unsigned long co_get_host_time(void) +{ + unsigned long flags; + unsigned long time; + + co_passage_page_assert_valid(); + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_GET_TIME; + co_switch_wrapper(); + time = co_passage_page->params[0]; + co_passage_page_release(flags); + + return time; +} + +void co_queue_incoming_message(co_message_node_t *node_message) +{ + if (!co_messages_active) + return; + + co_linux_message_t *message = (co_linux_message_t *)&node_message->msg.data; + if (message->device < 0 || (message->device >= CO_DEVICES_TOTAL)) + return; + + co_message_queue_t *queue; + queue = &co_incoming_queued_messages[message->device]; + + /* Add to the queue */ + unsigned long flags; + local_irq_save(flags); + list_add(&node_message->node, &queue->list); + queue->num_messages++; + local_irq_restore(flags); +} + +int co_get_message(co_message_node_t **message, co_device_t device) +{ + co_message_queue_t *queue; + co_message_node_t *node; + unsigned long flags; + + if (!co_messages_active) + return 0; + + local_irq_save(flags); + queue = &co_incoming_queued_messages[device]; + if (list_empty(&queue->list)) { + local_irq_restore(flags); + return 0; + } + + node = list_entry(queue->list.prev, co_message_node_t, node); + list_del(&node->node); + queue->num_messages--; + local_irq_restore(flags); + + *message = node; + return 1; +} + +void co_free_message(co_message_node_t *message) +{ + kfree(message); +} + +co_info_t co_info = { + .api_version = CO_LINUX_API_VERSION, + .compiler_major = __GNUC__, + .compiler_minor = __GNUC_MINOR__, +}; + +static int __init initcall_message_queues(void) +{ + int queue_index; + + INIT_LIST_HEAD(&co_outgoing_messages.list); + INIT_LIST_HEAD(&co_incoming_messages.list); + + co_incoming_queued_messages = kmalloc(sizeof(co_message_queue_t) * CO_DEVICES_TOTAL, + GFP_KERNEL); + if (!co_incoming_queued_messages) + panic("unable to allocate message queues\n"); + + for (queue_index=0; queue_index < CO_DEVICES_TOTAL; queue_index++) { + co_message_queue_t *queue = &co_incoming_queued_messages[queue_index]; + queue->num_messages = 0; + INIT_LIST_HEAD(&queue->list); + } + + co_messages_active = 1; + + return 0; +} + + +void co_free_pages(unsigned long vaddr, int order) +{ + unsigned long flags; + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_FREE_PAGES; + co_passage_page->params[0] = vaddr; + co_passage_page->params[1] = order; + co_switch_wrapper(); + co_passage_page_release(flags); +} + +int co_alloc_pages(unsigned long vaddr, int order) +{ + unsigned long flags; + long result; + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_ALLOC_PAGES; + co_passage_page->params[0] = vaddr; + co_passage_page->params[1] = order; + co_switch_wrapper(); + result = (long)co_passage_page->params[4]; + co_passage_page_release(flags); + + if (result < 0) + return -ENOMEM; + + return 0; +} + +__initcall(initcall_message_queues); + +EXPORT_SYMBOL(co_terminate); + +CO_TRACE_CONTINUE; diff -urN a/kernel/panic.c b/kernel/panic.c --- a/kernel/panic.c +++ b/kernel/panic.c @@ -18,6 +18,7 @@ #include <linux/sysrq.h> #include <linux/interrupt.h> #include <linux/nmi.h> +#include <linux/cooperative_internal.h> int panic_timeout; int panic_on_oops; @@ -71,6 +72,10 @@ printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf); bust_spinlocks(0); + if (cooperative_mode_enabled()) { + co_terminate(CO_TERMINATE_PANIC); + } + #ifdef CONFIG_SMP smp_send_stop(); #endif diff -urN a/kernel/printk.c b/kernel/printk.c --- a/kernel/printk.c +++ b/kernel/printk.c @@ -34,6 +34,8 @@ #include <asm/uaccess.h> +#include <linux/cooperative_internal.h> + #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) /* printk's without a loglevel use this.. */ @@ -538,6 +540,8 @@ /* Emit the output into the temporary buffer */ printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args); + co_printk(printk_buf); + /* * Copy the output into log_buf. If the caller didn't provide * appropriate log level tags, we insert them here diff -urN a/localversion-cooperative b/localversion-cooperative --- a/localversion-cooperative +++ b/localversion-cooperative @@ -0,0 +1 @@ +-co- diff -urN a/mm/bootmem.c b/mm/bootmem.c --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -17,6 +17,7 @@ #include <linux/bootmem.h> #include <linux/mmzone.h> #include <linux/module.h> +#include <linux/cooperative_internal.h> #include <asm/dma.h> #include <asm/io.h> @@ -248,6 +249,23 @@ for (i = start; i < start+areasize; i++) if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map))) BUG(); + + if (cooperative_mode_enabled()) { + unsigned long alloc_address = (unsigned long)ret; + unsigned long alloc_size = size; + int result; + + alloc_size += (alloc_address & (~PAGE_MASK)); + alloc_address &= PAGE_MASK; + alloc_size = (alloc_size + PAGE_SIZE - 1) >> PAGE_SHIFT; + + result = co_alloc_pages(alloc_address, alloc_size); + if (result) { + free_bootmem((unsigned long)ret, size); + return NULL; + } + } + memset(ret, 0, size); return ret; } diff -urN a/mm/page_alloc.c b/mm/page_alloc.c --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -32,7 +32,7 @@ #include <linux/sysctl.h> #include <linux/cpu.h> #include <linux/nodemask.h> - +#include <linux/cooperative_internal.h> #include <asm/tlbflush.h> nodemask_t node_online_map = NODE_MASK_NONE; @@ -184,6 +184,9 @@ { unsigned long page_idx, index, mask; + if (cooperative_mode_enabled()) + co_free_pages((unsigned long)page_address(page), 1 << order); + if (order) destroy_compound_page(page, order); mask = (~0UL) << order; @@ -738,6 +741,37 @@ got_pg: zone_statistics(zonelist, z); kernel_map_pages(page, 1 << order, 1); + + if (cooperative_mode_enabled()) { + int result, retries_left; + + retries_left = 10; + + while (retries_left > 0) { + result = co_alloc_pages((unsigned long)page_address(page), 1 << order); + if (result) { + unsigned long cache_size; + /* + * Whoops, we have allocated too much of the + * host OS's memory, time to free some cache. + * cache. + */ + cache_size = get_page_cache_size()-total_swapcache_pages; + cache_size /= 2; + if (cache_size < ((1 << order)*2)) + cache_size = (1 << order)*2; + shrink_all_memory(cache_size); + } else + break; + retries_left--; + } + + if (result) { + __free_pages(page, order); + return NULL; + } + } + return page; } diff -urN a/mm/vmscan.c b/mm/vmscan.c --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1180,7 +1180,7 @@ wake_up_interruptible(&zone->zone_pgdat->kswapd_wait); } -#ifdef CONFIG_PM +#if defined(CONFIG_PM) || defined(CONFIG_COOPERATIVE) /* * Try to free `nr_pages' of memory, system-wide. Returns the number of freed * pages.