diff -Nur c3000_pre/linux/arch/arm/config.in c3000_test/linux/arch/arm/config.in
--- c3000_pre/linux/arch/arm/config.in	2004-12-16 22:55:34.000000000 +0900
+++ c3000_test/linux/arch/arm/config.in	2004-12-20 23:23:28.000000000 +0900
@@ -574,6 +574,8 @@
    fi
 fi
 dep_bool 'Preemptible Kernel support' CONFIG_PREEMPT $CONFIG_CPU_32
+bool 'Low latency scheduling' CONFIG_LOLAT
+dep_bool 'Control low latency with sysctl' CONFIG_LOLAT_SYSCTL $CONFIG_LOLAT
 
 endmenu
 
diff -Nur c3000_pre/linux/arch/i386/config.in c3000_test/linux/arch/i386/config.in
--- c3000_pre/linux/arch/i386/config.in	2004-08-21 09:48:09.000000000 +0900
+++ c3000_test/linux/arch/i386/config.in	2004-12-20 22:56:21.000000000 +0900
@@ -25,6 +25,9 @@
 
 mainmenu_option next_comment
 comment 'Processor type and features'
+bool 'Low latency scheduling' CONFIG_LOLAT
+dep_bool 'Control low latency with sysctl' CONFIG_LOLAT_SYSCTL $CONFIG_LOLAT
+
 choice 'Processor family' \
 	"386					CONFIG_M386 \
 	 486					CONFIG_M486 \
diff -Nur c3000_pre/linux/drivers/block/ll_rw_blk.c c3000_test/linux/drivers/block/ll_rw_blk.c
--- c3000_pre/linux/drivers/block/ll_rw_blk.c	2004-08-21 09:48:24.000000000 +0900
+++ c3000_test/linux/drivers/block/ll_rw_blk.c	2004-12-20 22:56:21.000000000 +0900
@@ -1211,6 +1211,7 @@
 			kstat.pgpgin += count;
 			break;
 	}
+	conditional_schedule();
 }
 
 /**
diff -Nur c3000_pre/linux/drivers/char/mem.c c3000_test/linux/drivers/char/mem.c
--- c3000_pre/linux/drivers/char/mem.c	2004-08-21 09:48:25.000000000 +0900
+++ c3000_test/linux/drivers/char/mem.c	2004-12-20 22:56:21.000000000 +0900
@@ -422,7 +422,7 @@
 		if (count > size)
 			count = size;
 
-		zap_page_range(mm, addr, count);
+		zap_page_range(mm, addr, count, 0);
         	zeromap_page_range(addr, count, PAGE_COPY);
 
 		size -= count;
diff -Nur c3000_pre/linux/drivers/char/random.c c3000_test/linux/drivers/char/random.c
--- c3000_pre/linux/drivers/char/random.c	2004-08-21 09:48:25.000000000 +0900
+++ c3000_test/linux/drivers/char/random.c	2004-12-20 22:56:21.000000000 +0900
@@ -1374,6 +1374,11 @@
 		buf += i;
 		ret += i;
 		add_timer_randomness(&extract_timer_state, nbytes);
+#if LOWLATENCY_NEEDED
+		/* This can happen in softirq's, but that's what we want */
+		if (conditional_schedule_needed())
+			break;
+#endif
 	}
 
 	/* Wipe data just returned from memory */
diff -Nur c3000_pre/linux/drivers/i2c/i2c-core.c c3000_test/linux/drivers/i2c/i2c-core.c
--- c3000_pre/linux/drivers/i2c/i2c-core.c	2004-08-21 09:48:34.000000000 +0900
+++ c3000_test/linux/drivers/i2c/i2c-core.c	2004-12-20 22:56:21.000000000 +0900
@@ -761,6 +761,8 @@
 {
 	int ret;
 
+	conditional_schedule();
+
 	if (adap->algo->master_xfer) {
  	 	DEB2(printk("i2c-core.o: master_xfer: %s with %d msgs.\n",
 		            adap->name,num));
@@ -783,6 +785,8 @@
 	struct i2c_adapter *adap=client->adapter;
 	struct i2c_msg msg;
 
+	conditional_schedule();
+
 	if (client->adapter->algo->master_xfer) {
 		msg.addr   = client->addr;
 		msg.flags = client->flags & I2C_M_TEN;
@@ -812,6 +816,9 @@
 	struct i2c_adapter *adap=client->adapter;
 	struct i2c_msg msg;
 	int ret;
+	
+	conditional_schedule();	 
+	 
 	if (client->adapter->algo->master_xfer) {
 		msg.addr   = client->addr;
 		msg.flags = client->flags & I2C_M_TEN;
diff -Nur c3000_pre/linux/fs/buffer.c c3000_test/linux/fs/buffer.c
--- c3000_pre/linux/fs/buffer.c	2004-08-21 09:48:58.000000000 +0900
+++ c3000_test/linux/fs/buffer.c	2004-12-20 22:56:21.000000000 +0900
@@ -216,8 +216,10 @@
 
 		if (dev != NODEV && bh->b_dev != dev)
 			continue;
-		if (test_and_set_bit(BH_Lock, &bh->b_state))
+		if (test_and_set_bit(BH_Lock, &bh->b_state)) {
+			__refile_buffer(bh);
 			continue;
+		}
 		if (atomic_set_buffer_clean(bh)) {
 			__refile_buffer(bh);
 			get_bh(bh);
@@ -227,6 +229,7 @@
 
 			spin_unlock(&lru_list_lock);
 			write_locked_buffers(array, count);
+			conditional_schedule();
 			return -EAGAIN;
 		}
 		unlock_buffer(bh);
@@ -260,12 +263,19 @@
 	struct buffer_head * next;
 	int nr;
 
-	next = lru_list[index];
 	nr = nr_buffers_type[index];
+repeat:
+	next = lru_list[index];
 	while (next && --nr >= 0) {
 		struct buffer_head *bh = next;
 		next = bh->b_next_free;
 
+		if (conditional_schedule_needed()) {
+			spin_unlock(&lru_list_lock);
+			unconditional_schedule();
+			spin_lock(&lru_list_lock);
+			goto repeat;
+		}
 		if (!buffer_locked(bh)) {
 			if (refile)
 				__refile_buffer(bh);
@@ -273,7 +283,6 @@
 		}
 		if (dev != NODEV && bh->b_dev != dev)
 			continue;
-
 		get_bh(bh);
 		spin_unlock(&lru_list_lock);
 		wait_on_buffer (bh);
@@ -306,6 +315,15 @@
 {
 	int err = 0;
 
+#if LOWLATENCY_NEEDED
+	/*
+	 * syncing devA when there are lots of buffers dirty against
+	 * devB is expensive.
+	 */
+	if (enable_lowlatency)
+		dev = NODEV;
+#endif
+
 	/* One pass for no-wait, three for wait:
 	 * 0) write out all dirty, unlocked buffers;
 	 * 1) wait for all dirty locked buffers;
@@ -697,6 +715,16 @@
 			/* Not hashed? */
 			if (!bh->b_pprev)
 				continue;
+
+			if (conditional_schedule_needed()) {
+				get_bh(bh);
+				spin_unlock(&lru_list_lock);
+				unconditional_schedule();
+				spin_lock(&lru_list_lock);
+				put_bh(bh);
+				slept = 1;
+			}
+
 			if (buffer_locked(bh)) {
 				get_bh(bh);
 				spin_unlock(&lru_list_lock);
@@ -848,12 +876,19 @@
 	struct buffer_head *bh;
 	struct inode tmp;
 	int err = 0, err2;
-	
+	DEFINE_RESCHED_COUNT;
+
 	INIT_LIST_HEAD(&tmp.i_dirty_buffers);
-	
+
+repeat:
 	spin_lock(&lru_list_lock);
 
 	while (!list_empty(list)) {
+		if (conditional_schedule_needed()) {
+			spin_unlock(&lru_list_lock);
+			unconditional_schedule();
+			goto repeat;
+		}
 		bh = BH_ENTRY(list->next);
 		list_del(&bh->b_inode_buffers);
 		if (!buffer_dirty(bh) && !buffer_locked(bh))
@@ -878,8 +913,18 @@
 				spin_lock(&lru_list_lock);
 			}
 		}
+		if (TEST_RESCHED_COUNT(32)) {
+			RESET_RESCHED_COUNT();
+			if (conditional_schedule_needed()) {
+				spin_unlock(&lru_list_lock);
+				unconditional_schedule();	/* Syncing many dirty buffers */
+				spin_lock(&lru_list_lock);
+			}
+		}
 	}
 
+	RESET_RESCHED_COUNT();
+
 	while (!list_empty(&tmp.i_dirty_buffers)) {
 		bh = BH_ENTRY(tmp.i_dirty_buffers.prev);
 		remove_inode_queue(bh);
@@ -889,6 +934,7 @@
 		if (!buffer_uptodate(bh))
 			err = -EIO;
 		brelse(bh);
+		conditional_schedule();
 		spin_lock(&lru_list_lock);
 	}
 	
@@ -916,11 +962,20 @@
 	struct buffer_head *bh;
 	struct list_head *p;
 	int err = 0;
+	DEFINE_RESCHED_COUNT;
 
+repeat:
+	conditional_schedule();
 	spin_lock(&lru_list_lock);
 	
- repeat:
 	list_for_each_prev(p, list) {
+		if (TEST_RESCHED_COUNT(32)) {
+			RESET_RESCHED_COUNT();
+			if (conditional_schedule_needed()) {
+				spin_unlock(&lru_list_lock);
+				goto repeat;
+			}
+		}
 		bh = BH_ENTRY(p);
 		if (buffer_locked(bh)) {
 			get_bh(bh);
@@ -929,7 +984,6 @@
 			if (!buffer_uptodate(bh))
 				err = -EIO;
 			brelse(bh);
-			spin_lock(&lru_list_lock);
 			goto repeat;
 		}
 	}
@@ -946,12 +1000,24 @@
 void invalidate_inode_buffers(struct inode *inode)
 {
 	struct list_head * entry;
-	
+
+repeat:
+	conditional_schedule();	
 	spin_lock(&lru_list_lock);
-	while ((entry = inode->i_dirty_buffers.next) != &inode->i_dirty_buffers)
+	while ((entry = inode->i_dirty_buffers.next) != &inode->i_dirty_buffers) {
+		if (conditional_schedule_needed()) {
+			spin_unlock(&lru_list_lock);
+			goto repeat;
+		}
 		remove_inode_queue(BH_ENTRY(entry));
-	while ((entry = inode->i_dirty_data_buffers.next) != &inode->i_dirty_data_buffers)
+	}
+	while ((entry = inode->i_dirty_data_buffers.next) != &inode->i_dirty_data_buffers) {
+		if (conditional_schedule_needed()) {
+			spin_unlock(&lru_list_lock);
+			goto repeat;
+		}
 		remove_inode_queue(BH_ENTRY(entry));
+	}
 	spin_unlock(&lru_list_lock);
 }
 
@@ -974,6 +1040,7 @@
 		bh = get_hash_table(dev, block, size);
 		if (bh) {
 			touch_buffer(bh);
+			conditional_schedule();
 			return bh;
 		}
 
@@ -2831,7 +2898,7 @@
 
 DECLARE_WAIT_QUEUE_HEAD(bdflush_wait);
 
-void wakeup_bdflush(void)
+void wakeup_bdflush(void) 
 {
 	wake_up_interruptible(&bdflush_wait);
 }
diff -Nur c3000_pre/linux/fs/dcache.c c3000_test/linux/fs/dcache.c
--- c3000_pre/linux/fs/dcache.c	2004-08-21 09:48:58.000000000 +0900
+++ c3000_test/linux/fs/dcache.c	2004-12-20 22:56:21.000000000 +0900
@@ -320,11 +320,23 @@
  
 void prune_dcache(int count)
 {
+	DEFINE_RESCHED_COUNT;
+
+redo:
 	spin_lock(&dcache_lock);
 	for (;;) {
 		struct dentry *dentry;
 		struct list_head *tmp;
 
+		if (TEST_RESCHED_COUNT(100)) {
+			RESET_RESCHED_COUNT();
+			if (conditional_schedule_needed()) {
+				spin_unlock(&dcache_lock);
+				unconditional_schedule();
+				goto redo;
+			}
+		}
+
 		tmp = dentry_unused.prev;
 
 		if (tmp == &dentry_unused)
@@ -479,6 +491,7 @@
 	struct dentry *this_parent = parent;
 	struct list_head *next;
 	int found = 0;
+	DEFINE_RESCHED_COUNT;
 
 	spin_lock(&dcache_lock);
 repeat:
@@ -493,6 +506,13 @@
 			list_add(&dentry->d_lru, dentry_unused.prev);
 			found++;
 		}
+
+		if (TEST_RESCHED_COUNT(500) && found > 10) {
+			if (conditional_schedule_needed())	/* Typically sys_rmdir() */
+				goto out;
+			RESET_RESCHED_COUNT();
+		}
+
 		/*
 		 * Descend a level if the d_subdirs list is non-empty.
 		 */
@@ -517,6 +537,7 @@
 #endif
 		goto resume;
 	}
+out:
 	spin_unlock(&dcache_lock);
 	return found;
 }
@@ -532,8 +553,10 @@
 {
 	int found;
 
-	while ((found = select_parent(parent)) != 0)
+	while ((found = select_parent(parent)) != 0) {
 		prune_dcache(found);
+		conditional_schedule();		/* Typically sys_rmdir() */
+	}
 }
 
 /*
diff -Nur c3000_pre/linux/fs/exec.c c3000_test/linux/fs/exec.c
--- c3000_pre/linux/fs/exec.c	2004-08-21 09:48:58.000000000 +0900
+++ c3000_test/linux/fs/exec.c	2004-12-20 22:56:21.000000000 +0900
@@ -249,7 +249,7 @@
 					memset(kaddr+offset+len, 0,
 						PAGE_SIZE-offset-len);
 			}
-			err = copy_from_user(kaddr+offset, str, bytes_to_copy);
+			err = ll_copy_from_user(kaddr+offset, str, bytes_to_copy);
 			if (err) {
 				ret = -EFAULT;
 				goto out;
diff -Nur c3000_pre/linux/fs/ext2/dir.c c3000_test/linux/fs/ext2/dir.c
--- c3000_pre/linux/fs/ext2/dir.c	2004-08-21 09:48:59.000000000 +0900
+++ c3000_test/linux/fs/ext2/dir.c	2004-12-20 22:56:21.000000000 +0900
@@ -153,6 +153,7 @@
 	struct address_space *mapping = dir->i_mapping;
 	struct page *page = read_cache_page(mapping, n,
 				(filler_t*)mapping->a_ops->readpage, NULL);
+	conditional_schedule();		/* Scanning large directories */
 	if (!IS_ERR(page)) {
 		wait_on_page(page);
 		kmap(page);
diff -Nur c3000_pre/linux/fs/ext2/inode.c c3000_test/linux/fs/ext2/inode.c
--- c3000_pre/linux/fs/ext2/inode.c	2004-08-21 09:48:59.000000000 +0900
+++ c3000_test/linux/fs/ext2/inode.c	2004-12-20 22:56:21.000000000 +0900
@@ -715,8 +715,13 @@
 {
 	unsigned long block_to_free = 0, count = 0;
 	unsigned long nr;
+	DEFINE_RESCHED_COUNT;
 
 	for ( ; p < q ; p++) {
+		if (TEST_RESCHED_COUNT(32)) {
+			RESET_RESCHED_COUNT();
+			conditional_schedule();
+		}
 		nr = le32_to_cpu(*p);
 		if (nr) {
 			*p = 0;
@@ -759,6 +764,7 @@
 	if (depth--) {
 		int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
 		for ( ; p < q ; p++) {
+			conditional_schedule();		/* Deleting large files */
 			nr = le32_to_cpu(*p);
 			if (!nr)
 				continue;
diff -Nur c3000_pre/linux/fs/ext3/balloc.c c3000_test/linux/fs/ext3/balloc.c
--- c3000_pre/linux/fs/ext3/balloc.c	2004-08-21 09:48:59.000000000 +0900
+++ c3000_test/linux/fs/ext3/balloc.c	2004-12-20 22:56:21.000000000 +0900
@@ -363,6 +363,9 @@
 			}
 		}
 #endif
+		/* superblock lock is held, so this is safe */
+		conditional_schedule();
+
 		BUFFER_TRACE(bitmap_bh, "clear bit");
 		if (!ext3_clear_bit (bit + i, bitmap_bh->b_data)) {
 			ext3_error (sb, __FUNCTION__,
diff -Nur c3000_pre/linux/fs/ext3/inode.c c3000_test/linux/fs/ext3/inode.c
--- c3000_pre/linux/fs/ext3/inode.c	2004-08-21 09:48:59.000000000 +0900
+++ c3000_test/linux/fs/ext3/inode.c	2004-12-20 22:56:21.000000000 +0900
@@ -902,6 +902,8 @@
 
 	prev_blocks = inode->i_blocks;
 
+	conditional_schedule();		/* Reading large directories */
+
 	bh = ext3_getblk (handle, inode, block, create, err);
 	if (!bh)
 		return bh;
@@ -1605,6 +1607,7 @@
 	 */
 	for (p = first; p < last; p++) {
 		u32 nr = le32_to_cpu(*p);
+		conditional_schedule();
 		if (nr) {
 			struct buffer_head *bh;
 
@@ -1659,6 +1662,7 @@
 	}
 
 	for (p = first; p < last; p++) {
+		conditional_schedule();
 		nr = le32_to_cpu(*p);
 		if (nr) {
 			/* accumulate blocks to free if they're contiguous */
diff -Nur c3000_pre/linux/fs/ext3/namei.c c3000_test/linux/fs/ext3/namei.c
--- c3000_pre/linux/fs/ext3/namei.c	2004-08-21 09:48:59.000000000 +0900
+++ c3000_test/linux/fs/ext3/namei.c	2004-12-20 22:56:21.000000000 +0900
@@ -157,6 +157,7 @@
 		if ((bh = bh_use[ra_ptr++]) == NULL)
 			goto next;
 		wait_on_buffer(bh);
+		conditional_schedule();
 		if (!buffer_uptodate(bh)) {
 			/* read error, skip block & hope for the best */
 			brelse(bh);
diff -Nur c3000_pre/linux/fs/inode.c c3000_test/linux/fs/inode.c
--- c3000_pre/linux/fs/inode.c	2004-08-21 09:48:58.000000000 +0900
+++ c3000_test/linux/fs/inode.c	2004-12-20 23:00:06.000000000 +0900
@@ -251,6 +251,8 @@
 
 	filemap_fdatawait(inode->i_mapping);
 
+	conditional_schedule();
+
 	spin_lock(&inode_lock);
 	inode->i_state &= ~I_LOCK;
 	if (!(inode->i_state & I_FREEING)) {
@@ -561,6 +563,7 @@
 
 	while ((inode_entry = head->next) != head)
 	{
+		conditional_schedule();
 		list_del(inode_entry);
 
 		inode = list_entry(inode_entry, struct inode, i_list);
@@ -589,9 +592,22 @@
 		if (tmp == head)
 			break;
 		inode = list_entry(tmp, struct inode, i_list);
+
+		if (conditional_schedule_needed()) {
+			atomic_inc(&inode->i_count);
+			spin_unlock(&inode_lock);
+			unconditional_schedule();
+			spin_lock(&inode_lock);
+			atomic_dec(&inode->i_count);
+		}
+			
 		if (inode->i_sb != sb)
 			continue;
+		atomic_inc(&inode->i_count);
+		spin_unlock(&inode_lock);
 		invalidate_inode_buffers(inode);
+		spin_lock(&inode_lock);
+		atomic_dec(&inode->i_count);
 		if (!atomic_read(&inode->i_count)) {
 			list_del_init(&inode->i_hash);
 			list_del(&inode->i_list);
@@ -896,6 +912,8 @@
 	if (inode) {
 		struct inode * old;
 
+		conditional_schedule();			/* sync_old_buffers */
+
 		spin_lock(&inode_lock);
 		/* We released the lock, so.. */
 		old = find_inode(sb, ino, head, find_actor, opaque);
@@ -1313,18 +1331,32 @@
 	int request=goal;
 	int loop=0;
 #endif
+ 	int nr_to_scan = inodes_stat.nr_unused;
 
+resume:
 	spin_lock(&inode_lock);
 
 	count = 0;
 	entry = inode_unused.prev;
-	while (entry != &inode_unused)
-	{
+	while (entry != &inode_unused && nr_to_scan--) {
 		struct list_head *tmp = entry;
 
 #ifdef JFFS2_INODE_DEBUG
 		loop++;
 #endif
+		if (conditional_schedule_needed()) {
+			/*
+			 * Need to drop the lock.  Reposition
+			 * the list head so we start here next time.
+			 * This can corrupt the LRU nature of the
+			 * unused list, but this isn't very important.
+			 */
+			list_del(&inode_unused);
+			list_add(&inode_unused, entry);
+			spin_unlock(&inode_lock);
+			unconditional_schedule();
+			goto resume;
+		}
 		entry = entry->prev;
 		inode = INODE(tmp);
 		if (inode->i_state & (I_FREEING|I_CLEAR|I_LOCK))
diff -Nur c3000_pre/linux/fs/jbd/checkpoint.c c3000_test/linux/fs/jbd/checkpoint.c
--- c3000_pre/linux/fs/jbd/checkpoint.c	2004-08-21 09:48:59.000000000 +0900
+++ c3000_test/linux/fs/jbd/checkpoint.c	2004-12-20 22:56:21.000000000 +0900
@@ -431,7 +431,11 @@
 {
 	transaction_t *transaction, *last_transaction, *next_transaction;
 	int ret = 0;
+	int ll_retries = 4;		/* lowlatency addition */
 
+restart:
+	if (ll_retries-- == 0)
+		goto out;
 	transaction = journal->j_checkpoint_transactions;
 	if (transaction == 0)
 		goto out;
@@ -451,6 +455,12 @@
 				jh = next_jh;
 				next_jh = jh->b_cpnext;
 				ret += __try_to_free_cp_buf(jh);
+				if (conditional_schedule_needed()) {
+					spin_unlock(&journal_datalist_lock);
+					unconditional_schedule();
+					spin_lock(&journal_datalist_lock);
+					goto restart;
+				}
 			} while (jh != last_jh);
 		}
 	} while (transaction != last_transaction);
diff -Nur c3000_pre/linux/fs/jbd/commit.c c3000_test/linux/fs/jbd/commit.c
--- c3000_pre/linux/fs/jbd/commit.c	2004-08-21 09:48:59.000000000 +0900
+++ c3000_test/linux/fs/jbd/commit.c	2004-12-20 22:56:21.000000000 +0900
@@ -212,6 +212,16 @@
 				__journal_remove_journal_head(bh);
 				refile_buffer(bh);
 				__brelse(bh);
+				if (conditional_schedule_needed()) {
+					if (commit_transaction->t_sync_datalist)
+						commit_transaction->t_sync_datalist =
+							next_jh;
+					if (bufs)
+						break;
+					spin_unlock(&journal_datalist_lock);
+					unconditional_schedule();
+					goto write_out_data;
+				}
 			}
 		}
 		if (bufs == ARRAY_SIZE(wbuf)) {
@@ -235,8 +245,7 @@
 		journal_brelse_array(wbuf, bufs);
 		lock_journal(journal);
 		spin_lock(&journal_datalist_lock);
-		if (bufs)
-			goto write_out_data_locked;
+		goto write_out_data_locked;
 	}
 
 	/*
@@ -272,6 +281,14 @@
 	 */
 	while ((jh = commit_transaction->t_async_datalist)) {
 		struct buffer_head *bh = jh2bh(jh);
+		if (conditional_schedule_needed()) {
+			spin_unlock(&journal_datalist_lock);
+			unlock_journal(journal);
+			unconditional_schedule();
+			lock_journal(journal);
+			spin_lock(&journal_datalist_lock);
+			continue;	/* List may have changed */
+		}
 		if (buffer_locked(bh)) {
 			spin_unlock(&journal_datalist_lock);
 			unlock_journal(journal);
@@ -486,6 +503,8 @@
  wait_for_iobuf:
 	while (commit_transaction->t_iobuf_list != NULL) {
 		struct buffer_head *bh;
+
+		conditional_schedule();
 		jh = commit_transaction->t_iobuf_list->b_tprev;
 		bh = jh2bh(jh);
 		if (buffer_locked(bh)) {
@@ -644,6 +663,8 @@
 		transaction_t *cp_transaction;
 		struct buffer_head *bh;
 
+		conditional_schedule();		/* journal is locked */
+
 		jh = commit_transaction->t_forget;
 		J_ASSERT_JH(jh,	jh->b_transaction == commit_transaction ||
 			jh->b_transaction == journal->j_running_transaction);
diff -Nur c3000_pre/linux/fs/proc/array.c c3000_test/linux/fs/proc/array.c
--- c3000_pre/linux/fs/proc/array.c	2004-08-21 09:49:01.000000000 +0900
+++ c3000_test/linux/fs/proc/array.c	2004-12-20 22:56:21.000000000 +0900
@@ -498,9 +498,11 @@
 	if (end > PMD_SIZE)
 		end = PMD_SIZE;
 	do {
-		pte_t page = *pte;
+		pte_t page;
 		struct page *ptpage;
 
+		conditional_schedule();		/* For `top' and `ps' */
+		page = *pte;
 		address += PAGE_SIZE;
 		pte++;
 		if (pte_none(page))
diff -Nur c3000_pre/linux/fs/proc/generic.c c3000_test/linux/fs/proc/generic.c
--- c3000_pre/linux/fs/proc/generic.c	2004-08-21 09:49:01.000000000 +0900
+++ c3000_test/linux/fs/proc/generic.c	2004-12-20 22:56:21.000000000 +0900
@@ -98,6 +98,8 @@
 				retval = n;
 			break;
 		}
+
+		conditional_schedule();		/* Some /proc files are large */
 		
 		/* This is a hack to allow mangling of file pos independent
  		 * of actual bytes read.  Simply place the data at page,
diff -Nur c3000_pre/linux/fs/reiserfs/buffer2.c c3000_test/linux/fs/reiserfs/buffer2.c
--- c3000_pre/linux/fs/reiserfs/buffer2.c	2004-08-21 09:49:01.000000000 +0900
+++ c3000_test/linux/fs/reiserfs/buffer2.c	2004-12-20 22:56:21.000000000 +0900
@@ -54,6 +54,7 @@
     PROC_EXP( unsigned int ctx_switches = kstat.context_swtch );
 
     result = bread (super -> s_dev, n_block, n_size);
+    conditional_schedule();
     PROC_INFO_INC( super, breads );
     PROC_EXP( if( kstat.context_swtch != ctx_switches ) 
 	      PROC_INFO_INC( super, bread_miss ) );
diff -Nur c3000_pre/linux/fs/reiserfs/journal.c c3000_test/linux/fs/reiserfs/journal.c
--- c3000_pre/linux/fs/reiserfs/journal.c	2004-08-21 09:49:01.000000000 +0900
+++ c3000_test/linux/fs/reiserfs/journal.c	2004-12-20 22:56:21.000000000 +0900
@@ -573,6 +573,7 @@
 /* lock the current transaction */
 inline static void lock_journal(struct super_block *p_s_sb) {
   PROC_INFO_INC( p_s_sb, journal.lock_journal );
+  conditional_schedule();
   while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_wlock)) > 0) {
     PROC_INFO_INC( p_s_sb, journal.lock_journal_wait );
     sleep_on(&(SB_JOURNAL(p_s_sb)->j_wait)) ;
@@ -703,6 +704,7 @@
 	mark_buffer_dirty(tbh) ;
       }
       ll_rw_block(WRITE, 1, &tbh) ;
+      conditional_schedule();
       count++ ;
       put_bh(tbh) ; /* once for our get_hash */
     } 
@@ -832,6 +834,7 @@
     set_bit(BH_Dirty, &(SB_JOURNAL(p_s_sb)->j_header_bh->b_state)) ;
     ll_rw_block(WRITE, 1, &(SB_JOURNAL(p_s_sb)->j_header_bh)) ;
     wait_on_buffer((SB_JOURNAL(p_s_sb)->j_header_bh)) ; 
+    conditional_schedule();
     if (!buffer_uptodate(SB_JOURNAL(p_s_sb)->j_header_bh)) {
       printk( "reiserfs: journal-837: IO error during journal replay\n" );
       return -EIO ;
@@ -2125,6 +2128,7 @@
 }
 
 int journal_begin(struct reiserfs_transaction_handle *th, struct super_block  * p_s_sb, unsigned long nblocks) {
+  conditional_schedule();
   return do_journal_begin_r(th, p_s_sb, nblocks, 0) ;
 }
 
@@ -2265,6 +2269,7 @@
 }
 
 int journal_end(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) {
+  conditional_schedule();
   return do_journal_end(th, p_s_sb, nblocks, 0) ;
 }
 
@@ -2716,6 +2721,7 @@
       RFALSE( buffer_locked(bh) && cur_tb != NULL,
 	      "waiting while do_balance was running\n") ;
       wait_on_buffer(bh) ;
+      conditional_schedule();
     }
     PROC_INFO_INC( p_s_sb, journal.prepare_retry );
     retry_count++ ;
@@ -2888,6 +2894,7 @@
     /* copy all the real blocks into log area.  dirty log blocks */
     if (test_bit(BH_JDirty, &cn->bh->b_state)) {
       struct buffer_head *tmp_bh ;
+      conditional_schedule();
       tmp_bh = sb_getblk(p_s_sb, reiserfs_get_journal_block(p_s_sb) + 
 		     ((cur_write_start + jindex) % JOURNAL_BLOCK_COUNT)) ;
       mark_buffer_uptodate(tmp_bh, 1) ;
diff -Nur c3000_pre/linux/fs/reiserfs/stree.c c3000_test/linux/fs/reiserfs/stree.c
--- c3000_pre/linux/fs/reiserfs/stree.c	2004-08-21 09:49:01.000000000 +0900
+++ c3000_test/linux/fs/reiserfs/stree.c	2004-12-20 22:56:21.000000000 +0900
@@ -652,9 +652,8 @@
                                        stop at leaf level - set to
                                        DISK_LEAF_NODE_LEVEL */
     ) {
-    int  n_block_number = SB_ROOT_BLOCK (p_s_sb),
-      expected_level = SB_TREE_HEIGHT (p_s_sb),
-      n_block_size    = p_s_sb->s_blocksize;
+    int n_block_number, expected_level;
+    int n_block_size    = p_s_sb->s_blocksize;
     struct buffer_head  *       p_s_bh;
     struct path_element *       p_s_last_element;
     int				n_node_level, n_retval;
@@ -666,7 +665,8 @@
 #endif
     
     PROC_INFO_INC( p_s_sb, search_by_key );
-    
+    conditional_schedule();
+
     /* As we add each node to a path we increase its count.  This means that
        we must be careful to release all nodes in a path before we either
        discard the path struct or re-use the path struct, as we do here. */
@@ -678,6 +678,8 @@
     /* With each iteration of this loop we search through the items in the
        current node, and calculate the next current node(next path element)
        for the next iteration of this loop.. */
+    n_block_number = SB_ROOT_BLOCK (p_s_sb);
+    expected_level = SB_TREE_HEIGHT (p_s_sb);
     while ( 1 ) {
 
 #ifdef CONFIG_REISERFS_CHECK
@@ -1104,6 +1106,8 @@
 	    for (n_counter = *p_n_removed;
 		 n_counter < n_unfm_number; n_counter++, p_n_unfm_pointer-- ) {
 
+		conditional_schedule();
+
 		if (item_moved (&s_ih, p_s_path)) {
 		    need_research = 1 ;
 		    break;
diff -Nur c3000_pre/linux/include/linux/low-latency.h c3000_test/linux/include/linux/low-latency.h
--- c3000_pre/linux/include/linux/low-latency.h	1970-01-01 09:00:00.000000000 +0900
+++ c3000_test/linux/include/linux/low-latency.h	2004-12-20 22:56:21.000000000 +0900
@@ -0,0 +1,109 @@
+/*
+ * include/linux/low-latency.h
+ *
+ * Andrew Morton <akpm@zip.com.au>
+ */
+
+#ifndef LOW_LATENCY_H_INCLUDED
+#define LOW_LATENCY_H_INCLUDED
+
+#if defined(CONFIG_LOLAT)
+#define LOWLATENCY_NEEDED	1
+#else
+#define LOWLATENCY_NEEDED	0
+#endif
+
+#if LOWLATENCY_NEEDED
+
+#include <linux/cache.h>		/* For ____cacheline_aligned */
+
+#ifdef CONFIG_LOLAT_SYSCTL
+extern struct low_latency_enable_struct {
+	int yep;
+} ____cacheline_aligned __enable_lowlatency;
+#define enable_lowlatency __enable_lowlatency.yep
+
+#else
+#define enable_lowlatency 1
+#endif
+
+/*
+ * Set this non-zero to generate low-latency instrumentation
+ */
+#define LOWLATENCY_DEBUG		0
+
+/*
+ * Set this non-zero for robustness testing
+ */
+#define LOWLATENCY_ALWAYS_SCHEDULE	0
+
+#if LOWLATENCY_DEBUG
+
+#if LOWLATENCY_ALWAYS_SCHEDULE
+#define conditional_schedule_needed() ((enable_lowlatency == 2) || (enable_lowlatency && current->need_resched))
+#else
+#define conditional_schedule_needed() (enable_lowlatency && current->need_resched)
+#endif
+
+struct lolat_stats_t {
+	unsigned long count;
+	int visited;
+	const char *file;
+	int line;
+	struct lolat_stats_t *next;
+};
+
+void set_running_and_schedule(struct lolat_stats_t *stats);
+
+#define unconditional_schedule()					\
+	do {								\
+		static struct lolat_stats_t stats = {			\
+			file: __FILE__,					\
+			line: __LINE__,					\
+		};							\
+		set_running_and_schedule(&stats);			\
+	} while (0)
+
+extern void show_lolat_stats(void);
+
+#else	/* LOWLATENCY_DEBUG */
+
+#if LOWLATENCY_ALWAYS_SCHEDULE
+#define conditional_schedule_needed() 1
+#else
+#define conditional_schedule_needed() (current->need_resched)
+#endif
+
+void set_running_and_schedule(void);
+#define unconditional_schedule() set_running_and_schedule()
+
+#endif	/* LOWLATENCY_DEBUG */
+
+#define conditional_schedule()						\
+	do {								\
+		if (conditional_schedule_needed())			\
+			unconditional_schedule();			\
+	} while (0)
+
+#define DEFINE_RESCHED_COUNT	int resched_count = 0
+#define TEST_RESCHED_COUNT(n)	(enable_lowlatency && (++resched_count > (n)))
+#define RESET_RESCHED_COUNT()	resched_count = 0
+extern int ll_copy_to_user(void *to_user, const void *from, unsigned long len);
+extern int ll_copy_from_user(void *to, const void *from_user, unsigned long len);
+
+#else	/* LOWLATENCY_NEEDED */
+
+#define conditional_schedule_needed() 0
+#define conditional_schedule()
+#define unconditional_schedule()
+
+#define DEFINE_RESCHED_COUNT
+#define TEST_RESCHED_COUNT(n)	0
+#define RESET_RESCHED_COUNT()
+#define ll_copy_to_user(to_user, from, len) copy_to_user((to_user), (from), (len))
+#define ll_copy_from_user(to, from_user, len) copy_from_user((to), (from_user), (len))
+
+#endif	/* LOWLATENCY_NEEDED */
+
+#endif /* LOW_LATENCY_H_INCLUDED */
+
diff -Nur c3000_pre/linux/include/linux/mm.h c3000_test/linux/include/linux/mm.h
--- c3000_pre/linux/include/linux/mm.h	2004-08-21 09:49:13.000000000 +0900
+++ c3000_test/linux/include/linux/mm.h	2004-12-20 22:56:21.000000000 +0900
@@ -149,6 +149,8 @@
  */
 extern pgprot_t protection_map[16];
 
+/* Actions for zap_page_range() */
+#define ZPR_COND_RESCHED	1	/* Do a conditional_schedule() occasionally */
 
 /*
  * These are the virtual MM functions - opening of an area, closing and
@@ -500,7 +502,7 @@
 extern void shmem_lock(struct file * file, int lock);
 extern int shmem_zero_setup(struct vm_area_struct *);
 
-extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size);
+extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, int actions);
 extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma);
 extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot);
 extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot);
diff -Nur c3000_pre/linux/include/linux/reiserfs_fs.h c3000_test/linux/include/linux/reiserfs_fs.h
--- c3000_pre/linux/include/linux/reiserfs_fs.h	2004-08-21 09:49:13.000000000 +0900
+++ c3000_test/linux/include/linux/reiserfs_fs.h	2004-12-20 22:56:21.000000000 +0900
@@ -1197,8 +1197,8 @@
 #define fs_generation(s) ((s)->u.reiserfs_sb.s_generation_counter)
 #define get_generation(s) atomic_read (&fs_generation(s))
 #define FILESYSTEM_CHANGED_TB(tb)  (get_generation((tb)->tb_sb) != (tb)->fs_gen)
-#define fs_changed(gen,s) (gen != get_generation (s))
-
+#define __fs_changed(gen,s) (gen != get_generation (s))
+#define fs_changed(gen,s) ({conditional_schedule(); __fs_changed(gen,s);})
 
 /***************************************************************************/
 /*                  FIXATE NODES                                           */
diff -Nur c3000_pre/linux/include/linux/sched.h c3000_test/linux/include/linux/sched.h
--- c3000_pre/linux/include/linux/sched.h	2004-08-21 09:49:13.000000000 +0900
+++ c3000_test/linux/include/linux/sched.h	2004-12-20 22:56:21.000000000 +0900
@@ -1092,6 +1092,7 @@
 #include <linux/dcache.h>
 #include <linux/tqueue.h>
 #include <linux/fs_struct.h>
+#include <linux/low-latency.h>
 
 #endif /* __KERNEL__ */
 #endif
diff -Nur c3000_pre/linux/include/linux/sysctl.h c3000_test/linux/include/linux/sysctl.h
--- c3000_pre/linux/include/linux/sysctl.h	2004-08-21 09:49:13.000000000 +0900
+++ c3000_test/linux/include/linux/sysctl.h	2004-12-20 22:56:21.000000000 +0900
@@ -131,6 +131,7 @@
 	KERN_CORE_USES_PID=52,		/* int: use core or core.%pid */
 	KERN_TAINTED=53,	/* int: various kernel tainted flags */
 	KERN_CADPID=54,		/* int: PID of the process to notify on CAD */
+	KERN_LOWLATENCY=55,     /* int: enable low latency scheduling */
 };
 
 
diff -Nur c3000_pre/linux/kernel/exit.c c3000_test/linux/kernel/exit.c
--- c3000_pre/linux/kernel/exit.c	2004-08-21 09:49:14.000000000 +0900
+++ c3000_test/linux/kernel/exit.c	2004-12-20 22:56:21.000000000 +0900
@@ -196,6 +196,7 @@
 			}
 			i++;
 			set >>= 1;
+			conditional_schedule();		/* sys_exit, many files open */
 		}
 	}
 }
diff -Nur c3000_pre/linux/kernel/ksyms.c c3000_test/linux/kernel/ksyms.c
--- c3000_pre/linux/kernel/ksyms.c	2004-12-19 00:35:59.000000000 +0900
+++ c3000_test/linux/kernel/ksyms.c	2004-12-20 23:07:26.000000000 +0900
@@ -481,6 +481,13 @@
 EXPORT_SYMBOL(do_gettimeofday);
 EXPORT_SYMBOL(do_settimeofday);
 
+#if LOWLATENCY_NEEDED
+EXPORT_SYMBOL(set_running_and_schedule);
+#ifdef CONFIG_LOLAT_SYSCTL
+EXPORT_SYMBOL(__enable_lowlatency);
+#endif
+#endif
+
 #if !defined(__ia64__)
 EXPORT_SYMBOL(loops_per_jiffy);
 #endif
diff -Nur c3000_pre/linux/kernel/module.c c3000_test/linux/kernel/module.c
--- c3000_pre/linux/kernel/module.c	2004-08-21 09:49:14.000000000 +0900
+++ c3000_test/linux/kernel/module.c	2004-12-20 22:56:21.000000000 +0900
@@ -1174,6 +1174,11 @@
 		return ERR_PTR(-ENOMEM);
 	lock_kernel();
 	for (v = module_list, n = *pos; v; n -= v->nsyms, v = v->next) {
+#if 0
+		/* We can't actually do this, because we'd create a
+		 * race against module unload.  Need a semaphore. */
+		conditional_schedule();
+#endif
 		if (n < v->nsyms) {
 			p->mod = v;
 			p->index = n;
diff -Nur c3000_pre/linux/kernel/sched.c c3000_test/linux/kernel/sched.c
--- c3000_pre/linux/kernel/sched.c	2004-08-21 09:49:14.000000000 +0900
+++ c3000_test/linux/kernel/sched.c	2004-12-20 22:56:21.000000000 +0900
@@ -302,6 +302,17 @@
 		if (tsk->processor != this_cpu)
 			smp_send_reschedule(tsk->processor);
 	}
+#if LOWLATENCY_NEEDED
+	if (enable_lowlatency && (p->policy != SCHED_OTHER)) {
+		struct task_struct *t;
+		for (i = 0; i < smp_num_cpus; i++) {
+			cpu = cpu_logical_map(i);
+			t = cpu_curr(cpu);
+			if (t != tsk)
+				t->need_resched = 1;
+		}
+	}
+#endif
 	return;
 		
 
@@ -1429,3 +1440,93 @@
 	atomic_inc(&init_mm.mm_count);
 	enter_lazy_tlb(&init_mm, current, cpu);
 }
+
+#if LOWLATENCY_NEEDED
+#if LOWLATENCY_DEBUG
+
+static struct lolat_stats_t *lolat_stats_head;
+static spinlock_t lolat_stats_lock = SPIN_LOCK_UNLOCKED;
+
+void set_running_and_schedule(struct lolat_stats_t *stats)
+{
+	spin_lock(&lolat_stats_lock);
+	if (stats->visited == 0) {
+		stats->visited = 1;
+		stats->next = lolat_stats_head;
+		lolat_stats_head = stats;
+	}
+	stats->count++;
+	spin_unlock(&lolat_stats_lock);
+
+	if (current->state != TASK_RUNNING)
+		set_current_state(TASK_RUNNING);
+	schedule();
+}
+
+void show_lolat_stats(void)
+{
+	struct lolat_stats_t *stats = lolat_stats_head;
+
+	printk("Low latency scheduling stats:\n");
+	while (stats) {
+		printk("%s:%d: %lu\n", stats->file, stats->line, stats->count);
+		stats->count = 0;
+		stats = stats->next;
+	}
+}
+
+#else	/* LOWLATENCY_DEBUG */
+
+void set_running_and_schedule()
+{
+	if (current->state != TASK_RUNNING)
+		__set_current_state(TASK_RUNNING);
+	schedule();
+}
+
+#endif	/* LOWLATENCY_DEBUG */
+
+int ll_copy_to_user(void *to_user, const void *from, unsigned long len)
+{
+	while (len) {
+		unsigned long n_to_copy = len;
+		unsigned long remainder;
+
+		if (n_to_copy > 4096)
+			n_to_copy = 4096;
+		remainder = copy_to_user(to_user, from, n_to_copy);
+		if (remainder)
+			return remainder + len;
+		to_user = ((char *)to_user) + n_to_copy;
+		from = ((char *)from) + n_to_copy;
+		len -= n_to_copy;
+		conditional_schedule();
+	}
+	return 0;
+}
+
+int ll_copy_from_user(void *to, const void *from_user, unsigned long len)
+{
+	while (len) {
+		unsigned long n_to_copy = len;
+		unsigned long remainder;
+
+		if (n_to_copy > 4096)
+			n_to_copy = 4096;
+		remainder = copy_from_user(to, from_user, n_to_copy);
+		if (remainder)
+			return remainder + len;
+		to = ((char *)to) + n_to_copy;
+		from_user = ((char *)from_user) + n_to_copy;
+		len -= n_to_copy;
+		conditional_schedule();
+	}
+	return 0;
+}
+
+#ifdef CONFIG_LOLAT_SYSCTL
+struct low_latency_enable_struct __enable_lowlatency = { 0, };
+#endif
+
+#endif	/* LOWLATENCY_NEEDED */
+
diff -Nur c3000_pre/linux/kernel/sysctl.c c3000_test/linux/kernel/sysctl.c
--- c3000_pre/linux/kernel/sysctl.c	2004-08-21 09:49:14.000000000 +0900
+++ c3000_test/linux/kernel/sysctl.c	2004-12-20 22:56:21.000000000 +0900
@@ -271,6 +271,10 @@
 	{KERN_S390_USER_DEBUG_LOGGING,"userprocess_debug",
 	 &sysctl_userprocess_debug,sizeof(int),0644,NULL,&proc_dointvec},
 #endif
+#ifdef CONFIG_LOLAT_SYSCTL
+	{KERN_LOWLATENCY, "lowlatency", &enable_lowlatency, sizeof (int),
+	 0644, NULL, &proc_dointvec},
+#endif
 	{0}
 };
 
diff -Nur c3000_pre/linux/mm/filemap.c c3000_test/linux/mm/filemap.c
--- c3000_pre/linux/mm/filemap.c	2004-08-21 09:49:15.000000000 +0900
+++ c3000_test/linux/mm/filemap.c	2004-12-20 22:56:21.000000000 +0900
@@ -179,7 +179,9 @@
 {
 	struct list_head *head, *curr;
 	struct page * page;
+	int ll_count = 100;
 
+restart:
 	head = &inode->i_mapping->clean_pages;
 
 	spin_lock(&pagemap_lru_lock);
@@ -190,6 +192,14 @@
 		page = list_entry(curr, struct page, list);
 		curr = curr->next;
 
+		if (conditional_schedule_needed() && ll_count) {
+			spin_unlock(&pagecache_lock);
+			spin_unlock(&pagemap_lru_lock);
+			unconditional_schedule();
+			ll_count--;
+			goto restart;
+		}
+
 		/* We cannot invalidate something in dirty.. */
 		if (PageDirty(page))
 			continue;
@@ -253,8 +263,7 @@
 	page_cache_release(page);
 }
 
-static int FASTCALL(truncate_list_pages(struct list_head *, unsigned long, unsigned *));
-static int truncate_list_pages(struct list_head *head, unsigned long start, unsigned *partial)
+static int truncate_list_pages(struct list_head *head, unsigned long start, unsigned *partial, int *restart_count)
 {
 	struct list_head *curr;
 	struct page * page;
@@ -265,6 +274,17 @@
 	while (curr != head) {
 		unsigned long offset;
 
+		if (conditional_schedule_needed() && *restart_count) {
+			(*restart_count)--;
+			list_del(head);
+			list_add(head, curr);		/* Restart on this page */
+			spin_unlock(&pagecache_lock);
+			unconditional_schedule();
+			spin_lock(&pagecache_lock);
+			unlocked = 1;
+			goto restart;
+		}
+
 		page = list_entry(curr, struct page, list);
 		offset = page->index;
 
@@ -297,13 +317,11 @@
 			} else
  				wait_on_page(page);
 
-			page_cache_release(page);
-
-			if (current->need_resched) {
-				__set_current_state(TASK_RUNNING);
-				schedule();
+			if (LOWLATENCY_NEEDED) {
+				*restart_count = 4;	/* We made progress */
 			}
 
+			page_cache_release(page);
 			spin_lock(&pagecache_lock);
 			goto restart;
 		}
@@ -326,13 +344,14 @@
 {
 	unsigned long start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
+	int restart_count = 4;
 	int unlocked;
 
 	spin_lock(&pagecache_lock);
 	do {
-		unlocked = truncate_list_pages(&mapping->clean_pages, start, &partial);
-		unlocked |= truncate_list_pages(&mapping->dirty_pages, start, &partial);
-		unlocked |= truncate_list_pages(&mapping->locked_pages, start, &partial);
+		unlocked = truncate_list_pages(&mapping->clean_pages, start, &partial, &restart_count);
+		unlocked |= truncate_list_pages(&mapping->dirty_pages, start, &partial, &restart_count);
+		unlocked |= truncate_list_pages(&mapping->locked_pages, start, &partial, &restart_count);
 	} while (unlocked);
 	/* Traversed all three lists without dropping the lock */
 	spin_unlock(&pagecache_lock);
@@ -477,6 +496,7 @@
 
 		page_cache_get(page);
 		spin_unlock(&pagecache_lock);
+		conditional_schedule();		/* sys_msync() (only used by minixfs, udf) */
 		lock_page(page);
 
 		/* The buffers could have been free'd while we waited for the page lock */
@@ -563,12 +583,14 @@
 		list_del(&page->list);
 		list_add(&page->list, &mapping->locked_pages);
 
-		if (!PageDirty(page))
-			continue;
-
 		page_cache_get(page);
 		spin_unlock(&pagecache_lock);
 
+		conditional_schedule();		/* sys_msync() */
+
+		if (!PageDirty(page))
+			goto clean;
+
 		lock_page(page);
 
 		if (PageDirty(page)) {
@@ -579,7 +601,7 @@
 				ret = err;
 		} else
 			UnlockPage(page);
-
+clean:
 		page_cache_release(page);
 		spin_lock(&pagecache_lock);
 	}
@@ -597,7 +619,8 @@
 int filemap_fdatawait(struct address_space * mapping)
 {
 	int ret = 0;
-
+	DEFINE_RESCHED_COUNT;
+restart:
 	spin_lock(&pagecache_lock);
 
         while (!list_empty(&mapping->locked_pages)) {
@@ -606,6 +629,17 @@
 		list_del(&page->list);
 		list_add(&page->list, &mapping->clean_pages);
 
+		if (TEST_RESCHED_COUNT(32)) {
+			RESET_RESCHED_COUNT();
+			if (conditional_schedule_needed()) {
+				page_cache_get(page);
+				spin_unlock(&pagecache_lock);
+				unconditional_schedule();
+				page_cache_release(page);
+				goto restart;
+			}
+		}
+
 		if (!PageLocked(page))
 			continue;
 
@@ -706,8 +740,10 @@
 	spin_lock(&pagecache_lock);
 	page = __find_page_nolock(mapping, offset, *hash);
 	spin_unlock(&pagecache_lock);
-	if (page)
+	if (page) {
+		conditional_schedule();
 		return 0;
+	}
 
 	page = page_cache_alloc(mapping);
 	if (!page)
@@ -963,6 +999,11 @@
 	 * the hash-list needs a held write-lock.
 	 */
 repeat:
+	if (conditional_schedule_needed()) {
+		spin_unlock(&pagecache_lock);
+		unconditional_schedule();
+		spin_lock(&pagecache_lock);
+	}
 	page = __find_page_nolock(mapping, offset, hash);
 	if (page) {
 		page_cache_get(page);
@@ -1413,6 +1454,8 @@
 		page_cache_get(page);
 		spin_unlock(&pagecache_lock);
 
+		conditional_schedule();		/* sys_read() */
+
 		if (!Page_Uptodate(page))
 			goto page_not_up_to_date;
 		generic_file_readahead(reada_ok, filp, inode, page);
@@ -2114,6 +2157,12 @@
 		address += PAGE_SIZE;
 		pte++;
 	} while (address && (address < end));
+
+	if (conditional_schedule_needed()) {
+		spin_unlock(&vma->vm_mm->page_table_lock);
+		unconditional_schedule();		/* syncing large mapped files */
+		spin_lock(&vma->vm_mm->page_table_lock);
+	}
 	return error;
 }
 
@@ -2530,7 +2579,9 @@
 	if (vma->vm_flags & VM_LOCKED)
 		return -EINVAL;
 
-	zap_page_range(vma->vm_mm, start, end - start);
+        zap_page_range(vma->vm_mm, start, end - start,
+		ZPR_COND_RESCHED);        /* sys_madvise(MADV_DONTNEED) */
+
 	return 0;
 }
 
@@ -3095,6 +3146,9 @@
 			goto sync_failure;
 		page_fault = __copy_from_user(kaddr+offset, buf, bytes);
 		flush_dcache_page(page);
+
+                conditional_schedule();
+
 		status = mapping->a_ops->commit_write(file, page, offset, offset+bytes);
 		if (page_fault)
 			goto fail_write;
diff -Nur c3000_pre/linux/mm/memory.c c3000_test/linux/mm/memory.c
--- c3000_pre/linux/mm/memory.c	2004-08-21 09:49:15.000000000 +0900
+++ c3000_test/linux/mm/memory.c	2004-12-20 22:56:21.000000000 +0900
@@ -370,7 +370,7 @@
 /*
  * remove user pages in a given range.
  */
-void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size)
+static void do_zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size)
 {
 	mmu_gather_t *tlb;
 	pgd_t * dir;
@@ -494,6 +494,10 @@
 			struct page *map;
 			while (!(map = follow_page(mm, start, write))) {
 				spin_unlock(&mm->page_table_lock);
+
+				/* Pinning down many physical pages (kiobufs, mlockall) */
+				conditional_schedule();
+
 				switch (handle_mm_fault(mm, vma, start, write)) {
 				case 1:
 					tsk->min_flt++;
@@ -655,6 +659,21 @@
 	iobuf->locked = 0;
 }
 
+#define MAX_ZAP_BYTES 256*PAGE_SIZE
+
+void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, int actions)
+{
+	while (size) {
+		unsigned long chunk = size;
+		if (actions & ZPR_COND_RESCHED && chunk > MAX_ZAP_BYTES)
+			chunk = MAX_ZAP_BYTES;
+		do_zap_page_range(mm, address, chunk);
+		if (actions & ZPR_COND_RESCHED)
+			conditional_schedule();
+		address += chunk;
+		size -= chunk;
+	}
+}
 
 /*
  * Lock down all of the pages of a kiovec for IO.
@@ -764,11 +783,18 @@
 	return 0;
 }
 
-static inline void zeromap_pte_range(pte_t * pte, unsigned long address,
-                                     unsigned long size, pgprot_t prot)
+static inline void zeromap_pte_range(struct mm_struct *mm, pte_t * pte,
+				unsigned long address, unsigned long size,
+				pgprot_t prot)
 {
 	unsigned long end;
 
+	if (conditional_schedule_needed()) {
+		spin_unlock(&mm->page_table_lock);
+		unconditional_schedule();		/* mmap(/dev/zero) */
+		spin_lock(&mm->page_table_lock);
+	}
+
 	address &= ~PMD_MASK;
 	end = address + size;
 	if (end > PMD_SIZE)
@@ -796,7 +822,7 @@
 		pte_t * pte = pte_alloc(mm, pmd, address);
 		if (!pte)
 			return -ENOMEM;
-		zeromap_pte_range(pte, address, end - address, prot);
+		zeromap_pte_range(mm, pte, address, end - address, prot);
 		address = (address + PMD_SIZE) & PMD_MASK;
 		pmd++;
 	} while (address && (address < end));
@@ -1044,7 +1070,7 @@
 
 		/* mapping wholly truncated? */
 		if (mpnt->vm_pgoff >= pgoff) {
-			zap_page_range(mm, start, len);
+                        zap_page_range(mm, start, len, 0);
 			continue;
 		}
 
@@ -1057,7 +1083,7 @@
 		/* Ok, partially affected.. */
 		start += diff << PAGE_SHIFT;
 		len = (len - diff) << PAGE_SHIFT;
-		zap_page_range(mm, start, len);
+                zap_page_range(mm, start, len, 0);
 	} while ((mpnt = mpnt->vm_next_share) != NULL);
 }
 
diff -Nur c3000_pre/linux/mm/mmap.c c3000_test/linux/mm/mmap.c
--- c3000_pre/linux/mm/mmap.c	2004-12-16 22:55:54.000000000 +0900
+++ c3000_test/linux/mm/mmap.c	2004-12-20 23:07:25.000000000 +0900
@@ -598,7 +598,7 @@
 	fput(file);
 
 	/* Undo any partial mapping done by a device driver. */
-	zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start);
+        zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start, 0);
 free_vma:
 	kmem_cache_free(vm_area_cachep, vma);
 	return error;
@@ -998,7 +998,7 @@
 		remove_shared_vm_struct(mpnt);
 		mm->map_count--;
 
-		zap_page_range(mm, st, size);
+                zap_page_range(mm, st, size, ZPR_COND_RESCHED);   /* sys_munmap() */
 
 		/*
 		 * Fix the mapping, and free the old area if it wasn't reused.
@@ -1158,7 +1158,7 @@
 		}
 		mm->map_count--;
 		remove_shared_vm_struct(mpnt);
-		zap_page_range(mm, start, size);
+		zap_page_range(mm, start, size, ZPR_COND_RESCHED);      /* sys_exit() */
 		if (mpnt->vm_file)
 			fput(mpnt->vm_file);
 		kmem_cache_free(vm_area_cachep, mpnt);
diff -Nur c3000_pre/linux/mm/mremap.c c3000_test/linux/mm/mremap.c
--- c3000_pre/linux/mm/mremap.c	2004-12-16 22:55:54.000000000 +0900
+++ c3000_test/linux/mm/mremap.c	2004-12-20 23:07:25.000000000 +0900
@@ -121,7 +121,7 @@
 	flush_cache_range(mm, new_addr, new_addr + len);
 	while ((offset += PAGE_SIZE) < len)
 		move_one_page(mm, new_addr + offset, old_addr + offset);
-	zap_page_range(mm, new_addr, len);
+	zap_page_range(mm, new_addr, len, 0);
 #ifdef __arm__
 	memc_update_mm(mm);
 #endif
diff -Nur c3000_pre/linux/mm/slab.c c3000_test/linux/mm/slab.c
--- c3000_pre/linux/mm/slab.c	2004-11-16 15:31:09.000000000 +0900
+++ c3000_test/linux/mm/slab.c	2004-12-20 22:56:21.000000000 +0900
@@ -940,6 +940,7 @@
 		list_del(&slabp->list);
 
 		spin_unlock_irq(&cachep->spinlock);
+		conditional_schedule();
 		kmem_slab_destroy(cachep, slabp);
 		ret++;
 		spin_lock_irq(&cachep->spinlock);
@@ -1853,6 +1854,7 @@
 		 */
 		spin_unlock_irq(&best_cachep->spinlock);
 		kmem_slab_destroy(best_cachep, slabp);
+		conditional_schedule();		/* try_to_free_pages() */
 		spin_lock_irq(&best_cachep->spinlock);
 	}
 	spin_unlock_irq(&best_cachep->spinlock);
diff -Nur c3000_pre/linux/mm/swapfile.c c3000_test/linux/mm/swapfile.c
--- c3000_pre/linux/mm/swapfile.c	2004-08-21 09:49:16.000000000 +0900
+++ c3000_test/linux/mm/swapfile.c	2004-12-20 22:56:21.000000000 +0900
@@ -819,7 +819,7 @@
 				len += sprintf(buf + len, "partition\t");
 
 			usedswap = 0;
-			for (j = 0; j < ptr->max; ++j)
+			for (j = 0; j < ptr->max; ++j) {
 				switch (ptr->swap_map[j]) {
 					case SWAP_MAP_BAD:
 					case 0:
@@ -827,6 +827,8 @@
 					default:
 						usedswap++;
 				}
+				conditional_schedule();
+			}
 			len += sprintf(buf + len, "%d\t%d\t%d\n", ptr->pages << (PAGE_SHIFT - 10), 
 				usedswap << (PAGE_SHIFT - 10), ptr->prio);
 		}
@@ -1120,6 +1122,11 @@
 		if (swap_info[i].flags != SWP_USED)
 			continue;
 		for (j = 0; j < swap_info[i].max; ++j) {
+			if (conditional_schedule_needed()) {
+				swap_list_unlock();
+				conditional_schedule();
+				swap_list_lock();
+			}
 			switch (swap_info[i].swap_map[j]) {
 				case 0:
 				case SWAP_MAP_BAD:
diff -Nur c3000_pre/linux/mm/vmscan.c c3000_test/linux/mm/vmscan.c
--- c3000_pre/linux/mm/vmscan.c	2004-08-21 09:49:16.000000000 +0900
+++ c3000_test/linux/mm/vmscan.c	2004-12-20 22:56:21.000000000 +0900
@@ -173,6 +173,7 @@
 {
 	pte_t * pte;
 	unsigned long pmd_end;
+	DEFINE_RESCHED_COUNT;
 
 	if (pmd_none(*dir))
 		return count;
@@ -198,11 +199,17 @@
 					address += PAGE_SIZE;
 					break;
 				}
+                                if (TEST_RESCHED_COUNT(4)) {
+                                        if (conditional_schedule_needed())
+						goto out;
+                                        RESET_RESCHED_COUNT();
+                                }
 			}
 		}
 		address += PAGE_SIZE;
 		pte++;
 	} while (address && (address < end));
+out:
 	mm->swap_address = address;
 	return count;
 }
@@ -231,6 +238,8 @@
 		count = swap_out_pmd(mm, vma, pmd, address, end, count, classzone);
 		if (!count)
 			break;
+		if (conditional_schedule_needed())
+			return count;
 		address = (address + PMD_SIZE) & PMD_MASK;
 		pmd++;
 	} while (address && (address < end));
@@ -255,6 +264,8 @@
 		count = swap_out_pgd(mm, vma, pgdir, address, end, count, classzone);
 		if (!count)
 			break;
+		if (conditional_schedule_needed())
+			return count;
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		pgdir++;
 	} while (address && (address < end));
@@ -276,6 +287,7 @@
 	 * Find the proper vm-area after freezing the vma chain 
 	 * and ptes.
 	 */
+continue_scan:
 	spin_lock(&mm->page_table_lock);
 	address = mm->swap_address;
 	if (address == TASK_SIZE || swap_mm != mm) {
@@ -293,6 +305,12 @@
 			vma = vma->vm_next;
 			if (!vma)
 				break;
+                        if (conditional_schedule_needed()) {    /* Scanning a large vma */
+                                spin_unlock(&mm->page_table_lock);
+                                unconditional_schedule();
+                                /* Continue from where we left off */
+                                goto continue_scan;
+                        }
 			if (!count)
 				goto out_unlock;
 			address = vma->vm_start;
diff -Nur c3000_pre/linux/net/core/iovec.c c3000_test/linux/net/core/iovec.c
--- c3000_pre/linux/net/core/iovec.c	2004-08-21 11:23:13.000000000 +0900
+++ c3000_test/linux/net/core/iovec.c	2004-12-20 22:56:21.000000000 +0900
@@ -88,7 +88,7 @@
 		if(iov->iov_len)
 		{
 			int copy = min_t(unsigned int, iov->iov_len, len);
-			if (copy_to_user(iov->iov_base, kdata, copy))
+                        if (ll_copy_to_user(iov->iov_base, kdata, copy))
 				goto out;
 			kdata+=copy;
 			len-=copy;
diff -Nur c3000_pre/linux/net/ipv4/tcp_minisocks.c c3000_test/linux/net/ipv4/tcp_minisocks.c
--- c3000_pre/linux/net/ipv4/tcp_minisocks.c	2004-08-21 09:49:22.000000000 +0900
+++ c3000_test/linux/net/ipv4/tcp_minisocks.c	2004-12-20 22:56:21.000000000 +0900
@@ -440,6 +440,9 @@
 {
 	struct tcp_tw_bucket *tw;
 	int killed = 0;
+#if LOWLATENCY_NEEDED
+	int max_killed = 0;
+#endif
 
 	/* NOTE: compare this to previous version where lock
 	 * was released after detaching chain. It was racy,
@@ -453,6 +456,13 @@
 		goto out;
 
 	while((tw = tcp_tw_death_row[tcp_tw_death_row_slot]) != NULL) {
+#if LOWLATENCY_NEEDED
+		/* This loop takes ~6 usecs per iteration. */
+		if (killed > 100) {
+			max_killed = 1;
+			break;
+		}
+#endif
 		tcp_tw_death_row[tcp_tw_death_row_slot] = tw->next_death;
 		tw->pprev_death = NULL;
 		spin_unlock(&tw_death_lock);
@@ -463,12 +473,24 @@
 		killed++;
 
 		spin_lock(&tw_death_lock);
+
+	}
+
+#if LOWLATENCY_NEEDED
+	if (max_killed) {	/* More to do: do it soon */
+		mod_timer(&tcp_tw_timer, jiffies+2);
+		tcp_tw_count -= killed;
+	}
+	else
+#endif
+	{
+		tcp_tw_death_row_slot =
+			((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1));
+	
+		if ((tcp_tw_count -= killed) != 0)
+			mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD);
 	}
-	tcp_tw_death_row_slot =
-		((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1));
 
-	if ((tcp_tw_count -= killed) != 0)
-		mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD);
 	net_statistics[smp_processor_id()*2].TimeWaited += killed;
 out:
 	spin_unlock(&tw_death_lock);