summaryrefslogtreecommitdiff
path: root/packages/linux/openzaurus-pxa27x-2.4.20-rmk2-embedix20050602/P14-lowlatency_041221.patch
diff options
context:
space:
mode:
Diffstat (limited to 'packages/linux/openzaurus-pxa27x-2.4.20-rmk2-embedix20050602/P14-lowlatency_041221.patch')
-rw-r--r--packages/linux/openzaurus-pxa27x-2.4.20-rmk2-embedix20050602/P14-lowlatency_041221.patch1677
1 files changed, 1677 insertions, 0 deletions
diff --git a/packages/linux/openzaurus-pxa27x-2.4.20-rmk2-embedix20050602/P14-lowlatency_041221.patch b/packages/linux/openzaurus-pxa27x-2.4.20-rmk2-embedix20050602/P14-lowlatency_041221.patch
new file mode 100644
index 0000000000..edd63fb13b
--- /dev/null
+++ b/packages/linux/openzaurus-pxa27x-2.4.20-rmk2-embedix20050602/P14-lowlatency_041221.patch
@@ -0,0 +1,1677 @@
+diff -Nur c3000_pre/linux/arch/arm/config.in c3000_test/linux/arch/arm/config.in
+--- c3000_pre/linux/arch/arm/config.in 2004-12-16 22:55:34.000000000 +0900
++++ c3000_test/linux/arch/arm/config.in 2004-12-20 23:23:28.000000000 +0900
+@@ -574,6 +574,8 @@
+ fi
+ fi
+ dep_bool 'Preemptible Kernel support' CONFIG_PREEMPT $CONFIG_CPU_32
++bool 'Low latency scheduling' CONFIG_LOLAT
++dep_bool 'Control low latency with sysctl' CONFIG_LOLAT_SYSCTL $CONFIG_LOLAT
+
+ endmenu
+
+diff -Nur c3000_pre/linux/arch/i386/config.in c3000_test/linux/arch/i386/config.in
+--- c3000_pre/linux/arch/i386/config.in 2004-08-21 09:48:09.000000000 +0900
++++ c3000_test/linux/arch/i386/config.in 2004-12-20 22:56:21.000000000 +0900
+@@ -25,6 +25,9 @@
+
+ mainmenu_option next_comment
+ comment 'Processor type and features'
++bool 'Low latency scheduling' CONFIG_LOLAT
++dep_bool 'Control low latency with sysctl' CONFIG_LOLAT_SYSCTL $CONFIG_LOLAT
++
+ choice 'Processor family' \
+ "386 CONFIG_M386 \
+ 486 CONFIG_M486 \
+diff -Nur c3000_pre/linux/drivers/block/ll_rw_blk.c c3000_test/linux/drivers/block/ll_rw_blk.c
+--- c3000_pre/linux/drivers/block/ll_rw_blk.c 2004-08-21 09:48:24.000000000 +0900
++++ c3000_test/linux/drivers/block/ll_rw_blk.c 2004-12-20 22:56:21.000000000 +0900
+@@ -1211,6 +1211,7 @@
+ kstat.pgpgin += count;
+ break;
+ }
++ conditional_schedule();
+ }
+
+ /**
+diff -Nur c3000_pre/linux/drivers/char/mem.c c3000_test/linux/drivers/char/mem.c
+--- c3000_pre/linux/drivers/char/mem.c 2004-08-21 09:48:25.000000000 +0900
++++ c3000_test/linux/drivers/char/mem.c 2004-12-20 22:56:21.000000000 +0900
+@@ -422,7 +422,7 @@
+ if (count > size)
+ count = size;
+
+- zap_page_range(mm, addr, count);
++ zap_page_range(mm, addr, count, 0);
+ zeromap_page_range(addr, count, PAGE_COPY);
+
+ size -= count;
+diff -Nur c3000_pre/linux/drivers/char/random.c c3000_test/linux/drivers/char/random.c
+--- c3000_pre/linux/drivers/char/random.c 2004-08-21 09:48:25.000000000 +0900
++++ c3000_test/linux/drivers/char/random.c 2004-12-20 22:56:21.000000000 +0900
+@@ -1374,6 +1374,11 @@
+ buf += i;
+ ret += i;
+ add_timer_randomness(&extract_timer_state, nbytes);
++#if LOWLATENCY_NEEDED
++ /* This can happen in softirq's, but that's what we want */
++ if (conditional_schedule_needed())
++ break;
++#endif
+ }
+
+ /* Wipe data just returned from memory */
+diff -Nur c3000_pre/linux/drivers/i2c/i2c-core.c c3000_test/linux/drivers/i2c/i2c-core.c
+--- c3000_pre/linux/drivers/i2c/i2c-core.c 2004-08-21 09:48:34.000000000 +0900
++++ c3000_test/linux/drivers/i2c/i2c-core.c 2004-12-20 22:56:21.000000000 +0900
+@@ -761,6 +761,8 @@
+ {
+ int ret;
+
++ conditional_schedule();
++
+ if (adap->algo->master_xfer) {
+ DEB2(printk("i2c-core.o: master_xfer: %s with %d msgs.\n",
+ adap->name,num));
+@@ -783,6 +785,8 @@
+ struct i2c_adapter *adap=client->adapter;
+ struct i2c_msg msg;
+
++ conditional_schedule();
++
+ if (client->adapter->algo->master_xfer) {
+ msg.addr = client->addr;
+ msg.flags = client->flags & I2C_M_TEN;
+@@ -812,6 +816,9 @@
+ struct i2c_adapter *adap=client->adapter;
+ struct i2c_msg msg;
+ int ret;
++
++ conditional_schedule();
++
+ if (client->adapter->algo->master_xfer) {
+ msg.addr = client->addr;
+ msg.flags = client->flags & I2C_M_TEN;
+diff -Nur c3000_pre/linux/fs/buffer.c c3000_test/linux/fs/buffer.c
+--- c3000_pre/linux/fs/buffer.c 2004-08-21 09:48:58.000000000 +0900
++++ c3000_test/linux/fs/buffer.c 2004-12-20 22:56:21.000000000 +0900
+@@ -216,8 +216,10 @@
+
+ if (dev != NODEV && bh->b_dev != dev)
+ continue;
+- if (test_and_set_bit(BH_Lock, &bh->b_state))
++ if (test_and_set_bit(BH_Lock, &bh->b_state)) {
++ __refile_buffer(bh);
+ continue;
++ }
+ if (atomic_set_buffer_clean(bh)) {
+ __refile_buffer(bh);
+ get_bh(bh);
+@@ -227,6 +229,7 @@
+
+ spin_unlock(&lru_list_lock);
+ write_locked_buffers(array, count);
++ conditional_schedule();
+ return -EAGAIN;
+ }
+ unlock_buffer(bh);
+@@ -260,12 +263,19 @@
+ struct buffer_head * next;
+ int nr;
+
+- next = lru_list[index];
+ nr = nr_buffers_type[index];
++repeat:
++ next = lru_list[index];
+ while (next && --nr >= 0) {
+ struct buffer_head *bh = next;
+ next = bh->b_next_free;
+
++ if (conditional_schedule_needed()) {
++ spin_unlock(&lru_list_lock);
++ unconditional_schedule();
++ spin_lock(&lru_list_lock);
++ goto repeat;
++ }
+ if (!buffer_locked(bh)) {
+ if (refile)
+ __refile_buffer(bh);
+@@ -273,7 +283,6 @@
+ }
+ if (dev != NODEV && bh->b_dev != dev)
+ continue;
+-
+ get_bh(bh);
+ spin_unlock(&lru_list_lock);
+ wait_on_buffer (bh);
+@@ -306,6 +315,15 @@
+ {
+ int err = 0;
+
++#if LOWLATENCY_NEEDED
++ /*
++ * syncing devA when there are lots of buffers dirty against
++ * devB is expensive.
++ */
++ if (enable_lowlatency)
++ dev = NODEV;
++#endif
++
+ /* One pass for no-wait, three for wait:
+ * 0) write out all dirty, unlocked buffers;
+ * 1) wait for all dirty locked buffers;
+@@ -697,6 +715,16 @@
+ /* Not hashed? */
+ if (!bh->b_pprev)
+ continue;
++
++ if (conditional_schedule_needed()) {
++ get_bh(bh);
++ spin_unlock(&lru_list_lock);
++ unconditional_schedule();
++ spin_lock(&lru_list_lock);
++ put_bh(bh);
++ slept = 1;
++ }
++
+ if (buffer_locked(bh)) {
+ get_bh(bh);
+ spin_unlock(&lru_list_lock);
+@@ -848,12 +876,19 @@
+ struct buffer_head *bh;
+ struct inode tmp;
+ int err = 0, err2;
+-
++ DEFINE_RESCHED_COUNT;
++
+ INIT_LIST_HEAD(&tmp.i_dirty_buffers);
+-
++
++repeat:
+ spin_lock(&lru_list_lock);
+
+ while (!list_empty(list)) {
++ if (conditional_schedule_needed()) {
++ spin_unlock(&lru_list_lock);
++ unconditional_schedule();
++ goto repeat;
++ }
+ bh = BH_ENTRY(list->next);
+ list_del(&bh->b_inode_buffers);
+ if (!buffer_dirty(bh) && !buffer_locked(bh))
+@@ -878,8 +913,18 @@
+ spin_lock(&lru_list_lock);
+ }
+ }
++ if (TEST_RESCHED_COUNT(32)) {
++ RESET_RESCHED_COUNT();
++ if (conditional_schedule_needed()) {
++ spin_unlock(&lru_list_lock);
++ unconditional_schedule(); /* Syncing many dirty buffers */
++ spin_lock(&lru_list_lock);
++ }
++ }
+ }
+
++ RESET_RESCHED_COUNT();
++
+ while (!list_empty(&tmp.i_dirty_buffers)) {
+ bh = BH_ENTRY(tmp.i_dirty_buffers.prev);
+ remove_inode_queue(bh);
+@@ -889,6 +934,7 @@
+ if (!buffer_uptodate(bh))
+ err = -EIO;
+ brelse(bh);
++ conditional_schedule();
+ spin_lock(&lru_list_lock);
+ }
+
+@@ -916,11 +962,20 @@
+ struct buffer_head *bh;
+ struct list_head *p;
+ int err = 0;
++ DEFINE_RESCHED_COUNT;
+
++repeat:
++ conditional_schedule();
+ spin_lock(&lru_list_lock);
+
+- repeat:
+ list_for_each_prev(p, list) {
++ if (TEST_RESCHED_COUNT(32)) {
++ RESET_RESCHED_COUNT();
++ if (conditional_schedule_needed()) {
++ spin_unlock(&lru_list_lock);
++ goto repeat;
++ }
++ }
+ bh = BH_ENTRY(p);
+ if (buffer_locked(bh)) {
+ get_bh(bh);
+@@ -929,7 +984,6 @@
+ if (!buffer_uptodate(bh))
+ err = -EIO;
+ brelse(bh);
+- spin_lock(&lru_list_lock);
+ goto repeat;
+ }
+ }
+@@ -946,12 +1000,24 @@
+ void invalidate_inode_buffers(struct inode *inode)
+ {
+ struct list_head * entry;
+-
++
++repeat:
++ conditional_schedule();
+ spin_lock(&lru_list_lock);
+- while ((entry = inode->i_dirty_buffers.next) != &inode->i_dirty_buffers)
++ while ((entry = inode->i_dirty_buffers.next) != &inode->i_dirty_buffers) {
++ if (conditional_schedule_needed()) {
++ spin_unlock(&lru_list_lock);
++ goto repeat;
++ }
+ remove_inode_queue(BH_ENTRY(entry));
+- while ((entry = inode->i_dirty_data_buffers.next) != &inode->i_dirty_data_buffers)
++ }
++ while ((entry = inode->i_dirty_data_buffers.next) != &inode->i_dirty_data_buffers) {
++ if (conditional_schedule_needed()) {
++ spin_unlock(&lru_list_lock);
++ goto repeat;
++ }
+ remove_inode_queue(BH_ENTRY(entry));
++ }
+ spin_unlock(&lru_list_lock);
+ }
+
+@@ -974,6 +1040,7 @@
+ bh = get_hash_table(dev, block, size);
+ if (bh) {
+ touch_buffer(bh);
++ conditional_schedule();
+ return bh;
+ }
+
+@@ -2831,7 +2898,7 @@
+
+ DECLARE_WAIT_QUEUE_HEAD(bdflush_wait);
+
+-void wakeup_bdflush(void)
++void wakeup_bdflush(void)
+ {
+ wake_up_interruptible(&bdflush_wait);
+ }
+diff -Nur c3000_pre/linux/fs/dcache.c c3000_test/linux/fs/dcache.c
+--- c3000_pre/linux/fs/dcache.c 2004-08-21 09:48:58.000000000 +0900
++++ c3000_test/linux/fs/dcache.c 2004-12-20 22:56:21.000000000 +0900
+@@ -320,11 +320,23 @@
+
+ void prune_dcache(int count)
+ {
++ DEFINE_RESCHED_COUNT;
++
++redo:
+ spin_lock(&dcache_lock);
+ for (;;) {
+ struct dentry *dentry;
+ struct list_head *tmp;
+
++ if (TEST_RESCHED_COUNT(100)) {
++ RESET_RESCHED_COUNT();
++ if (conditional_schedule_needed()) {
++ spin_unlock(&dcache_lock);
++ unconditional_schedule();
++ goto redo;
++ }
++ }
++
+ tmp = dentry_unused.prev;
+
+ if (tmp == &dentry_unused)
+@@ -479,6 +491,7 @@
+ struct dentry *this_parent = parent;
+ struct list_head *next;
+ int found = 0;
++ DEFINE_RESCHED_COUNT;
+
+ spin_lock(&dcache_lock);
+ repeat:
+@@ -493,6 +506,13 @@
+ list_add(&dentry->d_lru, dentry_unused.prev);
+ found++;
+ }
++
++ if (TEST_RESCHED_COUNT(500) && found > 10) {
++ if (conditional_schedule_needed()) /* Typically sys_rmdir() */
++ goto out;
++ RESET_RESCHED_COUNT();
++ }
++
+ /*
+ * Descend a level if the d_subdirs list is non-empty.
+ */
+@@ -517,6 +537,7 @@
+ #endif
+ goto resume;
+ }
++out:
+ spin_unlock(&dcache_lock);
+ return found;
+ }
+@@ -532,8 +553,10 @@
+ {
+ int found;
+
+- while ((found = select_parent(parent)) != 0)
++ while ((found = select_parent(parent)) != 0) {
+ prune_dcache(found);
++ conditional_schedule(); /* Typically sys_rmdir() */
++ }
+ }
+
+ /*
+diff -Nur c3000_pre/linux/fs/exec.c c3000_test/linux/fs/exec.c
+--- c3000_pre/linux/fs/exec.c 2004-08-21 09:48:58.000000000 +0900
++++ c3000_test/linux/fs/exec.c 2004-12-20 22:56:21.000000000 +0900
+@@ -249,7 +249,7 @@
+ memset(kaddr+offset+len, 0,
+ PAGE_SIZE-offset-len);
+ }
+- err = copy_from_user(kaddr+offset, str, bytes_to_copy);
++ err = ll_copy_from_user(kaddr+offset, str, bytes_to_copy);
+ if (err) {
+ ret = -EFAULT;
+ goto out;
+diff -Nur c3000_pre/linux/fs/ext2/dir.c c3000_test/linux/fs/ext2/dir.c
+--- c3000_pre/linux/fs/ext2/dir.c 2004-08-21 09:48:59.000000000 +0900
++++ c3000_test/linux/fs/ext2/dir.c 2004-12-20 22:56:21.000000000 +0900
+@@ -153,6 +153,7 @@
+ struct address_space *mapping = dir->i_mapping;
+ struct page *page = read_cache_page(mapping, n,
+ (filler_t*)mapping->a_ops->readpage, NULL);
++ conditional_schedule(); /* Scanning large directories */
+ if (!IS_ERR(page)) {
+ wait_on_page(page);
+ kmap(page);
+diff -Nur c3000_pre/linux/fs/ext2/inode.c c3000_test/linux/fs/ext2/inode.c
+--- c3000_pre/linux/fs/ext2/inode.c 2004-08-21 09:48:59.000000000 +0900
++++ c3000_test/linux/fs/ext2/inode.c 2004-12-20 22:56:21.000000000 +0900
+@@ -715,8 +715,13 @@
+ {
+ unsigned long block_to_free = 0, count = 0;
+ unsigned long nr;
++ DEFINE_RESCHED_COUNT;
+
+ for ( ; p < q ; p++) {
++ if (TEST_RESCHED_COUNT(32)) {
++ RESET_RESCHED_COUNT();
++ conditional_schedule();
++ }
+ nr = le32_to_cpu(*p);
+ if (nr) {
+ *p = 0;
+@@ -759,6 +764,7 @@
+ if (depth--) {
+ int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+ for ( ; p < q ; p++) {
++ conditional_schedule(); /* Deleting large files */
+ nr = le32_to_cpu(*p);
+ if (!nr)
+ continue;
+diff -Nur c3000_pre/linux/fs/ext3/balloc.c c3000_test/linux/fs/ext3/balloc.c
+--- c3000_pre/linux/fs/ext3/balloc.c 2004-08-21 09:48:59.000000000 +0900
++++ c3000_test/linux/fs/ext3/balloc.c 2004-12-20 22:56:21.000000000 +0900
+@@ -363,6 +363,9 @@
+ }
+ }
+ #endif
++ /* superblock lock is held, so this is safe */
++ conditional_schedule();
++
+ BUFFER_TRACE(bitmap_bh, "clear bit");
+ if (!ext3_clear_bit (bit + i, bitmap_bh->b_data)) {
+ ext3_error (sb, __FUNCTION__,
+diff -Nur c3000_pre/linux/fs/ext3/inode.c c3000_test/linux/fs/ext3/inode.c
+--- c3000_pre/linux/fs/ext3/inode.c 2004-08-21 09:48:59.000000000 +0900
++++ c3000_test/linux/fs/ext3/inode.c 2004-12-20 22:56:21.000000000 +0900
+@@ -902,6 +902,8 @@
+
+ prev_blocks = inode->i_blocks;
+
++ conditional_schedule(); /* Reading large directories */
++
+ bh = ext3_getblk (handle, inode, block, create, err);
+ if (!bh)
+ return bh;
+@@ -1605,6 +1607,7 @@
+ */
+ for (p = first; p < last; p++) {
+ u32 nr = le32_to_cpu(*p);
++ conditional_schedule();
+ if (nr) {
+ struct buffer_head *bh;
+
+@@ -1659,6 +1662,7 @@
+ }
+
+ for (p = first; p < last; p++) {
++ conditional_schedule();
+ nr = le32_to_cpu(*p);
+ if (nr) {
+ /* accumulate blocks to free if they're contiguous */
+diff -Nur c3000_pre/linux/fs/ext3/namei.c c3000_test/linux/fs/ext3/namei.c
+--- c3000_pre/linux/fs/ext3/namei.c 2004-08-21 09:48:59.000000000 +0900
++++ c3000_test/linux/fs/ext3/namei.c 2004-12-20 22:56:21.000000000 +0900
+@@ -157,6 +157,7 @@
+ if ((bh = bh_use[ra_ptr++]) == NULL)
+ goto next;
+ wait_on_buffer(bh);
++ conditional_schedule();
+ if (!buffer_uptodate(bh)) {
+ /* read error, skip block & hope for the best */
+ brelse(bh);
+diff -Nur c3000_pre/linux/fs/inode.c c3000_test/linux/fs/inode.c
+--- c3000_pre/linux/fs/inode.c 2004-08-21 09:48:58.000000000 +0900
++++ c3000_test/linux/fs/inode.c 2004-12-20 23:00:06.000000000 +0900
+@@ -251,6 +251,8 @@
+
+ filemap_fdatawait(inode->i_mapping);
+
++ conditional_schedule();
++
+ spin_lock(&inode_lock);
+ inode->i_state &= ~I_LOCK;
+ if (!(inode->i_state & I_FREEING)) {
+@@ -561,6 +563,7 @@
+
+ while ((inode_entry = head->next) != head)
+ {
++ conditional_schedule();
+ list_del(inode_entry);
+
+ inode = list_entry(inode_entry, struct inode, i_list);
+@@ -589,9 +592,22 @@
+ if (tmp == head)
+ break;
+ inode = list_entry(tmp, struct inode, i_list);
++
++ if (conditional_schedule_needed()) {
++ atomic_inc(&inode->i_count);
++ spin_unlock(&inode_lock);
++ unconditional_schedule();
++ spin_lock(&inode_lock);
++ atomic_dec(&inode->i_count);
++ }
++
+ if (inode->i_sb != sb)
+ continue;
++ atomic_inc(&inode->i_count);
++ spin_unlock(&inode_lock);
+ invalidate_inode_buffers(inode);
++ spin_lock(&inode_lock);
++ atomic_dec(&inode->i_count);
+ if (!atomic_read(&inode->i_count)) {
+ list_del_init(&inode->i_hash);
+ list_del(&inode->i_list);
+@@ -896,6 +912,8 @@
+ if (inode) {
+ struct inode * old;
+
++ conditional_schedule(); /* sync_old_buffers */
++
+ spin_lock(&inode_lock);
+ /* We released the lock, so.. */
+ old = find_inode(sb, ino, head, find_actor, opaque);
+@@ -1313,18 +1331,32 @@
+ int request=goal;
+ int loop=0;
+ #endif
++ int nr_to_scan = inodes_stat.nr_unused;
+
++resume:
+ spin_lock(&inode_lock);
+
+ count = 0;
+ entry = inode_unused.prev;
+- while (entry != &inode_unused)
+- {
++ while (entry != &inode_unused && nr_to_scan--) {
+ struct list_head *tmp = entry;
+
+ #ifdef JFFS2_INODE_DEBUG
+ loop++;
+ #endif
++ if (conditional_schedule_needed()) {
++ /*
++ * Need to drop the lock. Reposition
++ * the list head so we start here next time.
++ * This can corrupt the LRU nature of the
++ * unused list, but this isn't very important.
++ */
++ list_del(&inode_unused);
++ list_add(&inode_unused, entry);
++ spin_unlock(&inode_lock);
++ unconditional_schedule();
++ goto resume;
++ }
+ entry = entry->prev;
+ inode = INODE(tmp);
+ if (inode->i_state & (I_FREEING|I_CLEAR|I_LOCK))
+diff -Nur c3000_pre/linux/fs/jbd/checkpoint.c c3000_test/linux/fs/jbd/checkpoint.c
+--- c3000_pre/linux/fs/jbd/checkpoint.c 2004-08-21 09:48:59.000000000 +0900
++++ c3000_test/linux/fs/jbd/checkpoint.c 2004-12-20 22:56:21.000000000 +0900
+@@ -431,7 +431,11 @@
+ {
+ transaction_t *transaction, *last_transaction, *next_transaction;
+ int ret = 0;
++ int ll_retries = 4; /* lowlatency addition */
+
++restart:
++ if (ll_retries-- == 0)
++ goto out;
+ transaction = journal->j_checkpoint_transactions;
+ if (transaction == 0)
+ goto out;
+@@ -451,6 +455,12 @@
+ jh = next_jh;
+ next_jh = jh->b_cpnext;
+ ret += __try_to_free_cp_buf(jh);
++ if (conditional_schedule_needed()) {
++ spin_unlock(&journal_datalist_lock);
++ unconditional_schedule();
++ spin_lock(&journal_datalist_lock);
++ goto restart;
++ }
+ } while (jh != last_jh);
+ }
+ } while (transaction != last_transaction);
+diff -Nur c3000_pre/linux/fs/jbd/commit.c c3000_test/linux/fs/jbd/commit.c
+--- c3000_pre/linux/fs/jbd/commit.c 2004-08-21 09:48:59.000000000 +0900
++++ c3000_test/linux/fs/jbd/commit.c 2004-12-20 22:56:21.000000000 +0900
+@@ -212,6 +212,16 @@
+ __journal_remove_journal_head(bh);
+ refile_buffer(bh);
+ __brelse(bh);
++ if (conditional_schedule_needed()) {
++ if (commit_transaction->t_sync_datalist)
++ commit_transaction->t_sync_datalist =
++ next_jh;
++ if (bufs)
++ break;
++ spin_unlock(&journal_datalist_lock);
++ unconditional_schedule();
++ goto write_out_data;
++ }
+ }
+ }
+ if (bufs == ARRAY_SIZE(wbuf)) {
+@@ -235,8 +245,7 @@
+ journal_brelse_array(wbuf, bufs);
+ lock_journal(journal);
+ spin_lock(&journal_datalist_lock);
+- if (bufs)
+- goto write_out_data_locked;
++ goto write_out_data_locked;
+ }
+
+ /*
+@@ -272,6 +281,14 @@
+ */
+ while ((jh = commit_transaction->t_async_datalist)) {
+ struct buffer_head *bh = jh2bh(jh);
++ if (conditional_schedule_needed()) {
++ spin_unlock(&journal_datalist_lock);
++ unlock_journal(journal);
++ unconditional_schedule();
++ lock_journal(journal);
++ spin_lock(&journal_datalist_lock);
++ continue; /* List may have changed */
++ }
+ if (buffer_locked(bh)) {
+ spin_unlock(&journal_datalist_lock);
+ unlock_journal(journal);
+@@ -486,6 +503,8 @@
+ wait_for_iobuf:
+ while (commit_transaction->t_iobuf_list != NULL) {
+ struct buffer_head *bh;
++
++ conditional_schedule();
+ jh = commit_transaction->t_iobuf_list->b_tprev;
+ bh = jh2bh(jh);
+ if (buffer_locked(bh)) {
+@@ -644,6 +663,8 @@
+ transaction_t *cp_transaction;
+ struct buffer_head *bh;
+
++ conditional_schedule(); /* journal is locked */
++
+ jh = commit_transaction->t_forget;
+ J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
+ jh->b_transaction == journal->j_running_transaction);
+diff -Nur c3000_pre/linux/fs/proc/array.c c3000_test/linux/fs/proc/array.c
+--- c3000_pre/linux/fs/proc/array.c 2004-08-21 09:49:01.000000000 +0900
++++ c3000_test/linux/fs/proc/array.c 2004-12-20 22:56:21.000000000 +0900
+@@ -498,9 +498,11 @@
+ if (end > PMD_SIZE)
+ end = PMD_SIZE;
+ do {
+- pte_t page = *pte;
++ pte_t page;
+ struct page *ptpage;
+
++ conditional_schedule(); /* For `top' and `ps' */
++ page = *pte;
+ address += PAGE_SIZE;
+ pte++;
+ if (pte_none(page))
+diff -Nur c3000_pre/linux/fs/proc/generic.c c3000_test/linux/fs/proc/generic.c
+--- c3000_pre/linux/fs/proc/generic.c 2004-08-21 09:49:01.000000000 +0900
++++ c3000_test/linux/fs/proc/generic.c 2004-12-20 22:56:21.000000000 +0900
+@@ -98,6 +98,8 @@
+ retval = n;
+ break;
+ }
++
++ conditional_schedule(); /* Some /proc files are large */
+
+ /* This is a hack to allow mangling of file pos independent
+ * of actual bytes read. Simply place the data at page,
+diff -Nur c3000_pre/linux/fs/reiserfs/buffer2.c c3000_test/linux/fs/reiserfs/buffer2.c
+--- c3000_pre/linux/fs/reiserfs/buffer2.c 2004-08-21 09:49:01.000000000 +0900
++++ c3000_test/linux/fs/reiserfs/buffer2.c 2004-12-20 22:56:21.000000000 +0900
+@@ -54,6 +54,7 @@
+ PROC_EXP( unsigned int ctx_switches = kstat.context_swtch );
+
+ result = bread (super -> s_dev, n_block, n_size);
++ conditional_schedule();
+ PROC_INFO_INC( super, breads );
+ PROC_EXP( if( kstat.context_swtch != ctx_switches )
+ PROC_INFO_INC( super, bread_miss ) );
+diff -Nur c3000_pre/linux/fs/reiserfs/journal.c c3000_test/linux/fs/reiserfs/journal.c
+--- c3000_pre/linux/fs/reiserfs/journal.c 2004-08-21 09:49:01.000000000 +0900
++++ c3000_test/linux/fs/reiserfs/journal.c 2004-12-20 22:56:21.000000000 +0900
+@@ -573,6 +573,7 @@
+ /* lock the current transaction */
+ inline static void lock_journal(struct super_block *p_s_sb) {
+ PROC_INFO_INC( p_s_sb, journal.lock_journal );
++ conditional_schedule();
+ while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_wlock)) > 0) {
+ PROC_INFO_INC( p_s_sb, journal.lock_journal_wait );
+ sleep_on(&(SB_JOURNAL(p_s_sb)->j_wait)) ;
+@@ -703,6 +704,7 @@
+ mark_buffer_dirty(tbh) ;
+ }
+ ll_rw_block(WRITE, 1, &tbh) ;
++ conditional_schedule();
+ count++ ;
+ put_bh(tbh) ; /* once for our get_hash */
+ }
+@@ -832,6 +834,7 @@
+ set_bit(BH_Dirty, &(SB_JOURNAL(p_s_sb)->j_header_bh->b_state)) ;
+ ll_rw_block(WRITE, 1, &(SB_JOURNAL(p_s_sb)->j_header_bh)) ;
+ wait_on_buffer((SB_JOURNAL(p_s_sb)->j_header_bh)) ;
++ conditional_schedule();
+ if (!buffer_uptodate(SB_JOURNAL(p_s_sb)->j_header_bh)) {
+ printk( "reiserfs: journal-837: IO error during journal replay\n" );
+ return -EIO ;
+@@ -2125,6 +2128,7 @@
+ }
+
+ int journal_begin(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb, unsigned long nblocks) {
++ conditional_schedule();
+ return do_journal_begin_r(th, p_s_sb, nblocks, 0) ;
+ }
+
+@@ -2265,6 +2269,7 @@
+ }
+
+ int journal_end(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) {
++ conditional_schedule();
+ return do_journal_end(th, p_s_sb, nblocks, 0) ;
+ }
+
+@@ -2716,6 +2721,7 @@
+ RFALSE( buffer_locked(bh) && cur_tb != NULL,
+ "waiting while do_balance was running\n") ;
+ wait_on_buffer(bh) ;
++ conditional_schedule();
+ }
+ PROC_INFO_INC( p_s_sb, journal.prepare_retry );
+ retry_count++ ;
+@@ -2888,6 +2894,7 @@
+ /* copy all the real blocks into log area. dirty log blocks */
+ if (test_bit(BH_JDirty, &cn->bh->b_state)) {
+ struct buffer_head *tmp_bh ;
++ conditional_schedule();
+ tmp_bh = sb_getblk(p_s_sb, reiserfs_get_journal_block(p_s_sb) +
+ ((cur_write_start + jindex) % JOURNAL_BLOCK_COUNT)) ;
+ mark_buffer_uptodate(tmp_bh, 1) ;
+diff -Nur c3000_pre/linux/fs/reiserfs/stree.c c3000_test/linux/fs/reiserfs/stree.c
+--- c3000_pre/linux/fs/reiserfs/stree.c 2004-08-21 09:49:01.000000000 +0900
++++ c3000_test/linux/fs/reiserfs/stree.c 2004-12-20 22:56:21.000000000 +0900
+@@ -652,9 +652,8 @@
+ stop at leaf level - set to
+ DISK_LEAF_NODE_LEVEL */
+ ) {
+- int n_block_number = SB_ROOT_BLOCK (p_s_sb),
+- expected_level = SB_TREE_HEIGHT (p_s_sb),
+- n_block_size = p_s_sb->s_blocksize;
++ int n_block_number, expected_level;
++ int n_block_size = p_s_sb->s_blocksize;
+ struct buffer_head * p_s_bh;
+ struct path_element * p_s_last_element;
+ int n_node_level, n_retval;
+@@ -666,7 +665,8 @@
+ #endif
+
+ PROC_INFO_INC( p_s_sb, search_by_key );
+-
++ conditional_schedule();
++
+ /* As we add each node to a path we increase its count. This means that
+ we must be careful to release all nodes in a path before we either
+ discard the path struct or re-use the path struct, as we do here. */
+@@ -678,6 +678,8 @@
+ /* With each iteration of this loop we search through the items in the
+ current node, and calculate the next current node(next path element)
+ for the next iteration of this loop.. */
++ n_block_number = SB_ROOT_BLOCK (p_s_sb);
++ expected_level = SB_TREE_HEIGHT (p_s_sb);
+ while ( 1 ) {
+
+ #ifdef CONFIG_REISERFS_CHECK
+@@ -1104,6 +1106,8 @@
+ for (n_counter = *p_n_removed;
+ n_counter < n_unfm_number; n_counter++, p_n_unfm_pointer-- ) {
+
++ conditional_schedule();
++
+ if (item_moved (&s_ih, p_s_path)) {
+ need_research = 1 ;
+ break;
+diff -Nur c3000_pre/linux/include/linux/low-latency.h c3000_test/linux/include/linux/low-latency.h
+--- c3000_pre/linux/include/linux/low-latency.h 1970-01-01 09:00:00.000000000 +0900
++++ c3000_test/linux/include/linux/low-latency.h 2004-12-20 22:56:21.000000000 +0900
+@@ -0,0 +1,109 @@
++/*
++ * include/linux/low-latency.h
++ *
++ * Andrew Morton <akpm@zip.com.au>
++ */
++
++#ifndef LOW_LATENCY_H_INCLUDED
++#define LOW_LATENCY_H_INCLUDED
++
++#if defined(CONFIG_LOLAT)
++#define LOWLATENCY_NEEDED 1
++#else
++#define LOWLATENCY_NEEDED 0
++#endif
++
++#if LOWLATENCY_NEEDED
++
++#include <linux/cache.h> /* For ____cacheline_aligned */
++
++#ifdef CONFIG_LOLAT_SYSCTL
++extern struct low_latency_enable_struct {
++ int yep;
++} ____cacheline_aligned __enable_lowlatency;
++#define enable_lowlatency __enable_lowlatency.yep
++
++#else
++#define enable_lowlatency 1
++#endif
++
++/*
++ * Set this non-zero to generate low-latency instrumentation
++ */
++#define LOWLATENCY_DEBUG 0
++
++/*
++ * Set this non-zero for robustness testing
++ */
++#define LOWLATENCY_ALWAYS_SCHEDULE 0
++
++#if LOWLATENCY_DEBUG
++
++#if LOWLATENCY_ALWAYS_SCHEDULE
++#define conditional_schedule_needed() ((enable_lowlatency == 2) || (enable_lowlatency && current->need_resched))
++#else
++#define conditional_schedule_needed() (enable_lowlatency && current->need_resched)
++#endif
++
++struct lolat_stats_t {
++ unsigned long count;
++ int visited;
++ const char *file;
++ int line;
++ struct lolat_stats_t *next;
++};
++
++void set_running_and_schedule(struct lolat_stats_t *stats);
++
++#define unconditional_schedule() \
++ do { \
++ static struct lolat_stats_t stats = { \
++ file: __FILE__, \
++ line: __LINE__, \
++ }; \
++ set_running_and_schedule(&stats); \
++ } while (0)
++
++extern void show_lolat_stats(void);
++
++#else /* LOWLATENCY_DEBUG */
++
++#if LOWLATENCY_ALWAYS_SCHEDULE
++#define conditional_schedule_needed() 1
++#else
++#define conditional_schedule_needed() (current->need_resched)
++#endif
++
++void set_running_and_schedule(void);
++#define unconditional_schedule() set_running_and_schedule()
++
++#endif /* LOWLATENCY_DEBUG */
++
++#define conditional_schedule() \
++ do { \
++ if (conditional_schedule_needed()) \
++ unconditional_schedule(); \
++ } while (0)
++
++#define DEFINE_RESCHED_COUNT int resched_count = 0
++#define TEST_RESCHED_COUNT(n) (enable_lowlatency && (++resched_count > (n)))
++#define RESET_RESCHED_COUNT() resched_count = 0
++extern int ll_copy_to_user(void *to_user, const void *from, unsigned long len);
++extern int ll_copy_from_user(void *to, const void *from_user, unsigned long len);
++
++#else /* LOWLATENCY_NEEDED */
++
++#define conditional_schedule_needed() 0
++#define conditional_schedule()
++#define unconditional_schedule()
++
++#define DEFINE_RESCHED_COUNT
++#define TEST_RESCHED_COUNT(n) 0
++#define RESET_RESCHED_COUNT()
++#define ll_copy_to_user(to_user, from, len) copy_to_user((to_user), (from), (len))
++#define ll_copy_from_user(to, from_user, len) copy_from_user((to), (from_user), (len))
++
++#endif /* LOWLATENCY_NEEDED */
++
++#endif /* LOW_LATENCY_H_INCLUDED */
++
+diff -Nur c3000_pre/linux/include/linux/mm.h c3000_test/linux/include/linux/mm.h
+--- c3000_pre/linux/include/linux/mm.h 2004-08-21 09:49:13.000000000 +0900
++++ c3000_test/linux/include/linux/mm.h 2004-12-20 22:56:21.000000000 +0900
+@@ -149,6 +149,8 @@
+ */
+ extern pgprot_t protection_map[16];
+
++/* Actions for zap_page_range() */
++#define ZPR_COND_RESCHED 1 /* Do a conditional_schedule() occasionally */
+
+ /*
+ * These are the virtual MM functions - opening of an area, closing and
+@@ -500,7 +502,7 @@
+ extern void shmem_lock(struct file * file, int lock);
+ extern int shmem_zero_setup(struct vm_area_struct *);
+
+-extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size);
++extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, int actions);
+ extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma);
+ extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot);
+ extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot);
+diff -Nur c3000_pre/linux/include/linux/reiserfs_fs.h c3000_test/linux/include/linux/reiserfs_fs.h
+--- c3000_pre/linux/include/linux/reiserfs_fs.h 2004-08-21 09:49:13.000000000 +0900
++++ c3000_test/linux/include/linux/reiserfs_fs.h 2004-12-20 22:56:21.000000000 +0900
+@@ -1197,8 +1197,8 @@
+ #define fs_generation(s) ((s)->u.reiserfs_sb.s_generation_counter)
+ #define get_generation(s) atomic_read (&fs_generation(s))
+ #define FILESYSTEM_CHANGED_TB(tb) (get_generation((tb)->tb_sb) != (tb)->fs_gen)
+-#define fs_changed(gen,s) (gen != get_generation (s))
+-
++#define __fs_changed(gen,s) (gen != get_generation (s))
++#define fs_changed(gen,s) ({conditional_schedule(); __fs_changed(gen,s);})
+
+ /***************************************************************************/
+ /* FIXATE NODES */
+diff -Nur c3000_pre/linux/include/linux/sched.h c3000_test/linux/include/linux/sched.h
+--- c3000_pre/linux/include/linux/sched.h 2004-08-21 09:49:13.000000000 +0900
++++ c3000_test/linux/include/linux/sched.h 2004-12-20 22:56:21.000000000 +0900
+@@ -1092,6 +1092,7 @@
+ #include <linux/dcache.h>
+ #include <linux/tqueue.h>
+ #include <linux/fs_struct.h>
++#include <linux/low-latency.h>
+
+ #endif /* __KERNEL__ */
+ #endif
+diff -Nur c3000_pre/linux/include/linux/sysctl.h c3000_test/linux/include/linux/sysctl.h
+--- c3000_pre/linux/include/linux/sysctl.h 2004-08-21 09:49:13.000000000 +0900
++++ c3000_test/linux/include/linux/sysctl.h 2004-12-20 22:56:21.000000000 +0900
+@@ -131,6 +131,7 @@
+ KERN_CORE_USES_PID=52, /* int: use core or core.%pid */
+ KERN_TAINTED=53, /* int: various kernel tainted flags */
+ KERN_CADPID=54, /* int: PID of the process to notify on CAD */
++ KERN_LOWLATENCY=55, /* int: enable low latency scheduling */
+ };
+
+
+diff -Nur c3000_pre/linux/kernel/exit.c c3000_test/linux/kernel/exit.c
+--- c3000_pre/linux/kernel/exit.c 2004-08-21 09:49:14.000000000 +0900
++++ c3000_test/linux/kernel/exit.c 2004-12-20 22:56:21.000000000 +0900
+@@ -196,6 +196,7 @@
+ }
+ i++;
+ set >>= 1;
++ conditional_schedule(); /* sys_exit, many files open */
+ }
+ }
+ }
+diff -Nur c3000_pre/linux/kernel/ksyms.c c3000_test/linux/kernel/ksyms.c
+--- c3000_pre/linux/kernel/ksyms.c 2004-12-19 00:35:59.000000000 +0900
++++ c3000_test/linux/kernel/ksyms.c 2004-12-20 23:07:26.000000000 +0900
+@@ -481,6 +481,13 @@
+ EXPORT_SYMBOL(do_gettimeofday);
+ EXPORT_SYMBOL(do_settimeofday);
+
++#if LOWLATENCY_NEEDED
++EXPORT_SYMBOL(set_running_and_schedule);
++#ifdef CONFIG_LOLAT_SYSCTL
++EXPORT_SYMBOL(__enable_lowlatency);
++#endif
++#endif
++
+ #if !defined(__ia64__)
+ EXPORT_SYMBOL(loops_per_jiffy);
+ #endif
+diff -Nur c3000_pre/linux/kernel/module.c c3000_test/linux/kernel/module.c
+--- c3000_pre/linux/kernel/module.c 2004-08-21 09:49:14.000000000 +0900
++++ c3000_test/linux/kernel/module.c 2004-12-20 22:56:21.000000000 +0900
+@@ -1174,6 +1174,11 @@
+ return ERR_PTR(-ENOMEM);
+ lock_kernel();
+ for (v = module_list, n = *pos; v; n -= v->nsyms, v = v->next) {
++#if 0
++ /* We can't actually do this, because we'd create a
++ * race against module unload. Need a semaphore. */
++ conditional_schedule();
++#endif
+ if (n < v->nsyms) {
+ p->mod = v;
+ p->index = n;
+diff -Nur c3000_pre/linux/kernel/sched.c c3000_test/linux/kernel/sched.c
+--- c3000_pre/linux/kernel/sched.c 2004-08-21 09:49:14.000000000 +0900
++++ c3000_test/linux/kernel/sched.c 2004-12-20 22:56:21.000000000 +0900
+@@ -302,6 +302,17 @@
+ if (tsk->processor != this_cpu)
+ smp_send_reschedule(tsk->processor);
+ }
++#if LOWLATENCY_NEEDED
++ if (enable_lowlatency && (p->policy != SCHED_OTHER)) {
++ struct task_struct *t;
++ for (i = 0; i < smp_num_cpus; i++) {
++ cpu = cpu_logical_map(i);
++ t = cpu_curr(cpu);
++ if (t != tsk)
++ t->need_resched = 1;
++ }
++ }
++#endif
+ return;
+
+
+@@ -1429,3 +1440,93 @@
+ atomic_inc(&init_mm.mm_count);
+ enter_lazy_tlb(&init_mm, current, cpu);
+ }
++
++#if LOWLATENCY_NEEDED
++#if LOWLATENCY_DEBUG
++
++static struct lolat_stats_t *lolat_stats_head;
++static spinlock_t lolat_stats_lock = SPIN_LOCK_UNLOCKED;
++
++void set_running_and_schedule(struct lolat_stats_t *stats)
++{
++ spin_lock(&lolat_stats_lock);
++ if (stats->visited == 0) {
++ stats->visited = 1;
++ stats->next = lolat_stats_head;
++ lolat_stats_head = stats;
++ }
++ stats->count++;
++ spin_unlock(&lolat_stats_lock);
++
++ if (current->state != TASK_RUNNING)
++ set_current_state(TASK_RUNNING);
++ schedule();
++}
++
++void show_lolat_stats(void)
++{
++ struct lolat_stats_t *stats = lolat_stats_head;
++
++ printk("Low latency scheduling stats:\n");
++ while (stats) {
++ printk("%s:%d: %lu\n", stats->file, stats->line, stats->count);
++ stats->count = 0;
++ stats = stats->next;
++ }
++}
++
++#else /* LOWLATENCY_DEBUG */
++
++void set_running_and_schedule()
++{
++ if (current->state != TASK_RUNNING)
++ __set_current_state(TASK_RUNNING);
++ schedule();
++}
++
++#endif /* LOWLATENCY_DEBUG */
++
++int ll_copy_to_user(void *to_user, const void *from, unsigned long len)
++{
++ while (len) {
++ unsigned long n_to_copy = len;
++ unsigned long remainder;
++
++ if (n_to_copy > 4096)
++ n_to_copy = 4096;
++ remainder = copy_to_user(to_user, from, n_to_copy);
++ if (remainder)
++ return remainder + len;
++ to_user = ((char *)to_user) + n_to_copy;
++ from = ((char *)from) + n_to_copy;
++ len -= n_to_copy;
++ conditional_schedule();
++ }
++ return 0;
++}
++
++int ll_copy_from_user(void *to, const void *from_user, unsigned long len)
++{
++ while (len) {
++ unsigned long n_to_copy = len;
++ unsigned long remainder;
++
++ if (n_to_copy > 4096)
++ n_to_copy = 4096;
++ remainder = copy_from_user(to, from_user, n_to_copy);
++ if (remainder)
++ return remainder + len;
++ to = ((char *)to) + n_to_copy;
++ from_user = ((char *)from_user) + n_to_copy;
++ len -= n_to_copy;
++ conditional_schedule();
++ }
++ return 0;
++}
++
++#ifdef CONFIG_LOLAT_SYSCTL
++struct low_latency_enable_struct __enable_lowlatency = { 0, };
++#endif
++
++#endif /* LOWLATENCY_NEEDED */
++
+diff -Nur c3000_pre/linux/kernel/sysctl.c c3000_test/linux/kernel/sysctl.c
+--- c3000_pre/linux/kernel/sysctl.c 2004-08-21 09:49:14.000000000 +0900
++++ c3000_test/linux/kernel/sysctl.c 2004-12-20 22:56:21.000000000 +0900
+@@ -271,6 +271,10 @@
+ {KERN_S390_USER_DEBUG_LOGGING,"userprocess_debug",
+ &sysctl_userprocess_debug,sizeof(int),0644,NULL,&proc_dointvec},
+ #endif
++#ifdef CONFIG_LOLAT_SYSCTL
++ {KERN_LOWLATENCY, "lowlatency", &enable_lowlatency, sizeof (int),
++ 0644, NULL, &proc_dointvec},
++#endif
+ {0}
+ };
+
+diff -Nur c3000_pre/linux/mm/filemap.c c3000_test/linux/mm/filemap.c
+--- c3000_pre/linux/mm/filemap.c 2004-08-21 09:49:15.000000000 +0900
++++ c3000_test/linux/mm/filemap.c 2004-12-20 22:56:21.000000000 +0900
+@@ -179,7 +179,9 @@
+ {
+ struct list_head *head, *curr;
+ struct page * page;
++ int ll_count = 100;
+
++restart:
+ head = &inode->i_mapping->clean_pages;
+
+ spin_lock(&pagemap_lru_lock);
+@@ -190,6 +192,14 @@
+ page = list_entry(curr, struct page, list);
+ curr = curr->next;
+
++ if (conditional_schedule_needed() && ll_count) {
++ spin_unlock(&pagecache_lock);
++ spin_unlock(&pagemap_lru_lock);
++ unconditional_schedule();
++ ll_count--;
++ goto restart;
++ }
++
+ /* We cannot invalidate something in dirty.. */
+ if (PageDirty(page))
+ continue;
+@@ -253,8 +263,7 @@
+ page_cache_release(page);
+ }
+
+-static int FASTCALL(truncate_list_pages(struct list_head *, unsigned long, unsigned *));
+-static int truncate_list_pages(struct list_head *head, unsigned long start, unsigned *partial)
++static int truncate_list_pages(struct list_head *head, unsigned long start, unsigned *partial, int *restart_count)
+ {
+ struct list_head *curr;
+ struct page * page;
+@@ -265,6 +274,17 @@
+ while (curr != head) {
+ unsigned long offset;
+
++ if (conditional_schedule_needed() && *restart_count) {
++ (*restart_count)--;
++ list_del(head);
++ list_add(head, curr); /* Restart on this page */
++ spin_unlock(&pagecache_lock);
++ unconditional_schedule();
++ spin_lock(&pagecache_lock);
++ unlocked = 1;
++ goto restart;
++ }
++
+ page = list_entry(curr, struct page, list);
+ offset = page->index;
+
+@@ -297,13 +317,11 @@
+ } else
+ wait_on_page(page);
+
+- page_cache_release(page);
+-
+- if (current->need_resched) {
+- __set_current_state(TASK_RUNNING);
+- schedule();
++ if (LOWLATENCY_NEEDED) {
++ *restart_count = 4; /* We made progress */
+ }
+
++ page_cache_release(page);
+ spin_lock(&pagecache_lock);
+ goto restart;
+ }
+@@ -326,13 +344,14 @@
+ {
+ unsigned long start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
++ int restart_count = 4;
+ int unlocked;
+
+ spin_lock(&pagecache_lock);
+ do {
+- unlocked = truncate_list_pages(&mapping->clean_pages, start, &partial);
+- unlocked |= truncate_list_pages(&mapping->dirty_pages, start, &partial);
+- unlocked |= truncate_list_pages(&mapping->locked_pages, start, &partial);
++ unlocked = truncate_list_pages(&mapping->clean_pages, start, &partial, &restart_count);
++ unlocked |= truncate_list_pages(&mapping->dirty_pages, start, &partial, &restart_count);
++ unlocked |= truncate_list_pages(&mapping->locked_pages, start, &partial, &restart_count);
+ } while (unlocked);
+ /* Traversed all three lists without dropping the lock */
+ spin_unlock(&pagecache_lock);
+@@ -477,6 +496,7 @@
+
+ page_cache_get(page);
+ spin_unlock(&pagecache_lock);
++ conditional_schedule(); /* sys_msync() (only used by minixfs, udf) */
+ lock_page(page);
+
+ /* The buffers could have been free'd while we waited for the page lock */
+@@ -563,12 +583,14 @@
+ list_del(&page->list);
+ list_add(&page->list, &mapping->locked_pages);
+
+- if (!PageDirty(page))
+- continue;
+-
+ page_cache_get(page);
+ spin_unlock(&pagecache_lock);
+
++ conditional_schedule(); /* sys_msync() */
++
++ if (!PageDirty(page))
++ goto clean;
++
+ lock_page(page);
+
+ if (PageDirty(page)) {
+@@ -579,7 +601,7 @@
+ ret = err;
+ } else
+ UnlockPage(page);
+-
++clean:
+ page_cache_release(page);
+ spin_lock(&pagecache_lock);
+ }
+@@ -597,7 +619,8 @@
+ int filemap_fdatawait(struct address_space * mapping)
+ {
+ int ret = 0;
+-
++ DEFINE_RESCHED_COUNT;
++restart:
+ spin_lock(&pagecache_lock);
+
+ while (!list_empty(&mapping->locked_pages)) {
+@@ -606,6 +629,17 @@
+ list_del(&page->list);
+ list_add(&page->list, &mapping->clean_pages);
+
++ if (TEST_RESCHED_COUNT(32)) {
++ RESET_RESCHED_COUNT();
++ if (conditional_schedule_needed()) {
++ page_cache_get(page);
++ spin_unlock(&pagecache_lock);
++ unconditional_schedule();
++ page_cache_release(page);
++ goto restart;
++ }
++ }
++
+ if (!PageLocked(page))
+ continue;
+
+@@ -706,8 +740,10 @@
+ spin_lock(&pagecache_lock);
+ page = __find_page_nolock(mapping, offset, *hash);
+ spin_unlock(&pagecache_lock);
+- if (page)
++ if (page) {
++ conditional_schedule();
+ return 0;
++ }
+
+ page = page_cache_alloc(mapping);
+ if (!page)
+@@ -963,6 +999,11 @@
+ * the hash-list needs a held write-lock.
+ */
+ repeat:
++ if (conditional_schedule_needed()) {
++ spin_unlock(&pagecache_lock);
++ unconditional_schedule();
++ spin_lock(&pagecache_lock);
++ }
+ page = __find_page_nolock(mapping, offset, hash);
+ if (page) {
+ page_cache_get(page);
+@@ -1413,6 +1454,8 @@
+ page_cache_get(page);
+ spin_unlock(&pagecache_lock);
+
++ conditional_schedule(); /* sys_read() */
++
+ if (!Page_Uptodate(page))
+ goto page_not_up_to_date;
+ generic_file_readahead(reada_ok, filp, inode, page);
+@@ -2114,6 +2157,12 @@
+ address += PAGE_SIZE;
+ pte++;
+ } while (address && (address < end));
++
++ if (conditional_schedule_needed()) {
++ spin_unlock(&vma->vm_mm->page_table_lock);
++ unconditional_schedule(); /* syncing large mapped files */
++ spin_lock(&vma->vm_mm->page_table_lock);
++ }
+ return error;
+ }
+
+@@ -2530,7 +2579,9 @@
+ if (vma->vm_flags & VM_LOCKED)
+ return -EINVAL;
+
+- zap_page_range(vma->vm_mm, start, end - start);
++ zap_page_range(vma->vm_mm, start, end - start,
++ ZPR_COND_RESCHED); /* sys_madvise(MADV_DONTNEED) */
++
+ return 0;
+ }
+
+@@ -3095,6 +3146,9 @@
+ goto sync_failure;
+ page_fault = __copy_from_user(kaddr+offset, buf, bytes);
+ flush_dcache_page(page);
++
++ conditional_schedule();
++
+ status = mapping->a_ops->commit_write(file, page, offset, offset+bytes);
+ if (page_fault)
+ goto fail_write;
+diff -Nur c3000_pre/linux/mm/memory.c c3000_test/linux/mm/memory.c
+--- c3000_pre/linux/mm/memory.c 2004-08-21 09:49:15.000000000 +0900
++++ c3000_test/linux/mm/memory.c 2004-12-20 22:56:21.000000000 +0900
+@@ -370,7 +370,7 @@
+ /*
+ * remove user pages in a given range.
+ */
+-void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size)
++static void do_zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size)
+ {
+ mmu_gather_t *tlb;
+ pgd_t * dir;
+@@ -494,6 +494,10 @@
+ struct page *map;
+ while (!(map = follow_page(mm, start, write))) {
+ spin_unlock(&mm->page_table_lock);
++
++ /* Pinning down many physical pages (kiobufs, mlockall) */
++ conditional_schedule();
++
+ switch (handle_mm_fault(mm, vma, start, write)) {
+ case 1:
+ tsk->min_flt++;
+@@ -655,6 +659,21 @@
+ iobuf->locked = 0;
+ }
+
++#define MAX_ZAP_BYTES 256*PAGE_SIZE
++
++void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, int actions)
++{
++ while (size) {
++ unsigned long chunk = size;
++ if (actions & ZPR_COND_RESCHED && chunk > MAX_ZAP_BYTES)
++ chunk = MAX_ZAP_BYTES;
++ do_zap_page_range(mm, address, chunk);
++ if (actions & ZPR_COND_RESCHED)
++ conditional_schedule();
++ address += chunk;
++ size -= chunk;
++ }
++}
+
+ /*
+ * Lock down all of the pages of a kiovec for IO.
+@@ -764,11 +783,18 @@
+ return 0;
+ }
+
+-static inline void zeromap_pte_range(pte_t * pte, unsigned long address,
+- unsigned long size, pgprot_t prot)
++static inline void zeromap_pte_range(struct mm_struct *mm, pte_t * pte,
++ unsigned long address, unsigned long size,
++ pgprot_t prot)
+ {
+ unsigned long end;
+
++ if (conditional_schedule_needed()) {
++ spin_unlock(&mm->page_table_lock);
++ unconditional_schedule(); /* mmap(/dev/zero) */
++ spin_lock(&mm->page_table_lock);
++ }
++
+ address &= ~PMD_MASK;
+ end = address + size;
+ if (end > PMD_SIZE)
+@@ -796,7 +822,7 @@
+ pte_t * pte = pte_alloc(mm, pmd, address);
+ if (!pte)
+ return -ENOMEM;
+- zeromap_pte_range(pte, address, end - address, prot);
++ zeromap_pte_range(mm, pte, address, end - address, prot);
+ address = (address + PMD_SIZE) & PMD_MASK;
+ pmd++;
+ } while (address && (address < end));
+@@ -1044,7 +1070,7 @@
+
+ /* mapping wholly truncated? */
+ if (mpnt->vm_pgoff >= pgoff) {
+- zap_page_range(mm, start, len);
++ zap_page_range(mm, start, len, 0);
+ continue;
+ }
+
+@@ -1057,7 +1083,7 @@
+ /* Ok, partially affected.. */
+ start += diff << PAGE_SHIFT;
+ len = (len - diff) << PAGE_SHIFT;
+- zap_page_range(mm, start, len);
++ zap_page_range(mm, start, len, 0);
+ } while ((mpnt = mpnt->vm_next_share) != NULL);
+ }
+
+diff -Nur c3000_pre/linux/mm/mmap.c c3000_test/linux/mm/mmap.c
+--- c3000_pre/linux/mm/mmap.c 2004-12-16 22:55:54.000000000 +0900
++++ c3000_test/linux/mm/mmap.c 2004-12-20 23:07:25.000000000 +0900
+@@ -598,7 +598,7 @@
+ fput(file);
+
+ /* Undo any partial mapping done by a device driver. */
+- zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start);
++ zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start, 0);
+ free_vma:
+ kmem_cache_free(vm_area_cachep, vma);
+ return error;
+@@ -998,7 +998,7 @@
+ remove_shared_vm_struct(mpnt);
+ mm->map_count--;
+
+- zap_page_range(mm, st, size);
++ zap_page_range(mm, st, size, ZPR_COND_RESCHED); /* sys_munmap() */
+
+ /*
+ * Fix the mapping, and free the old area if it wasn't reused.
+@@ -1158,7 +1158,7 @@
+ }
+ mm->map_count--;
+ remove_shared_vm_struct(mpnt);
+- zap_page_range(mm, start, size);
++ zap_page_range(mm, start, size, ZPR_COND_RESCHED); /* sys_exit() */
+ if (mpnt->vm_file)
+ fput(mpnt->vm_file);
+ kmem_cache_free(vm_area_cachep, mpnt);
+diff -Nur c3000_pre/linux/mm/mremap.c c3000_test/linux/mm/mremap.c
+--- c3000_pre/linux/mm/mremap.c 2004-12-16 22:55:54.000000000 +0900
++++ c3000_test/linux/mm/mremap.c 2004-12-20 23:07:25.000000000 +0900
+@@ -121,7 +121,7 @@
+ flush_cache_range(mm, new_addr, new_addr + len);
+ while ((offset += PAGE_SIZE) < len)
+ move_one_page(mm, new_addr + offset, old_addr + offset);
+- zap_page_range(mm, new_addr, len);
++ zap_page_range(mm, new_addr, len, 0);
+ #ifdef __arm__
+ memc_update_mm(mm);
+ #endif
+diff -Nur c3000_pre/linux/mm/slab.c c3000_test/linux/mm/slab.c
+--- c3000_pre/linux/mm/slab.c 2004-11-16 15:31:09.000000000 +0900
++++ c3000_test/linux/mm/slab.c 2004-12-20 22:56:21.000000000 +0900
+@@ -940,6 +940,7 @@
+ list_del(&slabp->list);
+
+ spin_unlock_irq(&cachep->spinlock);
++ conditional_schedule();
+ kmem_slab_destroy(cachep, slabp);
+ ret++;
+ spin_lock_irq(&cachep->spinlock);
+@@ -1853,6 +1854,7 @@
+ */
+ spin_unlock_irq(&best_cachep->spinlock);
+ kmem_slab_destroy(best_cachep, slabp);
++ conditional_schedule(); /* try_to_free_pages() */
+ spin_lock_irq(&best_cachep->spinlock);
+ }
+ spin_unlock_irq(&best_cachep->spinlock);
+diff -Nur c3000_pre/linux/mm/swapfile.c c3000_test/linux/mm/swapfile.c
+--- c3000_pre/linux/mm/swapfile.c 2004-08-21 09:49:16.000000000 +0900
++++ c3000_test/linux/mm/swapfile.c 2004-12-20 22:56:21.000000000 +0900
+@@ -819,7 +819,7 @@
+ len += sprintf(buf + len, "partition\t");
+
+ usedswap = 0;
+- for (j = 0; j < ptr->max; ++j)
++ for (j = 0; j < ptr->max; ++j) {
+ switch (ptr->swap_map[j]) {
+ case SWAP_MAP_BAD:
+ case 0:
+@@ -827,6 +827,8 @@
+ default:
+ usedswap++;
+ }
++ conditional_schedule();
++ }
+ len += sprintf(buf + len, "%d\t%d\t%d\n", ptr->pages << (PAGE_SHIFT - 10),
+ usedswap << (PAGE_SHIFT - 10), ptr->prio);
+ }
+@@ -1120,6 +1122,11 @@
+ if (swap_info[i].flags != SWP_USED)
+ continue;
+ for (j = 0; j < swap_info[i].max; ++j) {
++ if (conditional_schedule_needed()) {
++ swap_list_unlock();
++ conditional_schedule();
++ swap_list_lock();
++ }
+ switch (swap_info[i].swap_map[j]) {
+ case 0:
+ case SWAP_MAP_BAD:
+diff -Nur c3000_pre/linux/mm/vmscan.c c3000_test/linux/mm/vmscan.c
+--- c3000_pre/linux/mm/vmscan.c 2004-08-21 09:49:16.000000000 +0900
++++ c3000_test/linux/mm/vmscan.c 2004-12-20 22:56:21.000000000 +0900
+@@ -173,6 +173,7 @@
+ {
+ pte_t * pte;
+ unsigned long pmd_end;
++ DEFINE_RESCHED_COUNT;
+
+ if (pmd_none(*dir))
+ return count;
+@@ -198,11 +199,17 @@
+ address += PAGE_SIZE;
+ break;
+ }
++ if (TEST_RESCHED_COUNT(4)) {
++ if (conditional_schedule_needed())
++ goto out;
++ RESET_RESCHED_COUNT();
++ }
+ }
+ }
+ address += PAGE_SIZE;
+ pte++;
+ } while (address && (address < end));
++out:
+ mm->swap_address = address;
+ return count;
+ }
+@@ -231,6 +238,8 @@
+ count = swap_out_pmd(mm, vma, pmd, address, end, count, classzone);
+ if (!count)
+ break;
++ if (conditional_schedule_needed())
++ return count;
+ address = (address + PMD_SIZE) & PMD_MASK;
+ pmd++;
+ } while (address && (address < end));
+@@ -255,6 +264,8 @@
+ count = swap_out_pgd(mm, vma, pgdir, address, end, count, classzone);
+ if (!count)
+ break;
++ if (conditional_schedule_needed())
++ return count;
+ address = (address + PGDIR_SIZE) & PGDIR_MASK;
+ pgdir++;
+ } while (address && (address < end));
+@@ -276,6 +287,7 @@
+ * Find the proper vm-area after freezing the vma chain
+ * and ptes.
+ */
++continue_scan:
+ spin_lock(&mm->page_table_lock);
+ address = mm->swap_address;
+ if (address == TASK_SIZE || swap_mm != mm) {
+@@ -293,6 +305,12 @@
+ vma = vma->vm_next;
+ if (!vma)
+ break;
++ if (conditional_schedule_needed()) { /* Scanning a large vma */
++ spin_unlock(&mm->page_table_lock);
++ unconditional_schedule();
++ /* Continue from where we left off */
++ goto continue_scan;
++ }
+ if (!count)
+ goto out_unlock;
+ address = vma->vm_start;
+diff -Nur c3000_pre/linux/net/core/iovec.c c3000_test/linux/net/core/iovec.c
+--- c3000_pre/linux/net/core/iovec.c 2004-08-21 11:23:13.000000000 +0900
++++ c3000_test/linux/net/core/iovec.c 2004-12-20 22:56:21.000000000 +0900
+@@ -88,7 +88,7 @@
+ if(iov->iov_len)
+ {
+ int copy = min_t(unsigned int, iov->iov_len, len);
+- if (copy_to_user(iov->iov_base, kdata, copy))
++ if (ll_copy_to_user(iov->iov_base, kdata, copy))
+ goto out;
+ kdata+=copy;
+ len-=copy;
+diff -Nur c3000_pre/linux/net/ipv4/tcp_minisocks.c c3000_test/linux/net/ipv4/tcp_minisocks.c
+--- c3000_pre/linux/net/ipv4/tcp_minisocks.c 2004-08-21 09:49:22.000000000 +0900
++++ c3000_test/linux/net/ipv4/tcp_minisocks.c 2004-12-20 22:56:21.000000000 +0900
+@@ -440,6 +440,9 @@
+ {
+ struct tcp_tw_bucket *tw;
+ int killed = 0;
++#if LOWLATENCY_NEEDED
++ int max_killed = 0;
++#endif
+
+ /* NOTE: compare this to previous version where lock
+ * was released after detaching chain. It was racy,
+@@ -453,6 +456,13 @@
+ goto out;
+
+ while((tw = tcp_tw_death_row[tcp_tw_death_row_slot]) != NULL) {
++#if LOWLATENCY_NEEDED
++ /* This loop takes ~6 usecs per iteration. */
++ if (killed > 100) {
++ max_killed = 1;
++ break;
++ }
++#endif
+ tcp_tw_death_row[tcp_tw_death_row_slot] = tw->next_death;
+ tw->pprev_death = NULL;
+ spin_unlock(&tw_death_lock);
+@@ -463,12 +473,24 @@
+ killed++;
+
+ spin_lock(&tw_death_lock);
++
++ }
++
++#if LOWLATENCY_NEEDED
++ if (max_killed) { /* More to do: do it soon */
++ mod_timer(&tcp_tw_timer, jiffies+2);
++ tcp_tw_count -= killed;
++ }
++ else
++#endif
++ {
++ tcp_tw_death_row_slot =
++ ((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1));
++
++ if ((tcp_tw_count -= killed) != 0)
++ mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD);
+ }
+- tcp_tw_death_row_slot =
+- ((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1));
+
+- if ((tcp_tw_count -= killed) != 0)
+- mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD);
+ net_statistics[smp_processor_id()*2].TimeWaited += killed;
+ out:
+ spin_unlock(&tw_death_lock);