diff options
Diffstat (limited to 'packages/linux/openzaurus-pxa27x-2.4.20-rmk2-embedix20050602/P14-lowlatency_041221.patch')
-rw-r--r-- | packages/linux/openzaurus-pxa27x-2.4.20-rmk2-embedix20050602/P14-lowlatency_041221.patch | 1677 |
1 files changed, 1677 insertions, 0 deletions
diff --git a/packages/linux/openzaurus-pxa27x-2.4.20-rmk2-embedix20050602/P14-lowlatency_041221.patch b/packages/linux/openzaurus-pxa27x-2.4.20-rmk2-embedix20050602/P14-lowlatency_041221.patch new file mode 100644 index 0000000000..edd63fb13b --- /dev/null +++ b/packages/linux/openzaurus-pxa27x-2.4.20-rmk2-embedix20050602/P14-lowlatency_041221.patch @@ -0,0 +1,1677 @@ +diff -Nur c3000_pre/linux/arch/arm/config.in c3000_test/linux/arch/arm/config.in +--- c3000_pre/linux/arch/arm/config.in 2004-12-16 22:55:34.000000000 +0900 ++++ c3000_test/linux/arch/arm/config.in 2004-12-20 23:23:28.000000000 +0900 +@@ -574,6 +574,8 @@ + fi + fi + dep_bool 'Preemptible Kernel support' CONFIG_PREEMPT $CONFIG_CPU_32 ++bool 'Low latency scheduling' CONFIG_LOLAT ++dep_bool 'Control low latency with sysctl' CONFIG_LOLAT_SYSCTL $CONFIG_LOLAT + + endmenu + +diff -Nur c3000_pre/linux/arch/i386/config.in c3000_test/linux/arch/i386/config.in +--- c3000_pre/linux/arch/i386/config.in 2004-08-21 09:48:09.000000000 +0900 ++++ c3000_test/linux/arch/i386/config.in 2004-12-20 22:56:21.000000000 +0900 +@@ -25,6 +25,9 @@ + + mainmenu_option next_comment + comment 'Processor type and features' ++bool 'Low latency scheduling' CONFIG_LOLAT ++dep_bool 'Control low latency with sysctl' CONFIG_LOLAT_SYSCTL $CONFIG_LOLAT ++ + choice 'Processor family' \ + "386 CONFIG_M386 \ + 486 CONFIG_M486 \ +diff -Nur c3000_pre/linux/drivers/block/ll_rw_blk.c c3000_test/linux/drivers/block/ll_rw_blk.c +--- c3000_pre/linux/drivers/block/ll_rw_blk.c 2004-08-21 09:48:24.000000000 +0900 ++++ c3000_test/linux/drivers/block/ll_rw_blk.c 2004-12-20 22:56:21.000000000 +0900 +@@ -1211,6 +1211,7 @@ + kstat.pgpgin += count; + break; + } ++ conditional_schedule(); + } + + /** +diff -Nur c3000_pre/linux/drivers/char/mem.c c3000_test/linux/drivers/char/mem.c +--- c3000_pre/linux/drivers/char/mem.c 2004-08-21 09:48:25.000000000 +0900 ++++ c3000_test/linux/drivers/char/mem.c 2004-12-20 22:56:21.000000000 +0900 +@@ -422,7 +422,7 @@ + if (count > size) + count = size; + +- zap_page_range(mm, addr, count); ++ zap_page_range(mm, addr, count, 0); + zeromap_page_range(addr, count, PAGE_COPY); + + size -= count; +diff -Nur c3000_pre/linux/drivers/char/random.c c3000_test/linux/drivers/char/random.c +--- c3000_pre/linux/drivers/char/random.c 2004-08-21 09:48:25.000000000 +0900 ++++ c3000_test/linux/drivers/char/random.c 2004-12-20 22:56:21.000000000 +0900 +@@ -1374,6 +1374,11 @@ + buf += i; + ret += i; + add_timer_randomness(&extract_timer_state, nbytes); ++#if LOWLATENCY_NEEDED ++ /* This can happen in softirq's, but that's what we want */ ++ if (conditional_schedule_needed()) ++ break; ++#endif + } + + /* Wipe data just returned from memory */ +diff -Nur c3000_pre/linux/drivers/i2c/i2c-core.c c3000_test/linux/drivers/i2c/i2c-core.c +--- c3000_pre/linux/drivers/i2c/i2c-core.c 2004-08-21 09:48:34.000000000 +0900 ++++ c3000_test/linux/drivers/i2c/i2c-core.c 2004-12-20 22:56:21.000000000 +0900 +@@ -761,6 +761,8 @@ + { + int ret; + ++ conditional_schedule(); ++ + if (adap->algo->master_xfer) { + DEB2(printk("i2c-core.o: master_xfer: %s with %d msgs.\n", + adap->name,num)); +@@ -783,6 +785,8 @@ + struct i2c_adapter *adap=client->adapter; + struct i2c_msg msg; + ++ conditional_schedule(); ++ + if (client->adapter->algo->master_xfer) { + msg.addr = client->addr; + msg.flags = client->flags & I2C_M_TEN; +@@ -812,6 +816,9 @@ + struct i2c_adapter *adap=client->adapter; + struct i2c_msg msg; + int ret; ++ ++ conditional_schedule(); ++ + if (client->adapter->algo->master_xfer) { + msg.addr = client->addr; + msg.flags = client->flags & I2C_M_TEN; +diff -Nur c3000_pre/linux/fs/buffer.c c3000_test/linux/fs/buffer.c +--- c3000_pre/linux/fs/buffer.c 2004-08-21 09:48:58.000000000 +0900 ++++ c3000_test/linux/fs/buffer.c 2004-12-20 22:56:21.000000000 +0900 +@@ -216,8 +216,10 @@ + + if (dev != NODEV && bh->b_dev != dev) + continue; +- if (test_and_set_bit(BH_Lock, &bh->b_state)) ++ if (test_and_set_bit(BH_Lock, &bh->b_state)) { ++ __refile_buffer(bh); + continue; ++ } + if (atomic_set_buffer_clean(bh)) { + __refile_buffer(bh); + get_bh(bh); +@@ -227,6 +229,7 @@ + + spin_unlock(&lru_list_lock); + write_locked_buffers(array, count); ++ conditional_schedule(); + return -EAGAIN; + } + unlock_buffer(bh); +@@ -260,12 +263,19 @@ + struct buffer_head * next; + int nr; + +- next = lru_list[index]; + nr = nr_buffers_type[index]; ++repeat: ++ next = lru_list[index]; + while (next && --nr >= 0) { + struct buffer_head *bh = next; + next = bh->b_next_free; + ++ if (conditional_schedule_needed()) { ++ spin_unlock(&lru_list_lock); ++ unconditional_schedule(); ++ spin_lock(&lru_list_lock); ++ goto repeat; ++ } + if (!buffer_locked(bh)) { + if (refile) + __refile_buffer(bh); +@@ -273,7 +283,6 @@ + } + if (dev != NODEV && bh->b_dev != dev) + continue; +- + get_bh(bh); + spin_unlock(&lru_list_lock); + wait_on_buffer (bh); +@@ -306,6 +315,15 @@ + { + int err = 0; + ++#if LOWLATENCY_NEEDED ++ /* ++ * syncing devA when there are lots of buffers dirty against ++ * devB is expensive. ++ */ ++ if (enable_lowlatency) ++ dev = NODEV; ++#endif ++ + /* One pass for no-wait, three for wait: + * 0) write out all dirty, unlocked buffers; + * 1) wait for all dirty locked buffers; +@@ -697,6 +715,16 @@ + /* Not hashed? */ + if (!bh->b_pprev) + continue; ++ ++ if (conditional_schedule_needed()) { ++ get_bh(bh); ++ spin_unlock(&lru_list_lock); ++ unconditional_schedule(); ++ spin_lock(&lru_list_lock); ++ put_bh(bh); ++ slept = 1; ++ } ++ + if (buffer_locked(bh)) { + get_bh(bh); + spin_unlock(&lru_list_lock); +@@ -848,12 +876,19 @@ + struct buffer_head *bh; + struct inode tmp; + int err = 0, err2; +- ++ DEFINE_RESCHED_COUNT; ++ + INIT_LIST_HEAD(&tmp.i_dirty_buffers); +- ++ ++repeat: + spin_lock(&lru_list_lock); + + while (!list_empty(list)) { ++ if (conditional_schedule_needed()) { ++ spin_unlock(&lru_list_lock); ++ unconditional_schedule(); ++ goto repeat; ++ } + bh = BH_ENTRY(list->next); + list_del(&bh->b_inode_buffers); + if (!buffer_dirty(bh) && !buffer_locked(bh)) +@@ -878,8 +913,18 @@ + spin_lock(&lru_list_lock); + } + } ++ if (TEST_RESCHED_COUNT(32)) { ++ RESET_RESCHED_COUNT(); ++ if (conditional_schedule_needed()) { ++ spin_unlock(&lru_list_lock); ++ unconditional_schedule(); /* Syncing many dirty buffers */ ++ spin_lock(&lru_list_lock); ++ } ++ } + } + ++ RESET_RESCHED_COUNT(); ++ + while (!list_empty(&tmp.i_dirty_buffers)) { + bh = BH_ENTRY(tmp.i_dirty_buffers.prev); + remove_inode_queue(bh); +@@ -889,6 +934,7 @@ + if (!buffer_uptodate(bh)) + err = -EIO; + brelse(bh); ++ conditional_schedule(); + spin_lock(&lru_list_lock); + } + +@@ -916,11 +962,20 @@ + struct buffer_head *bh; + struct list_head *p; + int err = 0; ++ DEFINE_RESCHED_COUNT; + ++repeat: ++ conditional_schedule(); + spin_lock(&lru_list_lock); + +- repeat: + list_for_each_prev(p, list) { ++ if (TEST_RESCHED_COUNT(32)) { ++ RESET_RESCHED_COUNT(); ++ if (conditional_schedule_needed()) { ++ spin_unlock(&lru_list_lock); ++ goto repeat; ++ } ++ } + bh = BH_ENTRY(p); + if (buffer_locked(bh)) { + get_bh(bh); +@@ -929,7 +984,6 @@ + if (!buffer_uptodate(bh)) + err = -EIO; + brelse(bh); +- spin_lock(&lru_list_lock); + goto repeat; + } + } +@@ -946,12 +1000,24 @@ + void invalidate_inode_buffers(struct inode *inode) + { + struct list_head * entry; +- ++ ++repeat: ++ conditional_schedule(); + spin_lock(&lru_list_lock); +- while ((entry = inode->i_dirty_buffers.next) != &inode->i_dirty_buffers) ++ while ((entry = inode->i_dirty_buffers.next) != &inode->i_dirty_buffers) { ++ if (conditional_schedule_needed()) { ++ spin_unlock(&lru_list_lock); ++ goto repeat; ++ } + remove_inode_queue(BH_ENTRY(entry)); +- while ((entry = inode->i_dirty_data_buffers.next) != &inode->i_dirty_data_buffers) ++ } ++ while ((entry = inode->i_dirty_data_buffers.next) != &inode->i_dirty_data_buffers) { ++ if (conditional_schedule_needed()) { ++ spin_unlock(&lru_list_lock); ++ goto repeat; ++ } + remove_inode_queue(BH_ENTRY(entry)); ++ } + spin_unlock(&lru_list_lock); + } + +@@ -974,6 +1040,7 @@ + bh = get_hash_table(dev, block, size); + if (bh) { + touch_buffer(bh); ++ conditional_schedule(); + return bh; + } + +@@ -2831,7 +2898,7 @@ + + DECLARE_WAIT_QUEUE_HEAD(bdflush_wait); + +-void wakeup_bdflush(void) ++void wakeup_bdflush(void) + { + wake_up_interruptible(&bdflush_wait); + } +diff -Nur c3000_pre/linux/fs/dcache.c c3000_test/linux/fs/dcache.c +--- c3000_pre/linux/fs/dcache.c 2004-08-21 09:48:58.000000000 +0900 ++++ c3000_test/linux/fs/dcache.c 2004-12-20 22:56:21.000000000 +0900 +@@ -320,11 +320,23 @@ + + void prune_dcache(int count) + { ++ DEFINE_RESCHED_COUNT; ++ ++redo: + spin_lock(&dcache_lock); + for (;;) { + struct dentry *dentry; + struct list_head *tmp; + ++ if (TEST_RESCHED_COUNT(100)) { ++ RESET_RESCHED_COUNT(); ++ if (conditional_schedule_needed()) { ++ spin_unlock(&dcache_lock); ++ unconditional_schedule(); ++ goto redo; ++ } ++ } ++ + tmp = dentry_unused.prev; + + if (tmp == &dentry_unused) +@@ -479,6 +491,7 @@ + struct dentry *this_parent = parent; + struct list_head *next; + int found = 0; ++ DEFINE_RESCHED_COUNT; + + spin_lock(&dcache_lock); + repeat: +@@ -493,6 +506,13 @@ + list_add(&dentry->d_lru, dentry_unused.prev); + found++; + } ++ ++ if (TEST_RESCHED_COUNT(500) && found > 10) { ++ if (conditional_schedule_needed()) /* Typically sys_rmdir() */ ++ goto out; ++ RESET_RESCHED_COUNT(); ++ } ++ + /* + * Descend a level if the d_subdirs list is non-empty. + */ +@@ -517,6 +537,7 @@ + #endif + goto resume; + } ++out: + spin_unlock(&dcache_lock); + return found; + } +@@ -532,8 +553,10 @@ + { + int found; + +- while ((found = select_parent(parent)) != 0) ++ while ((found = select_parent(parent)) != 0) { + prune_dcache(found); ++ conditional_schedule(); /* Typically sys_rmdir() */ ++ } + } + + /* +diff -Nur c3000_pre/linux/fs/exec.c c3000_test/linux/fs/exec.c +--- c3000_pre/linux/fs/exec.c 2004-08-21 09:48:58.000000000 +0900 ++++ c3000_test/linux/fs/exec.c 2004-12-20 22:56:21.000000000 +0900 +@@ -249,7 +249,7 @@ + memset(kaddr+offset+len, 0, + PAGE_SIZE-offset-len); + } +- err = copy_from_user(kaddr+offset, str, bytes_to_copy); ++ err = ll_copy_from_user(kaddr+offset, str, bytes_to_copy); + if (err) { + ret = -EFAULT; + goto out; +diff -Nur c3000_pre/linux/fs/ext2/dir.c c3000_test/linux/fs/ext2/dir.c +--- c3000_pre/linux/fs/ext2/dir.c 2004-08-21 09:48:59.000000000 +0900 ++++ c3000_test/linux/fs/ext2/dir.c 2004-12-20 22:56:21.000000000 +0900 +@@ -153,6 +153,7 @@ + struct address_space *mapping = dir->i_mapping; + struct page *page = read_cache_page(mapping, n, + (filler_t*)mapping->a_ops->readpage, NULL); ++ conditional_schedule(); /* Scanning large directories */ + if (!IS_ERR(page)) { + wait_on_page(page); + kmap(page); +diff -Nur c3000_pre/linux/fs/ext2/inode.c c3000_test/linux/fs/ext2/inode.c +--- c3000_pre/linux/fs/ext2/inode.c 2004-08-21 09:48:59.000000000 +0900 ++++ c3000_test/linux/fs/ext2/inode.c 2004-12-20 22:56:21.000000000 +0900 +@@ -715,8 +715,13 @@ + { + unsigned long block_to_free = 0, count = 0; + unsigned long nr; ++ DEFINE_RESCHED_COUNT; + + for ( ; p < q ; p++) { ++ if (TEST_RESCHED_COUNT(32)) { ++ RESET_RESCHED_COUNT(); ++ conditional_schedule(); ++ } + nr = le32_to_cpu(*p); + if (nr) { + *p = 0; +@@ -759,6 +764,7 @@ + if (depth--) { + int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); + for ( ; p < q ; p++) { ++ conditional_schedule(); /* Deleting large files */ + nr = le32_to_cpu(*p); + if (!nr) + continue; +diff -Nur c3000_pre/linux/fs/ext3/balloc.c c3000_test/linux/fs/ext3/balloc.c +--- c3000_pre/linux/fs/ext3/balloc.c 2004-08-21 09:48:59.000000000 +0900 ++++ c3000_test/linux/fs/ext3/balloc.c 2004-12-20 22:56:21.000000000 +0900 +@@ -363,6 +363,9 @@ + } + } + #endif ++ /* superblock lock is held, so this is safe */ ++ conditional_schedule(); ++ + BUFFER_TRACE(bitmap_bh, "clear bit"); + if (!ext3_clear_bit (bit + i, bitmap_bh->b_data)) { + ext3_error (sb, __FUNCTION__, +diff -Nur c3000_pre/linux/fs/ext3/inode.c c3000_test/linux/fs/ext3/inode.c +--- c3000_pre/linux/fs/ext3/inode.c 2004-08-21 09:48:59.000000000 +0900 ++++ c3000_test/linux/fs/ext3/inode.c 2004-12-20 22:56:21.000000000 +0900 +@@ -902,6 +902,8 @@ + + prev_blocks = inode->i_blocks; + ++ conditional_schedule(); /* Reading large directories */ ++ + bh = ext3_getblk (handle, inode, block, create, err); + if (!bh) + return bh; +@@ -1605,6 +1607,7 @@ + */ + for (p = first; p < last; p++) { + u32 nr = le32_to_cpu(*p); ++ conditional_schedule(); + if (nr) { + struct buffer_head *bh; + +@@ -1659,6 +1662,7 @@ + } + + for (p = first; p < last; p++) { ++ conditional_schedule(); + nr = le32_to_cpu(*p); + if (nr) { + /* accumulate blocks to free if they're contiguous */ +diff -Nur c3000_pre/linux/fs/ext3/namei.c c3000_test/linux/fs/ext3/namei.c +--- c3000_pre/linux/fs/ext3/namei.c 2004-08-21 09:48:59.000000000 +0900 ++++ c3000_test/linux/fs/ext3/namei.c 2004-12-20 22:56:21.000000000 +0900 +@@ -157,6 +157,7 @@ + if ((bh = bh_use[ra_ptr++]) == NULL) + goto next; + wait_on_buffer(bh); ++ conditional_schedule(); + if (!buffer_uptodate(bh)) { + /* read error, skip block & hope for the best */ + brelse(bh); +diff -Nur c3000_pre/linux/fs/inode.c c3000_test/linux/fs/inode.c +--- c3000_pre/linux/fs/inode.c 2004-08-21 09:48:58.000000000 +0900 ++++ c3000_test/linux/fs/inode.c 2004-12-20 23:00:06.000000000 +0900 +@@ -251,6 +251,8 @@ + + filemap_fdatawait(inode->i_mapping); + ++ conditional_schedule(); ++ + spin_lock(&inode_lock); + inode->i_state &= ~I_LOCK; + if (!(inode->i_state & I_FREEING)) { +@@ -561,6 +563,7 @@ + + while ((inode_entry = head->next) != head) + { ++ conditional_schedule(); + list_del(inode_entry); + + inode = list_entry(inode_entry, struct inode, i_list); +@@ -589,9 +592,22 @@ + if (tmp == head) + break; + inode = list_entry(tmp, struct inode, i_list); ++ ++ if (conditional_schedule_needed()) { ++ atomic_inc(&inode->i_count); ++ spin_unlock(&inode_lock); ++ unconditional_schedule(); ++ spin_lock(&inode_lock); ++ atomic_dec(&inode->i_count); ++ } ++ + if (inode->i_sb != sb) + continue; ++ atomic_inc(&inode->i_count); ++ spin_unlock(&inode_lock); + invalidate_inode_buffers(inode); ++ spin_lock(&inode_lock); ++ atomic_dec(&inode->i_count); + if (!atomic_read(&inode->i_count)) { + list_del_init(&inode->i_hash); + list_del(&inode->i_list); +@@ -896,6 +912,8 @@ + if (inode) { + struct inode * old; + ++ conditional_schedule(); /* sync_old_buffers */ ++ + spin_lock(&inode_lock); + /* We released the lock, so.. */ + old = find_inode(sb, ino, head, find_actor, opaque); +@@ -1313,18 +1331,32 @@ + int request=goal; + int loop=0; + #endif ++ int nr_to_scan = inodes_stat.nr_unused; + ++resume: + spin_lock(&inode_lock); + + count = 0; + entry = inode_unused.prev; +- while (entry != &inode_unused) +- { ++ while (entry != &inode_unused && nr_to_scan--) { + struct list_head *tmp = entry; + + #ifdef JFFS2_INODE_DEBUG + loop++; + #endif ++ if (conditional_schedule_needed()) { ++ /* ++ * Need to drop the lock. Reposition ++ * the list head so we start here next time. ++ * This can corrupt the LRU nature of the ++ * unused list, but this isn't very important. ++ */ ++ list_del(&inode_unused); ++ list_add(&inode_unused, entry); ++ spin_unlock(&inode_lock); ++ unconditional_schedule(); ++ goto resume; ++ } + entry = entry->prev; + inode = INODE(tmp); + if (inode->i_state & (I_FREEING|I_CLEAR|I_LOCK)) +diff -Nur c3000_pre/linux/fs/jbd/checkpoint.c c3000_test/linux/fs/jbd/checkpoint.c +--- c3000_pre/linux/fs/jbd/checkpoint.c 2004-08-21 09:48:59.000000000 +0900 ++++ c3000_test/linux/fs/jbd/checkpoint.c 2004-12-20 22:56:21.000000000 +0900 +@@ -431,7 +431,11 @@ + { + transaction_t *transaction, *last_transaction, *next_transaction; + int ret = 0; ++ int ll_retries = 4; /* lowlatency addition */ + ++restart: ++ if (ll_retries-- == 0) ++ goto out; + transaction = journal->j_checkpoint_transactions; + if (transaction == 0) + goto out; +@@ -451,6 +455,12 @@ + jh = next_jh; + next_jh = jh->b_cpnext; + ret += __try_to_free_cp_buf(jh); ++ if (conditional_schedule_needed()) { ++ spin_unlock(&journal_datalist_lock); ++ unconditional_schedule(); ++ spin_lock(&journal_datalist_lock); ++ goto restart; ++ } + } while (jh != last_jh); + } + } while (transaction != last_transaction); +diff -Nur c3000_pre/linux/fs/jbd/commit.c c3000_test/linux/fs/jbd/commit.c +--- c3000_pre/linux/fs/jbd/commit.c 2004-08-21 09:48:59.000000000 +0900 ++++ c3000_test/linux/fs/jbd/commit.c 2004-12-20 22:56:21.000000000 +0900 +@@ -212,6 +212,16 @@ + __journal_remove_journal_head(bh); + refile_buffer(bh); + __brelse(bh); ++ if (conditional_schedule_needed()) { ++ if (commit_transaction->t_sync_datalist) ++ commit_transaction->t_sync_datalist = ++ next_jh; ++ if (bufs) ++ break; ++ spin_unlock(&journal_datalist_lock); ++ unconditional_schedule(); ++ goto write_out_data; ++ } + } + } + if (bufs == ARRAY_SIZE(wbuf)) { +@@ -235,8 +245,7 @@ + journal_brelse_array(wbuf, bufs); + lock_journal(journal); + spin_lock(&journal_datalist_lock); +- if (bufs) +- goto write_out_data_locked; ++ goto write_out_data_locked; + } + + /* +@@ -272,6 +281,14 @@ + */ + while ((jh = commit_transaction->t_async_datalist)) { + struct buffer_head *bh = jh2bh(jh); ++ if (conditional_schedule_needed()) { ++ spin_unlock(&journal_datalist_lock); ++ unlock_journal(journal); ++ unconditional_schedule(); ++ lock_journal(journal); ++ spin_lock(&journal_datalist_lock); ++ continue; /* List may have changed */ ++ } + if (buffer_locked(bh)) { + spin_unlock(&journal_datalist_lock); + unlock_journal(journal); +@@ -486,6 +503,8 @@ + wait_for_iobuf: + while (commit_transaction->t_iobuf_list != NULL) { + struct buffer_head *bh; ++ ++ conditional_schedule(); + jh = commit_transaction->t_iobuf_list->b_tprev; + bh = jh2bh(jh); + if (buffer_locked(bh)) { +@@ -644,6 +663,8 @@ + transaction_t *cp_transaction; + struct buffer_head *bh; + ++ conditional_schedule(); /* journal is locked */ ++ + jh = commit_transaction->t_forget; + J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || + jh->b_transaction == journal->j_running_transaction); +diff -Nur c3000_pre/linux/fs/proc/array.c c3000_test/linux/fs/proc/array.c +--- c3000_pre/linux/fs/proc/array.c 2004-08-21 09:49:01.000000000 +0900 ++++ c3000_test/linux/fs/proc/array.c 2004-12-20 22:56:21.000000000 +0900 +@@ -498,9 +498,11 @@ + if (end > PMD_SIZE) + end = PMD_SIZE; + do { +- pte_t page = *pte; ++ pte_t page; + struct page *ptpage; + ++ conditional_schedule(); /* For `top' and `ps' */ ++ page = *pte; + address += PAGE_SIZE; + pte++; + if (pte_none(page)) +diff -Nur c3000_pre/linux/fs/proc/generic.c c3000_test/linux/fs/proc/generic.c +--- c3000_pre/linux/fs/proc/generic.c 2004-08-21 09:49:01.000000000 +0900 ++++ c3000_test/linux/fs/proc/generic.c 2004-12-20 22:56:21.000000000 +0900 +@@ -98,6 +98,8 @@ + retval = n; + break; + } ++ ++ conditional_schedule(); /* Some /proc files are large */ + + /* This is a hack to allow mangling of file pos independent + * of actual bytes read. Simply place the data at page, +diff -Nur c3000_pre/linux/fs/reiserfs/buffer2.c c3000_test/linux/fs/reiserfs/buffer2.c +--- c3000_pre/linux/fs/reiserfs/buffer2.c 2004-08-21 09:49:01.000000000 +0900 ++++ c3000_test/linux/fs/reiserfs/buffer2.c 2004-12-20 22:56:21.000000000 +0900 +@@ -54,6 +54,7 @@ + PROC_EXP( unsigned int ctx_switches = kstat.context_swtch ); + + result = bread (super -> s_dev, n_block, n_size); ++ conditional_schedule(); + PROC_INFO_INC( super, breads ); + PROC_EXP( if( kstat.context_swtch != ctx_switches ) + PROC_INFO_INC( super, bread_miss ) ); +diff -Nur c3000_pre/linux/fs/reiserfs/journal.c c3000_test/linux/fs/reiserfs/journal.c +--- c3000_pre/linux/fs/reiserfs/journal.c 2004-08-21 09:49:01.000000000 +0900 ++++ c3000_test/linux/fs/reiserfs/journal.c 2004-12-20 22:56:21.000000000 +0900 +@@ -573,6 +573,7 @@ + /* lock the current transaction */ + inline static void lock_journal(struct super_block *p_s_sb) { + PROC_INFO_INC( p_s_sb, journal.lock_journal ); ++ conditional_schedule(); + while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_wlock)) > 0) { + PROC_INFO_INC( p_s_sb, journal.lock_journal_wait ); + sleep_on(&(SB_JOURNAL(p_s_sb)->j_wait)) ; +@@ -703,6 +704,7 @@ + mark_buffer_dirty(tbh) ; + } + ll_rw_block(WRITE, 1, &tbh) ; ++ conditional_schedule(); + count++ ; + put_bh(tbh) ; /* once for our get_hash */ + } +@@ -832,6 +834,7 @@ + set_bit(BH_Dirty, &(SB_JOURNAL(p_s_sb)->j_header_bh->b_state)) ; + ll_rw_block(WRITE, 1, &(SB_JOURNAL(p_s_sb)->j_header_bh)) ; + wait_on_buffer((SB_JOURNAL(p_s_sb)->j_header_bh)) ; ++ conditional_schedule(); + if (!buffer_uptodate(SB_JOURNAL(p_s_sb)->j_header_bh)) { + printk( "reiserfs: journal-837: IO error during journal replay\n" ); + return -EIO ; +@@ -2125,6 +2128,7 @@ + } + + int journal_begin(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb, unsigned long nblocks) { ++ conditional_schedule(); + return do_journal_begin_r(th, p_s_sb, nblocks, 0) ; + } + +@@ -2265,6 +2269,7 @@ + } + + int journal_end(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) { ++ conditional_schedule(); + return do_journal_end(th, p_s_sb, nblocks, 0) ; + } + +@@ -2716,6 +2721,7 @@ + RFALSE( buffer_locked(bh) && cur_tb != NULL, + "waiting while do_balance was running\n") ; + wait_on_buffer(bh) ; ++ conditional_schedule(); + } + PROC_INFO_INC( p_s_sb, journal.prepare_retry ); + retry_count++ ; +@@ -2888,6 +2894,7 @@ + /* copy all the real blocks into log area. dirty log blocks */ + if (test_bit(BH_JDirty, &cn->bh->b_state)) { + struct buffer_head *tmp_bh ; ++ conditional_schedule(); + tmp_bh = sb_getblk(p_s_sb, reiserfs_get_journal_block(p_s_sb) + + ((cur_write_start + jindex) % JOURNAL_BLOCK_COUNT)) ; + mark_buffer_uptodate(tmp_bh, 1) ; +diff -Nur c3000_pre/linux/fs/reiserfs/stree.c c3000_test/linux/fs/reiserfs/stree.c +--- c3000_pre/linux/fs/reiserfs/stree.c 2004-08-21 09:49:01.000000000 +0900 ++++ c3000_test/linux/fs/reiserfs/stree.c 2004-12-20 22:56:21.000000000 +0900 +@@ -652,9 +652,8 @@ + stop at leaf level - set to + DISK_LEAF_NODE_LEVEL */ + ) { +- int n_block_number = SB_ROOT_BLOCK (p_s_sb), +- expected_level = SB_TREE_HEIGHT (p_s_sb), +- n_block_size = p_s_sb->s_blocksize; ++ int n_block_number, expected_level; ++ int n_block_size = p_s_sb->s_blocksize; + struct buffer_head * p_s_bh; + struct path_element * p_s_last_element; + int n_node_level, n_retval; +@@ -666,7 +665,8 @@ + #endif + + PROC_INFO_INC( p_s_sb, search_by_key ); +- ++ conditional_schedule(); ++ + /* As we add each node to a path we increase its count. This means that + we must be careful to release all nodes in a path before we either + discard the path struct or re-use the path struct, as we do here. */ +@@ -678,6 +678,8 @@ + /* With each iteration of this loop we search through the items in the + current node, and calculate the next current node(next path element) + for the next iteration of this loop.. */ ++ n_block_number = SB_ROOT_BLOCK (p_s_sb); ++ expected_level = SB_TREE_HEIGHT (p_s_sb); + while ( 1 ) { + + #ifdef CONFIG_REISERFS_CHECK +@@ -1104,6 +1106,8 @@ + for (n_counter = *p_n_removed; + n_counter < n_unfm_number; n_counter++, p_n_unfm_pointer-- ) { + ++ conditional_schedule(); ++ + if (item_moved (&s_ih, p_s_path)) { + need_research = 1 ; + break; +diff -Nur c3000_pre/linux/include/linux/low-latency.h c3000_test/linux/include/linux/low-latency.h +--- c3000_pre/linux/include/linux/low-latency.h 1970-01-01 09:00:00.000000000 +0900 ++++ c3000_test/linux/include/linux/low-latency.h 2004-12-20 22:56:21.000000000 +0900 +@@ -0,0 +1,109 @@ ++/* ++ * include/linux/low-latency.h ++ * ++ * Andrew Morton <akpm@zip.com.au> ++ */ ++ ++#ifndef LOW_LATENCY_H_INCLUDED ++#define LOW_LATENCY_H_INCLUDED ++ ++#if defined(CONFIG_LOLAT) ++#define LOWLATENCY_NEEDED 1 ++#else ++#define LOWLATENCY_NEEDED 0 ++#endif ++ ++#if LOWLATENCY_NEEDED ++ ++#include <linux/cache.h> /* For ____cacheline_aligned */ ++ ++#ifdef CONFIG_LOLAT_SYSCTL ++extern struct low_latency_enable_struct { ++ int yep; ++} ____cacheline_aligned __enable_lowlatency; ++#define enable_lowlatency __enable_lowlatency.yep ++ ++#else ++#define enable_lowlatency 1 ++#endif ++ ++/* ++ * Set this non-zero to generate low-latency instrumentation ++ */ ++#define LOWLATENCY_DEBUG 0 ++ ++/* ++ * Set this non-zero for robustness testing ++ */ ++#define LOWLATENCY_ALWAYS_SCHEDULE 0 ++ ++#if LOWLATENCY_DEBUG ++ ++#if LOWLATENCY_ALWAYS_SCHEDULE ++#define conditional_schedule_needed() ((enable_lowlatency == 2) || (enable_lowlatency && current->need_resched)) ++#else ++#define conditional_schedule_needed() (enable_lowlatency && current->need_resched) ++#endif ++ ++struct lolat_stats_t { ++ unsigned long count; ++ int visited; ++ const char *file; ++ int line; ++ struct lolat_stats_t *next; ++}; ++ ++void set_running_and_schedule(struct lolat_stats_t *stats); ++ ++#define unconditional_schedule() \ ++ do { \ ++ static struct lolat_stats_t stats = { \ ++ file: __FILE__, \ ++ line: __LINE__, \ ++ }; \ ++ set_running_and_schedule(&stats); \ ++ } while (0) ++ ++extern void show_lolat_stats(void); ++ ++#else /* LOWLATENCY_DEBUG */ ++ ++#if LOWLATENCY_ALWAYS_SCHEDULE ++#define conditional_schedule_needed() 1 ++#else ++#define conditional_schedule_needed() (current->need_resched) ++#endif ++ ++void set_running_and_schedule(void); ++#define unconditional_schedule() set_running_and_schedule() ++ ++#endif /* LOWLATENCY_DEBUG */ ++ ++#define conditional_schedule() \ ++ do { \ ++ if (conditional_schedule_needed()) \ ++ unconditional_schedule(); \ ++ } while (0) ++ ++#define DEFINE_RESCHED_COUNT int resched_count = 0 ++#define TEST_RESCHED_COUNT(n) (enable_lowlatency && (++resched_count > (n))) ++#define RESET_RESCHED_COUNT() resched_count = 0 ++extern int ll_copy_to_user(void *to_user, const void *from, unsigned long len); ++extern int ll_copy_from_user(void *to, const void *from_user, unsigned long len); ++ ++#else /* LOWLATENCY_NEEDED */ ++ ++#define conditional_schedule_needed() 0 ++#define conditional_schedule() ++#define unconditional_schedule() ++ ++#define DEFINE_RESCHED_COUNT ++#define TEST_RESCHED_COUNT(n) 0 ++#define RESET_RESCHED_COUNT() ++#define ll_copy_to_user(to_user, from, len) copy_to_user((to_user), (from), (len)) ++#define ll_copy_from_user(to, from_user, len) copy_from_user((to), (from_user), (len)) ++ ++#endif /* LOWLATENCY_NEEDED */ ++ ++#endif /* LOW_LATENCY_H_INCLUDED */ ++ +diff -Nur c3000_pre/linux/include/linux/mm.h c3000_test/linux/include/linux/mm.h +--- c3000_pre/linux/include/linux/mm.h 2004-08-21 09:49:13.000000000 +0900 ++++ c3000_test/linux/include/linux/mm.h 2004-12-20 22:56:21.000000000 +0900 +@@ -149,6 +149,8 @@ + */ + extern pgprot_t protection_map[16]; + ++/* Actions for zap_page_range() */ ++#define ZPR_COND_RESCHED 1 /* Do a conditional_schedule() occasionally */ + + /* + * These are the virtual MM functions - opening of an area, closing and +@@ -500,7 +502,7 @@ + extern void shmem_lock(struct file * file, int lock); + extern int shmem_zero_setup(struct vm_area_struct *); + +-extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size); ++extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, int actions); + extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma); + extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot); + extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot); +diff -Nur c3000_pre/linux/include/linux/reiserfs_fs.h c3000_test/linux/include/linux/reiserfs_fs.h +--- c3000_pre/linux/include/linux/reiserfs_fs.h 2004-08-21 09:49:13.000000000 +0900 ++++ c3000_test/linux/include/linux/reiserfs_fs.h 2004-12-20 22:56:21.000000000 +0900 +@@ -1197,8 +1197,8 @@ + #define fs_generation(s) ((s)->u.reiserfs_sb.s_generation_counter) + #define get_generation(s) atomic_read (&fs_generation(s)) + #define FILESYSTEM_CHANGED_TB(tb) (get_generation((tb)->tb_sb) != (tb)->fs_gen) +-#define fs_changed(gen,s) (gen != get_generation (s)) +- ++#define __fs_changed(gen,s) (gen != get_generation (s)) ++#define fs_changed(gen,s) ({conditional_schedule(); __fs_changed(gen,s);}) + + /***************************************************************************/ + /* FIXATE NODES */ +diff -Nur c3000_pre/linux/include/linux/sched.h c3000_test/linux/include/linux/sched.h +--- c3000_pre/linux/include/linux/sched.h 2004-08-21 09:49:13.000000000 +0900 ++++ c3000_test/linux/include/linux/sched.h 2004-12-20 22:56:21.000000000 +0900 +@@ -1092,6 +1092,7 @@ + #include <linux/dcache.h> + #include <linux/tqueue.h> + #include <linux/fs_struct.h> ++#include <linux/low-latency.h> + + #endif /* __KERNEL__ */ + #endif +diff -Nur c3000_pre/linux/include/linux/sysctl.h c3000_test/linux/include/linux/sysctl.h +--- c3000_pre/linux/include/linux/sysctl.h 2004-08-21 09:49:13.000000000 +0900 ++++ c3000_test/linux/include/linux/sysctl.h 2004-12-20 22:56:21.000000000 +0900 +@@ -131,6 +131,7 @@ + KERN_CORE_USES_PID=52, /* int: use core or core.%pid */ + KERN_TAINTED=53, /* int: various kernel tainted flags */ + KERN_CADPID=54, /* int: PID of the process to notify on CAD */ ++ KERN_LOWLATENCY=55, /* int: enable low latency scheduling */ + }; + + +diff -Nur c3000_pre/linux/kernel/exit.c c3000_test/linux/kernel/exit.c +--- c3000_pre/linux/kernel/exit.c 2004-08-21 09:49:14.000000000 +0900 ++++ c3000_test/linux/kernel/exit.c 2004-12-20 22:56:21.000000000 +0900 +@@ -196,6 +196,7 @@ + } + i++; + set >>= 1; ++ conditional_schedule(); /* sys_exit, many files open */ + } + } + } +diff -Nur c3000_pre/linux/kernel/ksyms.c c3000_test/linux/kernel/ksyms.c +--- c3000_pre/linux/kernel/ksyms.c 2004-12-19 00:35:59.000000000 +0900 ++++ c3000_test/linux/kernel/ksyms.c 2004-12-20 23:07:26.000000000 +0900 +@@ -481,6 +481,13 @@ + EXPORT_SYMBOL(do_gettimeofday); + EXPORT_SYMBOL(do_settimeofday); + ++#if LOWLATENCY_NEEDED ++EXPORT_SYMBOL(set_running_and_schedule); ++#ifdef CONFIG_LOLAT_SYSCTL ++EXPORT_SYMBOL(__enable_lowlatency); ++#endif ++#endif ++ + #if !defined(__ia64__) + EXPORT_SYMBOL(loops_per_jiffy); + #endif +diff -Nur c3000_pre/linux/kernel/module.c c3000_test/linux/kernel/module.c +--- c3000_pre/linux/kernel/module.c 2004-08-21 09:49:14.000000000 +0900 ++++ c3000_test/linux/kernel/module.c 2004-12-20 22:56:21.000000000 +0900 +@@ -1174,6 +1174,11 @@ + return ERR_PTR(-ENOMEM); + lock_kernel(); + for (v = module_list, n = *pos; v; n -= v->nsyms, v = v->next) { ++#if 0 ++ /* We can't actually do this, because we'd create a ++ * race against module unload. Need a semaphore. */ ++ conditional_schedule(); ++#endif + if (n < v->nsyms) { + p->mod = v; + p->index = n; +diff -Nur c3000_pre/linux/kernel/sched.c c3000_test/linux/kernel/sched.c +--- c3000_pre/linux/kernel/sched.c 2004-08-21 09:49:14.000000000 +0900 ++++ c3000_test/linux/kernel/sched.c 2004-12-20 22:56:21.000000000 +0900 +@@ -302,6 +302,17 @@ + if (tsk->processor != this_cpu) + smp_send_reschedule(tsk->processor); + } ++#if LOWLATENCY_NEEDED ++ if (enable_lowlatency && (p->policy != SCHED_OTHER)) { ++ struct task_struct *t; ++ for (i = 0; i < smp_num_cpus; i++) { ++ cpu = cpu_logical_map(i); ++ t = cpu_curr(cpu); ++ if (t != tsk) ++ t->need_resched = 1; ++ } ++ } ++#endif + return; + + +@@ -1429,3 +1440,93 @@ + atomic_inc(&init_mm.mm_count); + enter_lazy_tlb(&init_mm, current, cpu); + } ++ ++#if LOWLATENCY_NEEDED ++#if LOWLATENCY_DEBUG ++ ++static struct lolat_stats_t *lolat_stats_head; ++static spinlock_t lolat_stats_lock = SPIN_LOCK_UNLOCKED; ++ ++void set_running_and_schedule(struct lolat_stats_t *stats) ++{ ++ spin_lock(&lolat_stats_lock); ++ if (stats->visited == 0) { ++ stats->visited = 1; ++ stats->next = lolat_stats_head; ++ lolat_stats_head = stats; ++ } ++ stats->count++; ++ spin_unlock(&lolat_stats_lock); ++ ++ if (current->state != TASK_RUNNING) ++ set_current_state(TASK_RUNNING); ++ schedule(); ++} ++ ++void show_lolat_stats(void) ++{ ++ struct lolat_stats_t *stats = lolat_stats_head; ++ ++ printk("Low latency scheduling stats:\n"); ++ while (stats) { ++ printk("%s:%d: %lu\n", stats->file, stats->line, stats->count); ++ stats->count = 0; ++ stats = stats->next; ++ } ++} ++ ++#else /* LOWLATENCY_DEBUG */ ++ ++void set_running_and_schedule() ++{ ++ if (current->state != TASK_RUNNING) ++ __set_current_state(TASK_RUNNING); ++ schedule(); ++} ++ ++#endif /* LOWLATENCY_DEBUG */ ++ ++int ll_copy_to_user(void *to_user, const void *from, unsigned long len) ++{ ++ while (len) { ++ unsigned long n_to_copy = len; ++ unsigned long remainder; ++ ++ if (n_to_copy > 4096) ++ n_to_copy = 4096; ++ remainder = copy_to_user(to_user, from, n_to_copy); ++ if (remainder) ++ return remainder + len; ++ to_user = ((char *)to_user) + n_to_copy; ++ from = ((char *)from) + n_to_copy; ++ len -= n_to_copy; ++ conditional_schedule(); ++ } ++ return 0; ++} ++ ++int ll_copy_from_user(void *to, const void *from_user, unsigned long len) ++{ ++ while (len) { ++ unsigned long n_to_copy = len; ++ unsigned long remainder; ++ ++ if (n_to_copy > 4096) ++ n_to_copy = 4096; ++ remainder = copy_from_user(to, from_user, n_to_copy); ++ if (remainder) ++ return remainder + len; ++ to = ((char *)to) + n_to_copy; ++ from_user = ((char *)from_user) + n_to_copy; ++ len -= n_to_copy; ++ conditional_schedule(); ++ } ++ return 0; ++} ++ ++#ifdef CONFIG_LOLAT_SYSCTL ++struct low_latency_enable_struct __enable_lowlatency = { 0, }; ++#endif ++ ++#endif /* LOWLATENCY_NEEDED */ ++ +diff -Nur c3000_pre/linux/kernel/sysctl.c c3000_test/linux/kernel/sysctl.c +--- c3000_pre/linux/kernel/sysctl.c 2004-08-21 09:49:14.000000000 +0900 ++++ c3000_test/linux/kernel/sysctl.c 2004-12-20 22:56:21.000000000 +0900 +@@ -271,6 +271,10 @@ + {KERN_S390_USER_DEBUG_LOGGING,"userprocess_debug", + &sysctl_userprocess_debug,sizeof(int),0644,NULL,&proc_dointvec}, + #endif ++#ifdef CONFIG_LOLAT_SYSCTL ++ {KERN_LOWLATENCY, "lowlatency", &enable_lowlatency, sizeof (int), ++ 0644, NULL, &proc_dointvec}, ++#endif + {0} + }; + +diff -Nur c3000_pre/linux/mm/filemap.c c3000_test/linux/mm/filemap.c +--- c3000_pre/linux/mm/filemap.c 2004-08-21 09:49:15.000000000 +0900 ++++ c3000_test/linux/mm/filemap.c 2004-12-20 22:56:21.000000000 +0900 +@@ -179,7 +179,9 @@ + { + struct list_head *head, *curr; + struct page * page; ++ int ll_count = 100; + ++restart: + head = &inode->i_mapping->clean_pages; + + spin_lock(&pagemap_lru_lock); +@@ -190,6 +192,14 @@ + page = list_entry(curr, struct page, list); + curr = curr->next; + ++ if (conditional_schedule_needed() && ll_count) { ++ spin_unlock(&pagecache_lock); ++ spin_unlock(&pagemap_lru_lock); ++ unconditional_schedule(); ++ ll_count--; ++ goto restart; ++ } ++ + /* We cannot invalidate something in dirty.. */ + if (PageDirty(page)) + continue; +@@ -253,8 +263,7 @@ + page_cache_release(page); + } + +-static int FASTCALL(truncate_list_pages(struct list_head *, unsigned long, unsigned *)); +-static int truncate_list_pages(struct list_head *head, unsigned long start, unsigned *partial) ++static int truncate_list_pages(struct list_head *head, unsigned long start, unsigned *partial, int *restart_count) + { + struct list_head *curr; + struct page * page; +@@ -265,6 +274,17 @@ + while (curr != head) { + unsigned long offset; + ++ if (conditional_schedule_needed() && *restart_count) { ++ (*restart_count)--; ++ list_del(head); ++ list_add(head, curr); /* Restart on this page */ ++ spin_unlock(&pagecache_lock); ++ unconditional_schedule(); ++ spin_lock(&pagecache_lock); ++ unlocked = 1; ++ goto restart; ++ } ++ + page = list_entry(curr, struct page, list); + offset = page->index; + +@@ -297,13 +317,11 @@ + } else + wait_on_page(page); + +- page_cache_release(page); +- +- if (current->need_resched) { +- __set_current_state(TASK_RUNNING); +- schedule(); ++ if (LOWLATENCY_NEEDED) { ++ *restart_count = 4; /* We made progress */ + } + ++ page_cache_release(page); + spin_lock(&pagecache_lock); + goto restart; + } +@@ -326,13 +344,14 @@ + { + unsigned long start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); ++ int restart_count = 4; + int unlocked; + + spin_lock(&pagecache_lock); + do { +- unlocked = truncate_list_pages(&mapping->clean_pages, start, &partial); +- unlocked |= truncate_list_pages(&mapping->dirty_pages, start, &partial); +- unlocked |= truncate_list_pages(&mapping->locked_pages, start, &partial); ++ unlocked = truncate_list_pages(&mapping->clean_pages, start, &partial, &restart_count); ++ unlocked |= truncate_list_pages(&mapping->dirty_pages, start, &partial, &restart_count); ++ unlocked |= truncate_list_pages(&mapping->locked_pages, start, &partial, &restart_count); + } while (unlocked); + /* Traversed all three lists without dropping the lock */ + spin_unlock(&pagecache_lock); +@@ -477,6 +496,7 @@ + + page_cache_get(page); + spin_unlock(&pagecache_lock); ++ conditional_schedule(); /* sys_msync() (only used by minixfs, udf) */ + lock_page(page); + + /* The buffers could have been free'd while we waited for the page lock */ +@@ -563,12 +583,14 @@ + list_del(&page->list); + list_add(&page->list, &mapping->locked_pages); + +- if (!PageDirty(page)) +- continue; +- + page_cache_get(page); + spin_unlock(&pagecache_lock); + ++ conditional_schedule(); /* sys_msync() */ ++ ++ if (!PageDirty(page)) ++ goto clean; ++ + lock_page(page); + + if (PageDirty(page)) { +@@ -579,7 +601,7 @@ + ret = err; + } else + UnlockPage(page); +- ++clean: + page_cache_release(page); + spin_lock(&pagecache_lock); + } +@@ -597,7 +619,8 @@ + int filemap_fdatawait(struct address_space * mapping) + { + int ret = 0; +- ++ DEFINE_RESCHED_COUNT; ++restart: + spin_lock(&pagecache_lock); + + while (!list_empty(&mapping->locked_pages)) { +@@ -606,6 +629,17 @@ + list_del(&page->list); + list_add(&page->list, &mapping->clean_pages); + ++ if (TEST_RESCHED_COUNT(32)) { ++ RESET_RESCHED_COUNT(); ++ if (conditional_schedule_needed()) { ++ page_cache_get(page); ++ spin_unlock(&pagecache_lock); ++ unconditional_schedule(); ++ page_cache_release(page); ++ goto restart; ++ } ++ } ++ + if (!PageLocked(page)) + continue; + +@@ -706,8 +740,10 @@ + spin_lock(&pagecache_lock); + page = __find_page_nolock(mapping, offset, *hash); + spin_unlock(&pagecache_lock); +- if (page) ++ if (page) { ++ conditional_schedule(); + return 0; ++ } + + page = page_cache_alloc(mapping); + if (!page) +@@ -963,6 +999,11 @@ + * the hash-list needs a held write-lock. + */ + repeat: ++ if (conditional_schedule_needed()) { ++ spin_unlock(&pagecache_lock); ++ unconditional_schedule(); ++ spin_lock(&pagecache_lock); ++ } + page = __find_page_nolock(mapping, offset, hash); + if (page) { + page_cache_get(page); +@@ -1413,6 +1454,8 @@ + page_cache_get(page); + spin_unlock(&pagecache_lock); + ++ conditional_schedule(); /* sys_read() */ ++ + if (!Page_Uptodate(page)) + goto page_not_up_to_date; + generic_file_readahead(reada_ok, filp, inode, page); +@@ -2114,6 +2157,12 @@ + address += PAGE_SIZE; + pte++; + } while (address && (address < end)); ++ ++ if (conditional_schedule_needed()) { ++ spin_unlock(&vma->vm_mm->page_table_lock); ++ unconditional_schedule(); /* syncing large mapped files */ ++ spin_lock(&vma->vm_mm->page_table_lock); ++ } + return error; + } + +@@ -2530,7 +2579,9 @@ + if (vma->vm_flags & VM_LOCKED) + return -EINVAL; + +- zap_page_range(vma->vm_mm, start, end - start); ++ zap_page_range(vma->vm_mm, start, end - start, ++ ZPR_COND_RESCHED); /* sys_madvise(MADV_DONTNEED) */ ++ + return 0; + } + +@@ -3095,6 +3146,9 @@ + goto sync_failure; + page_fault = __copy_from_user(kaddr+offset, buf, bytes); + flush_dcache_page(page); ++ ++ conditional_schedule(); ++ + status = mapping->a_ops->commit_write(file, page, offset, offset+bytes); + if (page_fault) + goto fail_write; +diff -Nur c3000_pre/linux/mm/memory.c c3000_test/linux/mm/memory.c +--- c3000_pre/linux/mm/memory.c 2004-08-21 09:49:15.000000000 +0900 ++++ c3000_test/linux/mm/memory.c 2004-12-20 22:56:21.000000000 +0900 +@@ -370,7 +370,7 @@ + /* + * remove user pages in a given range. + */ +-void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size) ++static void do_zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size) + { + mmu_gather_t *tlb; + pgd_t * dir; +@@ -494,6 +494,10 @@ + struct page *map; + while (!(map = follow_page(mm, start, write))) { + spin_unlock(&mm->page_table_lock); ++ ++ /* Pinning down many physical pages (kiobufs, mlockall) */ ++ conditional_schedule(); ++ + switch (handle_mm_fault(mm, vma, start, write)) { + case 1: + tsk->min_flt++; +@@ -655,6 +659,21 @@ + iobuf->locked = 0; + } + ++#define MAX_ZAP_BYTES 256*PAGE_SIZE ++ ++void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, int actions) ++{ ++ while (size) { ++ unsigned long chunk = size; ++ if (actions & ZPR_COND_RESCHED && chunk > MAX_ZAP_BYTES) ++ chunk = MAX_ZAP_BYTES; ++ do_zap_page_range(mm, address, chunk); ++ if (actions & ZPR_COND_RESCHED) ++ conditional_schedule(); ++ address += chunk; ++ size -= chunk; ++ } ++} + + /* + * Lock down all of the pages of a kiovec for IO. +@@ -764,11 +783,18 @@ + return 0; + } + +-static inline void zeromap_pte_range(pte_t * pte, unsigned long address, +- unsigned long size, pgprot_t prot) ++static inline void zeromap_pte_range(struct mm_struct *mm, pte_t * pte, ++ unsigned long address, unsigned long size, ++ pgprot_t prot) + { + unsigned long end; + ++ if (conditional_schedule_needed()) { ++ spin_unlock(&mm->page_table_lock); ++ unconditional_schedule(); /* mmap(/dev/zero) */ ++ spin_lock(&mm->page_table_lock); ++ } ++ + address &= ~PMD_MASK; + end = address + size; + if (end > PMD_SIZE) +@@ -796,7 +822,7 @@ + pte_t * pte = pte_alloc(mm, pmd, address); + if (!pte) + return -ENOMEM; +- zeromap_pte_range(pte, address, end - address, prot); ++ zeromap_pte_range(mm, pte, address, end - address, prot); + address = (address + PMD_SIZE) & PMD_MASK; + pmd++; + } while (address && (address < end)); +@@ -1044,7 +1070,7 @@ + + /* mapping wholly truncated? */ + if (mpnt->vm_pgoff >= pgoff) { +- zap_page_range(mm, start, len); ++ zap_page_range(mm, start, len, 0); + continue; + } + +@@ -1057,7 +1083,7 @@ + /* Ok, partially affected.. */ + start += diff << PAGE_SHIFT; + len = (len - diff) << PAGE_SHIFT; +- zap_page_range(mm, start, len); ++ zap_page_range(mm, start, len, 0); + } while ((mpnt = mpnt->vm_next_share) != NULL); + } + +diff -Nur c3000_pre/linux/mm/mmap.c c3000_test/linux/mm/mmap.c +--- c3000_pre/linux/mm/mmap.c 2004-12-16 22:55:54.000000000 +0900 ++++ c3000_test/linux/mm/mmap.c 2004-12-20 23:07:25.000000000 +0900 +@@ -598,7 +598,7 @@ + fput(file); + + /* Undo any partial mapping done by a device driver. */ +- zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start); ++ zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start, 0); + free_vma: + kmem_cache_free(vm_area_cachep, vma); + return error; +@@ -998,7 +998,7 @@ + remove_shared_vm_struct(mpnt); + mm->map_count--; + +- zap_page_range(mm, st, size); ++ zap_page_range(mm, st, size, ZPR_COND_RESCHED); /* sys_munmap() */ + + /* + * Fix the mapping, and free the old area if it wasn't reused. +@@ -1158,7 +1158,7 @@ + } + mm->map_count--; + remove_shared_vm_struct(mpnt); +- zap_page_range(mm, start, size); ++ zap_page_range(mm, start, size, ZPR_COND_RESCHED); /* sys_exit() */ + if (mpnt->vm_file) + fput(mpnt->vm_file); + kmem_cache_free(vm_area_cachep, mpnt); +diff -Nur c3000_pre/linux/mm/mremap.c c3000_test/linux/mm/mremap.c +--- c3000_pre/linux/mm/mremap.c 2004-12-16 22:55:54.000000000 +0900 ++++ c3000_test/linux/mm/mremap.c 2004-12-20 23:07:25.000000000 +0900 +@@ -121,7 +121,7 @@ + flush_cache_range(mm, new_addr, new_addr + len); + while ((offset += PAGE_SIZE) < len) + move_one_page(mm, new_addr + offset, old_addr + offset); +- zap_page_range(mm, new_addr, len); ++ zap_page_range(mm, new_addr, len, 0); + #ifdef __arm__ + memc_update_mm(mm); + #endif +diff -Nur c3000_pre/linux/mm/slab.c c3000_test/linux/mm/slab.c +--- c3000_pre/linux/mm/slab.c 2004-11-16 15:31:09.000000000 +0900 ++++ c3000_test/linux/mm/slab.c 2004-12-20 22:56:21.000000000 +0900 +@@ -940,6 +940,7 @@ + list_del(&slabp->list); + + spin_unlock_irq(&cachep->spinlock); ++ conditional_schedule(); + kmem_slab_destroy(cachep, slabp); + ret++; + spin_lock_irq(&cachep->spinlock); +@@ -1853,6 +1854,7 @@ + */ + spin_unlock_irq(&best_cachep->spinlock); + kmem_slab_destroy(best_cachep, slabp); ++ conditional_schedule(); /* try_to_free_pages() */ + spin_lock_irq(&best_cachep->spinlock); + } + spin_unlock_irq(&best_cachep->spinlock); +diff -Nur c3000_pre/linux/mm/swapfile.c c3000_test/linux/mm/swapfile.c +--- c3000_pre/linux/mm/swapfile.c 2004-08-21 09:49:16.000000000 +0900 ++++ c3000_test/linux/mm/swapfile.c 2004-12-20 22:56:21.000000000 +0900 +@@ -819,7 +819,7 @@ + len += sprintf(buf + len, "partition\t"); + + usedswap = 0; +- for (j = 0; j < ptr->max; ++j) ++ for (j = 0; j < ptr->max; ++j) { + switch (ptr->swap_map[j]) { + case SWAP_MAP_BAD: + case 0: +@@ -827,6 +827,8 @@ + default: + usedswap++; + } ++ conditional_schedule(); ++ } + len += sprintf(buf + len, "%d\t%d\t%d\n", ptr->pages << (PAGE_SHIFT - 10), + usedswap << (PAGE_SHIFT - 10), ptr->prio); + } +@@ -1120,6 +1122,11 @@ + if (swap_info[i].flags != SWP_USED) + continue; + for (j = 0; j < swap_info[i].max; ++j) { ++ if (conditional_schedule_needed()) { ++ swap_list_unlock(); ++ conditional_schedule(); ++ swap_list_lock(); ++ } + switch (swap_info[i].swap_map[j]) { + case 0: + case SWAP_MAP_BAD: +diff -Nur c3000_pre/linux/mm/vmscan.c c3000_test/linux/mm/vmscan.c +--- c3000_pre/linux/mm/vmscan.c 2004-08-21 09:49:16.000000000 +0900 ++++ c3000_test/linux/mm/vmscan.c 2004-12-20 22:56:21.000000000 +0900 +@@ -173,6 +173,7 @@ + { + pte_t * pte; + unsigned long pmd_end; ++ DEFINE_RESCHED_COUNT; + + if (pmd_none(*dir)) + return count; +@@ -198,11 +199,17 @@ + address += PAGE_SIZE; + break; + } ++ if (TEST_RESCHED_COUNT(4)) { ++ if (conditional_schedule_needed()) ++ goto out; ++ RESET_RESCHED_COUNT(); ++ } + } + } + address += PAGE_SIZE; + pte++; + } while (address && (address < end)); ++out: + mm->swap_address = address; + return count; + } +@@ -231,6 +238,8 @@ + count = swap_out_pmd(mm, vma, pmd, address, end, count, classzone); + if (!count) + break; ++ if (conditional_schedule_needed()) ++ return count; + address = (address + PMD_SIZE) & PMD_MASK; + pmd++; + } while (address && (address < end)); +@@ -255,6 +264,8 @@ + count = swap_out_pgd(mm, vma, pgdir, address, end, count, classzone); + if (!count) + break; ++ if (conditional_schedule_needed()) ++ return count; + address = (address + PGDIR_SIZE) & PGDIR_MASK; + pgdir++; + } while (address && (address < end)); +@@ -276,6 +287,7 @@ + * Find the proper vm-area after freezing the vma chain + * and ptes. + */ ++continue_scan: + spin_lock(&mm->page_table_lock); + address = mm->swap_address; + if (address == TASK_SIZE || swap_mm != mm) { +@@ -293,6 +305,12 @@ + vma = vma->vm_next; + if (!vma) + break; ++ if (conditional_schedule_needed()) { /* Scanning a large vma */ ++ spin_unlock(&mm->page_table_lock); ++ unconditional_schedule(); ++ /* Continue from where we left off */ ++ goto continue_scan; ++ } + if (!count) + goto out_unlock; + address = vma->vm_start; +diff -Nur c3000_pre/linux/net/core/iovec.c c3000_test/linux/net/core/iovec.c +--- c3000_pre/linux/net/core/iovec.c 2004-08-21 11:23:13.000000000 +0900 ++++ c3000_test/linux/net/core/iovec.c 2004-12-20 22:56:21.000000000 +0900 +@@ -88,7 +88,7 @@ + if(iov->iov_len) + { + int copy = min_t(unsigned int, iov->iov_len, len); +- if (copy_to_user(iov->iov_base, kdata, copy)) ++ if (ll_copy_to_user(iov->iov_base, kdata, copy)) + goto out; + kdata+=copy; + len-=copy; +diff -Nur c3000_pre/linux/net/ipv4/tcp_minisocks.c c3000_test/linux/net/ipv4/tcp_minisocks.c +--- c3000_pre/linux/net/ipv4/tcp_minisocks.c 2004-08-21 09:49:22.000000000 +0900 ++++ c3000_test/linux/net/ipv4/tcp_minisocks.c 2004-12-20 22:56:21.000000000 +0900 +@@ -440,6 +440,9 @@ + { + struct tcp_tw_bucket *tw; + int killed = 0; ++#if LOWLATENCY_NEEDED ++ int max_killed = 0; ++#endif + + /* NOTE: compare this to previous version where lock + * was released after detaching chain. It was racy, +@@ -453,6 +456,13 @@ + goto out; + + while((tw = tcp_tw_death_row[tcp_tw_death_row_slot]) != NULL) { ++#if LOWLATENCY_NEEDED ++ /* This loop takes ~6 usecs per iteration. */ ++ if (killed > 100) { ++ max_killed = 1; ++ break; ++ } ++#endif + tcp_tw_death_row[tcp_tw_death_row_slot] = tw->next_death; + tw->pprev_death = NULL; + spin_unlock(&tw_death_lock); +@@ -463,12 +473,24 @@ + killed++; + + spin_lock(&tw_death_lock); ++ ++ } ++ ++#if LOWLATENCY_NEEDED ++ if (max_killed) { /* More to do: do it soon */ ++ mod_timer(&tcp_tw_timer, jiffies+2); ++ tcp_tw_count -= killed; ++ } ++ else ++#endif ++ { ++ tcp_tw_death_row_slot = ++ ((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1)); ++ ++ if ((tcp_tw_count -= killed) != 0) ++ mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD); + } +- tcp_tw_death_row_slot = +- ((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1)); + +- if ((tcp_tw_count -= killed) != 0) +- mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD); + net_statistics[smp_processor_id()*2].TimeWaited += killed; + out: + spin_unlock(&tw_death_lock); |