| From: |
| Thomas Schlichter <schlicht@uni-mannheim.de> |
| To: |
| Andrew Morton <akpm@digeo.com> |
| Subject: |
| [RFC] Page prefetch ver. 0.1 for 2.5.68-mm2 |
| Date: |
| Mon, 28 Apr 2003 22:12:16 +0200 |
| Cc: |
| linux-kernel@vger.kernel.org |
diff -urP linux-2.5.68-mm2/arch/i386/Kconfig linux-2.5.68_patched/arch/i386/Kconfig
--- linux-2.5.68-mm2/arch/i386/Kconfig Mon Apr 28 20:41:41 2003
+++ linux-2.5.68_patched/arch/i386/Kconfig Mon Apr 28 12:05:07 2003
@@ -373,6 +373,14 @@
depends on MK8 || MPENTIUM4
default y
+config PAGE_PREFETCH
+ tristate "Prefetch swapped memory pages (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ help
+ This option enables the kernel to prefetch swapped memory pages
+ when idle. This feature is experimental and known not to work
+ with the NFS filesystem!
+
config HUGETLB_PAGE
bool "Huge TLB Page Support"
help
diff -urP linux-2.5.68-mm2/fs/inode.c linux-2.5.68_patched/fs/inode.c
--- linux-2.5.68-mm2/fs/inode.c Sun Apr 20 04:51:13 2003
+++ linux-2.5.68_patched/fs/inode.c Mon Apr 28 12:05:07 2003
@@ -18,6 +18,7 @@
#include <linux/hash.h>
#include <linux/swap.h>
#include <linux/security.h>
+#include <linux/page_prefetch.h>
/*
* This is needed for the following functions:
@@ -176,10 +177,12 @@
INIT_LIST_HEAD(&inode->i_data.clean_pages);
INIT_LIST_HEAD(&inode->i_data.dirty_pages);
INIT_LIST_HEAD(&inode->i_data.locked_pages);
+ INIT_LIST_HEAD(&inode->i_data.swapped_pages);
INIT_LIST_HEAD(&inode->i_data.io_pages);
INIT_LIST_HEAD(&inode->i_dentry);
INIT_LIST_HEAD(&inode->i_devices);
sema_init(&inode->i_sem, 1);
+ INIT_RADIX_TREE(&inode->i_data.swap_tree, GFP_ATOMIC);
INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
spin_lock_init(&inode->i_data.page_lock);
init_MUTEX(&inode->i_data.i_shared_sem);
@@ -227,6 +230,7 @@
void clear_inode(struct inode *inode)
{
+ invalidate_swapped_list(&inode->i_data);
invalidate_inode_buffers(inode);
if (inode->i_data.nrpages)
diff -urP linux-2.5.68-mm2/include/linux/fs.h linux-2.5.68_patched/include/linux/fs.h
--- linux-2.5.68-mm2/include/linux/fs.h Sun Apr 20 04:48:56 2003
+++ linux-2.5.68_patched/include/linux/fs.h Mon Apr 28 12:05:07 2003
@@ -312,11 +312,13 @@
struct backing_dev_info;
struct address_space {
struct inode *host; /* owner: inode, block_device */
+ struct radix_tree_root swap_tree; /* radix tree of swapped pages */
struct radix_tree_root page_tree; /* radix tree of all pages */
spinlock_t page_lock; /* and rwlock protecting it */
struct list_head clean_pages; /* list of clean pages */
struct list_head dirty_pages; /* list of dirty pages */
struct list_head locked_pages; /* list of locked pages */
+ struct list_head swapped_pages; /* list of swapped pages */
struct list_head io_pages; /* being prepared for I/O */
unsigned long nrpages; /* number of total pages */
struct address_space_operations *a_ops; /* methods */
diff -urP linux-2.5.68-mm2/include/linux/page_prefetch.h linux-2.5.68_patched/include/linux/page_prefetch.h
--- linux-2.5.68-mm2/include/linux/page_prefetch.h Thu Jan 1 01:00:00 1970
+++ linux-2.5.68_patched/include/linux/page_prefetch.h Mon Apr 28 12:05:07 2003
@@ -0,0 +1,96 @@
+#ifndef _LINUX_PAGE_PREFETCH_H
+#define _LINUX_PAGE_PREFETCH_H
+
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/radix-tree.h>
+
+struct swapped_entry_t {
+ swp_entry_t swp_entry;
+ struct address_space *mapping;
+ struct list_head mapping_list;
+ struct list_head swapped_list;
+};
+
+struct swapped_root_t {
+ spinlock_t lock;
+ unsigned int count;
+ unsigned int maxcount;
+ kmem_cache_t *cache;
+ struct list_head list;
+};
+
+extern struct swapped_root_t swapped_root;
+
+static inline void add_to_swapped_list(struct address_space *mapping,
+ unsigned long index)
+{
+ struct swapped_entry_t *entry;
+ int error;
+
+ spin_lock(&swapped_root.lock);
+
+ if(swapped_root.count >= swapped_root.maxcount)
+ {
+ entry = list_entry(swapped_root.list.prev,
+ struct swapped_entry_t, swapped_list);
+ radix_tree_delete(&entry->mapping->swap_tree,
+ entry->swp_entry.val);
+ list_del(&entry->mapping_list);
+ list_del(&entry->swapped_list);
+ swapped_root.count--;
+ } else {
+ entry = kmem_cache_alloc(swapped_root.cache, GFP_ATOMIC);
+ if(!entry)
+ goto out_locked;
+ }
+
+ entry->swp_entry.val = index;
+ entry->mapping = mapping;
+
+ error = radix_tree_insert(&mapping->swap_tree, index, entry);
+ if(!error) {
+ list_add(&entry->mapping_list, &mapping->swapped_pages);
+ list_add(&entry->swapped_list, &swapped_root.list);
+ swapped_root.count++;
+ } else {
+ kmem_cache_free(swapped_root.cache, entry);
+ }
+
+out_locked:
+ spin_unlock(&swapped_root.lock);
+}
+
+static inline void remove_from_swapped_list(struct address_space *mapping,
+ unsigned long index)
+{
+ struct swapped_entry_t *entry;
+
+ spin_lock(&swapped_root.lock);
+ entry = radix_tree_delete(&mapping->swap_tree, index);
+ if(entry) {
+ list_del(&entry->mapping_list);
+ list_del(&entry->swapped_list);
+ swapped_root.count--;
+ kmem_cache_free(swapped_root.cache, entry);
+ }
+ spin_unlock(&swapped_root.lock);
+}
+
+static inline void invalidate_swapped_list(struct address_space *mapping)
+{
+ spin_lock(&swapped_root.lock);
+ while(!list_empty(&mapping->swapped_pages)) {
+ struct swapped_entry_t *entry = list_entry(
+ mapping->swapped_pages.next,
+ struct swapped_entry_t, mapping_list);
+ radix_tree_delete(&mapping->swap_tree, entry->swp_entry.val);
+ list_del(&entry->mapping_list);
+ list_del(&entry->swapped_list);
+ swapped_root.count--;
+ kmem_cache_free(swapped_root.cache, entry);
+ }
+ spin_unlock(&swapped_root.lock);
+}
+
+#endif /* _LINUX_PAGE_PREFETCH_H */
diff -urP linux-2.5.68-mm2/include/linux/swap.h linux-2.5.68_patched/include/linux/swap.h
--- linux-2.5.68-mm2/include/linux/swap.h Sun Apr 20 04:48:49 2003
+++ linux-2.5.68_patched/include/linux/swap.h Mon Apr 28 12:05:07 2003
@@ -155,6 +155,8 @@
extern unsigned int nr_free_pages_pgdat(pg_data_t *pgdat);
extern unsigned int nr_free_buffer_pages(void);
extern unsigned int nr_free_pagecache_pages(void);
+extern unsigned int nr_avail_buffer_pages(void);
+extern unsigned int nr_avail_pagecache_pages(void);
/* linux/mm/swap.c */
extern void FASTCALL(lru_cache_add(struct page *));
diff -urP linux-2.5.68-mm2/kernel/ksyms.c linux-2.5.68_patched/kernel/ksyms.c
--- linux-2.5.68-mm2/kernel/ksyms.c Mon Apr 28 20:41:52 2003
+++ linux-2.5.68_patched/kernel/ksyms.c Mon Apr 28 12:05:07 2003
@@ -59,6 +59,7 @@
#include <linux/time.h>
#include <linux/backing-dev.h>
#include <linux/percpu_counter.h>
+#include <linux/page_prefetch.h>
#include <asm/checksum.h>
#if defined(CONFIG_PROC_FS)
@@ -71,6 +72,13 @@
extern struct timezone sys_tz;
extern int panic_timeout;
+
+/* needed for page prefetch support */
+EXPORT_SYMBOL(swapped_root);
+EXPORT_SYMBOL(swapper_space);
+EXPORT_SYMBOL(swapin_readahead);
+EXPORT_SYMBOL(do_page_cache_readahead);
+EXPORT_SYMBOL(nr_avail_pagecache_pages);
/* process memory management */
EXPORT_SYMBOL(do_mmap_pgoff);
diff -urP linux-2.5.68-mm2/mm/Makefile linux-2.5.68_patched/mm/Makefile
--- linux-2.5.68-mm2/mm/Makefile Sun Apr 20 04:50:05 2003
+++ linux-2.5.68_patched/mm/Makefile Mon Apr 28 12:05:08 2003
@@ -12,3 +12,5 @@
slab.o swap.o truncate.o vcache.o vmscan.o $(mmu-y)
obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o
+
+obj-$(CONFIG_PAGE_PREFETCH) += page_prefetch.o
diff -urP linux-2.5.68-mm2/mm/filemap.c linux-2.5.68_patched/mm/filemap.c
--- linux-2.5.68-mm2/mm/filemap.c Mon Apr 28 20:41:52 2003
+++ linux-2.5.68_patched/mm/filemap.c Mon Apr 28 12:06:39 2003
@@ -36,6 +36,7 @@
* FIXME: remove all knowledge of the buffer layer from the core VM
*/
#include <linux/buffer_head.h> /* for generic_osync_inode */
+#include <linux/page_prefetch.h>
#include <asm/uaccess.h>
#include <asm/mman.h>
@@ -83,6 +84,7 @@
BUG_ON(PageDirty(page) && !PageSwapCache(page));
+ remove_from_swapped_list(mapping, page->index);
radix_tree_delete(&mapping->page_tree, page->index);
list_del(&page->list);
page->mapping = NULL;
@@ -222,8 +224,11 @@
int add_to_page_cache(struct page *page, struct address_space *mapping,
pgoff_t offset, int gfp_mask)
{
- int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
+ int error;
+ remove_from_swapped_list(mapping, offset);
+
+ error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
if (error == 0) {
page_cache_get(page);
spin_lock(&mapping->page_lock);
diff -urP linux-2.5.68-mm2/mm/page_alloc.c linux-2.5.68_patched/mm/page_alloc.c
--- linux-2.5.68-mm2/mm/page_alloc.c Mon Apr 28 20:41:52 2003
+++ linux-2.5.68_patched/mm/page_alloc.c Mon Apr 28 12:05:08 2003
@@ -852,6 +852,48 @@
}
#endif
+static unsigned int nr_avail_zone_pages(int offset)
+{
+ pg_data_t *pgdat;
+ unsigned long avail = 0;
+
+ for_each_pgdat(pgdat) {
+ struct zonelist *zonelist = pgdat->node_zonelists + offset;
+ struct zone **zonep = zonelist->zones;
+ struct zone *zone;
+ unsigned long low = 0;
+
+ for (zone = *zonep++; zone; zone = *zonep++) {
+ unsigned long local_free = zone->free_pages;
+ unsigned long local_low = zone->pages_low;
+
+ low += local_low;
+ if (local_free > low) {
+ avail = max(avail, local_free - low);
+ }
+ low += local_low * sysctl_lower_zone_protection;
+ }
+ }
+
+ return avail;
+}
+
+/*
+ * Amount of available RAM allocatable within ZONE_DMA and ZONE_NORMAL
+ */
+unsigned int nr_avail_buffer_pages(void)
+{
+ return nr_avail_zone_pages(GFP_USER & GFP_ZONEMASK);
+}
+
+/*
+ * Amount of available RAM allocatable within all zones
+ */
+unsigned int nr_avail_pagecache_pages(void)
+{
+ return nr_avail_zone_pages(GFP_HIGHUSER & GFP_ZONEMASK);
+}
+
#ifdef CONFIG_NUMA
static void show_node(struct zone *zone)
{
diff -urP linux-2.5.68-mm2/mm/page_prefetch.c linux-2.5.68_patched/mm/page_prefetch.c
--- linux-2.5.68-mm2/mm/page_prefetch.c Thu Jan 1 01:00:00 1970
+++ linux-2.5.68_patched/mm/page_prefetch.c Mon Apr 28 12:05:08 2003
@@ -0,0 +1,114 @@
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/page_prefetch.h>
+#include <linux/backing-dev.h>
+
+#define RESERVED_PAGES 256 /* let 1MByte of pagecache free */
+#define INTERVAL 60 /* (secs) Default is 1 minute */
+
+static int reserved_pages = RESERVED_PAGES;
+static int interval = INTERVAL;
+
+MODULE_PARM(reserved_pages,"i");
+MODULE_PARM_DESC(reserved_pages,
+ "count of pagechache pages to let free (default 256)");
+
+MODULE_PARM(interval,"i");
+MODULE_PARM_DESC(interval,
+ "delay in seconds to wait between memory checks (default 60)");
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Thomas Schlichter <thomas.schlichter@web.de>");
+MODULE_DESCRIPTION("prefetches swap pages when there is free memory");
+
+/*
+ * Our timer
+ */
+static void prefetch_timer_handler(unsigned long data);
+static struct timer_list prefetch_timer =
+ TIMER_INITIALIZER(prefetch_timer_handler, 0, 0);
+
+/*
+ * Our waitqueue
+ */
+static DECLARE_WAIT_QUEUE_HEAD(kprefetchd_wait);
+
+/*
+ * If the timer expires...
+ */
+static void prefetch_timer_handler(unsigned long data)
+{
+ wake_up_interruptible(&kprefetchd_wait);
+ prefetch_timer.expires = jiffies + interval * HZ;
+ add_timer(&prefetch_timer);
+}
+
+static int running;
+
+/*
+ * ...wake up the kernel thread
+ */
+static int kprefetchd(void *data)
+{
+ DEFINE_WAIT(wait);
+
+ daemonize("kprefetchd");
+
+ while(running) {
+ prepare_to_wait(&kprefetchd_wait, &wait, TASK_INTERRUPTIBLE);
+ schedule();
+ finish_wait(&kprefetchd_wait, &wait);
+
+ while(nr_avail_pagecache_pages() > reserved_pages) {
+ struct swapped_entry_t *entry;
+
+ spin_lock(&swapped_root.lock);
+
+ if(list_empty(&swapped_root.list)) {
+ spin_unlock(&swapped_root.lock);
+ break;
+ }
+
+ entry = list_entry(swapped_root.list.next,
+ struct swapped_entry_t, swapped_list);
+ radix_tree_delete(&entry->mapping->swap_tree,
+ entry->swp_entry.val);
+ list_del(&entry->mapping_list);
+ list_del(&entry->swapped_list);
+ swapped_root.count--;
+
+ spin_unlock(&swapped_root.lock);
+
+ if(entry->mapping == &swapper_space) {
+ swapin_readahead(entry->swp_entry);
+ } else {
+ do_page_cache_readahead(entry->mapping,
+ NULL, entry->swp_entry.val,
+ entry->mapping->backing_dev_info->ra_pages);
+ }
+
+ kmem_cache_free(swapped_root.cache, entry);
+ }
+ }
+
+ return 0;
+}
+
+static int __init prefetch_init(void)
+{
+ running = 1;
+ kernel_thread(kprefetchd, NULL, CLONE_KERNEL);
+ prefetch_timer.expires = jiffies + interval * HZ;
+ add_timer(&prefetch_timer);
+ return 0;
+}
+
+static void __exit prefetch_exit(void)
+{
+ del_timer(&prefetch_timer);
+ running = 0;
+ wake_up_interruptible(&kprefetchd_wait);
+}
+
+module_init(prefetch_init);
+module_exit(prefetch_exit);
diff -urP linux-2.5.68-mm2/mm/swap.c linux-2.5.68_patched/mm/swap.c
--- linux-2.5.68-mm2/mm/swap.c Mon Apr 28 20:41:52 2003
+++ linux-2.5.68_patched/mm/swap.c Mon Apr 28 12:05:08 2003
@@ -23,6 +23,13 @@
#include <linux/mm_inline.h>
#include <linux/buffer_head.h> /* for try_to_release_page() */
#include <linux/percpu.h>
+#include <linux/page_prefetch.h>
+
+struct swapped_root_t swapped_root = {
+ .lock = SPIN_LOCK_UNLOCKED,
+ .count = 0,
+ .list = LIST_HEAD_INIT(swapped_root.list),
+};
/* How many pages do we try to swap or page in/out together? */
int page_cluster;
@@ -390,4 +397,17 @@
* Right now other parts of the system means that we
* _really_ don't want to cluster much more
*/
+
+ /*
+ * Create kmem cache for swapped entries
+ */
+ swapped_root.cache = kmem_cache_create("swapped_entry",
+ sizeof(struct swapped_entry_t), 0, 0, NULL, NULL);
+ if(!swapped_root.cache)
+ panic("swap_setup(): cannot create swapped_entry SLAB cache");
+
+ /*
+ * Set max count of swapped entries
+ */
+ swapped_root.maxcount = nr_free_pagecache_pages();
}
diff -urP linux-2.5.68-mm2/mm/swap_state.c linux-2.5.68_patched/mm/swap_state.c
--- linux-2.5.68-mm2/mm/swap_state.c Mon Apr 28 20:41:52 2003
+++ linux-2.5.68_patched/mm/swap_state.c Mon Apr 28 12:05:08 2003
@@ -32,12 +32,14 @@
extern struct address_space_operations swap_aops;
struct address_space swapper_space = {
+ .swap_tree = RADIX_TREE_INIT(GFP_ATOMIC),
.page_tree = RADIX_TREE_INIT(GFP_ATOMIC),
.page_lock = SPIN_LOCK_UNLOCKED,
.clean_pages = LIST_HEAD_INIT(swapper_space.clean_pages),
.dirty_pages = LIST_HEAD_INIT(swapper_space.dirty_pages),
.io_pages = LIST_HEAD_INIT(swapper_space.io_pages),
.locked_pages = LIST_HEAD_INIT(swapper_space.locked_pages),
+ .swapped_pages = LIST_HEAD_INIT(swapper_space.swapped_pages),
.host = &swapper_inode,
.a_ops = &swap_aops,
.backing_dev_info = &swap_backing_dev_info,
diff -urP linux-2.5.68-mm2/mm/vmscan.c linux-2.5.68_patched/mm/vmscan.c
--- linux-2.5.68-mm2/mm/vmscan.c Mon Apr 28 20:41:52 2003
+++ linux-2.5.68_patched/mm/vmscan.c Mon Apr 28 12:05:08 2003
@@ -28,6 +28,7 @@
#include <linux/pagevec.h>
#include <linux/backing-dev.h>
#include <linux/rmap-locking.h>
+#include <linux/page_prefetch.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
@@ -414,6 +415,8 @@
free_it:
unlock_page(page);
ret++;
+ if(mapping)
+ add_to_swapped_list(mapping, page->index);
if (!pagevec_add(&freed_pvec, page))
__pagevec_release_nonlru(&freed_pvec);
continue;