LWN.net Logo

Page prefetch ver. 0.1 for 2.5.68-mm2

From:  Thomas Schlichter <schlicht@uni-mannheim.de>
To:  Andrew Morton <akpm@digeo.com>
Subject:  [RFC] Page prefetch ver. 0.1 for 2.5.68-mm2
Date:  Mon, 28 Apr 2003 22:12:16 +0200
Cc:  linux-kernel@vger.kernel.org

diff -urP linux-2.5.68-mm2/arch/i386/Kconfig linux-2.5.68_patched/arch/i386/Kconfig
--- linux-2.5.68-mm2/arch/i386/Kconfig	Mon Apr 28 20:41:41 2003
+++ linux-2.5.68_patched/arch/i386/Kconfig	Mon Apr 28 12:05:07 2003
@@ -373,6 +373,14 @@
 	depends on MK8 || MPENTIUM4
 	default y
 
+config PAGE_PREFETCH
+	tristate "Prefetch swapped memory pages (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	help
+	  This option enables the kernel to prefetch swapped memory pages
+	  when idle. This feature is experimental and known not to work
+	  with the NFS filesystem!
+
 config HUGETLB_PAGE
 	bool "Huge TLB Page Support"
 	help
diff -urP linux-2.5.68-mm2/fs/inode.c linux-2.5.68_patched/fs/inode.c
--- linux-2.5.68-mm2/fs/inode.c	Sun Apr 20 04:51:13 2003
+++ linux-2.5.68_patched/fs/inode.c	Mon Apr 28 12:05:07 2003
@@ -18,6 +18,7 @@
 #include <linux/hash.h>
 #include <linux/swap.h>
 #include <linux/security.h>
+#include <linux/page_prefetch.h>
 
 /*
  * This is needed for the following functions:
@@ -176,10 +177,12 @@
 	INIT_LIST_HEAD(&inode->i_data.clean_pages);
 	INIT_LIST_HEAD(&inode->i_data.dirty_pages);
 	INIT_LIST_HEAD(&inode->i_data.locked_pages);
+	INIT_LIST_HEAD(&inode->i_data.swapped_pages);
 	INIT_LIST_HEAD(&inode->i_data.io_pages);
 	INIT_LIST_HEAD(&inode->i_dentry);
 	INIT_LIST_HEAD(&inode->i_devices);
 	sema_init(&inode->i_sem, 1);
+	INIT_RADIX_TREE(&inode->i_data.swap_tree, GFP_ATOMIC);
 	INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
 	spin_lock_init(&inode->i_data.page_lock);
 	init_MUTEX(&inode->i_data.i_shared_sem);
@@ -227,6 +230,7 @@
  
 void clear_inode(struct inode *inode)
 {
+	invalidate_swapped_list(&inode->i_data);
 	invalidate_inode_buffers(inode);
        
 	if (inode->i_data.nrpages)
diff -urP linux-2.5.68-mm2/include/linux/fs.h linux-2.5.68_patched/include/linux/fs.h
--- linux-2.5.68-mm2/include/linux/fs.h	Sun Apr 20 04:48:56 2003
+++ linux-2.5.68_patched/include/linux/fs.h	Mon Apr 28 12:05:07 2003
@@ -312,11 +312,13 @@
 struct backing_dev_info;
 struct address_space {
 	struct inode		*host;		/* owner: inode, block_device */
+	struct radix_tree_root	swap_tree;	/* radix tree of swapped pages */
 	struct radix_tree_root	page_tree;	/* radix tree of all pages */
 	spinlock_t		page_lock;	/* and rwlock protecting it */
 	struct list_head	clean_pages;	/* list of clean pages */
 	struct list_head	dirty_pages;	/* list of dirty pages */
 	struct list_head	locked_pages;	/* list of locked pages */
+	struct list_head	swapped_pages;	/* list of swapped pages */
 	struct list_head	io_pages;	/* being prepared for I/O */
 	unsigned long		nrpages;	/* number of total pages */
 	struct address_space_operations *a_ops;	/* methods */
diff -urP linux-2.5.68-mm2/include/linux/page_prefetch.h linux-2.5.68_patched/include/linux/page_prefetch.h
--- linux-2.5.68-mm2/include/linux/page_prefetch.h	Thu Jan  1 01:00:00 1970
+++ linux-2.5.68_patched/include/linux/page_prefetch.h	Mon Apr 28 12:05:07 2003
@@ -0,0 +1,96 @@
+#ifndef _LINUX_PAGE_PREFETCH_H
+#define _LINUX_PAGE_PREFETCH_H
+
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/radix-tree.h>
+
+struct swapped_entry_t {
+	swp_entry_t		swp_entry;
+	struct address_space	*mapping;
+	struct list_head	mapping_list;
+	struct list_head	swapped_list;
+};
+
+struct swapped_root_t {
+	spinlock_t		lock;
+	unsigned int		count;
+	unsigned int		maxcount;
+	kmem_cache_t		*cache;
+	struct list_head	list;
+};
+
+extern struct swapped_root_t	swapped_root;
+
+static inline void add_to_swapped_list(struct address_space *mapping,
+							unsigned long index)
+{
+	struct swapped_entry_t *entry;
+	int error;
+
+	spin_lock(&swapped_root.lock);
+
+	if(swapped_root.count >= swapped_root.maxcount)
+	{
+		entry = list_entry(swapped_root.list.prev,
+				struct swapped_entry_t, swapped_list);
+		radix_tree_delete(&entry->mapping->swap_tree,
+				entry->swp_entry.val);
+		list_del(&entry->mapping_list);
+		list_del(&entry->swapped_list);
+		swapped_root.count--;
+	} else {
+		entry = kmem_cache_alloc(swapped_root.cache, GFP_ATOMIC);
+		if(!entry)
+			goto out_locked;
+	}
+
+	entry->swp_entry.val = index;
+	entry->mapping       = mapping;
+
+	error = radix_tree_insert(&mapping->swap_tree, index, entry);
+	if(!error) {
+		list_add(&entry->mapping_list, &mapping->swapped_pages);
+		list_add(&entry->swapped_list, &swapped_root.list);
+		swapped_root.count++;
+	} else {
+		kmem_cache_free(swapped_root.cache, entry);
+	}
+
+out_locked:
+	spin_unlock(&swapped_root.lock);
+}
+
+static inline void remove_from_swapped_list(struct address_space *mapping,
+							unsigned long index)
+{
+	struct swapped_entry_t *entry;
+
+	spin_lock(&swapped_root.lock);
+	entry = radix_tree_delete(&mapping->swap_tree, index);
+	if(entry) {
+		list_del(&entry->mapping_list);
+		list_del(&entry->swapped_list);
+		swapped_root.count--;
+		kmem_cache_free(swapped_root.cache, entry);
+	}
+	spin_unlock(&swapped_root.lock);
+}
+
+static inline void invalidate_swapped_list(struct address_space *mapping)
+{
+	spin_lock(&swapped_root.lock);
+	while(!list_empty(&mapping->swapped_pages)) {
+		struct swapped_entry_t *entry = list_entry(
+					mapping->swapped_pages.next,
+					struct swapped_entry_t, mapping_list);
+		radix_tree_delete(&mapping->swap_tree, entry->swp_entry.val);
+		list_del(&entry->mapping_list);
+		list_del(&entry->swapped_list);
+		swapped_root.count--;
+		kmem_cache_free(swapped_root.cache, entry);
+	}
+	spin_unlock(&swapped_root.lock);
+}
+
+#endif /* _LINUX_PAGE_PREFETCH_H */
diff -urP linux-2.5.68-mm2/include/linux/swap.h linux-2.5.68_patched/include/linux/swap.h
--- linux-2.5.68-mm2/include/linux/swap.h	Sun Apr 20 04:48:49 2003
+++ linux-2.5.68_patched/include/linux/swap.h	Mon Apr 28 12:05:07 2003
@@ -155,6 +155,8 @@
 extern unsigned int nr_free_pages_pgdat(pg_data_t *pgdat);
 extern unsigned int nr_free_buffer_pages(void);
 extern unsigned int nr_free_pagecache_pages(void);
+extern unsigned int nr_avail_buffer_pages(void);
+extern unsigned int nr_avail_pagecache_pages(void);
 
 /* linux/mm/swap.c */
 extern void FASTCALL(lru_cache_add(struct page *));
diff -urP linux-2.5.68-mm2/kernel/ksyms.c linux-2.5.68_patched/kernel/ksyms.c
--- linux-2.5.68-mm2/kernel/ksyms.c	Mon Apr 28 20:41:52 2003
+++ linux-2.5.68_patched/kernel/ksyms.c	Mon Apr 28 12:05:07 2003
@@ -59,6 +59,7 @@
 #include <linux/time.h>
 #include <linux/backing-dev.h>
 #include <linux/percpu_counter.h>
+#include <linux/page_prefetch.h>
 #include <asm/checksum.h>
 
 #if defined(CONFIG_PROC_FS)
@@ -71,6 +72,13 @@
 extern struct timezone sys_tz;
 
 extern int panic_timeout;
+
+/* needed for page prefetch support */
+EXPORT_SYMBOL(swapped_root);
+EXPORT_SYMBOL(swapper_space);
+EXPORT_SYMBOL(swapin_readahead);
+EXPORT_SYMBOL(do_page_cache_readahead);
+EXPORT_SYMBOL(nr_avail_pagecache_pages);
 
 /* process memory management */
 EXPORT_SYMBOL(do_mmap_pgoff);
diff -urP linux-2.5.68-mm2/mm/Makefile linux-2.5.68_patched/mm/Makefile
--- linux-2.5.68-mm2/mm/Makefile	Sun Apr 20 04:50:05 2003
+++ linux-2.5.68_patched/mm/Makefile	Mon Apr 28 12:05:08 2003
@@ -12,3 +12,5 @@
 			   slab.o swap.o truncate.o vcache.o vmscan.o $(mmu-y)
 
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o
+
+obj-$(CONFIG_PAGE_PREFETCH)	+= page_prefetch.o
diff -urP linux-2.5.68-mm2/mm/filemap.c linux-2.5.68_patched/mm/filemap.c
--- linux-2.5.68-mm2/mm/filemap.c	Mon Apr 28 20:41:52 2003
+++ linux-2.5.68_patched/mm/filemap.c	Mon Apr 28 12:06:39 2003
@@ -36,6 +36,7 @@
  * FIXME: remove all knowledge of the buffer layer from the core VM
  */
 #include <linux/buffer_head.h> /* for generic_osync_inode */
+#include <linux/page_prefetch.h>
 
 #include <asm/uaccess.h>
 #include <asm/mman.h>
@@ -83,6 +84,7 @@
 
 	BUG_ON(PageDirty(page) && !PageSwapCache(page));
 
+	remove_from_swapped_list(mapping, page->index);
 	radix_tree_delete(&mapping->page_tree, page->index);
 	list_del(&page->list);
 	page->mapping = NULL;
@@ -222,8 +224,11 @@
 int add_to_page_cache(struct page *page, struct address_space *mapping,
 		pgoff_t offset, int gfp_mask)
 {
-	int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
+	int error;
 
+	remove_from_swapped_list(mapping, offset);
+
+	error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
 	if (error == 0) {
 		page_cache_get(page);
 		spin_lock(&mapping->page_lock);
diff -urP linux-2.5.68-mm2/mm/page_alloc.c linux-2.5.68_patched/mm/page_alloc.c
--- linux-2.5.68-mm2/mm/page_alloc.c	Mon Apr 28 20:41:52 2003
+++ linux-2.5.68_patched/mm/page_alloc.c	Mon Apr 28 12:05:08 2003
@@ -852,6 +852,48 @@
 }
 #endif
 
+static unsigned int nr_avail_zone_pages(int offset)
+{
+	pg_data_t *pgdat;
+	unsigned long avail = 0;
+
+	for_each_pgdat(pgdat) {
+		struct zonelist *zonelist = pgdat->node_zonelists + offset;
+		struct zone **zonep = zonelist->zones;
+		struct zone *zone;
+		unsigned long low = 0;
+
+		for (zone = *zonep++; zone; zone = *zonep++) {
+			unsigned long local_free = zone->free_pages;
+			unsigned long local_low  = zone->pages_low;
+			
+			low += local_low;
+			if (local_free > low) {
+				avail = max(avail, local_free - low);
+			}
+			low += local_low * sysctl_lower_zone_protection;
+		}
+	}
+
+	return avail;
+}
+
+/*
+ * Amount of available RAM allocatable within ZONE_DMA and ZONE_NORMAL
+ */
+unsigned int nr_avail_buffer_pages(void)
+{
+	return nr_avail_zone_pages(GFP_USER & GFP_ZONEMASK);
+}
+
+/*
+ * Amount of available RAM allocatable within all zones
+ */
+unsigned int nr_avail_pagecache_pages(void)
+{
+	return nr_avail_zone_pages(GFP_HIGHUSER & GFP_ZONEMASK);
+}
+
 #ifdef CONFIG_NUMA
 static void show_node(struct zone *zone)
 {
diff -urP linux-2.5.68-mm2/mm/page_prefetch.c linux-2.5.68_patched/mm/page_prefetch.c
--- linux-2.5.68-mm2/mm/page_prefetch.c	Thu Jan  1 01:00:00 1970
+++ linux-2.5.68_patched/mm/page_prefetch.c	Mon Apr 28 12:05:08 2003
@@ -0,0 +1,114 @@
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/page_prefetch.h>
+#include <linux/backing-dev.h>
+
+#define RESERVED_PAGES	256		/* let 1MByte of pagecache free */
+#define INTERVAL	60		/* (secs) Default is 1 minute */
+
+static int reserved_pages = RESERVED_PAGES;
+static int interval       = INTERVAL;
+
+MODULE_PARM(reserved_pages,"i");
+MODULE_PARM_DESC(reserved_pages,
+	"count of pagechache pages to let free (default 256)");
+
+MODULE_PARM(interval,"i");
+MODULE_PARM_DESC(interval,
+	"delay in seconds to wait between memory checks (default 60)");
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Thomas Schlichter <thomas.schlichter@web.de>");
+MODULE_DESCRIPTION("prefetches swap pages when there is free memory");
+
+/*
+ *	Our timer
+ */
+static void prefetch_timer_handler(unsigned long data);
+static struct timer_list prefetch_timer =
+		TIMER_INITIALIZER(prefetch_timer_handler, 0, 0);
+
+/*
+ *	Our waitqueue
+ */
+static DECLARE_WAIT_QUEUE_HEAD(kprefetchd_wait);
+
+/*
+ *	If the timer expires...
+ */
+static void prefetch_timer_handler(unsigned long data)
+{
+	wake_up_interruptible(&kprefetchd_wait);
+	prefetch_timer.expires = jiffies + interval * HZ;
+	add_timer(&prefetch_timer);
+}
+
+static int running;
+
+/*
+ *	...wake up the kernel thread
+ */
+static int kprefetchd(void *data)
+{
+	DEFINE_WAIT(wait);
+
+	daemonize("kprefetchd");
+
+	while(running) {
+		prepare_to_wait(&kprefetchd_wait, &wait, TASK_INTERRUPTIBLE);
+		schedule();
+		finish_wait(&kprefetchd_wait, &wait);
+
+		while(nr_avail_pagecache_pages() > reserved_pages) {
+			struct swapped_entry_t *entry;
+
+			spin_lock(&swapped_root.lock);
+
+			if(list_empty(&swapped_root.list)) {
+				spin_unlock(&swapped_root.lock);
+				break;
+			}
+
+			entry = list_entry(swapped_root.list.next,
+					struct swapped_entry_t, swapped_list);
+			radix_tree_delete(&entry->mapping->swap_tree,
+					entry->swp_entry.val);
+			list_del(&entry->mapping_list);
+			list_del(&entry->swapped_list);
+			swapped_root.count--;
+
+			spin_unlock(&swapped_root.lock);
+
+			if(entry->mapping == &swapper_space) {
+				swapin_readahead(entry->swp_entry);
+			} else {
+				do_page_cache_readahead(entry->mapping,
+					NULL, entry->swp_entry.val,
+					entry->mapping->backing_dev_info->ra_pages);
+			}
+
+			kmem_cache_free(swapped_root.cache, entry);
+		}
+	}
+
+	return 0;
+}
+
+static int __init prefetch_init(void)
+{
+	running = 1;
+	kernel_thread(kprefetchd, NULL, CLONE_KERNEL);
+	prefetch_timer.expires = jiffies + interval * HZ;
+	add_timer(&prefetch_timer);
+	return 0;
+}
+
+static void __exit prefetch_exit(void)
+{
+	del_timer(&prefetch_timer);
+	running = 0;
+	wake_up_interruptible(&kprefetchd_wait);
+}
+
+module_init(prefetch_init);
+module_exit(prefetch_exit);
diff -urP linux-2.5.68-mm2/mm/swap.c linux-2.5.68_patched/mm/swap.c
--- linux-2.5.68-mm2/mm/swap.c	Mon Apr 28 20:41:52 2003
+++ linux-2.5.68_patched/mm/swap.c	Mon Apr 28 12:05:08 2003
@@ -23,6 +23,13 @@
 #include <linux/mm_inline.h>
 #include <linux/buffer_head.h>	/* for try_to_release_page() */
 #include <linux/percpu.h>
+#include <linux/page_prefetch.h>
+
+struct swapped_root_t swapped_root = {
+	.lock  = SPIN_LOCK_UNLOCKED,
+	.count = 0,
+	.list  = LIST_HEAD_INIT(swapped_root.list),
+};
 
 /* How many pages do we try to swap or page in/out together? */
 int page_cluster;
@@ -390,4 +397,17 @@
 	 * Right now other parts of the system means that we
 	 * _really_ don't want to cluster much more
 	 */
+
+	/*
+	 * Create kmem cache for swapped entries
+	 */
+ 	swapped_root.cache = kmem_cache_create("swapped_entry",
+		sizeof(struct swapped_entry_t), 0, 0, NULL, NULL);
+	if(!swapped_root.cache)
+		panic("swap_setup(): cannot create swapped_entry SLAB cache");
+
+	/*
+	 * Set max count of swapped entries
+	 */
+	 swapped_root.maxcount = nr_free_pagecache_pages();
 }
diff -urP linux-2.5.68-mm2/mm/swap_state.c linux-2.5.68_patched/mm/swap_state.c
--- linux-2.5.68-mm2/mm/swap_state.c	Mon Apr 28 20:41:52 2003
+++ linux-2.5.68_patched/mm/swap_state.c	Mon Apr 28 12:05:08 2003
@@ -32,12 +32,14 @@
 extern struct address_space_operations swap_aops;
 
 struct address_space swapper_space = {
+	.swap_tree	= RADIX_TREE_INIT(GFP_ATOMIC),
 	.page_tree	= RADIX_TREE_INIT(GFP_ATOMIC),
 	.page_lock	= SPIN_LOCK_UNLOCKED,
 	.clean_pages	= LIST_HEAD_INIT(swapper_space.clean_pages),
 	.dirty_pages	= LIST_HEAD_INIT(swapper_space.dirty_pages),
 	.io_pages	= LIST_HEAD_INIT(swapper_space.io_pages),
 	.locked_pages	= LIST_HEAD_INIT(swapper_space.locked_pages),
+	.swapped_pages	= LIST_HEAD_INIT(swapper_space.swapped_pages),
 	.host		= &swapper_inode,
 	.a_ops		= &swap_aops,
 	.backing_dev_info = &swap_backing_dev_info,
diff -urP linux-2.5.68-mm2/mm/vmscan.c linux-2.5.68_patched/mm/vmscan.c
--- linux-2.5.68-mm2/mm/vmscan.c	Mon Apr 28 20:41:52 2003
+++ linux-2.5.68_patched/mm/vmscan.c	Mon Apr 28 12:05:08 2003
@@ -28,6 +28,7 @@
 #include <linux/pagevec.h>
 #include <linux/backing-dev.h>
 #include <linux/rmap-locking.h>
+#include <linux/page_prefetch.h>
 
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
@@ -414,6 +415,8 @@
 free_it:
 		unlock_page(page);
 		ret++;
+		if(mapping)
+			add_to_swapped_list(mapping, page->index);
 		if (!pagevec_add(&freed_pvec, page))
 			__pagevec_release_nonlru(&freed_pvec);
 		continue;

Copyright © 2003, Eklektix, Inc.
Comments and public postings are copyrighted by their creators.
Linux is a registered trademark of Linus Torvalds