LWN.net Logo

hot-n-cold pages: free and allocate hints

From:  Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
To:  BK Commits List:;
Subject:  [PATCH] hot-n-cold pages: free and allocate hints
Date:  Wed, 30 Oct 2002 23:36:13 +0000

ChangeSet 1.911, 2002/10/30 15:36:13-08:00, akpm@digeo.com

	[PATCH] hot-n-cold pages: free and allocate hints
	
	Add a `cold' hint to struct pagevec, and teach truncate and page
	reclaim to use it.
	
	Empirical testing showed that truncate's pages tend to be hot.  And page
	reclaim's are certainly cold.


# This patch includes the following deltas:
#	           ChangeSet	1.910   -> 1.911  
#	include/linux/pagemap.h	1.29    -> 1.30   
#	      mm/readahead.c	1.19    -> 1.20   
#	     mm/page_alloc.c	1.120   -> 1.121  
#	         mm/vmscan.c	1.123   -> 1.124  
#	          fs/mpage.c	1.27    -> 1.28   
#	        mm/filemap.c	1.152   -> 1.153  
#	           mm/swap.c	1.38    -> 1.39   
#	     mm/swap_state.c	1.48    -> 1.49   
#	       mm/truncate.c	1.3     -> 1.4    
#	include/linux/pagevec.h	1.5     -> 1.6    
#

 fs/mpage.c              |    4 ++--
 include/linux/pagemap.h |    2 +-
 include/linux/pagevec.h |   10 +++++++++-
 mm/filemap.c            |    2 +-
 mm/page_alloc.c         |    2 +-
 mm/readahead.c          |    2 +-
 mm/swap.c               |   15 ++++++++-------
 mm/swap_state.c         |    2 +-
 mm/truncate.c           |   10 +++++++---
 mm/vmscan.c             |    6 +++---
 10 files changed, 34 insertions(+), 21 deletions(-)


diff -Nru a/fs/mpage.c b/fs/mpage.c
--- a/fs/mpage.c	Wed Oct 30 16:22:01 2002
+++ b/fs/mpage.c	Wed Oct 30 16:22:01 2002
@@ -263,7 +263,7 @@
 	sector_t last_block_in_bio = 0;
 	struct pagevec lru_pvec;
 
-	pagevec_init(&lru_pvec);
+	pagevec_init(&lru_pvec, 0);
 	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
 		struct page *page = list_entry(pages->prev, struct page, list);
 
@@ -560,7 +560,7 @@
 	if (get_block == NULL)
 		writepage = mapping->a_ops->writepage;
 
-	pagevec_init(&pvec);
+	pagevec_init(&pvec, 0);
 	write_lock(&mapping->page_lock);
 
 	list_splice_init(&mapping->dirty_pages, &mapping->io_pages);
diff -Nru a/include/linux/pagemap.h b/include/linux/pagemap.h
--- a/include/linux/pagemap.h	Wed Oct 30 16:22:01 2002
+++ b/include/linux/pagemap.h	Wed Oct 30 16:22:01 2002
@@ -24,7 +24,7 @@
 
 #define page_cache_get(page)		get_page(page)
 #define page_cache_release(page)	put_page(page)
-void release_pages(struct page **pages, int nr);
+void release_pages(struct page **pages, int nr, int cold);
 
 static inline struct page *page_cache_alloc(struct address_space *x)
 {
diff -Nru a/include/linux/pagevec.h b/include/linux/pagevec.h
--- a/include/linux/pagevec.h	Wed Oct 30 16:22:01 2002
+++ b/include/linux/pagevec.h	Wed Oct 30 16:22:01 2002
@@ -12,6 +12,7 @@
 
 struct pagevec {
 	unsigned nr;
+	int cold;
 	struct page *pages[PAGEVEC_SIZE];
 };
 
@@ -25,7 +26,13 @@
 unsigned int pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
 		pgoff_t start, unsigned int nr_pages);
 
-static inline void pagevec_init(struct pagevec *pvec)
+static inline void pagevec_init(struct pagevec *pvec, int cold)
+{
+	pvec->nr = 0;
+	pvec->cold = cold;
+}
+
+static inline void pagevec_reinit(struct pagevec *pvec)
 {
 	pvec->nr = 0;
 }
@@ -48,6 +55,7 @@
 	pvec->pages[pvec->nr++] = page;
 	return pagevec_space(pvec);
 }
+
 
 static inline void pagevec_release(struct pagevec *pvec)
 {
diff -Nru a/mm/filemap.c b/mm/filemap.c
--- a/mm/filemap.c	Wed Oct 30 16:22:01 2002
+++ b/mm/filemap.c	Wed Oct 30 16:22:01 2002
@@ -1449,7 +1449,7 @@
 	/* We can write back this queue in page reclaim */
 	current->backing_dev_info = mapping->backing_dev_info;
 
-	pagevec_init(&lru_pvec);
+	pagevec_init(&lru_pvec, 0);
 
 	if (unlikely(file->f_error)) {
 		err = file->f_error;
diff -Nru a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c	Wed Oct 30 16:22:01 2002
+++ b/mm/page_alloc.c	Wed Oct 30 16:22:01 2002
@@ -548,7 +548,7 @@
 	int i = pagevec_count(pvec);
 
 	while (--i >= 0)
-		free_hot_page(pvec->pages[i]);
+		free_hot_cold_page(pvec->pages[i], pvec->cold);
 }
 
 void __free_pages(struct page *page, unsigned int order)
diff -Nru a/mm/readahead.c b/mm/readahead.c
--- a/mm/readahead.c	Wed Oct 30 16:22:01 2002
+++ b/mm/readahead.c	Wed Oct 30 16:22:01 2002
@@ -49,7 +49,7 @@
 	unsigned page_idx;
 	struct pagevec lru_pvec;
 
-	pagevec_init(&lru_pvec);
+	pagevec_init(&lru_pvec, 0);
 
 	if (mapping->a_ops->readpages)
 		return mapping->a_ops->readpages(mapping, pages, nr_pages);
diff -Nru a/mm/swap.c b/mm/swap.c
--- a/mm/swap.c	Wed Oct 30 16:22:01 2002
+++ b/mm/swap.c	Wed Oct 30 16:22:01 2002
@@ -99,13 +99,13 @@
  * page count inside the lock to see whether shrink_cache grabbed the page
  * via the LRU.  If it did, give up: shrink_cache will free it.
  */
-void release_pages(struct page **pages, int nr)
+void release_pages(struct page **pages, int nr, int cold)
 {
 	int i;
 	struct pagevec pages_to_free;
 	struct zone *zone = NULL;
 
-	pagevec_init(&pages_to_free);
+	pagevec_init(&pages_to_free, cold);
 	for (i = 0; i < nr; i++) {
 		struct page *page = pages[i];
 		struct zone *pagezone;
@@ -126,7 +126,7 @@
 			if (!pagevec_add(&pages_to_free, page)) {
 				spin_unlock_irq(&zone->lru_lock);
 				__pagevec_free(&pages_to_free);
-				pagevec_init(&pages_to_free);
+				pagevec_reinit(&pages_to_free);
 				zone = NULL;	/* No lock is held */
 			}
 		}
@@ -139,8 +139,8 @@
 
 void __pagevec_release(struct pagevec *pvec)
 {
-	release_pages(pvec->pages, pagevec_count(pvec));
-	pagevec_init(pvec);
+	release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
+	pagevec_reinit(pvec);
 }
 
 /*
@@ -153,7 +153,8 @@
 	int i;
 	struct pagevec pages_to_free;
 
-	pagevec_init(&pages_to_free);
+	pagevec_init(&pages_to_free, pvec->cold);
+	pages_to_free.cold = pvec->cold;
 	for (i = 0; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
 
@@ -162,7 +163,7 @@
 			pagevec_add(&pages_to_free, page);
 	}
 	pagevec_free(&pages_to_free);
-	pagevec_init(pvec);
+	pagevec_reinit(pvec);
 }
 
 /*
diff -Nru a/mm/swap_state.c b/mm/swap_state.c
--- a/mm/swap_state.c	Wed Oct 30 16:22:01 2002
+++ b/mm/swap_state.c	Wed Oct 30 16:22:01 2002
@@ -301,7 +301,7 @@
 
 		for (i = 0; i < todo; i++)
 			free_swap_cache(pagep[i]);
-		release_pages(pagep, todo);
+		release_pages(pagep, todo, 0);
 		pagep += todo;
 		nr -= todo;
 	}
diff -Nru a/mm/truncate.c b/mm/truncate.c
--- a/mm/truncate.c	Wed Oct 30 16:22:01 2002
+++ b/mm/truncate.c	Wed Oct 30 16:22:01 2002
@@ -100,6 +100,10 @@
  * When looking at page->index outside the page lock we need to be careful to
  * copy it into a local to avoid races (it could change at any time).
  *
+ * We pass down the cache-hot hint to the page freeing code.  Even if the
+ * mapping is large, it is probably the case that the final pages are the most
+ * recently touched, and freeing happens in ascending file offset order.
+ *
  * Called under (and serialised by) inode->i_sem.
  */
 void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
@@ -110,7 +114,7 @@
 	pgoff_t next;
 	int i;
 
-	pagevec_init(&pvec);
+	pagevec_init(&pvec, 0);
 	next = start;
 	while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
 		for (i = 0; i < pagevec_count(&pvec); i++) {
@@ -185,7 +189,7 @@
 	pgoff_t next = 0;
 	int i;
 
-	pagevec_init(&pvec);
+	pagevec_init(&pvec, 0);
 	while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
@@ -226,7 +230,7 @@
 	pgoff_t next = 0;
 	int i;
 
-	pagevec_init(&pvec);
+	pagevec_init(&pvec, 0);
 	while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
diff -Nru a/mm/vmscan.c b/mm/vmscan.c
--- a/mm/vmscan.c	Wed Oct 30 16:22:01 2002
+++ b/mm/vmscan.c	Wed Oct 30 16:22:01 2002
@@ -203,7 +203,7 @@
 	int pgactivate = 0;
 	int ret = 0;
 
-	pagevec_init(&freed_pvec);
+	pagevec_init(&freed_pvec, 1);
 	while (!list_empty(page_list)) {
 		struct page *page;
 		int may_enter_fs;
@@ -433,7 +433,7 @@
 	if (nr_to_process < SWAP_CLUSTER_MAX)
 		nr_to_process = SWAP_CLUSTER_MAX;
 
-	pagevec_init(&pvec);
+	pagevec_init(&pvec, 1);
 
 	lru_add_drain();
 	spin_lock_irq(&zone->lru_lock);
@@ -617,7 +617,7 @@
 		pgdeactivate++;
 	}
 
-	pagevec_init(&pvec);
+	pagevec_init(&pvec, 1);
 	spin_lock_irq(&zone->lru_lock);
 	while (!list_empty(&l_inactive)) {
 		page = list_entry(l_inactive.prev, struct page, lru);
-
To unsubscribe from this list: send the line "unsubscribe bk-commits-head" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Copyright © 2002, Eklektix, Inc.
Comments and public postings are copyrighted by their creators.
Linux is a registered trademark of Linus Torvalds