| From: |
| Mingming Cao <cmm@us.ibm.com> |
| To: |
| ltc-interlock@linux.ibm.com, lse-tech@lists.sourceforge.net |
| Subject: |
| [Lse-tech] [PATCH] reiserfs direct I/O support for 2.5 kernel |
| Date: |
| 14 Mar 2003 14:46:02 -0800 |
The patch below adds direct I/O support in Reiserfs for 2.5 kernel.
Thanks,
Mingming
diff -urN linux-2.5.64/fs/reiserfs/inode.c 2564-rf-dio/fs/reiserfs/inode.c
--- linux-2.5.64/fs/reiserfs/inode.c Tue Mar 4 19:29:15 2003
+++ 2564-rf-dio/fs/reiserfs/inode.c Thu Mar 13 16:59:07 2003
@@ -304,7 +304,7 @@
** read old data off disk. Set the up to date bit on the buffer instead
** and jump to the end
*/
- if (PageUptodate(bh_result->b_page)) {
+ if (!bh_result->b_page || PageUptodate(bh_result->b_page)) {
set_buffer_uptodate(bh_result);
goto finished ;
}
@@ -418,6 +418,40 @@
return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE) ;
}
+static int reiserfs_get_block_direct_io (struct inode * inode,
+ sector_t iblock, unsigned long max_blocks,
+ struct buffer_head * bh_result, int create) {
+ int ret ;
+
+ bh_result->b_size = (1 << inode->i_blkbits);
+ bh_result->b_page = NULL;
+
+ ret = reiserfs_get_block(inode, iblock, bh_result, create) ;
+
+ if (ret != 0)
+ return ret;
+
+ /* don't allow direct io onto tail pages */
+ if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
+ /* make sure future calls to the direct io funcs for this offset
+ ** in the file fail by unmapping the buffer
+ */
+ reiserfs_unmap_buffer(bh_result);
+ ret = -EINVAL ;
+ }
+
+ /* Possible unpacked tail. Flush the data before pages have
+ disappeared */
+ if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
+ lock_kernel();
+ reiserfs_commit_for_inode(inode);
+ REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
+ unlock_kernel();
+ }
+
+ return ret ;
+}
+
/*
** helper function for when reiserfs_get_block is called for a hole
** but the file tail is still in a direct item
@@ -446,7 +480,7 @@
tail_end = (tail_start | (bh_result->b_size - 1)) + 1 ;
index = tail_offset >> PAGE_CACHE_SHIFT ;
- if (index != hole_page->index) {
+ if (!hole_page || index != hole_page->index) {
tail_page = grab_cache_page(inode->i_mapping, index) ;
retval = -ENOMEM;
if (!tail_page) {
@@ -552,7 +586,15 @@
return ret;
}
- REISERFS_I(inode)->i_flags |= i_pack_on_close_mask ;
+ /* If file is of such a size, that it might have a tail and tails are enabled
+ ** we should mark it as possibly needing tail packing on close
+ */
+ if ( (have_large_tails (inode->i_sb) &&
+ inode->i_size < i_block_size (inode)*4) ||
+ (have_small_tails (inode->i_sb) &&
+ inode->i_size < i_block_size(inode)) )
+
+ REISERFS_I(inode)->i_flags |= i_pack_on_close_mask ;
windex = push_journal_writer("reiserfs_get_block") ;
@@ -742,22 +784,27 @@
** the disk
*/
set_buffer_uptodate (unbh);
-
- /* we've converted the tail, so we must
- ** flush unbh before the transaction commits
- */
- add_to_flushlist(inode, unbh) ;
-
- /* mark it dirty now to prevent commit_write from adding
- ** this buffer to the inode's dirty buffer list
- */
+ /* unbh->b_page == NULL in case of DIRECT_IO request, this means
+ buffer will disappear shortly, so it should not be added to
+ any of our lists.
+ */
+ if ( unbh->b_page ) {
+ /* we've converted the tail, so we must
+ ** flush unbh before the transaction commits
+ */
+ add_to_flushlist(inode, unbh) ;
+
+ /* mark it dirty now to prevent commit_write from adding
+ ** this buffer to the inode's dirty buffer list
+ */
/*
* AKPM: changed __mark_buffer_dirty to mark_buffer_dirty().
* It's still atomic, but it sets the page dirty too,
* which makes it eligible for writeback at any time by the
* VM (which was also the case with __mark_buffer_dirty())
*/
- mark_buffer_dirty(unbh) ;
+ mark_buffer_dirty(unbh) ;
+ }
//inode->i_blocks += inode->i_sb->s_blocksize / 512;
//mark_tail_converted (inode);
@@ -2156,6 +2203,15 @@
if (pos > inode->i_size) {
struct reiserfs_transaction_handle th ;
reiserfs_write_lock(inode->i_sb);
+ /* If the file have grown beyond the border where it
+ can have a tail, unmark it as needing a tail
+ packing */
+ if ( (have_large_tails (inode->i_sb) &&
+ inode->i_size < i_block_size(inode)*4) ||
+ (have_small_tails (inode->i_sb) &&
+ inode->i_size < i_block_size(inode)) )
+ REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask ;
+
journal_begin(&th, inode->i_sb, 1) ;
reiserfs_update_inode_transaction(inode) ;
inode->i_size = pos ;
@@ -2214,6 +2270,17 @@
return ret ;
}
+static int reiserfs_direct_IO(int rw, struct kiocb *iocb,
+ const struct iovec *iov, loff_t offset,
+ unsigned long nr_segs)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
+
+ return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+ offset, nr_segs, reiserfs_get_block_direct_io);
+}
+
struct address_space_operations reiserfs_address_space_operations = {
.writepage = reiserfs_writepage,
.readpage = reiserfs_readpage,
@@ -2222,5 +2289,6 @@
.sync_page = block_sync_page,
.prepare_write = reiserfs_prepare_write,
.commit_write = reiserfs_commit_write,
- .bmap = reiserfs_aop_bmap
+ .bmap = reiserfs_aop_bmap,
+ .direct_IO = reiserfs_direct_IO
} ;
diff -urN linux-2.5.64/fs/reiserfs/tail_conversion.c 2564-rf-dio/fs/reiserfs/tail_conversion.c
--- linux-2.5.64/fs/reiserfs/tail_conversion.c Tue Mar 4 19:29:57 2003
+++ 2564-rf-dio/fs/reiserfs/tail_conversion.c Thu Mar 13 16:59:07 2003
@@ -105,8 +105,10 @@
/* we only send the unbh pointer if the buffer is not up to date.
** this avoids overwriting good data from writepage() with old data
** from the disk or buffer cache
+ ** Special case: unbh->b_page will be NULL if we are coming through
+ ** DIRECT_IO handler here.
*/
- if (buffer_uptodate(unbh) || PageUptodate(unbh->b_page)) {
+ if (!unbh->b_page || buffer_uptodate(unbh) || PageUptodate(unbh->b_page)) {
up_to_date_bh = NULL ;
} else {
up_to_date_bh = unbh ;