| From: |
| NeilBrown <neilb@cse.unsw.edu.au> |
| To: |
| Linus Torvalds <torvalds@transmeta.com> |
| Subject: |
| [NFS] [PATCH] kNFSd - 2 of 2 - Support zero-copy read for NFSD |
| Date: |
| Fri, 1 Nov 2002 15:19:35 +1100 |
| Cc: |
| nfs@lists.sourceforge.net |
### Comments for ChangeSet
From: Hirokazu Takahashi <taka@valinux.co.jp>
This patch changes read and readdir in nfsd.
read:
If the file supports readpage, we use it to collect pages out of the page
cache and to attache them directly to the outgoing nfs reply.
The reduces the number of copies by one, and if the filesystem/device
driver didn't copy the data, and if the network card can support not copying
the data, then you get zero-copy reads.
readdir:
A separate page is used for stoing the readdir response so that a fill
PAGE_SIZE bytes of reply can be supported.
----------- Diffstat output ------------
./fs/nfsd/nfs3proc.c | 27 ++++------------
./fs/nfsd/nfs3xdr.c | 21 ++++++++++--
./fs/nfsd/nfsproc.c | 11 +-----
./fs/nfsd/nfsxdr.c | 13 ++++++-
./fs/nfsd/vfs.c | 70 ++++++++++++++++++++++++++++++++++++++++---
./include/linux/nfsd/xdr.h | 1
./include/linux/nfsd/xdr3.h | 1
./include/linux/sunrpc/svc.h | 13 +++++++
8 files changed, 119 insertions(+), 38 deletions(-)
--- ./fs/nfsd/vfs.c 2002/10/30 22:41:15 1.3
+++ ./fs/nfsd/vfs.c 2002/11/01 04:13:04 1.4
@@ -13,6 +13,7 @@
* dentry, don't worry--they have been taken care of.
*
* Copyright (C) 1995-1999 Olaf Kirch <okir@monad.swb.de>
+ * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp>
*/
#include <linux/config.h>
@@ -28,6 +29,7 @@
#include <linux/net.h>
#include <linux/unistd.h>
#include <linux/slab.h>
+#include <linux/pagemap.h>
#include <linux/in.h>
#include <linux/module.h>
#include <linux/namei.h>
@@ -571,6 +573,61 @@ found:
}
/*
+ * Grab and keep cached pages assosiated with a file in the svc_rqst
+ * so that they can be passed to the netowork sendmsg/sendpage routines
+ * directrly. They will be released after the sending has completed.
+ */
+static int
+nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset , unsigned long size)
+{
+ unsigned long count = desc->count;
+ struct svc_rqst *rqstp = (struct svc_rqst *)desc->buf;
+
+ if (size > count)
+ size = count;
+
+ if (rqstp->rq_res.page_len == 0) {
+ get_page(page);
+ rqstp->rq_respages[rqstp->rq_resused++] = page;
+ rqstp->rq_res.page_base = offset;
+ rqstp->rq_res.page_len = size;
+ } else if (page != rqstp->rq_respages[rqstp->rq_resused-1]) {
+ get_page(page);
+ rqstp->rq_respages[rqstp->rq_resused++] = page;
+ rqstp->rq_res.page_len += size;
+ } else {
+ rqstp->rq_res.page_len += size;
+ }
+
+ desc->count = count - size;
+ desc->written += size;
+ return size;
+}
+
+static inline ssize_t
+nfsd_getpages(struct file *filp, struct svc_rqst *rqstp, unsigned long count)
+{
+ read_descriptor_t desc;
+ ssize_t retval;
+
+ if (!count)
+ return 0;
+
+ svc_pushback_unused_pages(rqstp);
+
+ desc.written = 0;
+ desc.count = count;
+ desc.buf = (char *)rqstp;
+ desc.error = 0;
+ do_generic_file_read(filp, &filp->f_pos, &desc, nfsd_read_actor);
+
+ retval = desc.written;
+ if (!retval)
+ retval = desc.error;
+ return retval;
+}
+
+/*
* Read data from a file. count must contain the requested read count
* on entry. On return, *count contains the number of bytes actually read.
* N.B. After this call fhp needs an fh_put
@@ -601,10 +658,15 @@ nfsd_read(struct svc_rqst *rqstp, struct
if (ra)
file.f_ra = ra->p_ra;
- oldfs = get_fs();
- set_fs(KERNEL_DS);
- err = vfs_readv(&file, vec, vlen, *count, &offset);
- set_fs(oldfs);
+ if (inode->i_mapping->a_ops->readpage) {
+ file.f_pos = offset;
+ err = nfsd_getpages(&file, rqstp, *count);
+ } else {
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ err = vfs_readv(&file, vec, vlen, *count, &offset);
+ set_fs(oldfs);
+ }
/* Write back readahead params */
if (ra)
--- ./fs/nfsd/nfsxdr.c 2002/11/01 04:03:15 1.4
+++ ./fs/nfsd/nfsxdr.c 2002/11/01 04:13:05 1.5
@@ -337,6 +337,11 @@ nfssvc_decode_readdirargs(struct svc_rqs
return 0;
args->cookie = ntohl(*p++);
args->count = ntohl(*p++);
+ if (args->count > PAGE_SIZE)
+ args->count = PAGE_SIZE;
+
+ svc_take_page(rqstp);
+ args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
return xdr_argsize_check(rqstp, p);
}
@@ -385,7 +390,6 @@ nfssvc_encode_readres(struct svc_rqst *r
xdr_ressize_check(rqstp, p);
/* now update rqstp->rq_res to reflect data aswell */
- rqstp->rq_res.page_base = 0;
rqstp->rq_res.page_len = resp->count;
if (resp->count & 3) {
/* need to pad the tail */
@@ -404,11 +408,16 @@ int
nfssvc_encode_readdirres(struct svc_rqst *rqstp, u32 *p,
struct nfsd_readdirres *resp)
{
+ xdr_ressize_check(rqstp, p);
p = resp->buffer;
*p++ = 0; /* no more entries */
*p++ = htonl((resp->common.err == nfserr_eof));
+ rqstp->rq_res.page_len = ((unsigned long)p & ~PAGE_MASK);
+ rqstp->rq_res.len =
+ rqstp->rq_res.head[0].iov_len+
+ rqstp->rq_res.page_len;
- return xdr_ressize_check(rqstp, p);
+ return 1;
}
int
--- ./fs/nfsd/nfs3xdr.c 2002/11/01 04:03:15 1.4
+++ ./fs/nfsd/nfs3xdr.c 2002/11/01 04:13:05 1.5
@@ -490,6 +490,12 @@ nfs3svc_decode_readdirargs(struct svc_rq
args->dircount = ~0;
args->count = ntohl(*p++);
+ if (args->count > PAGE_SIZE)
+ args->count = PAGE_SIZE;
+
+ svc_take_page(rqstp);
+ args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+
return xdr_argsize_check(rqstp, p);
}
@@ -504,6 +510,9 @@ nfs3svc_decode_readdirplusargs(struct sv
args->dircount = ntohl(*p++);
args->count = ntohl(*p++);
+ svc_take_page(rqstp);
+ args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+
return xdr_argsize_check(rqstp, p);
}
@@ -600,7 +609,6 @@ nfs3svc_encode_readres(struct svc_rqst *
*p++ = htonl(resp->count); /* xdr opaque count */
xdr_ressize_check(rqstp, p);
/* now update rqstp->rq_res to reflect data aswell */
- rqstp->rq_res.page_base = 0;
rqstp->rq_res.page_len = resp->count;
if (resp->count & 3) {
/* need to pad the tail */
@@ -676,12 +684,17 @@ nfs3svc_encode_readdirres(struct svc_rqs
if (resp->status == 0) {
/* stupid readdir cookie */
memcpy(p, resp->verf, 8); p += 2;
+ xdr_ressize_check(rqstp, p);
p = resp->buffer;
*p++ = 0; /* no more entries */
*p++ = htonl(resp->common.err == nfserr_eof);
- }
-
- return xdr_ressize_check(rqstp, p);
+ rqstp->rq_res.page_len = ((unsigned long)p & ~PAGE_MASK);
+ rqstp->rq_res.len =
+ rqstp->rq_res.head[0].iov_len+
+ rqstp->rq_res.page_len;
+ return 1;
+ } else
+ return xdr_ressize_check(rqstp, p);
}
/*
--- ./fs/nfsd/nfsproc.c 2002/10/30 22:41:16 1.3
+++ ./fs/nfsd/nfsproc.c 2002/11/01 04:13:05 1.4
@@ -467,7 +467,6 @@ static int
nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp,
struct nfsd_readdirres *resp)
{
- u32 * buffer;
int nfserr, count;
loff_t offset;
@@ -475,19 +474,15 @@ nfsd_proc_readdir(struct svc_rqst *rqstp
SVCFH_fmt(&argp->fh),
argp->count, argp->cookie);
- /* Reserve buffer space for status */
- svcbuf_reserve(&rqstp->rq_res, &buffer, &count, 1);
-
/* Shrink to the client read size */
- if (count > (argp->count >> 2))
- count = argp->count >> 2;
+ count = (argp->count >> 2) - 2;
/* Make sure we've room for the NULL ptr & eof flag */
count -= 2;
if (count < 0)
count = 0;
- resp->buffer = buffer;
+ resp->buffer = argp->buffer;
resp->offset = NULL;
resp->buflen = count;
resp->common.err = nfs_ok;
@@ -496,7 +491,7 @@ nfsd_proc_readdir(struct svc_rqst *rqstp
nfserr = nfsd_readdir(rqstp, &argp->fh, &offset,
&resp->common, nfssvc_encode_entry);
- resp->count = resp->buffer - buffer;
+ resp->count = resp->buffer - argp->buffer;
if (resp->offset)
*resp->offset = (u32)offset;
--- ./fs/nfsd/nfs3proc.c 2002/10/30 22:41:16 1.3
+++ ./fs/nfsd/nfs3proc.c 2002/11/01 04:13:05 1.4
@@ -436,35 +436,28 @@ static int
nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
struct nfsd3_readdirres *resp)
{
- u32 * buffer;
int nfserr, count;
- unsigned int want;
dprintk("nfsd: READDIR(3) %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
argp->count, (u32) argp->cookie);
- /* Reserve buffer space for status, attributes and verifier */
- svcbuf_reserve(&rqstp->rq_res, &buffer, &count,
- 1 + NFS3_POST_OP_ATTR_WORDS + 2);
-
/* Make sure we've room for the NULL ptr & eof flag, and shrink to
* client read size */
- if ((count -= 2) > (want = (argp->count >> 2) - 2))
- count = want;
+ count = (argp->count >> 2) - 2;
/* Read directory and encode entries on the fly */
fh_copy(&resp->fh, &argp->fh);
resp->buflen = count;
resp->common.err = nfs_ok;
- resp->buffer = buffer;
+ resp->buffer = argp->buffer;
resp->offset = NULL;
resp->rqstp = rqstp;
nfserr = nfsd_readdir(rqstp, &resp->fh, (loff_t*) &argp->cookie,
&resp->common, nfs3svc_encode_entry);
memcpy(resp->verf, argp->verf, 8);
- resp->count = resp->buffer - buffer;
+ resp->count = resp->buffer - argp->buffer;
if (resp->offset)
xdr_encode_hyper(resp->offset, argp->cookie);
@@ -479,35 +472,29 @@ static int
nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
struct nfsd3_readdirres *resp)
{
- u32 * buffer;
- int nfserr, count, want;
+ int nfserr, count;
loff_t offset;
dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
argp->count, (u32) argp->cookie);
- /* Reserve buffer space for status, attributes and verifier */
- svcbuf_reserve(&rqstp->rq_res, &buffer, &count,
- 1 + NFS3_POST_OP_ATTR_WORDS + 2);
-
/* Make sure we've room for the NULL ptr & eof flag, and shrink to
* client read size */
- if ((count -= 2) > (want = argp->count >> 2))
- count = want;
+ count = (argp->count >> 2) - 2;
/* Read directory and encode entries on the fly */
fh_copy(&resp->fh, &argp->fh);
resp->buflen = count;
resp->common.err = nfs_ok;
- resp->buffer = buffer;
+ resp->buffer = argp->buffer;
resp->rqstp = rqstp;
offset = argp->cookie;
nfserr = nfsd_readdir(rqstp, &resp->fh, &offset,
&resp->common, nfs3svc_encode_entry_plus);
memcpy(resp->verf, argp->verf, 8);
- resp->count = resp->buffer - buffer;
+ resp->count = resp->buffer - argp->buffer;
if (resp->offset)
xdr_encode_hyper(resp->offset, offset);
--- ./include/linux/sunrpc/svc.h 2002/11/01 04:03:15 1.3
+++ ./include/linux/sunrpc/svc.h 2002/11/01 04:13:05 1.4
@@ -192,6 +192,19 @@ static void inline svc_pushback_allpages
}
}
+static void inline svc_pushback_unused_pages(struct svc_rqst *rqstp)
+{
+ while (rqstp->rq_resused) {
+ if (rqstp->rq_respages[--rqstp->rq_resused] != NULL) {
+ rqstp->rq_argpages[rqstp->rq_arghi++] =
+ rqstp->rq_respages[rqstp->rq_resused];
+ rqstp->rq_respages[rqstp->rq_resused] = NULL;
+ }
+ if (rqstp->rq_res.pages == &rqstp->rq_respages[rqstp->rq_resused])
+ break;
+ }
+}
+
static void inline svc_free_allpages(struct svc_rqst *rqstp)
{
while (rqstp->rq_resused) {
--- ./include/linux/nfsd/xdr.h 2002/10/30 22:41:16 1.3
+++ ./include/linux/nfsd/xdr.h 2002/11/01 04:13:05 1.4
@@ -77,6 +77,7 @@ struct nfsd_readdirargs {
struct svc_fh fh;
__u32 cookie;
__u32 count;
+ u32 * buffer;
};
struct nfsd_attrstat {
--- ./include/linux/nfsd/xdr3.h 2002/10/30 22:41:16 1.3
+++ ./include/linux/nfsd/xdr3.h 2002/11/01 04:13:05 1.4
@@ -96,6 +96,7 @@ struct nfsd3_readdirargs {
__u32 dircount;
__u32 count;
__u32 * verf;
+ u32 * buffer;
};
struct nfsd3_commitargs {
-------------------------------------------------------
This sf.net email is sponsored by: Influence the future
of Java(TM) technology. Join the Java Community
Process(SM) (JCP(SM)) program now.
http://ads.sourceforge.net/cgi-bin/redirect.pl?sunm0004en
_______________________________________________
NFS maillist - NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs