From owner-FreeBSD-users-jp@jp.freebsd.org  Mon Oct 21 15:11:37 1996
Received: by mail.jp.freebsd.org (8.7.3+2.6Wbeta5/8.7.3) id PAA09991
	Mon, 21 Oct 1996 15:11:37 +0900 (JST)
Received: by mail.jp.freebsd.org (8.7.3+2.6Wbeta5/8.7.3) with ESMTP id PAA09986
	for <FreeBSD-users-jp@jp.freebsd.org>; Mon, 21 Oct 1996 15:11:35 +0900 (JST)
Received: by uno.sat.t.u-tokyo.ac.jp (8.7.3+2.6Wbeta5/8.7.3) with ESMTP
	id PAA05596; Mon, 21 Oct 1996 15:11:35 +0900 (JST)
To: FreeBSD-users-jp@jp.freebsd.org
X-Mailer: Mew version 1.06 on Emacs 19.28.1, Mule 2.3
Mime-Version: 1.0
Content-Type: Multipart/Mixed;
	boundary="--Next_Part(Mon_Oct_21_15:10:12_1996)--"
Date: Mon, 21 Oct 1996 15:11:34 +0900
Message-ID: <5594.845878294@sat.t.u-tokyo.ac.jp>
From: Hidetoshi Shimokawa <simokawa@sat.t.u-tokyo.ac.jp>
Reply-To: FreeBSD-users-jp@jp.freebsd.org
X-Distribute: distribute [version 2.1 (Alpha) patchlevel=19]
X-Sequence: FreeBSD-users-jp 5428
Subject: [FreeBSD-users-jp 5428] Re: NFS V3 performance
Errors-To: owner-FreeBSD-users-jp@jp.freebsd.org
Sender: owner-FreeBSD-users-jp@jp.freebsd.org

----Next_Part(Mon_Oct_21_15:10:12_1996)--
Content-Type: Text/Plain; charset=iso-2022-jp

$B2<@n!wElBg$G$9!%(B

$B$&$`$&$`!$$^$?0c$C$?LdBj$,=P$G$-$^$9$M!%(B

kawamura> Solaris2.5 $B$H(B NFS Ver3 $B$G@\B3$7$?>l9g$N%l%]!<%H$G$9!#(Bserver $B$N(B cache 
kawamura> $B$K(B hit $B$9$k>l9g$H$=$&$G$J$$>l9g$,$o$+$l$P==J,$J$N$G!"(Bauto $B$GB,Dj$7$F$_(B
kawamura> $B$^$7$?!#(B
kawamura> 
kawamura> [(1) $B$N>l9g(B]
kawamura>         1       8192    172738              979691              
kawamura>         16      8192    112463              980586              
kawamura> [(2) $B$N>l9g(B]
kawamura>         1       8192    138798              44739242            
kawamura>         16      8192    115195              800105              
kawamura> [(3) $B$N>l9g(B]
kawamura>         1       8192    169895              44739242            
kawamura>         16      8192    126665              799212              
$B$^$:!$(B(2),(3)$B$N(Bread$B$,B.$$$N$O(Bread ahead$B$,$A$c$s$H8z$$$F$$$k$;$$$G$7$g$&!%(B
$B$D$^$j(BSolaris2.5$B$,==J,B.$$$N$G$7$g$&$+!%(B
$B!t@nB<$5$s$N<+Bp$N7k2L$H$O$^$C$?$/@5H?BP$G$9$M!%(B

(1)$B$N>l9g$K$O$?$V$s(Bbio$B$,(Bblock$B$5$l$F$7$^$C$F!$(Bread ahead$B$,8z$$$F$J$$$N(B
$B$G$7$g$&!%@nB<$5$s$N<+Bp$N>l9g$O(Bserver$B$,CY$$$N$G5U$K!$$3$N$*$+$2$G!$(B
request$B$,(Bduplicate$B$7$F$$$J$/$F!$NI$/$J$C$F$$$k$h$&$J5$$,$7$^$9!%(B

write$B$K4X$7$F$O!$(B-current$B$N(Bcode$B$O$*$+$7$$$i$7$$$N$G!$$J$s$H$b$$$($^$;(B
$B$s!%$J$s$+(Bcommit$B$9$k$@$1$G$$$$$b$N$r$b$&0l2s=q$-$K9T$C$F$$$k$i$7$$$G$9!%(B
$B8e$K(Bpatch$B$rIU$1$^$9$N$G!$M&5$$N$"$k$+$?$O;n$7$F$_$F$/$@$5$$!%(B

kawamura> $B$^$?(B (1) $B$N2<@n$5$s$N%Q%C%A$J$7$N>l9g!"(B
kawamura> 
kawamura>         MB      reclen  bytes/sec written   bytes/sec read      
kawamura>         1       512     159025              19173961            
kawamura>         1       1024    126740              979691              
kawamura>         1       2048    127948              44739242            
kawamura>         1       4096    120916              67108864            
kawamura>         1       8192    172738              979691              
kawamura> 
kawamura> $B$N$h$&$KBg$-$J%P%i%D%-$,$_$i$l$^$7$?!#(B
$B$3$l$O$J$s$J$s$G$7$g$&$M!%$A$g$C$H$o$+$j$^$;$s!%(B
$B!t(Bnetwork$B$H$+(Bserver$B$NIi2Y$H$+$O!$$"$^$j4X78$J$$$N$G$7$g$&$+!)(B

$B2H$G!$(BDHU+XJack Ether+PAO$B$N$H$C$F$b8E$$$d$D(B $B$r(Bclient$B$K$7$?$i!$(B
reclen$B$rA}$9$H(Bread$B$,$I$s$I$s0-$/$J$C$F$$$-$^$7$?!%(B
sn0: kernel memory allocation failed
$B$H$+=P$F$$$?$N$G!$$^$?JL$N860x$J$s$G$7$g$&$,!%(B

/\ Hidetoshi Shimokawa
\/  simokawa@sat.t.u-tokyo.ac.jp
PGP public key: finger -l simokawa@sat.t.u-tokyo.ac.jp

----Next_Part(Mon_Oct_21_15:10:12_1996)--
Content-Type: Text/Plain; charset=us-ascii

> I also like to know why NFSv3 client is so slow.

NFSv3 in current has a bug where uncommitted unstably written buffers can
later be rewritten to the server synchronously.  This patch fixes this bug
as well as improving the code which sends commit rpcs to the server to
reduce the number of rpcs needed.  It also marks buffers of uncommitted
data so that they can be cluster-committed automatically by the bio
system.

Index: nfs_bio.c
===================================================================
RCS file: /home/ncvs/src/sys/nfs/nfs_bio.c,v
retrieving revision 1.25
diff -u -r1.25 nfs_bio.c
--- nfs_bio.c	1996/09/19 18:20:54	1.25
+++ nfs_bio.c	1996/10/09 09:10:07
@@ -905,9 +905,11 @@
 		    iomode = NFSV3WRITE_FILESYNC;
 		bp->b_flags |= B_WRITEINPROG;
 		error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
-		if (!error && iomode == NFSV3WRITE_UNSTABLE)
+		if (!error && iomode == NFSV3WRITE_UNSTABLE) {
 		    bp->b_flags |= B_NEEDCOMMIT;
-		else
+		    if (bp->b_dirtyoff == 0 && bp->b_dirtyend == bp->b_bufsize)
+			bp->b_flags |= B_CLUSTEROK;
+		} else
 		    bp->b_flags &= ~B_NEEDCOMMIT;
 		bp->b_flags &= ~B_WRITEINPROG;
 
Index: nfs_vnops.c
===================================================================
RCS file: /home/ncvs/src/sys/nfs/nfs_vnops.c,v
retrieving revision 1.35
diff -u -r1.35 nfs_vnops.c
--- nfs_vnops.c	1996/09/19 18:21:01	1.35
+++ nfs_vnops.c	1996/10/09 09:10:15
@@ -1210,7 +1210,10 @@
 		tsiz -= len;
 	}
 nfsmout:
-	*iomode = committed;
+	if (vp->v_mount->mnt_flag & MNT_ASYNC)
+		*iomode = NFSV3WRITE_FILESYNC;
+	else
+		*iomode = committed;
 	if (error)
 		uiop->uio_resid = tsiz;
 	return (error);
@@ -2607,6 +2610,9 @@
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	
+#ifdef NFS_DEBUG
+	printf("nfs_commit(%x, %d, %d, %x, %x)\n", vp, (int) offset, cnt, cred, procp);
+#endif
 	if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0)
 		return (0);
 	nfsstats.rpccnt[NFSPROC_COMMIT]++;
@@ -2757,13 +2763,14 @@
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
 	int passone = 1;
-	u_quad_t off = (u_quad_t)-1, endoff = 0, toff;
+	u_quad_t off, endoff, toff;
 	struct ucred* wcred = NULL;
-#ifndef NFS_COMMITBVECSIZ
-#define NFS_COMMITBVECSIZ	20
-#endif
-	struct buf *bvec[NFS_COMMITBVECSIZ];
+	struct buf **bvec = NULL;
+	int bvecsize = 0, bveccount;
 
+#ifdef NFS_DEBUG
+	printf("nfs_flush(%x, %x, %d, %x, %d)\n", vp, cred, waitfor, p, commit);
+#endif
 	if (nmp->nm_flag & NFSMNT_INT)
 		slpflag = PCATCH;
 	if (!commit)
@@ -2776,12 +2783,41 @@
 	 * job.
 	 */
 again:
+	off = (u_quad_t)-1;
+	endoff = 0;
 	bvecpos = 0;
 	if (NFS_ISV3(vp) && commit) {
 		s = splbio();
+		/*
+		 * Count up how many buffers waiting for a commit.
+		 */
+		bveccount = 0;
+		for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
+			nbp = bp->b_vnbufs.le_next;
+			if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
+			    == (B_DELWRI | B_NEEDCOMMIT))
+				bveccount++;
+		}
+		/*
+		 * Allocate space to remember the list of bufs to commit.  It is
+		 * important to use M_NOWAIT here to avoid a race with nfs_write.
+		 * If we can't get memory (for whatever reason), we will end up
+		 * committing the buffers one-by-one in the loop below.
+		 */
+		if (bveccount > bvecsize) {
+			if (bvec != NULL)
+				free(bvec, M_TEMP);
+			bvec = (struct buf **)
+				malloc(bveccount * sizeof(struct buf *),
+				       M_TEMP, M_NOWAIT);
+			if (bvec == NULL)
+				bvecsize = 0;
+			else
+				bvecsize = bveccount;
+		}
 		for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
 			nbp = bp->b_vnbufs.le_next;
-			if (bvecpos >= NFS_COMMITBVECSIZ)
+			if (bvecpos >= bvecsize)
 				break;
 			if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
 				!= (B_DELWRI | B_NEEDCOMMIT))
@@ -2822,10 +2858,14 @@
 		 * one call for all of them, otherwise commit each one
 		 * separately.
 		 */
-		if (wcred != NOCRED)
+		if (wcred != NOCRED) {
+#ifdef NFS_DEBUG
+printf("nfs_flush: calling nfs_commit(%x, %d, %d, %x, %x)\n",
+	vp, (int) off, (int) (endoff - off), wcred, p);
+#endif
 			retv = nfs_commit(vp, off, (int)(endoff - off),
 					  wcred, p);
-		else {
+		} else {
 			retv = 0;
 			for (i = 0; i < bvecpos; i++) {
 				off_t off, size;
@@ -2879,8 +2919,10 @@
 				"nfsfsync", slptimeo);
 			splx(s);
 			if (error) {
-			    if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
-				return (EINTR);
+			    if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
+				error = EINTR;
+				goto done;
+			    }
 			    if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
@@ -2892,6 +2934,9 @@
 			panic("nfs_fsync: not dirty");
 		if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT))
 			continue;
+#ifdef NFS_DEBUG
+printf("nfs_flush: writing bp=%x, bp->b_flags=%x\n", bp, bp->b_flags);
+#endif
 		bremfree(bp);
 		if (passone || !commit)
 		    bp->b_flags |= (B_BUSY|B_ASYNC);
@@ -2912,8 +2957,10 @@
 			error = tsleep((caddr_t)&vp->v_numoutput,
 				slpflag | (PRIBIO + 1), "nfsfsync", slptimeo);
 			if (error) {
-			    if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
-				return (EINTR);
+			    if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
+				error = EINTR;
+				goto done;
+			    }
 			    if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
@@ -2928,6 +2975,9 @@
 		error = np->n_error;
 		np->n_flag &= ~NWRITEERR;
 	}
+done:
+	if (bvec)
+		free(bvec, M_TEMP);
 	return (error);
 }
 
@@ -3129,8 +3179,9 @@
 	 * an actual write will have to be scheduled via. VOP_STRATEGY().
 	 * If B_WRITEINPROG is already set, then push it with a write anyhow.
 	 */
-	if (oldflags & (B_NEEDCOMMIT | B_WRITEINPROG) == B_NEEDCOMMIT) {
+	if ((oldflags & (B_NEEDCOMMIT | B_WRITEINPROG)) == B_NEEDCOMMIT) {
 		off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
+		vfs_busy_pages(bp, 1);
 		bp->b_flags |= B_WRITEINPROG;
 		retv = nfs_commit(bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff,
 			bp->b_wcred, bp->b_proc);
@@ -3139,8 +3190,10 @@
 			bp->b_dirtyoff = bp->b_dirtyend = 0;
 			bp->b_flags &= ~B_NEEDCOMMIT;
 			biodone(bp);
-		} else if (retv == NFSERR_STALEWRITEVERF)
+		} else if (retv == NFSERR_STALEWRITEVERF) {
 			nfs_clearcommit(bp->b_vp->v_mount);
+			vfs_unbusy_pages(bp);
+		}
 	}
 	if (retv) {
 		if (force)


--
Doug Rabson, Microsoft RenderMorphics Ltd.	Mail:  dfr@render.com
						Phone: +44 171 734 3761
						FAX:   +44 171 734 6426



----Next_Part(Mon_Oct_21_15:10:12_1996)----
