⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 vfs_cluster.c

📁 早期freebsd实现
💻 C
📖 第 1 页 / 共 2 页
字号:
/*- * Copyright (c) 1993 *	The Regents of the University of California.  All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in the *    documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software *    must display the following acknowledgement: *	This product includes software developed by the University of *	California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors *    may be used to endorse or promote products derived from this software *    without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * *	@(#)vfs_cluster.c	8.7 (Berkeley) 2/13/94 */#include <sys/param.h>#include <sys/proc.h>#include <sys/buf.h>#include <sys/vnode.h>#include <sys/mount.h>#include <sys/trace.h>#include <sys/malloc.h>#include <sys/resourcevar.h>#include <libkern/libkern.h>#ifdef DEBUG#include <vm/vm.h>#include <sys/sysctl.h>int doreallocblks = 1;struct ctldebug debug13 = { "doreallocblks", &doreallocblks };#else/* XXX for cluster_write */#define doreallocblks 1#endif/* * Local declarations */struct buf *cluster_newbuf __P((struct vnode *, struct buf *, long, daddr_t,	    daddr_t, long, int));struct buf *cluster_rbuild __P((struct vnode *, u_quad_t, struct buf *,	    daddr_t, daddr_t, long, int, long));void	    cluster_wbuild __P((struct vnode *, struct buf *, long,	    daddr_t, int, daddr_t));struct cluster_save *cluster_collectbufs __P((struct vnode *, struct buf *));#ifdef DIAGNOSTIC/* * Set to 1 if reads of block zero should cause readahead to be done. * Set to 0 treats a read of block zero as a non-sequential read. * * Setting to one assumes that most reads of block zero of files are due to * sequential passes over the files (e.g. cat, sum) where additional blocks * will soon be needed.  Setting to zero assumes that the majority are * surgical strikes to get particular info (e.g. size, file) where readahead * blocks will not be used and, in fact, push out other potentially useful * blocks from the cache.  The former seems intuitive, but some quick tests * showed that the latter performed better from a system-wide point of view. */int	doclusterraz = 0;#define ISSEQREAD(vp, blk) \	(((blk) != 0 || doclusterraz) && \	 ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr))#else#define ISSEQREAD(vp, blk) \	((blk) != 0 && ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr))#endif/* * This replaces bread.  If this is a bread at the beginning of a file and * lastr is 0, we assume this is the first read and we'll read up to two * blocks if they are sequential.  After that, we'll do regular read ahead * in clustered chunks. * * There are 4 or 5 cases depending on how you count: *	Desired block is in the cache: *	    1 Not sequential access (0 I/Os). *	    2 Access is sequential, do read-ahead (1 ASYNC). *	Desired block is not in cache: *	    3 Not sequential access (1 SYNC). *	    4 Sequential access, next block is contiguous (1 SYNC). *	    5 Sequential access, next block is not contiguous (1 SYNC, 1 ASYNC) * * There are potentially two buffers that require I/O. * 	bp is the block requested. *	rbp is the read-ahead block. *	If either is NULL, then you don't have to do the I/O. */cluster_read(vp, filesize, lblkno, size, cred, bpp)	struct vnode *vp;	u_quad_t filesize;	daddr_t lblkno;	long size;	struct ucred *cred;	struct buf **bpp;{	struct buf *bp, *rbp;	daddr_t blkno, ioblkno;	long flags;	int error, num_ra, alreadyincore;#ifdef DIAGNOSTIC	if (size == 0)		panic("cluster_read: size = 0");#endif	error = 0;	flags = B_READ;	*bpp = bp = getblk(vp, lblkno, size, 0, 0);	if (bp->b_flags & B_CACHE) {		/*		 * Desired block is in cache; do any readahead ASYNC.		 * Case 1, 2.		 */		trace(TR_BREADHIT, pack(vp, size), lblkno);		flags |= B_ASYNC;		ioblkno = lblkno + (vp->v_ralen ? vp->v_ralen : 1);		alreadyincore = (int)incore(vp, ioblkno);		bp = NULL;	} else {		/* Block wasn't in cache, case 3, 4, 5. */		trace(TR_BREADMISS, pack(vp, size), lblkno);		bp->b_flags |= B_READ;		ioblkno = lblkno;		alreadyincore = 0;		curproc->p_stats->p_ru.ru_inblock++;		/* XXX */	}	/*	 * XXX	 * Replace 1 with a window size based on some permutation of	 * maxcontig and rot_delay.  This will let you figure out how	 * many blocks you should read-ahead (case 2, 4, 5).	 *	 * If the access isn't sequential, reset the window to 1.	 * Note that a read to the same block is considered sequential.	 * This catches the case where the file is being read sequentially,	 * but at smaller than the filesystem block size.	 */	rbp = NULL;	if (!ISSEQREAD(vp, lblkno)) {		vp->v_ralen = 0;		vp->v_maxra = lblkno;	} else if ((ioblkno + 1) * size <= filesize && !alreadyincore &&	    !(error = VOP_BMAP(vp, ioblkno, NULL, &blkno, &num_ra)) &&	    blkno != -1) {		/*		 * Reading sequentially, and the next block is not in the		 * cache.  We are going to try reading ahead.		 */		if (num_ra) {			/*			 * If our desired readahead block had been read			 * in a previous readahead but is no longer in			 * core, then we may be reading ahead too far			 * or are not using our readahead very rapidly.			 * In this case we scale back the window.			 */			if (!alreadyincore && ioblkno <= vp->v_maxra)				vp->v_ralen = max(vp->v_ralen >> 1, 1);			/*			 * There are more sequential blocks than our current			 * window allows, scale up.  Ideally we want to get			 * in sync with the filesystem maxcontig value.			 */			else if (num_ra > vp->v_ralen && lblkno != vp->v_lastr)				vp->v_ralen = vp->v_ralen ?					min(num_ra, vp->v_ralen << 1) : 1;			if (num_ra > vp->v_ralen)				num_ra = vp->v_ralen;		}		if (num_ra)				/* case 2, 4 */			rbp = cluster_rbuild(vp, filesize,			    bp, ioblkno, blkno, size, num_ra, flags);		else if (ioblkno == lblkno) {			bp->b_blkno = blkno;			/* Case 5: check how many blocks to read ahead */			++ioblkno;			if ((ioblkno + 1) * size > filesize ||			    incore(vp, ioblkno) || (error = VOP_BMAP(vp,			     ioblkno, NULL, &blkno, &num_ra)) || blkno == -1)				goto skip_readahead;			/*			 * Adjust readahead as above			 */			if (num_ra) {				if (!alreadyincore && ioblkno <= vp->v_maxra)					vp->v_ralen = max(vp->v_ralen >> 1, 1);				else if (num_ra > vp->v_ralen &&					 lblkno != vp->v_lastr)					vp->v_ralen = vp->v_ralen ?						min(num_ra,vp->v_ralen<<1) : 1;				if (num_ra > vp->v_ralen)					num_ra = vp->v_ralen;			}			flags |= B_ASYNC;			if (num_ra)				rbp = cluster_rbuild(vp, filesize,				    NULL, ioblkno, blkno, size, num_ra, flags);			else {				rbp = getblk(vp, ioblkno, size, 0, 0);				rbp->b_flags |= flags;				rbp->b_blkno = blkno;			}		} else {			/* case 2; read ahead single block */			rbp = getblk(vp, ioblkno, size, 0, 0);			rbp->b_flags |= flags;			rbp->b_blkno = blkno;		}		if (rbp == bp)			/* case 4 */			rbp = NULL;		else if (rbp) {			/* case 2, 5 */			trace(TR_BREADMISSRA,			    pack(vp, (num_ra + 1) * size), ioblkno);			curproc->p_stats->p_ru.ru_inblock++;	/* XXX */		}	}	/* XXX Kirk, do we need to make sure the bp has creds? */skip_readahead:	if (bp)		if (bp->b_flags & (B_DONE | B_DELWRI))			panic("cluster_read: DONE bp");		else 			error = VOP_STRATEGY(bp);	if (rbp)		if (error || rbp->b_flags & (B_DONE | B_DELWRI)) {			rbp->b_flags &= ~(B_ASYNC | B_READ);			brelse(rbp);		} else			(void) VOP_STRATEGY(rbp);	/*	 * Recalculate our maximum readahead	 */	if (rbp == NULL)		rbp = bp;	if (rbp)		vp->v_maxra = rbp->b_lblkno + (rbp->b_bufsize / size) - 1;	if (bp)		return(biowait(bp));	return(error);}/* * If blocks are contiguous on disk, use this to provide clustered * read ahead.  We will read as many blocks as possible sequentially * and then parcel them up into logical blocks in the buffer hash table. */struct buf *cluster_rbuild(vp, filesize, bp, lbn, blkno, size, run, flags)	struct vnode *vp;	u_quad_t filesize;	struct buf *bp;	daddr_t lbn;	daddr_t blkno;	long size;	int run;	long flags;{	struct cluster_save *b_save;	struct buf *tbp;	daddr_t bn;	int i, inc;#ifdef DIAGNOSTIC	if (size != vp->v_mount->mnt_stat.f_iosize)		panic("cluster_rbuild: size %d != filesize %d\n",			size, vp->v_mount->mnt_stat.f_iosize);#endif	if (size * (lbn + run + 1) > filesize)		--run;	if (run == 0) {		if (!bp) {			bp = getblk(vp, lbn, size, 0, 0);			bp->b_blkno = blkno;			bp->b_flags |= flags;		}		return(bp);	}	bp = cluster_newbuf(vp, bp, flags, blkno, lbn, size, run + 1);	if (bp->b_flags & (B_DONE | B_DELWRI))		return (bp);	b_save = malloc(sizeof(struct buf *) * run + sizeof(struct cluster_save),	    M_SEGMENT, M_WAITOK);	b_save->bs_bufsize = b_save->bs_bcount = size;	b_save->bs_nchildren = 0;	b_save->bs_children = (struct buf **)(b_save + 1);	b_save->bs_saveaddr = bp->b_saveaddr;	bp->b_saveaddr = (caddr_t) b_save;	inc = btodb(size);	for (bn = blkno + inc, i = 1; i <= run; ++i, bn += inc) {		if (incore(vp, lbn + i)) {			if (i == 1) {				bp->b_saveaddr = b_save->bs_saveaddr;				bp->b_flags &= ~B_CALL;				bp->b_iodone = NULL;				allocbuf(bp, size);				free(b_save, M_SEGMENT);			} else				allocbuf(bp, size * i);			break;		}		tbp = getblk(vp, lbn + i, 0, 0, 0);		/*		 * getblk may return some memory in the buffer if there were		 * no empty buffers to shed it to.  If there is currently		 * memory in the buffer, we move it down size bytes to make		 * room for the valid pages that cluster_callback will insert.		 * We do this now so we don't have to do it at interrupt time		 * in the callback routine.		 */		if (tbp->b_bufsize != 0) {			caddr_t bdata = (char *)tbp->b_data;			if (tbp->b_bufsize + size > MAXBSIZE)				panic("cluster_rbuild: too much memory");			if (tbp->b_bufsize > size) {				/*				 * XXX if the source and destination regions				 * overlap we have to copy backward to avoid				 * clobbering any valid pages (i.e. pagemove				 * implementations typically can't handle				 * overlap).				 */				bdata += tbp->b_bufsize;				while (bdata > (char *)tbp->b_data) {					bdata -= CLBYTES;					pagemove(bdata, bdata + size, CLBYTES);				}			} else 				pagemove(bdata, bdata + size, tbp->b_bufsize);		}		tbp->b_blkno = bn;		tbp->b_flags |= flags | B_READ | B_ASYNC;		++b_save->bs_nchildren;		b_save->bs_children[i - 1] = tbp;	}	return(bp);}/* * Either get a new buffer or grow the existing one. */struct buf *cluster_newbuf(vp, bp, flags, blkno, lblkno, size, run)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -