⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 md.c

📁 PostgreSQL7.4.6 for Linux
💻 C
📖 第 1 页 / 共 2 页
字号:
/*------------------------------------------------------------------------- * * md.c *	  This code manages relations that reside on magnetic disk. * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION *	  $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.98 2003/08/04 02:40:04 momjian Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include <errno.h>#include <unistd.h>#include <fcntl.h>#include <sys/file.h>#include "catalog/catalog.h"#include "miscadmin.h"#include "storage/smgr.h"#include "utils/inval.h"#include "utils/memutils.h"#undef DIAGNOSTIC/* *	The magnetic disk storage manager keeps track of open file descriptors *	in its own descriptor pool.  This happens for two reasons.	First, at *	transaction boundaries, we walk the list of descriptors and flush *	anything that we've dirtied in the current transaction.  Second, we want *	to support relations larger than the OS' file size limit (often 2GBytes). *	In order to do that, we break relations up into chunks of < 2GBytes *	and store one chunk in each of several files that represent the relation. *	See the BLCKSZ and RELSEG_SIZE configuration constants in include/pg_config.h. * *	The file descriptor stored in the relation cache (see RelationGetFile()) *	is actually an index into the Md_fdvec array.  -1 indicates not open. * *	When a relation is broken into multiple chunks, only the first chunk *	has its own entry in the Md_fdvec array; the remaining chunks have *	palloc'd MdfdVec objects that are chained onto the first chunk via the *	mdfd_chain links.  All chunks except the last MUST have size exactly *	equal to RELSEG_SIZE blocks --- see mdnblocks() and mdtruncate(). */typedef struct _MdfdVec{	int			mdfd_vfd;		/* fd number in vfd pool */	int			mdfd_flags;		/* fd status flags *//* these are the assigned bits in mdfd_flags: */#define MDFD_FREE	(1 << 0)	/* unused entry */	int			mdfd_nextFree;	/* link to next freelist member, if free */#ifndef LET_OS_MANAGE_FILESIZE	struct _MdfdVec *mdfd_chain;	/* for large relations */#endif} MdfdVec;static int	Nfds = 100;			/* initial/current size of Md_fdvec array */static MdfdVec *Md_fdvec = (MdfdVec *) NULL;static int	Md_Free = -1;		/* head of freelist of unused fdvec								 * entries */static int	CurFd = 0;			/* first never-used fdvec index */static MemoryContext MdCxt;		/* context for all my allocations *//* routines declared here */static void mdclose_fd(int fd);static int	_mdfd_getrelnfd(Relation reln);static MdfdVec *_mdfd_openseg(Relation reln, BlockNumber segno, int oflags);static MdfdVec *_mdfd_getseg(Relation reln, BlockNumber blkno);static int	_mdfd_blind_getseg(RelFileNode rnode, BlockNumber blkno);static int	_fdvec_alloc(void);static void _fdvec_free(int);static BlockNumber _mdnblocks(File file, Size blcksz);/* *	mdinit() -- Initialize private state for magnetic disk storage manager. * *		We keep a private table of all file descriptors.  Whenever we do *		a write to one, we mark it dirty in our table.	Whenever we force *		changes to disk, we mark the file descriptor clean.  At transaction *		commit, we force changes to disk for all dirty file descriptors. *		This routine allocates and initializes the table. * *		Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. */intmdinit(void){	int			i;	MdCxt = AllocSetContextCreate(TopMemoryContext,								  "MdSmgr",								  ALLOCSET_DEFAULT_MINSIZE,								  ALLOCSET_DEFAULT_INITSIZE,								  ALLOCSET_DEFAULT_MAXSIZE);	Md_fdvec = (MdfdVec *) MemoryContextAlloc(MdCxt, Nfds * sizeof(MdfdVec));	MemSet(Md_fdvec, 0, Nfds * sizeof(MdfdVec));	/* Set free list */	for (i = 0; i < Nfds; i++)	{		Md_fdvec[i].mdfd_nextFree = i + 1;		Md_fdvec[i].mdfd_flags = MDFD_FREE;	}	Md_Free = 0;	Md_fdvec[Nfds - 1].mdfd_nextFree = -1;	return SM_SUCCESS;}intmdcreate(Relation reln){	char	   *path;	int			fd,				vfd;	Assert(reln->rd_fd < 0);	path = relpath(reln->rd_node);	fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);	if (fd < 0)	{		int			save_errno = errno;		/*		 * During bootstrap, there are cases where a system relation will		 * be accessed (by internal backend processes) before the		 * bootstrap script nominally creates it.  Therefore, allow the		 * file to exist already, but in bootstrap mode only.  (See also		 * mdopen)		 */		if (IsBootstrapProcessingMode())			fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600);		if (fd < 0)		{			pfree(path);			/* be sure to return the error reported by create, not open */			errno = save_errno;			return -1;		}		errno = 0;	}	pfree(path);	vfd = _fdvec_alloc();	if (vfd < 0)		return -1;	Md_fdvec[vfd].mdfd_vfd = fd;	Md_fdvec[vfd].mdfd_flags = (uint16) 0;#ifndef LET_OS_MANAGE_FILESIZE	Md_fdvec[vfd].mdfd_chain = (MdfdVec *) NULL;#endif	return vfd;}/* *	mdunlink() -- Unlink a relation. */intmdunlink(RelFileNode rnode){	int			status = SM_SUCCESS;	int			save_errno = 0;	char	   *path;	path = relpath(rnode);	/* Delete the first segment, or only segment if not doing segmenting */	if (unlink(path) < 0)	{		status = SM_FAIL;		save_errno = errno;	}#ifndef LET_OS_MANAGE_FILESIZE	/* Get the additional segments, if any */	if (status == SM_SUCCESS)	{		char	   *segpath = (char *) palloc(strlen(path) + 12);		BlockNumber segno;		for (segno = 1;; segno++)		{			sprintf(segpath, "%s.%u", path, segno);			if (unlink(segpath) < 0)			{				/* ENOENT is expected after the last segment... */				if (errno != ENOENT)				{					status = SM_FAIL;					save_errno = errno;				}				break;			}		}		pfree(segpath);	}#endif	pfree(path);	errno = save_errno;	return status;}/* *	mdextend() -- Add a block to the specified relation. * *		The semantics are basically the same as mdwrite(): write at the *		specified position.  However, we are expecting to extend the *		relation (ie, blocknum is the current EOF), and so in case of *		failure we clean up by truncating. * *		This routine returns SM_FAIL or SM_SUCCESS, with errno set as *		appropriate. * * Note: this routine used to call mdnblocks() to get the block position * to write at, but that's pretty silly since the caller needs to know where * the block will be written, and accordingly must have done mdnblocks() * already.  Might as well pass in the position and save a seek. */intmdextend(Relation reln, BlockNumber blocknum, char *buffer){	long		seekpos;	int			nbytes;	MdfdVec    *v;	v = _mdfd_getseg(reln, blocknum);#ifndef LET_OS_MANAGE_FILESIZE	seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));#ifdef DIAGNOSTIC	if (seekpos >= BLCKSZ * RELSEG_SIZE)		elog(FATAL, "seekpos too big");#endif#else	seekpos = (long) (BLCKSZ * (blocknum));#endif	/*	 * Note: because caller obtained blocknum by calling mdnblocks, which	 * did a seek(SEEK_END), this seek is often redundant and will be	 * optimized away by fd.c.	It's not redundant, however, if there is a	 * partial page at the end of the file.  In that case we want to try	 * to overwrite the partial page with a full page.	It's also not	 * redundant if bufmgr.c had to dump another buffer of the same file	 * to make room for the new page's buffer.	 */	if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)		return SM_FAIL;	if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)	{		if (nbytes > 0)		{			int			save_errno = errno;			/* Remove the partially-written page */			FileTruncate(v->mdfd_vfd, seekpos);			FileSeek(v->mdfd_vfd, seekpos, SEEK_SET);			errno = save_errno;		}		return SM_FAIL;	}#ifndef LET_OS_MANAGE_FILESIZE#ifdef DIAGNOSTIC	if (_mdnblocks(v->mdfd_vfd, BLCKSZ) > ((BlockNumber) RELSEG_SIZE))		elog(FATAL, "segment too big");#endif#endif	return SM_SUCCESS;}/* *	mdopen() -- Open the specified relation. */intmdopen(Relation reln){	char	   *path;	int			fd;	int			vfd;	Assert(reln->rd_fd < 0);	path = relpath(reln->rd_node);	fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600);	if (fd < 0)	{		/*		 * During bootstrap, there are cases where a system relation will		 * be accessed (by internal backend processes) before the		 * bootstrap script nominally creates it.  Therefore, accept		 * mdopen() as a substitute for mdcreate() in bootstrap mode only.		 * (See mdcreate)		 */		if (IsBootstrapProcessingMode())			fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);		if (fd < 0)		{			pfree(path);			return -1;		}	}	pfree(path);	vfd = _fdvec_alloc();	if (vfd < 0)		return -1;	Md_fdvec[vfd].mdfd_vfd = fd;	Md_fdvec[vfd].mdfd_flags = (uint16) 0;#ifndef LET_OS_MANAGE_FILESIZE	Md_fdvec[vfd].mdfd_chain = (MdfdVec *) NULL;#ifdef DIAGNOSTIC	if (_mdnblocks(fd, BLCKSZ) > ((BlockNumber) RELSEG_SIZE))		elog(FATAL, "segment too big");#endif#endif	return vfd;}/* *	mdclose() -- Close the specified relation, if it isn't closed already. * *		AND FREE fd vector! It may be re-used for other relation! *		reln should be flushed from cache after closing !.. * *		Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. */intmdclose(Relation reln){	int			fd;	fd = RelationGetFile(reln);	if (fd < 0)		return SM_SUCCESS;		/* already closed, so no work */	mdclose_fd(fd);	reln->rd_fd = -1;	return SM_SUCCESS;}static voidmdclose_fd(int fd){	MdfdVec    *v;#ifndef LET_OS_MANAGE_FILESIZE	for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL;)	{		MdfdVec    *ov = v;		/* if not closed already */		if (v->mdfd_vfd >= 0)			FileClose(v->mdfd_vfd);		/* Now free vector */		v = v->mdfd_chain;		if (ov != &Md_fdvec[fd])			pfree(ov);	}	Md_fdvec[fd].mdfd_chain = (MdfdVec *) NULL;#else	v = &Md_fdvec[fd];	if (v != (MdfdVec *) NULL)	{		if (v->mdfd_vfd >= 0)			FileClose(v->mdfd_vfd);	}#endif	_fdvec_free(fd);}/* *	mdread() -- Read the specified block from a relation. * *		Returns SM_SUCCESS or SM_FAIL. */intmdread(Relation reln, BlockNumber blocknum, char *buffer){	int			status;	long		seekpos;	int			nbytes;	MdfdVec    *v;	v = _mdfd_getseg(reln, blocknum);#ifndef LET_OS_MANAGE_FILESIZE	seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));#ifdef DIAGNOSTIC	if (seekpos >= BLCKSZ * RELSEG_SIZE)		elog(FATAL, "seekpos too big");#endif#else	seekpos = (long) (BLCKSZ * (blocknum));#endif	if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)		return SM_FAIL;	status = SM_SUCCESS;	if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)	{		/*		 * If we are at or past EOF, return zeroes without complaining.		 * Also substitute zeroes if we found a partial block at EOF.		 *		 * XXX this is really ugly, bad design.  However the current		 * implementation of hash indexes requires it, because hash index		 * pages are initialized out-of-order.		 */		if (nbytes == 0 ||			(nbytes > 0 && mdnblocks(reln) == blocknum))			MemSet(buffer, 0, BLCKSZ);		else			status = SM_FAIL;	}	return status;}/* *	mdwrite() -- Write the supplied block at the appropriate location. * *		Returns SM_SUCCESS or SM_FAIL. */intmdwrite(Relation reln, BlockNumber blocknum, char *buffer){	long		seekpos;	MdfdVec    *v;	v = _mdfd_getseg(reln, blocknum);#ifndef LET_OS_MANAGE_FILESIZE	seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));#ifdef DIAGNOSTIC	if (seekpos >= BLCKSZ * RELSEG_SIZE)		elog(FATAL, "seekpos too big");#endif#else	seekpos = (long) (BLCKSZ * (blocknum));#endif	if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)		return SM_FAIL;	if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)		return SM_FAIL;	return SM_SUCCESS;}/* *	mdblindwrt() -- Write a block to disk blind. * *		We have to be able to do this using only the rnode of the relation *		in which the block belongs.  Otherwise this is much like mdwrite(). */intmdblindwrt(RelFileNode rnode,		   BlockNumber blkno,		   char *buffer){

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -