md.c

来自「PostgreSQL 8.1.4的源码适用于Linux下的开源数据库系统」· C语言代码 · 共 977 行 · 第 1/2 页
977 行
/*------------------------------------------------------------------------- * * md.c *	  This code manages relations that reside on magnetic disk. * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION *	  $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.118 2005/10/15 02:49:26 momjian Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include <errno.h>#include <unistd.h>#include <fcntl.h>#include <sys/file.h>#include "catalog/catalog.h"#include "miscadmin.h"#include "postmaster/bgwriter.h"#include "storage/fd.h"#include "storage/smgr.h"#include "utils/hsearch.h"#include "utils/memutils.h"/* *	The magnetic disk storage manager keeps track of open file *	descriptors in its own descriptor pool.  This is done to make it *	easier to support relations that are larger than the operating *	system's file size limit (often 2GBytes).  In order to do that, *	we break relations up into chunks of < 2GBytes and store one chunk *	in each of several files that represent the relation.  See the *	BLCKSZ and RELSEG_SIZE configuration constants in pg_config_manual.h. *	All chunks except the last MUST have size exactly equal to RELSEG_SIZE *	blocks --- see mdnblocks() and mdtruncate(). * *	The file descriptor pointer (md_fd field) stored in the SMgrRelation *	cache is, therefore, just the head of a list of MdfdVec objects. *	But note the md_fd pointer can be NULL, indicating relation not open. * *	Note that mdfd_chain == NULL does not necessarily mean the relation *	doesn't have another segment after this one; we may just not have *	opened the next segment yet.  (We could not have "all segments are *	in the chain" as an invariant anyway, since another backend could *	extend the relation when we weren't looking.) * *	All MdfdVec objects are palloc'd in the MdCxt memory context. */typedef struct _MdfdVec{	File		mdfd_vfd;		/* fd number in fd.c's pool */	BlockNumber mdfd_segno;		/* segment number, from 0 */#ifndef LET_OS_MANAGE_FILESIZE	/* for large relations */	struct _MdfdVec *mdfd_chain;	/* next segment, or NULL */#endif} MdfdVec;static MemoryContext MdCxt;		/* context for all md.c allocations *//* * In some contexts (currently, standalone backends and the bgwriter process) * we keep track of pending fsync operations: we need to remember all relation * segments that have been written since the last checkpoint, so that we can * fsync them down to disk before completing the next checkpoint.  This hash * table remembers the pending operations.	We use a hash table not because * we want to look up individual operations, but simply as a convenient way * of eliminating duplicate requests. * * (Regular backends do not track pending operations locally, but forward * them to the bgwriter.) * * XXX for WIN32, may want to expand this to track pending deletes, too. */typedef struct{	RelFileNode rnode;			/* the targeted relation */	BlockNumber segno;			/* which segment */} PendingOperationEntry;static HTAB *pendingOpsTable = NULL;/* local routines */static MdfdVec *mdopen(SMgrRelation reln, bool allowNotFound);static bool register_dirty_segment(SMgrRelation reln, MdfdVec *seg);static MdfdVec *_fdvec_alloc(void);#ifndef LET_OS_MANAGE_FILESIZEstatic MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno,			  int oflags);#endifstatic MdfdVec *_mdfd_getseg(SMgrRelation reln, BlockNumber blkno,			 bool allowNotFound);static BlockNumber _mdnblocks(File file, Size blcksz);/* *	mdinit() -- Initialize private state for magnetic disk storage manager. */boolmdinit(void){	MdCxt = AllocSetContextCreate(TopMemoryContext,								  "MdSmgr",								  ALLOCSET_DEFAULT_MINSIZE,								  ALLOCSET_DEFAULT_INITSIZE,								  ALLOCSET_DEFAULT_MAXSIZE);	/*	 * Create pending-operations hashtable if we need it.  Currently, we need	 * it if we are standalone (not under a postmaster) OR if we are a	 * bootstrap-mode subprocess of a postmaster (that is, a startup or	 * bgwriter process).	 */	if (!IsUnderPostmaster || IsBootstrapProcessingMode())	{		HASHCTL		hash_ctl;		MemSet(&hash_ctl, 0, sizeof(hash_ctl));		hash_ctl.keysize = sizeof(PendingOperationEntry);		hash_ctl.entrysize = sizeof(PendingOperationEntry);		hash_ctl.hash = tag_hash;		hash_ctl.hcxt = MdCxt;		pendingOpsTable = hash_create("Pending Ops Table",									  100L,									  &hash_ctl,								   HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);	}	return true;}/* *	mdcreate() -- Create a new relation on magnetic disk. * * If isRedo is true, it's okay for the relation to exist already. */boolmdcreate(SMgrRelation reln, bool isRedo){	char	   *path;	File		fd;	if (isRedo && reln->md_fd != NULL)		return true;			/* created and opened already... */	Assert(reln->md_fd == NULL);	path = relpath(reln->smgr_rnode);	fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);	if (fd < 0)	{		int			save_errno = errno;		/*		 * During bootstrap, there are cases where a system relation will be		 * accessed (by internal backend processes) before the bootstrap		 * script nominally creates it.  Therefore, allow the file to exist		 * already, even if isRedo is not set.	(See also mdopen)		 */		if (isRedo || IsBootstrapProcessingMode())			fd = PathNameOpenFile(path, O_RDWR | PG_BINARY, 0600);		if (fd < 0)		{			pfree(path);			/* be sure to return the error reported by create, not open */			errno = save_errno;			return false;		}		errno = 0;	}	pfree(path);	reln->md_fd = _fdvec_alloc();	reln->md_fd->mdfd_vfd = fd;	reln->md_fd->mdfd_segno = 0;#ifndef LET_OS_MANAGE_FILESIZE	reln->md_fd->mdfd_chain = NULL;#endif	return true;}/* *	mdunlink() -- Unlink a relation. * * Note that we're passed a RelFileNode --- by the time this is called, * there won't be an SMgrRelation hashtable entry anymore. * * If isRedo is true, it's okay for the relation to be already gone. */boolmdunlink(RelFileNode rnode, bool isRedo){	bool		status = true;	int			save_errno = 0;	char	   *path;	path = relpath(rnode);	/* Delete the first segment, or only segment if not doing segmenting */	if (unlink(path) < 0)	{		if (!isRedo || errno != ENOENT)		{			status = false;			save_errno = errno;		}	}#ifndef LET_OS_MANAGE_FILESIZE	/* Get the additional segments, if any */	if (status)	{		char	   *segpath = (char *) palloc(strlen(path) + 12);		BlockNumber segno;		for (segno = 1;; segno++)		{			sprintf(segpath, "%s.%u", path, segno);			if (unlink(segpath) < 0)			{				/* ENOENT is expected after the last segment... */				if (errno != ENOENT)				{					status = false;					save_errno = errno;				}				break;			}		}		pfree(segpath);	}#endif	pfree(path);	errno = save_errno;	return status;}/* *	mdextend() -- Add a block to the specified relation. * *		The semantics are basically the same as mdwrite(): write at the *		specified position.  However, we are expecting to extend the *		relation (ie, blocknum is the current EOF), and so in case of *		failure we clean up by truncating. * *		This routine returns true or false, with errno set as appropriate. * * Note: this routine used to call mdnblocks() to get the block position * to write at, but that's pretty silly since the caller needs to know where * the block will be written, and accordingly must have done mdnblocks() * already.  Might as well pass in the position and save a seek. */boolmdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp){	long		seekpos;	int			nbytes;	MdfdVec    *v;	v = _mdfd_getseg(reln, blocknum, false);#ifndef LET_OS_MANAGE_FILESIZE	seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));	Assert(seekpos < BLCKSZ * RELSEG_SIZE);#else	seekpos = (long) (BLCKSZ * (blocknum));#endif	/*	 * Note: because caller obtained blocknum by calling _mdnblocks, which did	 * a seek(SEEK_END), this seek is often redundant and will be optimized	 * away by fd.c.  It's not redundant, however, if there is a partial page	 * at the end of the file.	In that case we want to try to overwrite the	 * partial page with a full page.  It's also not redundant if bufmgr.c had	 * to dump another buffer of the same file to make room for the new page's	 * buffer.	 */	if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)		return false;	if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)	{		if (nbytes > 0)		{			int			save_errno = errno;			/* Remove the partially-written page */			FileTruncate(v->mdfd_vfd, seekpos);			FileSeek(v->mdfd_vfd, seekpos, SEEK_SET);			errno = save_errno;		}		return false;	}	if (!isTemp)	{		if (!register_dirty_segment(reln, v))			return false;	}#ifndef LET_OS_MANAGE_FILESIZE	Assert(_mdnblocks(v->mdfd_vfd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));#endif	return true;}/* *	mdopen() -- Open the specified relation.  ereport's on failure. *		(Optionally, can return NULL instead of ereport for ENOENT.) * * Note we only open the first segment, when there are multiple segments. */static MdfdVec *mdopen(SMgrRelation reln, bool allowNotFound){	MdfdVec    *mdfd;	char	   *path;	File		fd;	/* No work if already open */	if (reln->md_fd)		return reln->md_fd;	path = relpath(reln->smgr_rnode);	fd = PathNameOpenFile(path, O_RDWR | PG_BINARY, 0600);	if (fd < 0)	{		/*		 * During bootstrap, there are cases where a system relation will be		 * accessed (by internal backend processes) before the bootstrap		 * script nominally creates it.  Therefore, accept mdopen() as a		 * substitute for mdcreate() in bootstrap mode only. (See mdcreate)		 */		if (IsBootstrapProcessingMode())			fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);		if (fd < 0)		{			pfree(path);			if (allowNotFound && errno == ENOENT)				return NULL;			ereport(ERROR,					(errcode_for_file_access(),					 errmsg("could not open relation %u/%u/%u: %m",							reln->smgr_rnode.spcNode,							reln->smgr_rnode.dbNode,							reln->smgr_rnode.relNode)));		}	}	pfree(path);	reln->md_fd = mdfd = _fdvec_alloc();	mdfd->mdfd_vfd = fd;	mdfd->mdfd_segno = 0;#ifndef LET_OS_MANAGE_FILESIZE	mdfd->mdfd_chain = NULL;	Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));#endif	return mdfd;}/* *	mdclose() -- Close the specified relation, if it isn't closed already. * *		Returns true or false with errno set as appropriate. */boolmdclose(SMgrRelation reln){	MdfdVec    *v = reln->md_fd;	/* No work if already closed */	if (v == NULL)		return true;	reln->md_fd = NULL;			/* prevent dangling pointer after error */#ifndef LET_OS_MANAGE_FILESIZE	while (v != NULL)	{		MdfdVec    *ov = v;		/* if not closed already */		if (v->mdfd_vfd >= 0)			FileClose(v->mdfd_vfd);		/* Now free vector */		v = v->mdfd_chain;		pfree(ov);	}#else	if (v->mdfd_vfd >= 0)		FileClose(v->mdfd_vfd);	pfree(v);#endif	return true;}/* *	mdread() -- Read the specified block from a relation. */boolmdread(SMgrRelation reln, BlockNumber blocknum, char *buffer){	bool		status;	long		seekpos;	int			nbytes;	MdfdVec    *v;	v = _mdfd_getseg(reln, blocknum, false);#ifndef LET_OS_MANAGE_FILESIZE	seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));	Assert(seekpos < BLCKSZ * RELSEG_SIZE);#else	seekpos = (long) (BLCKSZ * (blocknum));#endif	if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)		return false;	status = true;	if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)	{		/*		 * If we are at or past EOF, return zeroes without complaining. Also		 * substitute zeroes if we found a partial block at EOF.		 *		 * XXX this is really ugly, bad design.  However the current		 * implementation of hash indexes requires it, because hash index		 * pages are initialized out-of-order.		 */		if (nbytes == 0 ||			(nbytes > 0 && mdnblocks(reln) == blocknum))			MemSet(buffer, 0, BLCKSZ);		else			status = false;	}	return status;}/* *	mdwrite() -- Write the supplied block at the appropriate location. */boolmdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp){	long		seekpos;	MdfdVec    *v;	v = _mdfd_getseg(reln, blocknum, false);#ifndef LET_OS_MANAGE_FILESIZE	seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));	Assert(seekpos < BLCKSZ * RELSEG_SIZE);#else	seekpos = (long) (BLCKSZ * (blocknum));#endif	if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)		return false;	if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)		return false;	if (!isTemp)	{		if (!register_dirty_segment(reln, v))
md.c - 源码说明

本页面展示了「PostgreSQL 8.1.4的源码适用于Linux下的开源数据库系统」中的 md.c 源码文件，采用 C语言编程语言编写，共 977 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与PostgreSQL相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?