📄 md.c

📁 postgresql8.3.4源码,开源数据库
💻 C
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
 * * If there is a local pending-ops table, just make an entry in it for * mdsync to process later.  Otherwise, try to pass off the fsync request * to the background writer process.  If that fails, just do the fsync * locally before returning (we expect this will not happen often enough * to be a performance problem). */static voidregister_dirty_segment(SMgrRelation reln, MdfdVec *seg){	if (pendingOpsTable)	{		/* push it into local pending-ops table */		RememberFsyncRequest(reln->smgr_rnode, seg->mdfd_segno);	}	else	{		if (ForwardFsyncRequest(reln->smgr_rnode, seg->mdfd_segno))			return;				/* passed it off successfully */		if (FileSync(seg->mdfd_vfd) < 0)			ereport(ERROR,					(errcode_for_file_access(),				errmsg("could not fsync segment %u of relation %u/%u/%u: %m",					   seg->mdfd_segno,					   reln->smgr_rnode.spcNode,					   reln->smgr_rnode.dbNode,					   reln->smgr_rnode.relNode)));	}}/* * register_unlink() -- Schedule a file to be deleted after next checkpoint * * As with register_dirty_segment, this could involve either a local or * a remote pending-ops table. */static voidregister_unlink(RelFileNode rnode){	if (pendingOpsTable)	{		/* push it into local pending-ops table */		RememberFsyncRequest(rnode, UNLINK_RELATION_REQUEST);	}	else	{		/*		 * Notify the bgwriter about it.  If we fail to queue the request		 * message, we have to sleep and try again, because we can't simply		 * delete the file now.  Ugly, but hopefully won't happen often.		 *		 * XXX should we just leave the file orphaned instead?		 */		Assert(IsUnderPostmaster);		while (!ForwardFsyncRequest(rnode, UNLINK_RELATION_REQUEST))			pg_usleep(10000L);	/* 10 msec seems a good number */	}}/* * RememberFsyncRequest() -- callback from bgwriter side of fsync request * * We stuff most fsync requests into the local hash table for execution * during the bgwriter's next checkpoint.  UNLINK requests go into a * separate linked list, however, because they get processed separately. * * The range of possible segment numbers is way less than the range of * BlockNumber, so we can reserve high values of segno for special purposes. * We define three: * - FORGET_RELATION_FSYNC means to cancel pending fsyncs for a relation * - FORGET_DATABASE_FSYNC means to cancel pending fsyncs for a whole database * - UNLINK_RELATION_REQUEST is a request to delete the file after the next *	 checkpoint. * * (Handling the FORGET_* requests is a tad slow because the hash table has * to be searched linearly, but it doesn't seem worth rethinking the table * structure for them.) */voidRememberFsyncRequest(RelFileNode rnode, BlockNumber segno){	Assert(pendingOpsTable);	if (segno == FORGET_RELATION_FSYNC)	{		/* Remove any pending requests for the entire relation */		HASH_SEQ_STATUS hstat;		PendingOperationEntry *entry;		hash_seq_init(&hstat, pendingOpsTable);		while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)		{			if (RelFileNodeEquals(entry->tag.rnode, rnode))			{				/* Okay, cancel this entry */				entry->canceled = true;			}		}	}	else if (segno == FORGET_DATABASE_FSYNC)	{		/* Remove any pending requests for the entire database */		HASH_SEQ_STATUS hstat;		PendingOperationEntry *entry;		ListCell   *cell, 				   *prev,				   *next;		/* Remove fsync requests */		hash_seq_init(&hstat, pendingOpsTable);		while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)		{			if (entry->tag.rnode.dbNode == rnode.dbNode)			{				/* Okay, cancel this entry */				entry->canceled = true;			}		}			/* Remove unlink requests */		prev = NULL;		for (cell = list_head(pendingUnlinks); cell; cell = next)		{			PendingUnlinkEntry *entry = (PendingUnlinkEntry *) lfirst(cell);			next = lnext(cell);			if (entry->rnode.dbNode == rnode.dbNode) 			{				pendingUnlinks = list_delete_cell(pendingUnlinks, cell, prev);				pfree(entry);			}			else				prev = cell;		}	}	else if (segno == UNLINK_RELATION_REQUEST)	{		/* Unlink request: put it in the linked list */		MemoryContext oldcxt = MemoryContextSwitchTo(MdCxt);		PendingUnlinkEntry *entry;		entry = palloc(sizeof(PendingUnlinkEntry));		entry->rnode = rnode;		entry->cycle_ctr = mdckpt_cycle_ctr;		pendingUnlinks = lappend(pendingUnlinks, entry);		MemoryContextSwitchTo(oldcxt);	}	else	{		/* Normal case: enter a request to fsync this segment */		PendingOperationTag key;		PendingOperationEntry *entry;		bool		found;		/* ensure any pad bytes in the hash key are zeroed */		MemSet(&key, 0, sizeof(key));		key.rnode = rnode;		key.segno = segno;		entry = (PendingOperationEntry *) hash_search(pendingOpsTable,													  &key,													  HASH_ENTER,													  &found);		/* if new or previously canceled entry, initialize it */		if (!found || entry->canceled)		{			entry->canceled = false;			entry->cycle_ctr = mdsync_cycle_ctr;		}		/*		 * NB: it's intentional that we don't change cycle_ctr if the entry		 * already exists.	The fsync request must be treated as old, even		 * though the new request will be satisfied too by any subsequent		 * fsync.		 *		 * However, if the entry is present but is marked canceled, we should		 * act just as though it wasn't there.  The only case where this could		 * happen would be if a file had been deleted, we received but did not		 * yet act on the cancel request, and the same relfilenode was then		 * assigned to a new file.	We mustn't lose the new request, but it		 * should be considered new not old.		 */	}}/* * ForgetRelationFsyncRequests -- forget any fsyncs for a rel */voidForgetRelationFsyncRequests(RelFileNode rnode){	if (pendingOpsTable)	{		/* standalone backend or startup process: fsync state is local */		RememberFsyncRequest(rnode, FORGET_RELATION_FSYNC);	}	else if (IsUnderPostmaster)	{		/*		 * Notify the bgwriter about it.  If we fail to queue the revoke		 * message, we have to sleep and try again ... ugly, but hopefully		 * won't happen often.		 *		 * XXX should we CHECK_FOR_INTERRUPTS in this loop?  Escaping with an		 * error would leave the no-longer-used file still present on disk,		 * which would be bad, so I'm inclined to assume that the bgwriter		 * will always empty the queue soon.		 */		while (!ForwardFsyncRequest(rnode, FORGET_RELATION_FSYNC))			pg_usleep(10000L);	/* 10 msec seems a good number */		/*		 * Note we don't wait for the bgwriter to actually absorb the revoke		 * message; see mdsync() for the implications.		 */	}}/* * ForgetDatabaseFsyncRequests -- forget any fsyncs and unlinks for a DB */voidForgetDatabaseFsyncRequests(Oid dbid){	RelFileNode rnode;	rnode.dbNode = dbid;	rnode.spcNode = 0;	rnode.relNode = 0;	if (pendingOpsTable)	{		/* standalone backend or startup process: fsync state is local */		RememberFsyncRequest(rnode, FORGET_DATABASE_FSYNC);	}	else if (IsUnderPostmaster)	{		/* see notes in ForgetRelationFsyncRequests */		while (!ForwardFsyncRequest(rnode, FORGET_DATABASE_FSYNC))			pg_usleep(10000L);	/* 10 msec seems a good number */	}}/* *	_fdvec_alloc() -- Make a MdfdVec object. */static MdfdVec *_fdvec_alloc(void){	return (MdfdVec *) MemoryContextAlloc(MdCxt, sizeof(MdfdVec));}#ifndef LET_OS_MANAGE_FILESIZE/* * Open the specified segment of the relation, * and make a MdfdVec object for it.  Returns NULL on failure. */static MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags){	MdfdVec    *v;	int			fd;	char	   *path,			   *fullpath;	path = relpath(reln->smgr_rnode);	if (segno > 0)	{		/* be sure we have enough space for the '.segno' */		fullpath = (char *) palloc(strlen(path) + 12);		sprintf(fullpath, "%s.%u", path, segno);		pfree(path);	}	else		fullpath = path;	/* open the file */	fd = PathNameOpenFile(fullpath, O_RDWR | PG_BINARY | oflags, 0600);	pfree(fullpath);	if (fd < 0)		return NULL;	/* allocate an mdfdvec entry for it */	v = _fdvec_alloc();	/* fill the entry */	v->mdfd_vfd = fd;	v->mdfd_segno = segno;	v->mdfd_chain = NULL;	Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE));	/* all done */	return v;}#endif   /* LET_OS_MANAGE_FILESIZE *//* *	_mdfd_getseg() -- Find the segment of the relation holding the *		specified block. * * If the segment doesn't exist, we ereport, return NULL, or create the * segment, according to "behavior".  Note: isTemp need only be correct * in the EXTENSION_CREATE case. */static MdfdVec *_mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,			 ExtensionBehavior behavior){	MdfdVec    *v = mdopen(reln, behavior);#ifndef LET_OS_MANAGE_FILESIZE	BlockNumber targetseg;	BlockNumber nextsegno;	if (!v)		return NULL;			/* only possible if EXTENSION_RETURN_NULL */	targetseg = blkno / ((BlockNumber) RELSEG_SIZE);	for (nextsegno = 1; nextsegno <= targetseg; nextsegno++)	{		Assert(nextsegno == v->mdfd_segno + 1);		if (v->mdfd_chain == NULL)		{			/*			 * Normally we will create new segments only if authorized by the			 * caller (i.e., we are doing mdextend()).	But when doing WAL			 * recovery, create segments anyway; this allows cases such as			 * replaying WAL data that has a write into a high-numbered			 * segment of a relation that was later deleted.  We want to go			 * ahead and create the segments so we can finish out the replay.			 *			 * We have to maintain the invariant that segments before the last			 * active segment are of size RELSEG_SIZE; therefore, pad them out			 * with zeroes if needed.  (This only matters if caller is			 * extending the relation discontiguously, but that can happen in			 * hash indexes.)			 */			if (behavior == EXTENSION_CREATE || InRecovery)			{				if (_mdnblocks(reln, v) < RELSEG_SIZE)				{					char	   *zerobuf = palloc0(BLCKSZ);					mdextend(reln, nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,							 zerobuf, isTemp);					pfree(zerobuf);				}				v->mdfd_chain = _mdfd_openseg(reln, nextsegno, O_CREAT);			}			else			{				/* We won't create segment if not existent */				v->mdfd_chain = _mdfd_openseg(reln, nextsegno, 0);			}			if (v->mdfd_chain == NULL)			{				if (behavior == EXTENSION_RETURN_NULL &&					FILE_POSSIBLY_DELETED(errno))					return NULL;				ereport(ERROR,						(errcode_for_file_access(),						 errmsg("could not open segment %u of relation %u/%u/%u (target block %u): %m",								nextsegno,								reln->smgr_rnode.spcNode,								reln->smgr_rnode.dbNode,								reln->smgr_rnode.relNode,								blkno)));			}		}		v = v->mdfd_chain;	}#endif	return v;}/* * Get number of blocks present in a single disk file */static BlockNumber_mdnblocks(SMgrRelation reln, MdfdVec *seg){	long		len;	len = FileSeek(seg->mdfd_vfd, 0L, SEEK_END);	if (len < 0)		ereport(ERROR,				(errcode_for_file_access(),		errmsg("could not seek to end of segment %u of relation %u/%u/%u: %m",			   seg->mdfd_segno,			   reln->smgr_rnode.spcNode,			   reln->smgr_rnode.dbNode,			   reln->smgr_rnode.relNode)));	/* note that this calculation will ignore any partial block at EOF */	return (BlockNumber) (len / BLCKSZ);}
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -