📄 md.c
字号:
int status; long seekpos; int fd; fd = _mdfd_blind_getseg(rnode, blkno); if (fd < 0) return SM_FAIL;#ifndef LET_OS_MANAGE_FILESIZE seekpos = (long) (BLCKSZ * (blkno % ((BlockNumber) RELSEG_SIZE)));#ifdef DIAGNOSTIC if (seekpos >= BLCKSZ * RELSEG_SIZE) elog(FATAL, "seekpos too big");#endif#else seekpos = (long) (BLCKSZ * (blkno));#endif errno = 0; if (lseek(fd, seekpos, SEEK_SET) != seekpos) { elog(LOG, "lseek(%ld) failed: %m", seekpos); close(fd); return SM_FAIL; } status = SM_SUCCESS; /* write the block */ errno = 0; if (write(fd, buffer, BLCKSZ) != BLCKSZ) { /* if write didn't set errno, assume problem is no disk space */ if (errno == 0) errno = ENOSPC; elog(LOG, "write() failed: %m"); status = SM_FAIL; } if (close(fd) < 0) { elog(LOG, "close() failed: %m"); status = SM_FAIL; } return status;}/* * mdnblocks() -- Get the number of blocks stored in a relation. * * Important side effect: all segments of the relation are opened * and added to the mdfd_chain list. If this routine has not been * called, then only segments up to the last one actually touched * are present in the chain... * * Returns # of blocks, ereport's on error. */BlockNumbermdnblocks(Relation reln){ int fd; MdfdVec *v;#ifndef LET_OS_MANAGE_FILESIZE BlockNumber nblocks; BlockNumber segno;#endif fd = _mdfd_getrelnfd(reln); v = &Md_fdvec[fd];#ifndef LET_OS_MANAGE_FILESIZE segno = 0; /* * Skip through any segments that aren't the last one, to avoid * redundant seeks on them. We have previously verified that these * segments are exactly RELSEG_SIZE long, and it's useless to recheck * that each time. (NOTE: this assumption could only be wrong if * another backend has truncated the relation. We rely on higher code * levels to handle that scenario by closing and re-opening the md * fd.) */ while (v->mdfd_chain != (MdfdVec *) NULL) { segno++; v = v->mdfd_chain; } for (;;) { nblocks = _mdnblocks(v->mdfd_vfd, BLCKSZ); if (nblocks > ((BlockNumber) RELSEG_SIZE)) elog(FATAL, "segment too big"); if (nblocks < ((BlockNumber) RELSEG_SIZE)) return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks; /* * If segment is exactly RELSEG_SIZE, advance to next one. */ segno++; if (v->mdfd_chain == (MdfdVec *) NULL) { /* * Because we pass O_CREAT, we will create the next segment * (with zero length) immediately, if the last segment is of * length REL_SEGSIZE. This is unnecessary but harmless, and * testing for the case would take more cycles than it seems * worth. */ v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT); if (v->mdfd_chain == (MdfdVec *) NULL) elog(ERROR, "could not count blocks of \"%s\": %m", RelationGetRelationName(reln)); } v = v->mdfd_chain; }#else return _mdnblocks(v->mdfd_vfd, BLCKSZ);#endif}/* * mdtruncate() -- Truncate relation to specified number of blocks. * * Returns # of blocks or InvalidBlockNumber on error. */BlockNumbermdtruncate(Relation reln, BlockNumber nblocks){ int fd; MdfdVec *v; BlockNumber curnblk;#ifndef LET_OS_MANAGE_FILESIZE BlockNumber priorblocks;#endif /* * NOTE: mdnblocks makes sure we have opened all existing segments, so * that truncate/delete loop will get them all! */ curnblk = mdnblocks(reln); if (nblocks > curnblk) return InvalidBlockNumber; /* bogus request */ if (nblocks == curnblk) return nblocks; /* no work */ fd = _mdfd_getrelnfd(reln); v = &Md_fdvec[fd];#ifndef LET_OS_MANAGE_FILESIZE priorblocks = 0; while (v != (MdfdVec *) NULL) { MdfdVec *ov = v; if (priorblocks > nblocks) { /* * This segment is no longer wanted at all (and has already * been unlinked from the mdfd_chain). We truncate the file * before deleting it because if other backends are holding * the file open, the unlink will fail on some platforms. * Better a zero-size file gets left around than a big file... */ FileTruncate(v->mdfd_vfd, 0); FileUnlink(v->mdfd_vfd); v = v->mdfd_chain; Assert(ov != &Md_fdvec[fd]); /* we never drop the 1st * segment */ pfree(ov); } else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks) { /* * This is the last segment we want to keep. Truncate the file * to the right length, and clear chain link that points to * any remaining segments (which we shall zap). NOTE: if * nblocks is exactly a multiple K of RELSEG_SIZE, we will * truncate the K+1st segment to 0 length but keep it. This is * mainly so that the right thing happens if nblocks==0. */ BlockNumber lastsegblocks = nblocks - priorblocks; if (FileTruncate(v->mdfd_vfd, lastsegblocks * BLCKSZ) < 0) return InvalidBlockNumber; v = v->mdfd_chain; ov->mdfd_chain = (MdfdVec *) NULL; } else { /* * We still need this segment and 0 or more blocks beyond it, * so nothing to do here. */ v = v->mdfd_chain; } priorblocks += RELSEG_SIZE; }#else if (FileTruncate(v->mdfd_vfd, nblocks * BLCKSZ) < 0) return InvalidBlockNumber;#endif return nblocks;}/* * mdcommit() -- Commit a transaction. * * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. */intmdcommit(void){ /* * We don't actually have to do anything here... */ return SM_SUCCESS;}/* * mdabort() -- Abort a transaction. * * Changes need not be forced to disk at transaction abort. */intmdabort(void){ /* * We don't actually have to do anything here... */ return SM_SUCCESS;}/* * mdsync() -- Sync previous writes to stable storage. */intmdsync(void){ sync(); if (IsUnderPostmaster) sleep(2); sync(); return SM_SUCCESS;}/* * _fdvec_alloc () -- grab a free (or new) md file descriptor vector. */static int_fdvec_alloc(void){ MdfdVec *nvec; int fdvec, i; if (Md_Free >= 0) /* get from free list */ { fdvec = Md_Free; Md_Free = Md_fdvec[fdvec].mdfd_nextFree; Assert(Md_fdvec[fdvec].mdfd_flags == MDFD_FREE); Md_fdvec[fdvec].mdfd_flags = 0; if (fdvec >= CurFd) { Assert(fdvec == CurFd); CurFd++; } return fdvec; } /* Must allocate more room */ if (Nfds != CurFd) elog(FATAL, "_fdvec_alloc error"); Nfds *= 2; nvec = (MdfdVec *) MemoryContextAlloc(MdCxt, Nfds * sizeof(MdfdVec)); MemSet(nvec, 0, Nfds * sizeof(MdfdVec)); memcpy(nvec, (char *) Md_fdvec, CurFd * sizeof(MdfdVec)); pfree(Md_fdvec); Md_fdvec = nvec; /* Set new free list */ for (i = CurFd; i < Nfds; i++) { Md_fdvec[i].mdfd_nextFree = i + 1; Md_fdvec[i].mdfd_flags = MDFD_FREE; } Md_fdvec[Nfds - 1].mdfd_nextFree = -1; Md_Free = CurFd + 1; fdvec = CurFd; CurFd++; Md_fdvec[fdvec].mdfd_flags = 0; return fdvec;}/* * _fdvec_free () -- free md file descriptor vector. * */staticvoid_fdvec_free(int fdvec){ Assert(Md_Free < 0 || Md_fdvec[Md_Free].mdfd_flags == MDFD_FREE); Assert(Md_fdvec[fdvec].mdfd_flags != MDFD_FREE); Md_fdvec[fdvec].mdfd_nextFree = Md_Free; Md_fdvec[fdvec].mdfd_flags = MDFD_FREE; Md_Free = fdvec;}static MdfdVec *_mdfd_openseg(Relation reln, BlockNumber segno, int oflags){ MdfdVec *v; int fd; char *path, *fullpath; /* be sure we have enough space for the '.segno', if any */ path = relpath(reln->rd_node); if (segno > 0) { fullpath = (char *) palloc(strlen(path) + 12); sprintf(fullpath, "%s.%u", path, segno); pfree(path); } else fullpath = path; /* open the file */ fd = FileNameOpenFile(fullpath, O_RDWR | PG_BINARY | oflags, 0600); pfree(fullpath); if (fd < 0) return (MdfdVec *) NULL; /* allocate an mdfdvec entry for it */ v = (MdfdVec *) MemoryContextAlloc(MdCxt, sizeof(MdfdVec)); /* fill the entry */ v->mdfd_vfd = fd; v->mdfd_flags = (uint16) 0;#ifndef LET_OS_MANAGE_FILESIZE v->mdfd_chain = (MdfdVec *) NULL;#ifdef DIAGNOSTIC if (_mdnblocks(fd, BLCKSZ) > ((BlockNumber) RELSEG_SIZE)) elog(FATAL, "segment too big");#endif#endif /* all done */ return v;}/* Get the fd for the relation, opening it if it's not already open */static int_mdfd_getrelnfd(Relation reln){ int fd; fd = RelationGetFile(reln); if (fd < 0) { if ((fd = mdopen(reln)) < 0) elog(ERROR, "could not open relation \"%s\": %m", RelationGetRelationName(reln)); reln->rd_fd = fd; } return fd;}/* Find the segment of the relation holding the specified block */static MdfdVec *_mdfd_getseg(Relation reln, BlockNumber blkno){ MdfdVec *v; int fd;#ifndef LET_OS_MANAGE_FILESIZE BlockNumber segno; BlockNumber i;#endif fd = _mdfd_getrelnfd(reln);#ifndef LET_OS_MANAGE_FILESIZE for (v = &Md_fdvec[fd], segno = blkno / ((BlockNumber) RELSEG_SIZE), i = 1; segno > 0; i++, segno--) { if (v->mdfd_chain == (MdfdVec *) NULL) { /* * We will create the next segment only if the target block is * within it. This prevents Sorcerer's Apprentice syndrome if * a bug at higher levels causes us to be handed a * ridiculously large blkno --- otherwise we could create many * thousands of empty segment files before reaching the * "target" block. We should never need to create more than * one new segment per call, so this restriction seems * reasonable. */ v->mdfd_chain = _mdfd_openseg(reln, i, (segno == 1) ? O_CREAT : 0); if (v->mdfd_chain == (MdfdVec *) NULL) elog(ERROR, "could not open segment %u of relation \"%s\" (target block %u): %m", i, RelationGetRelationName(reln), blkno); } v = v->mdfd_chain; }#else v = &Md_fdvec[fd];#endif return v;}/* * Find the segment of the relation holding the specified block. * * This performs the same work as _mdfd_getseg() except that we must work * "blind" with no Relation struct. We assume that we are not likely to * touch the same relation again soon, so we do not create an FD entry for * the relation --- we just open a kernel file descriptor which will be * used and promptly closed. We also assume that the target block already * exists, ie, we need not extend the relation. * * The return value is the kernel descriptor, or -1 on failure. */static int_mdfd_blind_getseg(RelFileNode rnode, BlockNumber blkno){ char *path; int fd;#ifndef LET_OS_MANAGE_FILESIZE BlockNumber segno;#endif path = relpath(rnode);#ifndef LET_OS_MANAGE_FILESIZE /* append the '.segno', if needed */ segno = blkno / ((BlockNumber) RELSEG_SIZE); if (segno > 0) { char *segpath = (char *) palloc(strlen(path) + 12); sprintf(segpath, "%s.%u", path, segno); pfree(path); path = segpath; }#endif /* call fd.c to allow other FDs to be closed if needed */ fd = BasicOpenFile(path, O_RDWR | PG_BINARY, 0600); if (fd < 0) elog(LOG, "could not open \"%s\": %m", path); pfree(path); return fd;}static BlockNumber_mdnblocks(File file, Size blcksz){ long len; len = FileSeek(file, 0L, SEEK_END); if (len < 0) return 0; /* on failure, assume file is empty */ return (BlockNumber) (len / blcksz);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -