📄 md.c
字号:
int status; char *path;#ifndef LET_OS_MANAGE_FILESIZE int nchars; /* be sure we have enough space for the '.segno', if any */ segno = blkno / RELSEG_SIZE; if (segno > 0) nchars = 10; else nchars = 0; /* construct the path to the file and open it */ /* system table? then put in system area... */ if (dbid == (Oid) 0) { path = (char *) palloc(strlen(DataDir) + sizeof(NameData) + 2 + nchars); if (segno == 0) sprintf(path, "%s/%s", DataDir, relstr); else sprintf(path, "%s/%s.%d", DataDir, relstr, segno); } /* user table? then put in user database area... */ else if (dbid == MyDatabaseId) { extern char *DatabasePath; path = (char *) palloc(strlen(DatabasePath) + 2 * sizeof(NameData) + 2 + nchars); if (segno == 0) sprintf(path, "%s%c%s", DatabasePath, SEP_CHAR, relstr); else sprintf(path, "%s%c%s.%d", DatabasePath, SEP_CHAR, relstr, segno); } else/* this is work arround only !!! */ { char dbpath[MAXPGPATH + 1]; int4 owner; Oid id; char *tmpPath; int tmpEncoding; GetRawDatabaseInfo(dbstr, &owner, &id, dbpath, &tmpEncoding); if (id != dbid) elog(FATAL, "mdblindwrt: oid of db %s is not %u", dbstr, dbid); tmpPath = ExpandDatabasePath(dbpath); if (tmpPath == NULL) elog(FATAL, "mdblindwrt: can't expand path for db %s", dbstr); path = (char *) palloc(strlen(tmpPath) + 2 * sizeof(NameData) + 2 + nchars); if (segno == 0) sprintf(path, "%s%c%s", tmpPath, SEP_CHAR, relstr); else sprintf(path, "%s%c%s.%d", tmpPath, SEP_CHAR, relstr, segno); pfree(tmpPath); }#else /* construct the path to the file and open it */ /* system table? then put in system area... */ if (dbid == (Oid) 0) { path = (char *) palloc(strlen(DataDir) + sizeof(NameData) + 2); sprintf(path, "%s/%s", DataDir, relstr); } /* user table? then put in user database area... */ else if (dbid == MyDatabaseId) { extern char *DatabasePath; path = (char *) palloc(strlen(DatabasePath) + 2 * sizeof(NameData) + 2); sprintf(path, "%s%c%s", DatabasePath, SEP_CHAR, relstr); } else/* this is work arround only !!! */ { char dbpath[MAXPGPATH + 1]; int4 owner; Oid id; char *tmpPath; int tmpEncoding; GetRawDatabaseInfo(dbstr, &owner, &id, dbpath, &tmpEncoding); if (id != dbid) elog(FATAL, "mdblindwrt: oid of db %s is not %u", dbstr, dbid); tmpPath = ExpandDatabasePath(dbpath); if (tmpPath == NULL) elog(FATAL, "mdblindwrt: can't expand path for db %s", dbstr); path = (char *) palloc(strlen(tmpPath) + 2 * sizeof(NameData) + 2); sprintf(path, "%s%c%s", tmpPath, SEP_CHAR, relstr); pfree(tmpPath); }#endif#ifndef __CYGWIN32__ if ((fd = open(path, O_RDWR, 0600)) < 0)#else if ((fd = open(path, O_RDWR | O_BINARY, 0600)) < 0)#endif return SM_FAIL; /* seek to the right spot */#ifndef LET_OS_MANAGE_FILESIZE seekpos = (long) (BLCKSZ * (blkno % RELSEG_SIZE));#else seekpos = (long) (BLCKSZ * (blkno));#endif if (lseek(fd, seekpos, SEEK_SET) != seekpos) { close(fd); return SM_FAIL; } status = SM_SUCCESS; /* write and sync the block */ if (write(fd, buffer, BLCKSZ) != BLCKSZ || (pg_fsync(fd) < 0)) status = SM_FAIL; if (close(fd) < 0) status = SM_FAIL; pfree(path); return status;}/* * mdnblocks() -- Get the number of blocks stored in a relation. * * Important side effect: all segments of the relation are opened * and added to the mdfd_chain list. If this routine has not been * called, then only segments up to the last one actually touched * are present in the chain... * * Returns # of blocks, elog's on error. */intmdnblocks(Relation reln){ int fd; MdfdVec *v;#ifndef LET_OS_MANAGE_FILESIZE int nblocks; int segno;#endif fd = _mdfd_getrelnfd(reln); v = &Md_fdvec[fd];#ifndef LET_OS_MANAGE_FILESIZE segno = 0; for (;;) { nblocks = _mdnblocks(v->mdfd_vfd, BLCKSZ); if (nblocks > RELSEG_SIZE) elog(FATAL, "segment too big in mdnblocks!"); v->mdfd_lstbcnt = nblocks; if (nblocks == RELSEG_SIZE) { segno++; if (v->mdfd_chain == (MdfdVec *) NULL) { v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT); if (v->mdfd_chain == (MdfdVec *) NULL) elog(ERROR, "cannot count blocks for %s -- open failed", RelationGetRelationName(reln)); } v = v->mdfd_chain; } else return (segno * RELSEG_SIZE) + nblocks; }#else return _mdnblocks(v->mdfd_vfd, BLCKSZ);#endif}/* * mdtruncate() -- Truncate relation to specified number of blocks. * * Returns # of blocks or -1 on error. */intmdtruncate(Relation reln, int nblocks){ int curnblk; int fd; MdfdVec *v;#ifndef LET_OS_MANAGE_FILESIZE MemoryContext oldcxt; int priorblocks;#endif /* NOTE: mdnblocks makes sure we have opened all existing segments, * so that truncate/delete loop will get them all! */ curnblk = mdnblocks(reln); if (nblocks < 0 || nblocks > curnblk) return -1; /* bogus request */ if (nblocks == curnblk) return nblocks; /* no work */ fd = _mdfd_getrelnfd(reln); v = &Md_fdvec[fd];#ifndef LET_OS_MANAGE_FILESIZE oldcxt = MemoryContextSwitchTo(MdCxt); priorblocks = 0; while (v != (MdfdVec *) NULL) { MdfdVec *ov = v; if (priorblocks > nblocks) { /* This segment is no longer wanted at all (and has already been * unlinked from the mdfd_chain). * We truncate the file before deleting it because if other * backends are holding the file open, the unlink will fail on * some platforms. Better a zero-size file gets left around than * a big file... */ FileTruncate(v->mdfd_vfd, 0); /* In 6.5, it is not safe to unlink apparently-unused segments, * because another backend could store tuples in one of those * segments before it notices the shared-cache-invalidation * message that would warn it to re-open the file. So, don't * unlink 'em, just truncate 'em. This is fixed properly for 6.6 * but back-patching the changes was judged too risky. */#if 0 FileUnlink(v->mdfd_vfd);#endif v = v->mdfd_chain; Assert(ov != &Md_fdvec[fd]); /* we never drop the 1st segment */ pfree(ov); } else if (priorblocks + RELSEG_SIZE > nblocks) { /* This is the last segment we want to keep. * Truncate the file to the right length, and clear chain link * that points to any remaining segments (which we shall zap). * NOTE: if nblocks is exactly a multiple K of RELSEG_SIZE, * we will truncate the K+1st segment to 0 length but keep it. * This is mainly so that the right thing happens if nblocks=0. */ int lastsegblocks = nblocks - priorblocks; if (FileTruncate(v->mdfd_vfd, lastsegblocks * BLCKSZ) < 0) return -1; v->mdfd_lstbcnt = lastsegblocks; v = v->mdfd_chain; ov->mdfd_chain = (MdfdVec *) NULL; } else { /* We still need this segment and 0 or more blocks beyond it, * so nothing to do here. */ v = v->mdfd_chain; } priorblocks += RELSEG_SIZE; } MemoryContextSwitchTo(oldcxt);#else if (FileTruncate(v->mdfd_vfd, nblocks * BLCKSZ) < 0) return -1; v->mdfd_lstbcnt = nblocks;#endif return nblocks;} /* mdtruncate *//* * mdcommit() -- Commit a transaction. * * All changes to magnetic disk relations must be forced to stable * storage. This routine makes a pass over the private table of * file descriptors. Any descriptors to which we have done writes, * but not synced, are synced here. * * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. */intmdcommit(){ int i; MdfdVec *v; for (i = 0; i < CurFd; i++) {#ifndef LET_OS_MANAGE_FILESIZE for (v = &Md_fdvec[i]; v != (MdfdVec *) NULL; v = v->mdfd_chain)#else v = &Md_fdvec[i]; if (v != (MdfdVec *) NULL)#endif { if (v->mdfd_flags & MDFD_DIRTY) { if (FileSync(v->mdfd_vfd) < 0) return SM_FAIL; v->mdfd_flags &= ~MDFD_DIRTY; } } } return SM_SUCCESS;}/* * mdabort() -- Abort a transaction. * * Changes need not be forced to disk at transaction abort. We mark * all file descriptors as clean here. Always returns SM_SUCCESS. */intmdabort(){ int i; MdfdVec *v; for (i = 0; i < CurFd; i++) {#ifndef LET_OS_MANAGE_FILESIZE for (v = &Md_fdvec[i]; v != (MdfdVec *) NULL; v = v->mdfd_chain) v->mdfd_flags &= ~MDFD_DIRTY;#else v = &Md_fdvec[i]; v->mdfd_flags &= ~MDFD_DIRTY;#endif } return SM_SUCCESS;}/* * _fdvec_alloc () -- grab a free (or new) md file descriptor vector. * */staticint_fdvec_alloc(){ MdfdVec *nvec; int fdvec, i; MemoryContext oldcxt; if (Md_Free >= 0) /* get from free list */ { fdvec = Md_Free; Md_Free = Md_fdvec[fdvec].mdfd_nextFree; Assert(Md_fdvec[fdvec].mdfd_flags == MDFD_FREE); Md_fdvec[fdvec].mdfd_flags = 0; if (fdvec >= CurFd) { Assert(fdvec == CurFd); CurFd++; } return fdvec; } /* Must allocate more room */ if (Nfds != CurFd) elog(FATAL, "_fdvec_alloc error"); Nfds *= 2; oldcxt = MemoryContextSwitchTo(MdCxt); nvec = (MdfdVec *) palloc(Nfds * sizeof(MdfdVec)); MemSet(nvec, 0, Nfds * sizeof(MdfdVec)); memmove(nvec, (char *) Md_fdvec, CurFd * sizeof(MdfdVec)); pfree(Md_fdvec); MemoryContextSwitchTo(oldcxt); Md_fdvec = nvec; /* Set new free list */ for (i = CurFd; i < Nfds; i++) { Md_fdvec[i].mdfd_nextFree = i + 1; Md_fdvec[i].mdfd_flags = MDFD_FREE; } Md_fdvec[Nfds - 1].mdfd_nextFree = -1; Md_Free = CurFd + 1; fdvec = CurFd; CurFd++; Md_fdvec[fdvec].mdfd_flags = 0; return fdvec;}/* * _fdvec_free () -- free md file descriptor vector. * */staticvoid_fdvec_free(int fdvec){ Assert(Md_Free < 0 || Md_fdvec[Md_Free].mdfd_flags == MDFD_FREE); Assert(Md_fdvec[fdvec].mdfd_flags != MDFD_FREE); Md_fdvec[fdvec].mdfd_nextFree = Md_Free; Md_fdvec[fdvec].mdfd_flags = MDFD_FREE; Md_Free = fdvec;}static MdfdVec *_mdfd_openseg(Relation reln, int segno, int oflags){ MemoryContext oldcxt; MdfdVec *v; int fd; bool dofree; char *path, *fullpath; /* be sure we have enough space for the '.segno', if any */ path = relpath(RelationGetRelationName(reln)->data); dofree = false; if (segno > 0) { dofree = true; fullpath = (char *) palloc(strlen(path) + 12); sprintf(fullpath, "%s.%d", path, segno); } else fullpath = path; /* open the file */#ifndef __CYGWIN32__ fd = FileNameOpenFile(fullpath, O_RDWR | oflags, 0600);#else fd = FileNameOpenFile(fullpath, O_RDWR | O_BINARY | oflags, 0600);#endif if (dofree) pfree(fullpath); if (fd < 0) return (MdfdVec *) NULL; /* allocate an mdfdvec entry for it */ oldcxt = MemoryContextSwitchTo(MdCxt); v = (MdfdVec *) palloc(sizeof(MdfdVec)); MemoryContextSwitchTo(oldcxt); /* fill the entry */ v->mdfd_vfd = fd; v->mdfd_flags = (uint16) 0; v->mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ);#ifndef LET_OS_MANAGE_FILESIZE v->mdfd_chain = (MdfdVec *) NULL;#ifdef DIAGNOSTIC if (v->mdfd_lstbcnt > RELSEG_SIZE) elog(FATAL, "segment too big on open!");#endif#endif /* all done */ return v;}/* Get the fd for the relation, opening it if it's not already open */static int_mdfd_getrelnfd(Relation reln){ int fd; fd = RelationGetFile(reln); if (fd < 0) { if ((fd = mdopen(reln)) < 0) elog(ERROR, "cannot open relation %s", RelationGetRelationName(reln)); reln->rd_fd = fd; } return fd;}/* Find the segment of the relation holding the specified block */static MdfdVec *_mdfd_getseg(Relation reln, int blkno){ MdfdVec *v; int segno; int fd; int i; fd = _mdfd_getrelnfd(reln);#ifndef LET_OS_MANAGE_FILESIZE for (v = &Md_fdvec[fd], segno = blkno / RELSEG_SIZE, i = 1; segno > 0; i++, segno--) { if (v->mdfd_chain == (MdfdVec *) NULL) { v->mdfd_chain = _mdfd_openseg(reln, i, O_CREAT); if (v->mdfd_chain == (MdfdVec *) NULL) elog(ERROR, "cannot open segment %d of relation %s", i, RelationGetRelationName(reln)); } v = v->mdfd_chain; }#else v = &Md_fdvec[fd];#endif return v;}static BlockNumber_mdnblocks(File file, Size blcksz){ long len; len = FileSeek(file, 0L, SEEK_END) - 1; return (BlockNumber) ((len < 0) ? 0 : 1 + len / blcksz);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -