📄 fd.c
字号:
/*------------------------------------------------------------------------- * * fd.c * Virtual file descriptor code. * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.121.2.3 2006/01/17 23:52:50 tgl Exp $ * * NOTES: * * This code manages a cache of 'virtual' file descriptors (VFDs). * The server opens many file descriptors for a variety of reasons, * including base tables, scratch files (e.g., sort and hash spool * files), and random calls to C library routines like system(3); it * is quite easy to exceed system limits on the number of open files a * single process can have. (This is around 256 on many modern * operating systems, but can be as low as 32 on others.) * * VFDs are managed as an LRU pool, with actual OS file descriptors * being opened and closed as needed. Obviously, if a routine is * opened using these interfaces, all subsequent operations must also * be through these interfaces (the File type is not a real file * descriptor). * * For this scheme to work, most (if not all) routines throughout the * server should use these interfaces instead of calling the C library * routines (e.g., open(2) and fopen(3)) themselves. Otherwise, we * may find ourselves short of real file descriptors anyway. * * This file used to contain a bunch of stuff to support RAID levels 0 * (jbod), 1 (duplex) and 5 (xor parity). That stuff is all gone * because the parallel query processing code that called it is all * gone. If you really need it you could get it from the original * POSTGRES source. *------------------------------------------------------------------------- */#include "postgres.h"#include <sys/file.h>#include <sys/param.h>#include <sys/stat.h>#include <unistd.h>#include <fcntl.h>#include "miscadmin.h"#include "access/xact.h"#include "storage/fd.h"#include "storage/ipc.h"/* * We must leave some file descriptors free for system(), the dynamic loader, * and other code that tries to open files without consulting fd.c. This * is the number left free. (While we can be pretty sure we won't get * EMFILE, there's never any guarantee that we won't get ENFILE due to * other processes chewing up FDs. So it's a bad idea to try to open files * without consulting fd.c. Nonetheless we cannot control all code.) * * Because this is just a fixed setting, we are effectively assuming that * no such code will leave FDs open over the long term; otherwise the slop * is likely to be insufficient. Note in particular that we expect that * loading a shared library does not result in any permanent increase in * the number of open files. (This appears to be true on most if not * all platforms as of Feb 2004.) */#define NUM_RESERVED_FDS 10/* * If we have fewer than this many usable FDs after allowing for the reserved * ones, choke. */#define FD_MINFREE 10/* * A number of platforms allow individual processes to open many more files * than they can really support when *many* processes do the same thing. * This GUC parameter lets the DBA limit max_safe_fds to something less than * what the postmaster's initial probe suggests will work. */int max_files_per_process = 1000;/* * Maximum number of file descriptors to open for either VFD entries or * AllocateFile/AllocateDir operations. This is initialized to a conservative * value, and remains that way indefinitely in bootstrap or standalone-backend * cases. In normal postmaster operation, the postmaster calls * set_max_safe_fds() late in initialization to update the value, and that * value is then inherited by forked subprocesses. * * Note: the value of max_files_per_process is taken into account while * setting this variable, and so need not be tested separately. */static int max_safe_fds = 32; /* default if not changed *//* Debugging.... */#ifdef FDDEBUG#define DO_DB(A) A#else#define DO_DB(A) /* A */#endif#define VFD_CLOSED (-1)#define FileIsValid(file) \ ((file) > 0 && (file) < (int) SizeVfdCache && VfdCache[file].fileName != NULL)#define FileIsNotOpen(file) (VfdCache[file].fd == VFD_CLOSED)#define FileUnknownPos (-1L)/* these are the assigned bits in fdstate below: */#define FD_TEMPORARY (1 << 0) /* T = delete when closed */#define FD_XACT_TEMPORARY (1 << 1) /* T = delete at eoXact */typedef struct vfd{ signed short fd; /* current FD, or VFD_CLOSED if none */ unsigned short fdstate; /* bitflags for VFD's state */ SubTransactionId create_subid; /* for TEMPORARY fds, creating subxact */ File nextFree; /* link to next free VFD, if in freelist */ File lruMoreRecently; /* doubly linked recency-of-use list */ File lruLessRecently; long seekPos; /* current logical file position */ char *fileName; /* name of file, or NULL for unused VFD */ /* NB: fileName is malloc'd, and must be free'd when closing the VFD */ int fileFlags; /* open(2) flags for (re)opening the file */ int fileMode; /* mode to pass to open(2) */} Vfd;/* * Virtual File Descriptor array pointer and size. This grows as * needed. 'File' values are indexes into this array. * Note that VfdCache[0] is not a usable VFD, just a list header. */static Vfd *VfdCache;static Size SizeVfdCache = 0;/* * Number of file descriptors known to be in use by VFD entries. */static int nfile = 0;/* * List of stdio FILEs and <dirent.h> DIRs opened with AllocateFile * and AllocateDir. * * Since we don't want to encourage heavy use of AllocateFile or AllocateDir, * it seems OK to put a pretty small maximum limit on the number of * simultaneously allocated descs. */#define MAX_ALLOCATED_DESCS 32typedef enum{ AllocateDescFile, AllocateDescDir} AllocateDescKind;typedef struct{ AllocateDescKind kind; union { FILE *file; DIR *dir; } desc; SubTransactionId create_subid;} AllocateDesc;static int numAllocatedDescs = 0;static AllocateDesc allocatedDescs[MAX_ALLOCATED_DESCS];/* * Number of temporary files opened during the current session; * this is used in generation of tempfile names. */static long tempFileCounter = 0;/*-------------------- * * Private Routines * * Delete - delete a file from the Lru ring * LruDelete - remove a file from the Lru ring and close its FD * Insert - put a file at the front of the Lru ring * LruInsert - put a file at the front of the Lru ring and open it * ReleaseLruFile - Release an fd by closing the last entry in the Lru ring * AllocateVfd - grab a free (or new) file record (from VfdArray) * FreeVfd - free a file record * * The Least Recently Used ring is a doubly linked list that begins and * ends on element zero. Element zero is special -- it doesn't represent * a file and its "fd" field always == VFD_CLOSED. Element zero is just an * anchor that shows us the beginning/end of the ring. * Only VFD elements that are currently really open (have an FD assigned) are * in the Lru ring. Elements that are "virtually" open can be recognized * by having a non-null fileName field. * * example: * * /--less----\ /---------\ * v \ v \ * #0 --more---> LeastRecentlyUsed --more-\ \ * ^\ | | * \\less--> MostRecentlyUsedFile <---/ | * \more---/ \--less--/ * *-------------------- */static void Delete(File file);static void LruDelete(File file);static void Insert(File file);static int LruInsert(File file);static bool ReleaseLruFile(void);static File AllocateVfd(void);static void FreeVfd(File file);static int FileAccess(File file);static char *make_database_relative(const char *filename);static void AtProcExit_Files(int code, Datum arg);static void CleanupTempFiles(bool isProcExit);static void RemovePgTempFilesInDir(const char *tmpdirname);/* * pg_fsync --- do fsync with or without writethrough */intpg_fsync(int fd){#ifndef HAVE_FSYNC_WRITETHROUGH_ONLY if (sync_method != SYNC_METHOD_FSYNC_WRITETHROUGH) return pg_fsync_no_writethrough(fd); else#endif return pg_fsync_writethrough(fd);}/* * pg_fsync_no_writethrough --- same as fsync except does nothing if * enableFsync is off */intpg_fsync_no_writethrough(int fd){ if (enableFsync) return fsync(fd); else return 0;}/* * pg_fsync_writethrough */intpg_fsync_writethrough(int fd){ if (enableFsync) {#ifdef WIN32 return _commit(fd);#elif defined(F_FULLFSYNC) return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;#else return -1;#endif } else return 0;}/* * pg_fdatasync --- same as fdatasync except does nothing if enableFsync is off * * Not all platforms have fdatasync; treat as fsync if not available. */intpg_fdatasync(int fd){ if (enableFsync) {#ifdef HAVE_FDATASYNC return fdatasync(fd);#else return fsync(fd);#endif } else return 0;}/* * InitFileAccess --- initialize this module during backend startup * * This is called during either normal or standalone backend start. * It is *not* called in the postmaster. */voidInitFileAccess(void){ Assert(SizeVfdCache == 0); /* call me only once */ /* initialize cache header entry */ VfdCache = (Vfd *) malloc(sizeof(Vfd)); if (VfdCache == NULL) ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); MemSet((char *) &(VfdCache[0]), 0, sizeof(Vfd)); VfdCache->fd = VFD_CLOSED; SizeVfdCache = 1; /* register proc-exit hook to ensure temp files are dropped at exit */ on_proc_exit(AtProcExit_Files, 0);}/* * count_usable_fds --- count how many FDs the system will let us open, * and estimate how many are already open. * * We stop counting if usable_fds reaches max_to_probe. Note: a small * value of max_to_probe might result in an underestimate of already_open; * we must fill in any "gaps" in the set of used FDs before the calculation * of already_open will give the right answer. In practice, max_to_probe * of a couple of dozen should be enough to ensure good results. * * We assume stdin (FD 0) is available for dup'ing */static voidcount_usable_fds(int max_to_probe, int *usable_fds, int *already_open){ int *fd; int size; int used = 0; int highestfd = 0; int j; size = 1024; fd = (int *) palloc(size * sizeof(int)); /* dup until failure or probe limit reached */ for (;;) { int thisfd; thisfd = dup(0); if (thisfd < 0) { /* Expect EMFILE or ENFILE, else it's fishy */ if (errno != EMFILE && errno != ENFILE) elog(WARNING, "dup(0) failed after %d successes: %m", used); break; } if (used >= size) { size *= 2; fd = (int *) repalloc(fd, size * sizeof(int)); } fd[used++] = thisfd; if (highestfd < thisfd) highestfd = thisfd; if (used >= max_to_probe) break; } /* release the files we opened */ for (j = 0; j < used; j++) close(fd[j]); pfree(fd); /* * Return results. usable_fds is just the number of successful dups. We * assume that the system limit is highestfd+1 (remember 0 is a legal FD * number) and so already_open is highestfd+1 - usable_fds. */ *usable_fds = used; *already_open = highestfd + 1 - used;}/* * set_max_safe_fds * Determine number of filedescriptors that fd.c is allowed to use */voidset_max_safe_fds(void){ int usable_fds; int already_open; /*---------- * We want to set max_safe_fds to * MIN(usable_fds, max_files_per_process - already_open) * less the slop factor for files that are opened without consulting * fd.c. This ensures that we won't exceed either max_files_per_process * or the experimentally-determined EMFILE limit. *---------- */ count_usable_fds(max_files_per_process, &usable_fds, &already_open); max_safe_fds = Min(usable_fds, max_files_per_process - already_open); /* * Take off the FDs reserved for system() etc. */ max_safe_fds -= NUM_RESERVED_FDS; /* * Make sure we still have enough to get by. */ if (max_safe_fds < FD_MINFREE) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("insufficient file descriptors available to start server process"), errdetail("System allows %d, we need at least %d.", max_safe_fds + NUM_RESERVED_FDS, FD_MINFREE + NUM_RESERVED_FDS))); elog(DEBUG2, "max_safe_fds = %d, usable_fds = %d, already_open = %d", max_safe_fds, usable_fds, already_open);}/* * BasicOpenFile --- same as open(2) except can free other FDs if needed * * This is exported for use by places that really want a plain kernel FD, * but need to be proof against running out of FDs. Once an FD has been * successfully returned, it is the caller's responsibility to ensure that * it will not be leaked on ereport()! Most users should *not* call this * routine directly, but instead use the VFD abstraction level, which * provides protection against descriptor leaks as well as management of * files that need to be open for more than a short period of time. * * Ideally this should be the *only* direct call of open() in the backend. * In practice, the postmaster calls open() directly, and there are some * direct open() calls done early in backend startup. Those are OK since * this module wouldn't have any open files to close at that point anyway. */intBasicOpenFile(FileName fileName, int fileFlags, int fileMode){ int fd;tryAgain: fd = open(fileName, fileFlags, fileMode); if (fd >= 0) return fd; /* success! */ if (errno == EMFILE || errno == ENFILE) { int save_errno = errno; ereport(LOG, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("out of file descriptors: %m; release and retry"))); errno = 0; if (ReleaseLruFile()) goto tryAgain; errno = save_errno; } return -1; /* failure */}#if defined(FDDEBUG)static void_dump_lru(void){ int mru = VfdCache[0].lruLessRecently; Vfd *vfdP = &VfdCache[mru]; char buf[2048]; snprintf(buf, sizeof(buf), "LRU: MOST %d ", mru); while (mru != 0) { mru = vfdP->lruLessRecently; vfdP = &VfdCache[mru]; snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "%d ", mru); } snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "LEAST"); elog(LOG, buf);}#endif /* FDDEBUG */static voidDelete(File file){ Vfd *vfdP; Assert(file != 0); DO_DB(elog(LOG, "Delete %d (%s)", file, VfdCache[file].fileName)); DO_DB(_dump_lru()); vfdP = &VfdCache[file]; VfdCache[vfdP->lruLessRecently].lruMoreRecently = vfdP->lruMoreRecently; VfdCache[vfdP->lruMoreRecently].lruLessRecently = vfdP->lruLessRecently; DO_DB(_dump_lru());}static voidLruDelete(File file){ Vfd *vfdP; Assert(file != 0); DO_DB(elog(LOG, "LruDelete %d (%s)", file, VfdCache[file].fileName)); vfdP = &VfdCache[file]; /* delete the vfd record from the LRU ring */ Delete(file); /* save the seek position */ vfdP->seekPos = (long) lseek(vfdP->fd, 0L, SEEK_CUR); Assert(vfdP->seekPos != -1L); /* close the file */ if (close(vfdP->fd)) elog(ERROR, "failed to close \"%s\": %m", vfdP->fileName); --nfile; vfdP->fd = VFD_CLOSED;}static voidInsert(File file){ Vfd *vfdP; Assert(file != 0); DO_DB(elog(LOG, "Insert %d (%s)", file, VfdCache[file].fileName)); DO_DB(_dump_lru()); vfdP = &VfdCache[file]; vfdP->lruMoreRecently = 0; vfdP->lruLessRecently = VfdCache[0].lruLessRecently; VfdCache[0].lruLessRecently = file; VfdCache[vfdP->lruLessRecently].lruMoreRecently = file; DO_DB(_dump_lru());}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -