📄 os_unix.c
字号:
/*** 2004 May 22**** The author disclaims copyright to this source code. In place of** a legal notice, here is a blessing:**** May you do good and not evil.** May you find forgiveness for yourself and forgive others.** May you share freely, never taking more than you give.************************************************************************************ This file contains code that is specific to Unix systems.*/#include "sqliteInt.h"#include "os.h"#if OS_UNIX /* This file is used on unix only */#include <time.h>#include <errno.h>#include <unistd.h>/*** Do not include any of the File I/O interface procedures if the** SQLITE_OMIT_DISKIO macro is defined (indicating that there database** will be in-memory only)*/#ifndef SQLITE_OMIT_DISKIO/*** Define various macros that are missing from some systems.*/#ifndef O_LARGEFILE# define O_LARGEFILE 0#endif#ifdef SQLITE_DISABLE_LFS# undef O_LARGEFILE# define O_LARGEFILE 0#endif#ifndef O_NOFOLLOW# define O_NOFOLLOW 0#endif#ifndef O_BINARY# define O_BINARY 0#endif/*** The DJGPP compiler environment looks mostly like Unix, but it** lacks the fcntl() system call. So redefine fcntl() to be something** that always succeeds. This means that locking does not occur under** DJGPP. But its DOS - what did you expect?*/#ifdef __DJGPP__# define fcntl(A,B,C) 0#endif/*** Macros used to determine whether or not to use threads. The** SQLITE_UNIX_THREADS macro is defined if we are synchronizing for** Posix threads and SQLITE_W32_THREADS is defined if we are** synchronizing using Win32 threads.*/#if defined(THREADSAFE) && THREADSAFE# include <pthread.h># define SQLITE_UNIX_THREADS 1#endif/*** Include code that is common to all os_*.c files*/#include "os_common.h"#if defined(THREADSAFE) && THREADSAFE && defined(__linux__)#define getpid pthread_self#endif/*** Here is the dirt on POSIX advisory locks: ANSI STD 1003.1 (1996)** section 6.5.2.2 lines 483 through 490 specify that when a process** sets or clears a lock, that operation overrides any prior locks set** by the same process. It does not explicitly say so, but this implies** that it overrides locks set by the same process using a different** file descriptor. Consider this test case:**** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);**** Suppose ./file1 and ./file2 are really the same file (because** one is a hard or symbolic link to the other) then if you set** an exclusive lock on fd1, then try to get an exclusive lock** on fd2, it works. I would have expected the second lock to** fail since there was already a lock on the file due to fd1.** But not so. Since both locks came from the same process, the** second overrides the first, even though they were on different** file descriptors opened on different file names.**** Bummer. If you ask me, this is broken. Badly broken. It means** that we cannot use POSIX locks to synchronize file access among** competing threads of the same process. POSIX locks will work fine** to synchronize access for threads in separate processes, but not** threads within the same process.**** To work around the problem, SQLite has to manage file locks internally** on its own. Whenever a new database is opened, we have to find the** specific inode of the database file (the inode is determined by the** st_dev and st_ino fields of the stat structure that fstat() fills in)** and check for locks already existing on that inode. When locks are** created or removed, we have to look at our own internal record of the** locks to see if another thread has previously set a lock on that same** inode.**** The OsFile structure for POSIX is no longer just an integer file** descriptor. It is now a structure that holds the integer file** descriptor and a pointer to a structure that describes the internal** locks on the corresponding inode. There is one locking structure** per inode, so if the same inode is opened twice, both OsFile structures** point to the same locking structure. The locking structure keeps** a reference count (so we will know when to delete it) and a "cnt"** field that tells us its internal lock status. cnt==0 means the** file is unlocked. cnt==-1 means the file has an exclusive lock.** cnt>0 means there are cnt shared locks on the file.**** Any attempt to lock or unlock a file first checks the locking** structure. The fcntl() system call is only invoked to set a ** POSIX lock if the internal lock structure transitions between** a locked and an unlocked state.**** 2004-Jan-11:** More recent discoveries about POSIX advisory locks. (The more** I discover, the more I realize the a POSIX advisory locks are** an abomination.)**** If you close a file descriptor that points to a file that has locks,** all locks on that file that are owned by the current process are** released. To work around this problem, each OsFile structure contains** a pointer to an openCnt structure. There is one openCnt structure** per open inode, which means that multiple OsFiles can point to a single** openCnt. When an attempt is made to close an OsFile, if there are** other OsFiles open on the same inode that are holding locks, the call** to close() the file descriptor is deferred until all of the locks clear.** The openCnt structure keeps a list of file descriptors that need to** be closed and that list is walked (and cleared) when the last lock** clears.**** First, under Linux threads, because each thread has a separate** process ID, lock operations in one thread do not override locks** to the same file in other threads. Linux threads behave like** separate processes in this respect. But, if you close a file** descriptor in linux threads, all locks are cleared, even locks** on other threads and even though the other threads have different** process IDs. Linux threads is inconsistent in this respect.** (I'm beginning to think that linux threads is an abomination too.)** The consequence of this all is that the hash table for the lockInfo** structure has to include the process id as part of its key because** locks in different threads are treated as distinct. But the ** openCnt structure should not include the process id in its** key because close() clears lock on all threads, not just the current** thread. Were it not for this goofiness in linux threads, we could** combine the lockInfo and openCnt structures into a single structure.**** 2004-Jun-28:** On some versions of linux, threads can override each others locks.** On others not. Sometimes you can change the behavior on the same** system by setting the LD_ASSUME_KERNEL environment variable. The** POSIX standard is silent as to which behavior is correct, as far** as I can tell, so other versions of unix might show the same** inconsistency. There is no little doubt in my mind that posix** advisory locks and linux threads are profoundly broken.**** To work around the inconsistencies, we have to test at runtime ** whether or not threads can override each others locks. This test** is run once, the first time any lock is attempted. A static ** variable is set to record the results of this test for future** use.*//*** An instance of the following structure serves as the key used** to locate a particular lockInfo structure given its inode.**** If threads cannot override each others locks, then we set the** lockKey.tid field to the thread ID. If threads can override** each others locks then tid is always set to zero. tid is also** set to zero if we compile without threading support.*/struct lockKey { dev_t dev; /* Device number */ ino_t ino; /* Inode number */#ifdef SQLITE_UNIX_THREADS pthread_t tid; /* Thread ID or zero if threads cannot override each other */#endif};/*** An instance of the following structure is allocated for each open** inode on each thread with a different process ID. (Threads have** different process IDs on linux, but not on most other unixes.)**** A single inode can have multiple file descriptors, so each OsFile** structure contains a pointer to an instance of this object and this** object keeps a count of the number of OsFiles pointing to it.*/struct lockInfo { struct lockKey key; /* The lookup key */ int cnt; /* Number of SHARED locks held */ int locktype; /* One of SHARED_LOCK, RESERVED_LOCK etc. */ int nRef; /* Number of pointers to this structure */};/*** An instance of the following structure serves as the key used** to locate a particular openCnt structure given its inode. This** is the same as the lockKey except that the thread ID is omitted.*/struct openKey { dev_t dev; /* Device number */ ino_t ino; /* Inode number */};/*** An instance of the following structure is allocated for each open** inode. This structure keeps track of the number of locks on that** inode. If a close is attempted against an inode that is holding** locks, the close is deferred until all locks clear by adding the** file descriptor to be closed to the pending list.*/struct openCnt { struct openKey key; /* The lookup key */ int nRef; /* Number of pointers to this structure */ int nLock; /* Number of outstanding locks */ int nPending; /* Number of pending close() operations */ int *aPending; /* Malloced space holding fd's awaiting a close() */};/* ** These hash table maps inodes and process IDs into lockInfo and openCnt** structures. Access to these hash tables must be protected by a mutex.*/static Hash lockHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };static Hash openHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };#ifdef SQLITE_UNIX_THREADS/*** This variable records whether or not threads can override each others** locks.**** 0: No. Threads cannot override each others locks.** 1: Yes. Threads can override each others locks.** -1: We don't know yet.*/static int threadsOverrideEachOthersLocks = -1;/*** This structure holds information passed into individual test** threads by the testThreadLockingBehavior() routine.*/struct threadTestData { int fd; /* File to be locked */ struct flock lock; /* The locking operation */ int result; /* Result of the locking operation */};/*** The testThreadLockingBehavior() routine launches two separate** threads on this routine. This routine attempts to lock a file** descriptor then returns. The success or failure of that attempt** allows the testThreadLockingBehavior() procedure to determine** whether or not threads can override each others locks.*/static void *threadLockingTest(void *pArg){ struct threadTestData *pData = (struct threadTestData*)pArg; pData->result = fcntl(pData->fd, F_SETLK, &pData->lock); return pArg;}/*** This procedure attempts to determine whether or not threads** can override each others locks then sets the ** threadsOverrideEachOthersLocks variable appropriately.*/static void testThreadLockingBehavior(fd_orig){ int fd; struct threadTestData d[2]; pthread_t t[2]; fd = dup(fd_orig); if( fd<0 ) return; memset(d, 0, sizeof(d)); d[0].fd = fd; d[0].lock.l_type = F_RDLCK; d[0].lock.l_len = 1; d[0].lock.l_start = 0; d[0].lock.l_whence = SEEK_SET; d[1] = d[0]; d[1].lock.l_type = F_WRLCK; pthread_create(&t[0], 0, threadLockingTest, &d[0]); pthread_create(&t[1], 0, threadLockingTest, &d[1]); pthread_join(t[0], 0); pthread_join(t[1], 0); close(fd); threadsOverrideEachOthersLocks = d[0].result==0 && d[1].result==0;}#endif /* SQLITE_UNIX_THREADS *//*** Release a lockInfo structure previously allocated by findLockInfo().*/static void releaseLockInfo(struct lockInfo *pLock){ pLock->nRef--; if( pLock->nRef==0 ){ sqlite3HashInsert(&lockHash, &pLock->key, sizeof(pLock->key), 0); sqliteFree(pLock); }}/*** Release a openCnt structure previously allocated by findLockInfo().*/static void releaseOpenCnt(struct openCnt *pOpen){ pOpen->nRef--; if( pOpen->nRef==0 ){ sqlite3HashInsert(&openHash, &pOpen->key, sizeof(pOpen->key), 0); sqliteFree(pOpen->aPending); sqliteFree(pOpen); }}/*** Given a file descriptor, locate lockInfo and openCnt structures that** describes that file descriptor. Create a new ones if necessary. The** return values might be unset if an error occurs.**** Return the number of errors.*/static int findLockInfo( int fd, /* The file descriptor used in the key */ struct lockInfo **ppLock, /* Return the lockInfo structure here */ struct openCnt **ppOpen /* Return the openCnt structure here */){ int rc; struct lockKey key1; struct openKey key2; struct stat statbuf; struct lockInfo *pLock; struct openCnt *pOpen; rc = fstat(fd, &statbuf); if( rc!=0 ) return 1; memset(&key1, 0, sizeof(key1)); key1.dev = statbuf.st_dev; key1.ino = statbuf.st_ino;#ifdef SQLITE_UNIX_THREADS if( threadsOverrideEachOthersLocks<0 ){ testThreadLockingBehavior(fd); } key1.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self();#endif memset(&key2, 0, sizeof(key2)); key2.dev = statbuf.st_dev; key2.ino = statbuf.st_ino; pLock = (struct lockInfo*)sqlite3HashFind(&lockHash, &key1, sizeof(key1)); if( pLock==0 ){ struct lockInfo *pOld; pLock = sqliteMallocRaw( sizeof(*pLock) ); if( pLock==0 ) return 1; pLock->key = key1; pLock->nRef = 1; pLock->cnt = 0; pLock->locktype = 0; pOld = sqlite3HashInsert(&lockHash, &pLock->key, sizeof(key1), pLock); if( pOld!=0 ){ assert( pOld==pLock ); sqliteFree(pLock); return 1; } }else{ pLock->nRef++; } *ppLock = pLock; pOpen = (struct openCnt*)sqlite3HashFind(&openHash, &key2, sizeof(key2)); if( pOpen==0 ){ struct openCnt *pOld; pOpen = sqliteMallocRaw( sizeof(*pOpen) ); if( pOpen==0 ){ releaseLockInfo(pLock); return 1; } pOpen->key = key2; pOpen->nRef = 1; pOpen->nLock = 0; pOpen->nPending = 0; pOpen->aPending = 0; pOld = sqlite3HashInsert(&openHash, &pOpen->key, sizeof(key2), pOpen); if( pOld!=0 ){ assert( pOld==pOpen ); sqliteFree(pOpen); releaseLockInfo(pLock); return 1; } }else{ pOpen->nRef++; } *ppOpen = pOpen; return 0;}/*** Delete the named file*/int sqlite3OsDelete(const char *zFilename){ unlink(zFilename); return SQLITE_OK;}/*** Return TRUE if the named file exists.*/int sqlite3OsFileExists(const char *zFilename){ return access(zFilename, 0)==0;}/*** Attempt to open a file for both reading and writing. If that** fails, try opening it read-only. If the file does not exist,** try to create it.**** On success, a handle for the open file is written to *id** and *pReadonly is set to 0 if the file was opened for reading and** writing or 1 if the file was opened read-only. The function returns** SQLITE_OK.**** On failure, the function returns SQLITE_CANTOPEN and leaves** *id and *pReadonly unchanged.*/int sqlite3OsOpenReadWrite( const char *zFilename, OsFile *id, int *pReadonly){ int rc; assert( !id->isOpen ); id->dirfd = -1; id->h = open(zFilename, O_RDWR|O_CREAT|O_LARGEFILE|O_BINARY, SQLITE_DEFAULT_FILE_PERMISSIONS); if( id->h<0 ){#ifdef EISDIR
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -