📄 tablespace.c
字号:
/*------------------------------------------------------------------------- * * tablespace.c * Commands to manipulate table spaces * * Tablespaces in PostgreSQL are designed to allow users to determine * where the data file(s) for a given database object reside on the file * system. * * A tablespace represents a directory on the file system. At tablespace * creation time, the directory must be empty. To simplify things and * remove the possibility of having file name conflicts, we isolate * files within a tablespace into database-specific subdirectories. * * To support file access via the information given in RelFileNode, we * maintain a symbolic-link map in $PGDATA/pg_tblspc. The symlinks are * named by tablespace OIDs and point to the actual tablespace directories. * Thus the full path to an arbitrary file is * $PGDATA/pg_tblspc/spcoid/dboid/relfilenode * * There are two tablespaces created at initdb time: pg_global (for shared * tables) and pg_default (for everything else). For backwards compatibility * and to remain functional on platforms without symlinks, these tablespaces * are accessed specially: they are respectively * $PGDATA/global/relfilenode * $PGDATA/base/dboid/relfilenode * * To allow CREATE DATABASE to give a new database a default tablespace * that's different from the template database's default, we make the * provision that a zero in pg_class.reltablespace means the database's * default tablespace. Without this, CREATE DATABASE would have to go in * and munge the system catalogs of the new database. * * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/commands/tablespace.c,v 1.28.2.2 2006/03/29 15:15:50 tgl Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include <unistd.h>#include <dirent.h>#include <sys/types.h>#include <sys/stat.h>#include "access/heapam.h"#include "catalog/catalog.h"#include "catalog/dependency.h"#include "catalog/indexing.h"#include "catalog/pg_namespace.h"#include "catalog/pg_tablespace.h"#include "commands/tablespace.h"#include "miscadmin.h"#include "storage/fd.h"#include "storage/smgr.h"#include "utils/acl.h"#include "utils/builtins.h"#include "utils/fmgroids.h"#include "utils/guc.h"#include "utils/lsyscache.h"#include "utils/syscache.h"/* GUC variable */char *default_tablespace = NULL;static bool remove_tablespace_directories(Oid tablespaceoid, bool redo);static void set_short_version(const char *path);/* * Each database using a table space is isolated into its own name space * by a subdirectory named for the database OID. On first creation of an * object in the tablespace, create the subdirectory. If the subdirectory * already exists, just fall through quietly. * * isRedo indicates that we are creating an object during WAL replay. * In this case we will cope with the possibility of the tablespace * directory not being there either --- this could happen if we are * replaying an operation on a table in a subsequently-dropped tablespace. * We handle this by making a directory in the place where the tablespace * symlink would normally be. This isn't an exact replay of course, but * it's the best we can do given the available information. * * If tablespaces are not supported, you might think this could be a no-op, * but you'd be wrong: we still need it in case we have to re-create a * database subdirectory (of $PGDATA/base) during WAL replay. */voidTablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo){ struct stat st; char *dir; /* * The global tablespace doesn't have per-database subdirectories, so * nothing to do for it. */ if (spcNode == GLOBALTABLESPACE_OID) return; Assert(OidIsValid(spcNode)); Assert(OidIsValid(dbNode)); dir = GetDatabasePath(dbNode, spcNode); if (stat(dir, &st) < 0) { if (errno == ENOENT) { /* * Acquire TablespaceCreateLock to ensure that no DROP TABLESPACE * or TablespaceCreateDbspace is running concurrently. */ LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE); /* * Recheck to see if someone created the directory while we were * waiting for lock. */ if (stat(dir, &st) == 0 && S_ISDIR(st.st_mode)) { /* need not do anything */ } else { /* OK, go for it */ if (mkdir(dir, S_IRWXU) < 0) { char *parentdir; if (errno != ENOENT || !isRedo) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create directory \"%s\": %m", dir))); /* Try to make parent directory too */ parentdir = pstrdup(dir); get_parent_directory(parentdir); if (mkdir(parentdir, S_IRWXU) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create directory \"%s\": %m", parentdir))); pfree(parentdir); if (mkdir(dir, S_IRWXU) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create directory \"%s\": %m", dir))); } } LWLockRelease(TablespaceCreateLock); } else { ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat directory \"%s\": %m", dir))); } } else { /* be paranoid */ if (!S_ISDIR(st.st_mode)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" exists but is not a directory", dir))); } pfree(dir);}/* * Create a table space * * Only superusers can create a tablespace. This seems a reasonable restriction * since we're determining the system layout and, anyway, we probably have * root if we're doing this kind of activity */voidCreateTableSpace(CreateTableSpaceStmt *stmt){#ifdef HAVE_SYMLINK Relation rel; Datum values[Natts_pg_tablespace]; char nulls[Natts_pg_tablespace]; HeapTuple tuple; Oid tablespaceoid; char *location; char *linkloc; Oid ownerId; /* validate */ /* don't call this in a transaction block */ PreventTransactionChain((void *) stmt, "CREATE TABLESPACE"); /* Must be super user */ if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied to create tablespace \"%s\"", stmt->tablespacename), errhint("Must be superuser to create a tablespace."))); /* However, the eventual owner of the tablespace need not be */ if (stmt->owner) ownerId = get_roleid_checked(stmt->owner); else ownerId = GetUserId(); /* Unix-ify the offered path, and strip any trailing slashes */ location = pstrdup(stmt->location); canonicalize_path(location); /* disallow quotes, else CREATE DATABASE would be at risk */ if (strchr(location, '\'')) ereport(ERROR, (errcode(ERRCODE_INVALID_NAME), errmsg("tablespace location may not contain single quotes"))); /* * Allowing relative paths seems risky * * this also helps us ensure that location is not empty or whitespace */ if (!is_absolute_path(location)) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("tablespace location must be an absolute path"))); /* * Check that location isn't too long. Remember that we're going to append * '/<dboid>/<relid>.<nnn>' (XXX but do we ever form the whole path * explicitly? This may be overly conservative.) */ if (strlen(location) >= (MAXPGPATH - 1 - 10 - 1 - 10 - 1 - 10)) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("tablespace location \"%s\" is too long", location))); /* * Disallow creation of tablespaces named "pg_xxx"; we reserve this * namespace for system purposes. */ if (!allowSystemTableMods && IsReservedName(stmt->tablespacename)) ereport(ERROR, (errcode(ERRCODE_RESERVED_NAME), errmsg("unacceptable tablespace name \"%s\"", stmt->tablespacename), errdetail("The prefix \"pg_\" is reserved for system tablespaces."))); /* * Check that there is no other tablespace by this name. (The unique * index would catch this anyway, but might as well give a friendlier * message.) */ if (OidIsValid(get_tablespace_oid(stmt->tablespacename))) ereport(ERROR, (errcode(ERRCODE_DUPLICATE_OBJECT), errmsg("tablespace \"%s\" already exists", stmt->tablespacename))); /* * Insert tuple into pg_tablespace. The purpose of doing this first is to * lock the proposed tablename against other would-be creators. The * insertion will roll back if we find problems below. */ rel = heap_open(TableSpaceRelationId, RowExclusiveLock); MemSet(nulls, ' ', Natts_pg_tablespace); values[Anum_pg_tablespace_spcname - 1] = DirectFunctionCall1(namein, CStringGetDatum(stmt->tablespacename)); values[Anum_pg_tablespace_spcowner - 1] = ObjectIdGetDatum(ownerId); values[Anum_pg_tablespace_spclocation - 1] = DirectFunctionCall1(textin, CStringGetDatum(location)); nulls[Anum_pg_tablespace_spcacl - 1] = 'n'; tuple = heap_formtuple(rel->rd_att, values, nulls); tablespaceoid = simple_heap_insert(rel, tuple); CatalogUpdateIndexes(rel, tuple); heap_freetuple(tuple); /* Record dependency on owner */ recordDependencyOnOwner(TableSpaceRelationId, tablespaceoid, ownerId); /* * Attempt to coerce target directory to safe permissions. If this fails, * it doesn't exist or has the wrong owner. */ if (chmod(location, 0700) != 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not set permissions on directory \"%s\": %m", location))); /* * Check the target directory is empty. */ if (!directory_is_empty(location)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("directory \"%s\" is not empty", location))); /* * Create the PG_VERSION file in the target directory. This has several * purposes: to make sure we can write in the directory, to prevent * someone from creating another tablespace pointing at the same directory * (the emptiness check above will fail), and to label tablespace * directories by PG version. */ set_short_version(location); /* * All seems well, create the symlink */ linkloc = (char *) palloc(10 + 10 + 1); sprintf(linkloc, "pg_tblspc/%u", tablespaceoid); if (symlink(location, linkloc) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create symbolic link \"%s\": %m", linkloc))); /* Record the filesystem change in XLOG */ { xl_tblspc_create_rec xlrec; XLogRecData rdata[2]; xlrec.ts_id = tablespaceoid; rdata[0].data = (char *) &xlrec; rdata[0].len = offsetof(xl_tblspc_create_rec, ts_path); rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); rdata[1].data = (char *) location; rdata[1].len = strlen(location) + 1; rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE, rdata); } pfree(linkloc); pfree(location); /* We keep the lock on pg_tablespace until commit */ heap_close(rel, NoLock);#else /* !HAVE_SYMLINK */ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("tablespaces are not supported on this platform")));#endif /* HAVE_SYMLINK */}/* * Drop a table space * * Be careful to check that the tablespace is empty. */voidDropTableSpace(DropTableSpaceStmt *stmt){#ifdef HAVE_SYMLINK char *tablespacename = stmt->tablespacename; HeapScanDesc scandesc; Relation rel; HeapTuple tuple; ScanKeyData entry[1]; Oid tablespaceoid; /* don't call this in a transaction block */ PreventTransactionChain((void *) stmt, "DROP TABLESPACE"); /* * Find the target tuple */ rel = heap_open(TableSpaceRelationId, RowExclusiveLock); ScanKeyInit(&entry[0], Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(tablespacename)); scandesc = heap_beginscan(rel, SnapshotNow, 1, entry); tuple = heap_getnext(scandesc, ForwardScanDirection); if (!HeapTupleIsValid(tuple)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("tablespace \"%s\" does not exist", tablespacename))); tablespaceoid = HeapTupleGetOid(tuple); /* Must be tablespace owner */ if (!pg_tablespace_ownercheck(tablespaceoid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_TABLESPACE, tablespacename); /* Disallow drop of the standard tablespaces, even by superuser */ if (tablespaceoid == GLOBALTABLESPACE_OID || tablespaceoid == DEFAULTTABLESPACE_OID) aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_TABLESPACE, tablespacename); /* * Remove the pg_tablespace tuple (this will roll back if we fail below) */ simple_heap_delete(rel, &tuple->t_self); heap_endscan(scandesc); /* * Remove dependency on owner. */ deleteSharedDependencyRecordsFor(TableSpaceRelationId, tablespaceoid); /* * Acquire TablespaceCreateLock to ensure that no TablespaceCreateDbspace * is running concurrently. */ LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE); /* * Try to remove the physical infrastructure */ if (!remove_tablespace_directories(tablespaceoid, false)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("tablespace \"%s\" is not empty", tablespacename))); /* Record the filesystem change in XLOG */ { xl_tblspc_drop_rec xlrec; XLogRecData rdata[1]; xlrec.ts_id = tablespaceoid; rdata[0].data = (char *) &xlrec; rdata[0].len = sizeof(xl_tblspc_drop_rec); rdata[0].buffer = InvalidBuffer; rdata[0].next = NULL; (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP, rdata); } /* * Allow TablespaceCreateDbspace again. */ LWLockRelease(TablespaceCreateLock); /* We keep the lock on pg_tablespace until commit */ heap_close(rel, NoLock);#else /* !HAVE_SYMLINK */ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("tablespaces are not supported on this platform")));#endif /* HAVE_SYMLINK */}/* * remove_tablespace_directories: attempt to remove filesystem infrastructure * * Returns TRUE if successful, FALSE if some subdirectory is not empty * * redo indicates we are redoing a drop from XLOG; okay if nothing there */static boolremove_tablespace_directories(Oid tablespaceoid, bool redo){ char *location; DIR *dirdesc; struct dirent *de; char *subfile; struct stat st; location = (char *) palloc(10 + 10 + 1); sprintf(location, "pg_tblspc/%u", tablespaceoid); /* * Check if the tablespace still contains any files. We try to rmdir each * per-database directory we find in it. rmdir failure implies there are * still files in that subdirectory, so give up. (We do not have to worry * about undoing any already completed rmdirs, since the next attempt to * use the tablespace from that database will simply recreate the * subdirectory via TablespaceCreateDbspace.) * * Since we hold TablespaceCreateLock, no one else should be creating any * fresh subdirectories in parallel. It is possible that new files are * being created within subdirectories, though, so the rmdir call could * fail. Worst consequence is a less friendly error message. */ dirdesc = AllocateDir(location); if (dirdesc == NULL) { if (redo && errno == ENOENT) { pfree(location); return true; } /* else let ReadDir report the error */ } while ((de = ReadDir(dirdesc, location)) != NULL) { /* Note we ignore PG_VERSION for the nonce */ if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0 || strcmp(de->d_name, "PG_VERSION") == 0) continue; subfile = palloc(strlen(location) + 1 + strlen(de->d_name) + 1); sprintf(subfile, "%s/%s", location, de->d_name);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -