📄 env_region.c
字号:
/*- * See the file LICENSE for redistribution information. * * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */#include "db_config.h"#ifndef lintstatic const char revid[] = "$Id: env_region.c,v 11.64 2002/07/17 15:09:19 bostic Exp $";#endif /* not lint */#ifndef NO_SYSTEM_INCLUDES#include <sys/types.h>#include <ctype.h>#include <string.h>#include <unistd.h>#endif#include "db_int.h"#include "dbinc/db_shash.h"#include "dbinc/lock.h"#include "dbinc/log.h"#include "dbinc/mp.h"#include "dbinc/rep.h"#include "dbinc/txn.h"static int __db_des_destroy __P((DB_ENV *, REGION *));static int __db_des_get __P((DB_ENV *, REGINFO *, REGINFO *, REGION **));static int __db_e_remfile __P((DB_ENV *));static int __db_faultmem __P((DB_ENV *, void *, size_t, int));static void __db_region_destroy __P((DB_ENV *, REGINFO *));/* * __db_e_attach * Join/create the environment * * PUBLIC: int __db_e_attach __P((DB_ENV *, u_int32_t *)); */int__db_e_attach(dbenv, init_flagsp) DB_ENV *dbenv; u_int32_t *init_flagsp;{ REGENV *renv; REGENV_REF ref; REGINFO *infop; REGION *rp, tregion; size_t size; size_t nrw; u_int32_t mbytes, bytes; int retry_cnt, ret, segid; char buf[sizeof(DB_REGION_FMT) + 20];#if !defined(HAVE_MUTEX_THREADS) /* * !!! * If we don't have spinlocks, we need a file descriptor for fcntl(2) * locking. We use the file handle from the REGENV file for this * purpose. * * Since we may be using shared memory regions, e.g., shmget(2), and * not a mapped-in regular file, the backing file may be only a few * bytes in length. So, this depends on the ability to call fcntl to * lock file offsets much larger than the actual physical file. I * think that's safe -- besides, very few systems actually need this * kind of support, SunOS is the only one still in wide use of which * I'm aware. * * The error case is if an application lacks spinlocks and wants to be * threaded. That doesn't work because fcntl may lock the underlying * process, including all its threads. */ if (F_ISSET(dbenv, DB_ENV_THREAD)) { __db_err(dbenv,"architecture lacks fast mutexes: applications cannot be threaded"); return (EINVAL); }#endif /* Initialization */ retry_cnt = 0; /* Repeated initialization. */loop: renv = NULL; /* Set up the DB_ENV's REG_INFO structure. */ if ((ret = __os_calloc(dbenv, 1, sizeof(REGINFO), &infop)) != 0) return (ret); infop->type = REGION_TYPE_ENV; infop->id = REGION_ID_ENV; infop->mode = dbenv->db_mode; infop->flags = REGION_JOIN_OK; if (F_ISSET(dbenv, DB_ENV_CREATE)) F_SET(infop, REGION_CREATE_OK); /* * We have to single-thread the creation of the REGENV region. Once * it exists, we can do locking using locks in the region, but until * then we have to be the only player in the game. * * If this is a private environment, we are only called once and there * are no possible race conditions. * * If this is a public environment, we use the filesystem to ensure * the creation of the environment file is single-threaded. */ if (F_ISSET(dbenv, DB_ENV_PRIVATE)) { if ((ret = __os_strdup(dbenv, "process-private", &infop->name)) != 0) goto err; goto creation; } /* Build the region name. */ (void)snprintf(buf, sizeof(buf), "%s", DB_REGION_ENV); if ((ret = __db_appname(dbenv, DB_APP_NONE, buf, 0, NULL, &infop->name)) != 0) goto err; /* * Try to create the file, if we have the authority. We have to ensure * that multiple threads/processes attempting to simultaneously create * the file are properly ordered. Open using the O_CREAT and O_EXCL * flags so that multiple attempts to create the region will return * failure in all but one. POSIX 1003.1 requires that EEXIST be the * errno return value -- I sure hope they're right. */ if (F_ISSET(dbenv, DB_ENV_CREATE)) { if ((ret = __os_open(dbenv, infop->name, DB_OSO_CREATE | DB_OSO_DIRECT | DB_OSO_EXCL | DB_OSO_REGION, dbenv->db_mode, dbenv->lockfhp)) == 0) goto creation; if (ret != EEXIST) { __db_err(dbenv, "%s: %s", infop->name, db_strerror(ret)); goto err; } } /* * If we couldn't create the file, try and open it. (If that fails, * we're done.) */ if ((ret = __os_open(dbenv, infop->name, DB_OSO_REGION | DB_OSO_DIRECT, dbenv->db_mode, dbenv->lockfhp)) != 0) goto err; /* * !!! * The region may be in system memory not backed by the filesystem * (more specifically, not backed by this file), and we're joining * it. In that case, the process that created it will have written * out a REGENV_REF structure as its only contents. We read that * structure before we do anything further, e.g., we can't just map * that file in and then figure out what's going on. * * All of this noise is because some systems don't have a coherent VM * and buffer cache, and what's worse, when you mix operations on the * VM and buffer cache, half the time you hang the system. * * If the file is the size of an REGENV_REF structure, then we know * the real region is in some other memory. (The only way you get a * file that size is to deliberately write it, as it's smaller than * any possible disk sector created by writing a file or mapping the * file into memory.) In which case, retrieve the structure from the * file and use it to acquire the referenced memory. * * If the structure is larger than a REGENV_REF structure, then this * file is backing the shared memory region, and we just map it into * memory. * * And yes, this makes me want to take somebody and kill them. (I * digress -- but you have no freakin' idea. This is unbelievably * stupid and gross, and I've probably spent six months of my life, * now, trying to make different versions of it work.) */ if ((ret = __os_ioinfo(dbenv, infop->name, dbenv->lockfhp, &mbytes, &bytes, NULL)) != 0) { __db_err(dbenv, "%s: %s", infop->name, db_strerror(ret)); goto err; } /* * !!! * A size_t is OK -- regions get mapped into memory, and so can't * be larger than a size_t. */ size = mbytes * MEGABYTE + bytes; /* * If the size is less than the size of a REGENV_REF structure, the * region (or, possibly, the REGENV_REF structure) has not yet been * completely written. Wait awhile and try again. * * Otherwise, if the size is the size of a REGENV_REF structure, * read it into memory and use it as a reference to the real region. */ if (size <= sizeof(ref)) { if (size != sizeof(ref)) goto retry; if ((ret = __os_read(dbenv, dbenv->lockfhp, &ref, sizeof(ref), &nrw)) != 0 || nrw < (size_t)sizeof(ref)) { if (ret == 0) ret = EIO; __db_err(dbenv, "%s: unable to read system-memory information from: %s", infop->name, db_strerror(ret)); goto err; } size = ref.size; segid = ref.segid; F_SET(dbenv, DB_ENV_SYSTEM_MEM); } else if (F_ISSET(dbenv, DB_ENV_SYSTEM_MEM)) { ret = EINVAL; __db_err(dbenv, "%s: existing environment not created in system memory: %s", infop->name, db_strerror(ret)); goto err; } else segid = INVALID_REGION_SEGID; /* * If not doing thread locking, we need to save the file handle for * fcntl(2) locking. Otherwise, discard the handle, we no longer * need it, and the less contact between the buffer cache and the VM, * the better. */#ifdef HAVE_MUTEX_THREADS __os_closehandle(dbenv, dbenv->lockfhp);#endif /* Call the region join routine to acquire the region. */ memset(&tregion, 0, sizeof(tregion)); tregion.size = (roff_t)size; tregion.segid = segid; if ((ret = __os_r_attach(dbenv, infop, &tregion)) != 0) goto err; /* * The environment's REGENV structure has to live at offset 0 instead * of the usual shalloc information. Set the primary reference and * correct the "addr" value to reference the shalloc region. Note, * this means that all of our offsets (R_ADDR/R_OFFSET) get shifted * as well, but that should be fine. */ infop->primary = R_ADDR(infop, 0); infop->addr = (u_int8_t *)infop->addr + sizeof(REGENV); /* * Check if the environment has had a catastrophic failure. * * Check the magic number to ensure the region is initialized. If the * magic number isn't set, the lock may not have been initialized, and * an attempt to use it could lead to random behavior. * * The panic and magic values aren't protected by any lock, so we never * use them in any check that's more complex than set/not-set. * * !!! * I'd rather play permissions games using the underlying file, but I * can't because Windows/NT filesystems won't open files mode 0. */ renv = infop->primary; if (renv->envpanic && !F_ISSET(dbenv, DB_ENV_NOPANIC)) { ret = __db_panic_msg(dbenv); goto err; } if (renv->magic != DB_REGION_MAGIC) goto retry; /* Make sure the region matches our build. */ if (renv->majver != DB_VERSION_MAJOR || renv->minver != DB_VERSION_MINOR || renv->patch != DB_VERSION_PATCH) { __db_err(dbenv, "Program version %d.%d.%d doesn't match environment version %d.%d.%d", DB_VERSION_MAJOR, DB_VERSION_MINOR, DB_VERSION_PATCH, renv->majver, renv->minver, renv->patch);#ifndef DIAGNOSTIC ret = EINVAL; goto err;#endif } /* Lock the environment. */ MUTEX_LOCK(dbenv, &renv->mutex); /* * Finally! We own the environment now. Repeat the panic check, it's * possible that it was set while we waited for the lock. */ if (renv->envpanic && !F_ISSET(dbenv, DB_ENV_NOPANIC)) { ret = __db_panic_msg(dbenv); goto err_unlock; } /* * Get a reference to the underlying REGION information for this * environment. */ if ((ret = __db_des_get(dbenv, infop, infop, &rp)) != 0 || rp == NULL) { MUTEX_UNLOCK(dbenv, &renv->mutex); goto find_err; } infop->rp = rp; /* * There's still a possibility for inconsistent data. When we acquired * the size of the region and attached to it, it might have still been * growing as part of its creation. We can detect this by checking the * size we originally found against the region's current size. (The * region's current size has to be final, the creator finished growing * it before releasing the environment for us to lock.) */ if (rp->size != size) {err_unlock: MUTEX_UNLOCK(dbenv, &renv->mutex); goto retry; } /* Increment the reference count. */ ++renv->refcnt; /* * If our caller wants them, return the flags this environment was * initialized with. */ if (init_flagsp != NULL) *init_flagsp = renv->init_flags; /* Discard our lock. */ MUTEX_UNLOCK(dbenv, &renv->mutex); /* * Fault the pages into memory. Note, do this AFTER releasing the * lock, because we're only reading the pages, not writing them. */ (void)__db_faultmem(dbenv, infop->primary, rp->size, 0); /* Everything looks good, we're done. */ dbenv->reginfo = infop; return (0);creation: /* Create the environment region. */ F_SET(infop, REGION_CREATE); /* * Allocate room for 50 REGION structures plus overhead (we're going * to use this space for last-ditch allocation requests), although we * should never need anything close to that. * * Encryption passwds are stored in the env region. Add that in too. */ memset(&tregion, 0, sizeof(tregion)); tregion.size = (roff_t)(50 * sizeof(REGION) + dbenv->passwd_len + 2048); tregion.segid = INVALID_REGION_SEGID; if ((ret = __os_r_attach(dbenv, infop, &tregion)) != 0) goto err; /* * Fault the pages into memory. Note, do this BEFORE we initialize * anything, because we're writing the pages, not just reading them. */ (void)__db_faultmem(dbenv, infop->addr, tregion.size, 1); /* * The first object in the region is the REGENV structure. This is * different from the other regions, and, from everything else in * this region, where all objects are allocated from the pool, i.e., * there aren't any fixed locations. The remaining space is made * available for later allocation. * * The allocation space must be size_t aligned, because that's what * the initialization routine is going to store there. To make sure * that happens, the REGENV structure was padded with a final size_t. * No other region needs to worry about it because all of them treat * the entire region as allocation space. * * Set the primary reference and correct the "addr" value to reference * the shalloc region. Note, this requires that we "uncorrect" it at * region detach, and that all of our offsets (R_ADDR/R_OFFSET) will be * shifted as well, but that should be fine. */ infop->primary = R_ADDR(infop, 0); infop->addr = (u_int8_t *)infop->addr + sizeof(REGENV); __db_shalloc_init(infop->addr, tregion.size - sizeof(REGENV)); /* * Initialize the rest of the REGENV structure, except for the magic * number which validates the file/environment. */ renv = infop->primary; renv->envpanic = 0; db_version(&renv->majver, &renv->minver, &renv->patch); SH_LIST_INIT(&renv->regionq); renv->refcnt = 1; renv->cipher_off = INVALID_ROFF; renv->rep_off = INVALID_ROFF; /* * Initialize init_flags to store the flags that any other environment * handle that uses DB_JOINENV to join this environment will need. */ renv->init_flags = (init_flagsp == NULL) ? 0 : *init_flagsp; /* * Lock the environment. * * Check the lock call return. This is the first lock we initialize * and acquire, and we have to know if it fails. (It CAN fail, e.g., * SunOS, when using fcntl(2) for locking and using an in-memory * filesystem as the database home. But you knew that, I'm sure -- it * probably wasn't even worth mentioning.)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -