📄 txn.c
字号:
/*- * See the file LICENSE for redistribution information. * * Copyright (c) 1996-2004 * Sleepycat Software. All rights reserved. *//* * Copyright (c) 1995, 1996 * The President and Fellows of Harvard University. All rights reserved. * * This code is derived from software contributed to Berkeley by * Margo Seltzer. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $Id: txn.c,v 11.249 2004/10/15 16:59:44 bostic Exp $ */#include "db_config.h"#ifndef NO_SYSTEM_INCLUDES#include <sys/types.h>#include <stdlib.h>#if TIME_WITH_SYS_TIME#include <sys/time.h>#include <time.h>#else#if HAVE_SYS_TIME_H#include <sys/time.h>#else#include <time.h>#endif#endif#include <string.h>#endif#include "db_int.h"#include "dbinc/crypto.h"#include "dbinc/hmac.h"#include "dbinc/db_page.h"#include "dbinc/db_shash.h"#include "dbinc/hash.h"#include "dbinc/lock.h"#include "dbinc/log.h"#include "dbinc/mp.h"#include "dbinc/txn.h"#define SET_LOG_FLAGS(dbenv, txnp, lflags) \ do { \ lflags = DB_LOG_COMMIT | DB_LOG_PERM; \ if (F_ISSET(txnp, TXN_SYNC)) \ lflags |= DB_FLUSH; \ else if (!F_ISSET(txnp, TXN_NOSYNC) && \ !F_ISSET(dbenv, DB_ENV_TXN_NOSYNC)) { \ if (F_ISSET(dbenv, DB_ENV_TXN_WRITE_NOSYNC)) \ lflags |= DB_LOG_WRNOSYNC; \ else \ lflags |= DB_FLUSH; \ } \ } while (0)/* * __txn_isvalid enumerated types. We cannot simply use the transaction * statuses, because different statuses need to be handled differently * depending on the caller. */typedef enum { TXN_OP_ABORT, TXN_OP_COMMIT, TXN_OP_DISCARD, TXN_OP_PREPARE} txnop_t;static int __txn_abort_pp __P((DB_TXN *));static int __txn_begin_int __P((DB_TXN *, int));static int __txn_commit_pp __P((DB_TXN *, u_int32_t));static int __txn_discard_pp __P((DB_TXN *, u_int32_t));static int __txn_end __P((DB_TXN *, int));static int __txn_isvalid __P((const DB_TXN *, TXN_DETAIL **, txnop_t));static int __txn_undo __P((DB_TXN *));static int __txn_dispatch_undo __P((DB_ENV *, DB_TXN *, DBT *, DB_LSN *, void *));static void __txn_set_begin_lsnp __P((DB_TXN *txn, DB_LSN **));/* * __txn_begin_pp -- * DB_ENV->txn_begin pre/post processing. * * PUBLIC: int __txn_begin_pp __P((DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t)); */int__txn_begin_pp(dbenv, parent, txnpp, flags) DB_ENV *dbenv; DB_TXN *parent, **txnpp; u_int32_t flags;{ int rep_check, ret; PANIC_CHECK(dbenv); ENV_REQUIRES_CONFIG(dbenv, dbenv->tx_handle, "txn_begin", DB_INIT_TXN); if ((ret = __db_fchk(dbenv, "txn_begin", flags, DB_DEGREE_2 | DB_DIRTY_READ | DB_TXN_NOWAIT | DB_TXN_NOSYNC | DB_TXN_SYNC)) != 0) return (ret); if ((ret = __db_fcchk(dbenv, "txn_begin", flags, DB_TXN_NOSYNC, DB_TXN_SYNC)) != 0) return (ret); if (parent == NULL) { rep_check = IS_ENV_REPLICATED(dbenv) ? 1 : 0; if (rep_check) __op_rep_enter(dbenv); } else rep_check = 0; ret = __txn_begin(dbenv, parent, txnpp, flags); /* * We only decrement the count if the operation fails. * Otherwise the count will be decremented when the * txn is resolved by txn_commit, txn_abort, etc. */ if (ret != 0 && rep_check) __op_rep_exit(dbenv); return (ret);}/* * __txn_begin -- * DB_ENV->txn_begin. * * This is a wrapper to the actual begin process. Normal transaction begin * allocates a DB_TXN structure for the caller, while XA transaction begin * does not. Other than that, both call into common __txn_begin_int code. * * Internally, we use TXN_DETAIL structures, but the DB_TXN structure * provides access to the transaction ID and the offset in the transaction * region of the TXN_DETAIL structure. * * PUBLIC: int __txn_begin __P((DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t)); */int__txn_begin(dbenv, parent, txnpp, flags) DB_ENV *dbenv; DB_TXN *parent, **txnpp; u_int32_t flags;{ DB_LOCKREGION *region; DB_TXN *txn; int ret; *txnpp = NULL; if ((ret = __os_calloc(dbenv, 1, sizeof(DB_TXN), &txn)) != 0) return (ret); txn->mgrp = dbenv->tx_handle; txn->parent = parent; TAILQ_INIT(&txn->kids); TAILQ_INIT(&txn->events); STAILQ_INIT(&txn->logs); txn->flags = TXN_MALLOC; if (LF_ISSET(DB_DEGREE_2)) F_SET(txn, TXN_DEGREE_2); if (LF_ISSET(DB_DIRTY_READ)) F_SET(txn, TXN_DIRTY_READ); if (LF_ISSET(DB_TXN_NOSYNC)) F_SET(txn, TXN_NOSYNC); if (LF_ISSET(DB_TXN_SYNC)) F_SET(txn, TXN_SYNC); if (LF_ISSET(DB_TXN_NOWAIT)) F_SET(txn, TXN_NOWAIT); if ((ret = __txn_begin_int(txn, 0)) != 0) goto err; if (parent != NULL) TAILQ_INSERT_HEAD(&parent->kids, txn, klinks); if (LOCKING_ON(dbenv)) { region = ((DB_LOCKTAB *)dbenv->lk_handle)->reginfo.primary; if (parent != NULL) { ret = __lock_inherit_timeout(dbenv, parent->txnid, txn->txnid); /* No parent locker set yet. */ if (ret == EINVAL) { parent = NULL; ret = 0; } if (ret != 0) goto err; } /* * Parent is NULL if we have no parent * or it has no timeouts set. */ if (parent == NULL && region->tx_timeout != 0) if ((ret = __lock_set_timeout(dbenv, txn->txnid, region->tx_timeout, DB_SET_TXN_TIMEOUT)) != 0) goto err; } *txnpp = txn; return (0);err: __os_free(dbenv, txn); return (ret);}/* * __txn_xa_begin -- * XA version of txn_begin. * * PUBLIC: int __txn_xa_begin __P((DB_ENV *, DB_TXN *)); */int__txn_xa_begin(dbenv, txn) DB_ENV *dbenv; DB_TXN *txn;{ PANIC_CHECK(dbenv); /* * We need to initialize the transaction structure, but must be careful * not to smash the links. We manually initialize the structure. */ txn->mgrp = dbenv->tx_handle; TAILQ_INIT(&txn->kids); TAILQ_INIT(&txn->events); STAILQ_INIT(&txn->logs); txn->parent = NULL; ZERO_LSN(txn->last_lsn); txn->txnid = TXN_INVALID; txn->tid = 0; txn->cursors = 0; memset(&txn->lock_timeout, 0, sizeof(db_timeout_t)); memset(&txn->expire, 0, sizeof(db_timeout_t)); return (__txn_begin_int(txn, 0));}/* * __txn_compensate_begin * Begin an compensation transaction. This is a special interface * that is used only for transactions that must be started to compensate * for actions during an abort. Currently only used for allocations. * * PUBLIC: int __txn_compensate_begin __P((DB_ENV *, DB_TXN **txnp)); */int__txn_compensate_begin(dbenv, txnpp) DB_ENV *dbenv; DB_TXN **txnpp;{ DB_TXN *txn; int ret; PANIC_CHECK(dbenv); if ((ret = __os_calloc(dbenv, 1, sizeof(DB_TXN), &txn)) != 0) return (ret); txn->mgrp = dbenv->tx_handle; TAILQ_INIT(&txn->kids); TAILQ_INIT(&txn->events); STAILQ_INIT(&txn->logs); txn->flags = TXN_COMPENSATE | TXN_MALLOC; *txnpp = txn; return (__txn_begin_int(txn, 1));}/* * __txn_begin_int -- * Normal DB version of txn_begin. */static int__txn_begin_int(txn, internal) DB_TXN *txn; int internal;{ DB_ENV *dbenv; DB_LSN null_lsn; DB_TXNMGR *mgr; DB_TXNREGION *region; TXN_DETAIL *td; size_t off; u_int32_t id, *ids; int nids, ret; mgr = txn->mgrp; dbenv = mgr->dbenv; region = mgr->reginfo.primary; R_LOCK(dbenv, &mgr->reginfo); if (!F_ISSET(txn, TXN_COMPENSATE) && F_ISSET(region, TXN_IN_RECOVERY)) { __db_err(dbenv, "operation not permitted during recovery"); ret = EINVAL; goto err; } /* Make sure that we aren't still recovering prepared transactions. */ if (!internal && region->stat.st_nrestores != 0) { __db_err(dbenv, "recovery of prepared but not yet committed transactions is incomplete"); ret = EINVAL; goto err; } /* * Allocate a new transaction id. Our current valid range can span * the maximum valid value, so check for it and wrap manually. */ if (region->last_txnid == TXN_MAXIMUM && region->cur_maxid != TXN_MAXIMUM) region->last_txnid = TXN_MINIMUM - 1; if (region->last_txnid == region->cur_maxid) { if ((ret = __os_malloc(dbenv, sizeof(u_int32_t) * region->maxtxns, &ids)) != 0) goto err; nids = 0; for (td = SH_TAILQ_FIRST(®ion->active_txn, __txn_detail); td != NULL; td = SH_TAILQ_NEXT(td, links, __txn_detail)) ids[nids++] = td->txnid; region->last_txnid = TXN_MINIMUM - 1; region->cur_maxid = TXN_MAXIMUM; if (nids != 0) __db_idspace(ids, nids, ®ion->last_txnid, ®ion->cur_maxid); __os_free(dbenv, ids); if (DBENV_LOGGING(dbenv) && (ret = __txn_recycle_log(dbenv, NULL, &null_lsn, 0, region->last_txnid + 1, region->cur_maxid)) != 0) goto err; } /* Allocate a new transaction detail structure. */ if ((ret = __db_shalloc(&mgr->reginfo, sizeof(TXN_DETAIL), 0, &td)) != 0) { __db_err(dbenv, "Unable to allocate memory for transaction detail"); goto err; } /* Place transaction on active transaction list. */ SH_TAILQ_INSERT_HEAD(®ion->active_txn, td, links, __txn_detail); id = ++region->last_txnid; ++region->stat.st_nbegins; if (++region->stat.st_nactive > region->stat.st_maxnactive) region->stat.st_maxnactive = region->stat.st_nactive; td->txnid = id; ZERO_LSN(td->last_lsn); ZERO_LSN(td->begin_lsn); if (txn->parent != NULL) td->parent = txn->parent->off; else td->parent = INVALID_ROFF; td->status = TXN_RUNNING; td->flags = 0; td->xa_status = 0; off = R_OFFSET(&mgr->reginfo, td); R_UNLOCK(dbenv, &mgr->reginfo); ZERO_LSN(txn->last_lsn); txn->txnid = id; txn->off = (u_int32_t)off; txn->abort = __txn_abort_pp; txn->commit = __txn_commit_pp; txn->discard = __txn_discard_pp; txn->id = __txn_id; txn->prepare = __txn_prepare; txn->set_timeout = __txn_set_timeout; txn->set_begin_lsnp = __txn_set_begin_lsnp; /* * If this is a transaction family, we must link the child to the * maximal grandparent in the lock table for deadlock detection. */ if (txn->parent != NULL && LOCKING_ON(dbenv)) if ((ret = __lock_addfamilylocker(dbenv, txn->parent->txnid, txn->txnid)) != 0) return (ret); if (F_ISSET(txn, TXN_MALLOC)) { MUTEX_THREAD_LOCK(dbenv, mgr->mutexp); TAILQ_INSERT_TAIL(&mgr->txn_chain, txn, links); MUTEX_THREAD_UNLOCK(dbenv, mgr->mutexp); } return (0);err: R_UNLOCK(dbenv, &mgr->reginfo); return (ret);}/* * __txn_commit_pp -- * Interface routine to TXN->commit. */static int__txn_commit_pp(txnp, flags) DB_TXN *txnp; u_int32_t flags;{ DB_ENV *dbenv; int not_child, ret; dbenv = txnp->mgrp->dbenv; not_child = txnp->parent == NULL; ret = __txn_commit(txnp, flags); if (not_child && IS_ENV_REPLICATED(dbenv)) __op_rep_exit(dbenv); return (ret);}/* * __txn_commit -- * Commit a transaction. * * PUBLIC: int __txn_commit __P((DB_TXN *, u_int32_t)); */int__txn_commit(txnp, flags) DB_TXN *txnp; u_int32_t flags;{ DBT list_dbt; DB_ENV *dbenv; DB_LOCKREQ request; DB_TXN *kid; TXN_DETAIL *td; u_int32_t lflags; int ret, t_ret; dbenv = txnp->mgrp->dbenv; PANIC_CHECK(dbenv); if ((ret = __txn_isvalid(txnp, &td, TXN_OP_COMMIT)) != 0) return (ret); /* * We clear flags that are incorrect, ignoring any flag errors, and * default to synchronous operations. By definition, transaction * handles are dead when we return, and this error should never * happen, but we don't want to fail in the field 'cause the app is * specifying the wrong flag for some reason. */ if (__db_fchk(dbenv, "DB_TXN->commit", flags, DB_TXN_NOSYNC | DB_TXN_SYNC) != 0) flags = DB_TXN_SYNC; if (__db_fcchk(dbenv, "DB_TXN->commit", flags, DB_TXN_NOSYNC, DB_TXN_SYNC) != 0) flags = DB_TXN_SYNC; if (LF_ISSET(DB_TXN_NOSYNC)) { F_CLR(txnp, TXN_SYNC); F_SET(txnp, TXN_NOSYNC); } if (LF_ISSET(DB_TXN_SYNC)) { F_CLR(txnp, TXN_NOSYNC); F_SET(txnp, TXN_SYNC); } /* * Commit any unresolved children. If anyone fails to commit, * then try to abort the rest of the kids and then abort the parent. * Abort should never fail; if it does, we bail out immediately. */ while ((kid = TAILQ_FIRST(&txnp->kids)) != NULL) if ((ret = __txn_commit(kid, flags)) != 0) while ((kid = TAILQ_FIRST(&txnp->kids)) != NULL) if ((t_ret = __txn_abort(kid)) != 0) return (__db_panic(dbenv, t_ret)); /* * If there are any log records, write a log record and sync the log, * else do no log writes. If the commit is for a child transaction, * we do not need to commit the child synchronously since it may still * abort (if its parent aborts), and otherwise its parent or ultimate * ancestor will write synchronously. */ if (DBENV_LOGGING(dbenv) && (!IS_ZERO_LSN(txnp->last_lsn) || STAILQ_FIRST(&txnp->logs) != NULL)) { if (txnp->parent == NULL) { /* * We are about to free all the read locks for this * transaction below. Some of those locks might be * handle locks which should not be freed, because * they will be freed when the handle is closed. Check * the events and preprocess any trades now so we don't * release the locks below. */ if ((ret = __txn_doevents(dbenv, txnp, TXN_PREPARE, 1)) != 0) goto err; memset(&request, 0, sizeof(request)); if (LOCKING_ON(dbenv)) { request.op = DB_LOCK_PUT_READ; if (IS_REP_MASTER(dbenv) && !IS_ZERO_LSN(txnp->last_lsn)) { memset(&list_dbt, 0, sizeof(list_dbt)); request.obj = &list_dbt; } ret = __lock_vec(dbenv, txnp->txnid, 0, &request, 1, NULL); } if (ret == 0 && !IS_ZERO_LSN(txnp->last_lsn)) { SET_LOG_FLAGS(dbenv, txnp, lflags); ret = __txn_regop_log(dbenv, txnp, &txnp->last_lsn, lflags, TXN_COMMIT, (int32_t)time(NULL), request.obj); } if (request.obj != NULL && request.obj->data != NULL) __os_free(dbenv, request.obj->data); if (ret != 0) goto err; } else { /* Log the commit in the parent! */ if (!IS_ZERO_LSN(txnp->last_lsn) && (ret = __txn_child_log(dbenv, txnp->parent, &txnp->parent->last_lsn, 0, txnp->txnid, &txnp->last_lsn)) != 0) { goto err; } if (STAILQ_FIRST(&txnp->logs) != NULL) { /* * Put the child first so we back it out first.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -