📄 db_dispatch.c
字号:
/*- * See the file LICENSE for redistribution information. * * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. *//* * Copyright (c) 1995, 1996 * The President and Fellows of Harvard University. All rights reserved. * * This code is derived from software contributed to Berkeley by * Margo Seltzer. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */#include "db_config.h"#ifndef lintstatic const char revid[] = "$Id: db_dispatch.c,v 11.121 2002/09/07 17:36:31 ubell Exp $";#endif /* not lint */#ifndef NO_SYSTEM_INCLUDES#include <sys/types.h>#include <stddef.h>#include <stdlib.h>#include <string.h>#endif#include "db_int.h"#include "dbinc/db_page.h"#include "dbinc/hash.h"#include "dbinc/log.h"#include "dbinc/fop.h"#include "dbinc/rep.h"#include "dbinc/txn.h"static int __db_limbo_fix __P((DB *, DB_TXN *, DB_TXNLIST *, db_pgno_t *, DBMETA *));static int __db_limbo_bucket __P((DB_ENV *, DB_TXN *, DB_TXNLIST *));static int __db_limbo_move __P((DB_ENV *, DB_TXN *, DB_TXN *, DB_TXNLIST *));static int __db_lock_move __P((DB_ENV *, u_int8_t *, db_pgno_t, db_lockmode_t, DB_TXN *, DB_TXN *));static int __db_default_getpgnos __P((DB_ENV *, DB_LSN *lsnp, void *));static int __db_txnlist_find_internal __P((DB_ENV *, void *, db_txnlist_type, u_int32_t, u_int8_t [DB_FILE_ID_LEN], DB_TXNLIST **, int));static int __db_txnlist_pgnoadd __P((DB_ENV *, DB_TXNHEAD *, int32_t, u_int8_t [DB_FILE_ID_LEN], char *, db_pgno_t));/* * __db_dispatch -- * * This is the transaction dispatch function used by the db access methods. * It is designed to handle the record format used by all the access * methods (the one automatically generated by the db_{h,log,read}.sh * scripts in the tools directory). An application using a different * recovery paradigm will supply a different dispatch function to txn_open. * * PUBLIC: int __db_dispatch __P((DB_ENV *, * PUBLIC: int (**)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)), * PUBLIC: size_t, DBT *, DB_LSN *, db_recops, void *)); */int__db_dispatch(dbenv, dtab, dtabsize, db, lsnp, redo, info) DB_ENV *dbenv; /* The environment. */ int (**dtab)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); size_t dtabsize; /* Size of the dtab. */ DBT *db; /* The log record upon which to dispatch. */ DB_LSN *lsnp; /* The lsn of the record being dispatched. */ db_recops redo; /* Redo this op (or undo it). */ void *info;{ DB_LSN prev_lsn; u_int32_t rectype, txnid; int make_call, ret; memcpy(&rectype, db->data, sizeof(rectype)); memcpy(&txnid, (u_int8_t *)db->data + sizeof(rectype), sizeof(txnid)); make_call = ret = 0; /* If we don't have a dispatch table, it's hard to dispatch. */ DB_ASSERT(dtab != NULL); /* * If we find a record that is in the user's number space and they * have specified a recovery routine, let them handle it. If they * didn't specify a recovery routine, then we expect that they've * followed all our rules and registered new recovery functions. */ switch (redo) { case DB_TXN_ABORT: case DB_TXN_APPLY: case DB_TXN_PRINT: make_call = 1; break; case DB_TXN_OPENFILES: /* * We collect all the transactions that have * "begin" records, those with no previous LSN, * so that we do not abort partial transactions. * These are known to be undone, otherwise the * log would not have been freeable. */ memcpy(&prev_lsn, (u_int8_t *)db->data + sizeof(rectype) + sizeof(txnid), sizeof(prev_lsn)); if (txnid != 0 && prev_lsn.file == 0 && (ret = __db_txnlist_add(dbenv, info, txnid, TXN_OK, NULL)) != 0) return (ret); /* FALLTHROUGH */ case DB_TXN_POPENFILES: if (rectype == DB___dbreg_register || rectype == DB___txn_ckp || rectype == DB___txn_recycle) return (dtab[rectype](dbenv, db, lsnp, redo, info)); break; case DB_TXN_BACKWARD_ROLL: /* * Running full recovery in the backward pass. If we've * seen this txnid before and added to it our commit list, * then we do nothing during this pass, unless this is a child * commit record, in which case we need to process it. If * we've never seen it, then we call the appropriate recovery * routine. * * We need to always undo DB___db_noop records, so that we * properly handle any aborts before the file was closed. */ switch(rectype) { case DB___txn_regop: case DB___txn_recycle: case DB___txn_ckp: case DB___db_noop: case DB___fop_file_remove: case DB___txn_child: make_call = 1; break; case DB___dbreg_register: if (txnid == 0) { make_call = 1; break; } /* FALLTHROUGH */ default: if (txnid != 0 && (ret = __db_txnlist_find(dbenv, info, txnid)) != TXN_COMMIT && ret != TXN_IGNORE) { /* * If not found then, this is an incomplete * abort. */ if (ret == TXN_NOTFOUND) return (__db_txnlist_add(dbenv, info, txnid, TXN_IGNORE, lsnp)); make_call = 1; if (ret == TXN_OK && (ret = __db_txnlist_update(dbenv, info, txnid, rectype == DB___txn_xa_regop ? TXN_PREPARE : TXN_ABORT, NULL)) != 0) return (ret); } } break; case DB_TXN_FORWARD_ROLL: /* * In the forward pass, if we haven't seen the transaction, * do nothing, else recover it. * * We need to always redo DB___db_noop records, so that we * properly handle any commits after the file was closed. */ switch(rectype) { case DB___txn_recycle: case DB___txn_ckp: case DB___db_noop: make_call = 1; break; default: if (txnid != 0 && (ret = __db_txnlist_find(dbenv, info, txnid)) == TXN_COMMIT) make_call = 1; else if (ret != TXN_IGNORE && (rectype == DB___ham_metagroup || rectype == DB___ham_groupalloc || rectype == DB___db_pg_alloc)) { /* * Because we cannot undo file extensions * all allocation records must be reprocessed * during rollforward in case the file was * just created. It may not have been * present during the backward pass. */ make_call = 1; redo = DB_TXN_BACKWARD_ALLOC; } else if (rectype == DB___dbreg_register) { /* * This may be a transaction dbreg_register. * If it is, we only make the call on a COMMIT, * which we checked above. If it's not, then we * should always make the call, because we need * the file open information. */ if (txnid == 0) make_call = 1; } } break; case DB_TXN_GETPGNOS: /* * If this is one of DB's own log records, we simply * dispatch. */ if (rectype < DB_user_BEGIN) { make_call = 1; break; } /* * If we're still here, this is a custom record in an * application that's doing app-specific logging. Such a * record doesn't have a getpgno function for the user * dispatch function to call--the getpgnos functions return * which pages replication needs to lock using the TXN_RECS * structure, which is private and not something we want to * document. * * Thus, we leave any necessary locking for the app's * recovery function to do during the upcoming * DB_TXN_APPLY. Fill in default getpgnos info (we need * a stub entry for every log record that will get * DB_TXN_APPLY'd) and return success. */ return (__db_default_getpgnos(dbenv, lsnp, info)); default: return (__db_unknown_flag(dbenv, "__db_dispatch", redo)); } /* * The switch statement uses ret to receive the return value of * __db_txnlist_find, which returns a large number of different * statuses, none of which we will be returning. For safety, * let's reset this here in case we ever do a "return(ret)" * below in the future. */ ret = 0; if (make_call) { if (rectype >= DB_user_BEGIN && dbenv->app_dispatch != NULL) return (dbenv->app_dispatch(dbenv, db, lsnp, redo)); else { /* * The size of the dtab table argument is the same as * the standard table, use the standard table's size * as our sanity check. */ if (rectype > dtabsize || dtab[rectype] == NULL) { __db_err(dbenv, "Illegal record type %lu in log", (u_long)rectype); return (EINVAL); } return (dtab[rectype](dbenv, db, lsnp, redo, info)); } } return (0);}/* * __db_add_recovery -- * * PUBLIC: int __db_add_recovery __P((DB_ENV *, * PUBLIC: int (***)(DB_ENV *, DBT *, DB_LSN *, db_recops, void *), size_t *, * PUBLIC: int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops, void *), u_int32_t)); */int__db_add_recovery(dbenv, dtab, dtabsize, func, ndx) DB_ENV *dbenv; int (***dtab) __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); size_t *dtabsize; int (*func) __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); u_int32_t ndx;{ size_t i, nsize; int ret; /* Check if we have to grow the table. */ if (ndx >= *dtabsize) { nsize = ndx + 40; if ((ret = __os_realloc(dbenv, nsize * sizeof((*dtab)[0]), dtab)) != 0) return (ret); for (i = *dtabsize; i < nsize; ++i) (*dtab)[i] = NULL; *dtabsize = nsize; } (*dtab)[ndx] = func; return (0);}/* * __db_txnlist_init -- * Initialize transaction linked list. * * PUBLIC: int __db_txnlist_init __P((DB_ENV *, * PUBLIC: u_int32_t, u_int32_t, DB_LSN *, void *)); */int__db_txnlist_init(dbenv, low_txn, hi_txn, trunc_lsn, retp) DB_ENV *dbenv; u_int32_t low_txn, hi_txn; DB_LSN *trunc_lsn; void *retp;{ DB_TXNHEAD *headp; u_int32_t tmp; int ret, size; /* * Size a hash table. * If low is zero then we are being called during rollback * and we need only one slot. * Hi maybe lower than low if we have recycled txnid's. * The numbers here are guesses about txn density, we can afford * to look at a few entries in each slot. */ if (low_txn == 0) size = 1; else { if (hi_txn < low_txn) { tmp = hi_txn; hi_txn = low_txn; low_txn = tmp; } tmp = hi_txn - low_txn; /* See if we wrapped around. */ if (tmp > (TXN_MAXIMUM - TXN_MINIMUM) / 2) tmp = (low_txn - TXN_MINIMUM) + (TXN_MAXIMUM - hi_txn); size = tmp / 5; if (size < 100) size = 100; } if ((ret = __os_malloc(dbenv, sizeof(DB_TXNHEAD) + size * sizeof(headp->head), &headp)) != 0) return (ret); memset(headp, 0, sizeof(DB_TXNHEAD) + size * sizeof(headp->head)); headp->maxid = hi_txn; headp->generation = 0; headp->nslots = size; headp->gen_alloc = 8; if ((ret = __os_malloc(dbenv, headp->gen_alloc * sizeof(headp->gen_array[0]), &headp->gen_array)) != 0) { __os_free(dbenv, headp); return (ret); } headp->gen_array[0].generation = 0; headp->gen_array[0].txn_min = TXN_MINIMUM; headp->gen_array[0].txn_max = TXN_MAXIMUM; if (trunc_lsn != NULL) headp->trunc_lsn = *trunc_lsn; else ZERO_LSN(headp->trunc_lsn); ZERO_LSN(headp->maxlsn); ZERO_LSN(headp->ckplsn); *(void **)retp = headp; return (0);}/* * __db_txnlist_add -- * Add an element to our transaction linked list. * * PUBLIC: int __db_txnlist_add __P((DB_ENV *, * PUBLIC: void *, u_int32_t, int32_t, DB_LSN *)); */int__db_txnlist_add(dbenv, listp, txnid, status, lsn) DB_ENV *dbenv; void *listp; u_int32_t txnid; int32_t status; DB_LSN *lsn;{ DB_TXNHEAD *hp; DB_TXNLIST *elp; int ret; if ((ret = __os_malloc(dbenv, sizeof(DB_TXNLIST), &elp)) != 0) return (ret); hp = (DB_TXNHEAD *)listp; LIST_INSERT_HEAD(&hp->head[DB_TXNLIST_MASK(hp, txnid)], elp, links); elp->type = TXNLIST_TXNID; elp->u.t.txnid = txnid; elp->u.t.status = status; elp->u.t.generation = hp->generation; if (txnid > hp->maxid) hp->maxid = txnid; if (lsn != NULL && IS_ZERO_LSN(hp->maxlsn) && status == TXN_COMMIT) hp->maxlsn = *lsn; DB_ASSERT(lsn == NULL || status != TXN_COMMIT || log_compare(&hp->maxlsn, lsn) >= 0); return (0);}/* * __db_txnlist_remove -- * Remove an element from our transaction linked list. * * PUBLIC: int __db_txnlist_remove __P((DB_ENV *, void *, u_int32_t)); */int__db_txnlist_remove(dbenv, listp, txnid) DB_ENV *dbenv; void *listp; u_int32_t txnid;{ DB_TXNLIST *entry; return (__db_txnlist_find_internal(dbenv, listp, TXNLIST_TXNID, txnid, NULL, &entry, 1) == TXN_NOTFOUND ? TXN_NOTFOUND : TXN_OK);}/* * __db_txnlist_ckp -- * Used to record the maximum checkpoint that will be retained * after recovery. Typically this is simply the max checkpoint, but * if we are doing client replication recovery or timestamp-based * recovery, we are going to virtually truncate the log and we need * to retain the last checkpoint before the truncation point. * * PUBLIC: void __db_txnlist_ckp __P((DB_ENV *, void *, DB_LSN *)); */void__db_txnlist_ckp(dbenv, listp, ckp_lsn) DB_ENV *dbenv; void *listp; DB_LSN *ckp_lsn;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -