📄 rep_util.c
字号:
/*- * See the file LICENSE for redistribution information. * * Copyright (c) 2001-2002 * Sleepycat Software. All rights reserved. */#include "db_config.h"#ifndef lintstatic const char revid[] = "$Id: rep_util.c,v 1.51 2002/09/05 02:30:00 margo Exp $";#endif /* not lint */#ifndef NO_SYSTEM_INCLUDES#include <stdlib.h>#include <string.h>#endif#include "db_int.h"#include "dbinc/db_page.h"#include "dbinc/btree.h"#include "dbinc/fop.h"#include "dbinc/hash.h"#include "dbinc/log.h"#include "dbinc/qam.h"#include "dbinc/rep.h"#include "dbinc/txn.h"/* * rep_util.c: * Miscellaneous replication-related utility functions, including * those called by other subsystems. */static int __rep_cmp_bylsn __P((const void *, const void *));static int __rep_cmp_bypage __P((const void *, const void *));#ifdef REP_DIAGNOSTICstatic void __rep_print_logmsg __P((DB_ENV *, const DBT *, DB_LSN *));#endif/* * __rep_check_alloc -- * Make sure the array of TXN_REC entries is of at least size n. * (This function is called by the __*_getpgnos() functions in * *.src.) * * PUBLIC: int __rep_check_alloc __P((DB_ENV *, TXN_RECS *, int)); */int__rep_check_alloc(dbenv, r, n) DB_ENV *dbenv; TXN_RECS *r; int n;{ int nalloc, ret; while (r->nalloc < r->npages + n) { nalloc = r->nalloc == 0 ? 20 : r->nalloc * 2; if ((ret = __os_realloc(dbenv, nalloc * sizeof(LSN_PAGE), &r->array)) != 0) return (ret); r->nalloc = nalloc; } return (0);}/* * __rep_send_message -- * This is a wrapper for sending a message. It takes care of constructing * the REP_CONTROL structure and calling the user's specified send function. * * PUBLIC: int __rep_send_message __P((DB_ENV *, int, * PUBLIC: u_int32_t, DB_LSN *, const DBT *, u_int32_t)); */int__rep_send_message(dbenv, eid, rtype, lsnp, dbtp, flags) DB_ENV *dbenv; int eid; u_int32_t rtype; DB_LSN *lsnp; const DBT *dbtp; u_int32_t flags;{ DB_REP *db_rep; REP *rep; DBT cdbt, scrap_dbt; REP_CONTROL cntrl; u_int32_t send_flags; int ret; db_rep = dbenv->rep_handle; rep = db_rep->region; /* Set up control structure. */ memset(&cntrl, 0, sizeof(cntrl)); if (lsnp == NULL) ZERO_LSN(cntrl.lsn); else cntrl.lsn = *lsnp; cntrl.rectype = rtype; cntrl.flags = flags; cntrl.rep_version = DB_REPVERSION; cntrl.log_version = DB_LOGVERSION; MUTEX_LOCK(dbenv, db_rep->mutexp); cntrl.gen = rep->gen; MUTEX_UNLOCK(dbenv, db_rep->mutexp); memset(&cdbt, 0, sizeof(cdbt)); cdbt.data = &cntrl; cdbt.size = sizeof(cntrl); /* Don't assume the send function will be tolerant of NULL records. */ if (dbtp == NULL) { memset(&scrap_dbt, 0, sizeof(DBT)); dbtp = &scrap_dbt; } send_flags = (LF_ISSET(DB_PERMANENT) ? DB_REP_PERMANENT : 0);#if 0 __rep_print_message(dbenv, eid, &cntrl, "rep_send_message");#endif#ifdef REP_DIAGNOSTIC if (rtype == REP_LOG) __rep_print_logmsg(dbenv, dbtp, lsnp);#endif ret = db_rep->rep_send(dbenv, &cdbt, dbtp, eid, send_flags); /* * We don't hold the rep lock, so this could miscount if we race. * I don't think it's worth grabbing the mutex for that bit of * extra accuracy. */ if (ret == 0) rep->stat.st_msgs_sent++; else rep->stat.st_msgs_send_failures++; return (ret);}#ifdef REP_DIAGNOSTIC/* * __rep_print_logmsg -- * This is a debugging routine for printing out log records that * we are about to transmit to a client. */static void__rep_print_logmsg(dbenv, logdbt, lsnp) DB_ENV *dbenv; const DBT *logdbt; DB_LSN *lsnp;{ /* Static structures to hold the printing functions. */ static int (**ptab)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)) = NULL; size_t ptabsize = 0; if (ptabsize == 0) { /* Initialize the table. */ (void)__bam_init_print(dbenv, &ptab, &ptabsize); (void)__crdel_init_print(dbenv, &ptab, &ptabsize); (void)__db_init_print(dbenv, &ptab, &ptabsize); (void)__dbreg_init_print(dbenv, &ptab, &ptabsize); (void)__fop_init_print(dbenv, &ptab, &ptabsize); (void)__qam_init_print(dbenv, &ptab, &ptabsize); (void)__ham_init_print(dbenv, &ptab, &ptabsize); (void)__txn_init_print(dbenv, &ptab, &ptabsize); } (void)__db_dispatch(dbenv, ptab, ptabsize, (DBT *)logdbt, lsnp, DB_TXN_PRINT, NULL);}#endif/* * __rep_new_master -- * Called after a master election to sync back up with a new master. * It's possible that we already know of this new master in which case * we don't need to do anything. * * This is written assuming that this message came from the master; we * need to enforce that in __rep_process_record, but right now, we have * no way to identify the master. * * PUBLIC: int __rep_new_master __P((DB_ENV *, REP_CONTROL *, int)); */int__rep_new_master(dbenv, cntrl, eid) DB_ENV *dbenv; REP_CONTROL *cntrl; int eid;{ DB_LOG *dblp; DB_LOGC *logc; DB_LSN last_lsn, lsn; DB_REP *db_rep; DBT dbt; LOG *lp; REP *rep; int change, ret, t_ret; db_rep = dbenv->rep_handle; rep = db_rep->region; MUTEX_LOCK(dbenv, db_rep->mutexp); ELECTION_DONE(rep); change = rep->gen != cntrl->gen || rep->master_id != eid; if (change) { rep->gen = cntrl->gen; rep->master_id = eid; F_SET(rep, REP_F_RECOVER); rep->stat.st_master_changes++; } MUTEX_UNLOCK(dbenv, db_rep->mutexp); if (!change) return (0); /* * If the master changed, we need to start the process of * figuring out what our last valid log record is. However, * if both the master and we agree that the max LSN is 0,0, * then there is no recovery to be done. If we are at 0 and * the master is not, then we just need to request all the log * records from the master. */ dblp = dbenv->lg_handle; lp = dblp->reginfo.primary; R_LOCK(dbenv, &dblp->reginfo); last_lsn = lsn = lp->lsn; if (last_lsn.offset > sizeof(LOGP)) last_lsn.offset -= lp->len; R_UNLOCK(dbenv, &dblp->reginfo); if (IS_INIT_LSN(lsn) || IS_ZERO_LSN(lsn)) {empty: MUTEX_LOCK(dbenv, db_rep->mutexp); F_CLR(rep, REP_F_RECOVER); MUTEX_UNLOCK(dbenv, db_rep->mutexp); if (IS_INIT_LSN(cntrl->lsn)) ret = 0; else ret = __rep_send_message(dbenv, rep->master_id, REP_ALL_REQ, &lsn, NULL, 0); if (ret == 0) ret = DB_REP_NEWMASTER; return (ret); } else if (last_lsn.offset <= sizeof(LOGP)) { /* * We have just changed log files and need to set lastlsn * to the last record in the previous log files. */ if ((ret = dbenv->log_cursor(dbenv, &logc, 0)) != 0) return (ret); memset(&dbt, 0, sizeof(dbt)); ret = logc->get(logc, &last_lsn, &dbt, DB_LAST); if ((t_ret = logc->close(logc, 0)) != 0 && ret == 0) ret = t_ret; if (ret == DB_NOTFOUND) goto empty; if (ret != 0) return (ret); } R_LOCK(dbenv, &dblp->reginfo); lp->verify_lsn = last_lsn; R_UNLOCK(dbenv, &dblp->reginfo); if ((ret = __rep_send_message(dbenv, eid, REP_VERIFY_REQ, &last_lsn, NULL, 0)) != 0) return (ret); return (DB_REP_NEWMASTER);}/* * __rep_lockpgno_init * Create a dispatch table for acquiring locks on each log record. * * PUBLIC: int __rep_lockpgno_init __P((DB_ENV *, * PUBLIC: int (***)(DB_ENV *, DBT *, DB_LSN *, db_recops, void *), * PUBLIC: size_t *)); */int__rep_lockpgno_init(dbenv, dtabp, dtabsizep) DB_ENV *dbenv; int (***dtabp)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); size_t *dtabsizep;{ int ret; /* Initialize dispatch table. */ *dtabsizep = 0; *dtabp = NULL; if ((ret = __bam_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0 || (ret = __crdel_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0 || (ret = __db_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0 || (ret = __dbreg_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0 || (ret = __fop_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0 || (ret = __qam_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0 || (ret = __ham_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0 || (ret = __txn_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0) return (ret); return (0);}/* * __rep_unlockpages -- * Unlock the pages locked in __rep_lockpages. * * PUBLIC: int __rep_unlockpages __P((DB_ENV *, u_int32_t)); */int__rep_unlockpages(dbenv, lid) DB_ENV *dbenv; u_int32_t lid;{ DB_LOCKREQ req, *lvp; req.op = DB_LOCK_PUT_ALL; return (dbenv->lock_vec(dbenv, lid, 0, &req, 1, &lvp));}/* * __rep_lockpages -- * Called to gather and lock pages in preparation for both * single transaction apply as well as client synchronization * with a new master. A non-NULL key_lsn means that we're locking * in order to apply a single log record during client recovery * to the joint LSN. A non-NULL max_lsn means that we are applying * a transaction whose commit is at max_lsn. * * PUBLIC: int __rep_lockpages __P((DB_ENV *, * PUBLIC: int (**)(DB_ENV *, DBT *, DB_LSN *, db_recops, void *), * PUBLIC: size_t, DB_LSN *, DB_LSN *, TXN_RECS *, u_int32_t)); */int__rep_lockpages(dbenv, dtab, dtabsize, key_lsn, max_lsn, recs, lid) DB_ENV *dbenv; int (**dtab)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); size_t dtabsize; DB_LSN *key_lsn, *max_lsn; TXN_RECS *recs; u_int32_t lid;{ DBT data_dbt, lo; DB_LOCK l; DB_LOCKREQ *lvp; DB_LOGC *logc; DB_LSN tmp_lsn; TXN_RECS tmp, *t; db_pgno_t cur_pgno; linfo_t locks; int i, ret, t_ret, unique; u_int32_t cur_fid; /* * There are two phases: First, we have to traverse backwards through * the log records gathering the list of all the pages accessed. Once * we have this information we can acquire all the locks we need. */ /* Initialization */ memset(&locks, 0, sizeof(locks)); ret = 0; t = recs != NULL ? recs : &tmp; t->npages = t->nalloc = 0; t->array = NULL; /* * We've got to be in one mode or the other; else life will either * be excessively boring or overly exciting. */ DB_ASSERT(key_lsn != NULL || max_lsn != NULL); DB_ASSERT(key_lsn == NULL || max_lsn == NULL); /* * Phase 1: Fill in the pgno array. */ memset(&data_dbt, 0, sizeof(data_dbt)); if (F_ISSET(dbenv, DB_ENV_THREAD)) F_SET(&data_dbt, DB_DBT_REALLOC); /* Single transaction apply. */ if (max_lsn != NULL) { DB_ASSERT(0); /* XXX */ /* tmp_lsn = *max_lsn; if ((ret = __rep_apply_thread(dbenv, dtab, dtabsize, &data_dbt, &tmp_lsn, t)) != 0) goto err; */ } /* In recovery. */ if (key_lsn != NULL) { if ((ret = dbenv->log_cursor(dbenv, &logc, 0)) != 0) goto err; ret = logc->get(logc, key_lsn, &data_dbt, DB_SET); /* Save lsn values, since dispatch functions can change them. */ tmp_lsn = *key_lsn; ret = __db_dispatch(dbenv, dtab, dtabsize, &data_dbt, &tmp_lsn, DB_TXN_GETPGNOS, t); if ((t_ret = logc->close(logc, 0)) != 0 && ret == 0) ret = t_ret; /* * If ret == DB_DELETED, this record refers to a temporary * file and there's nothing to apply. */ if (ret == DB_DELETED) { ret = 0; goto out; } else if (ret != 0) goto err; } if (t->npages == 0) goto out; /* Phase 2: Write lock all the pages. */ /* Sort the entries in the array by page number. */ qsort(t->array, t->npages, sizeof(LSN_PAGE), __rep_cmp_bypage); /* Count the number of unique pages. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -