📄 log_put.c
字号:
/*- * See the file LICENSE for redistribution information. * * Copyright (c) 1996-2004 * Sleepycat Software. All rights reserved. * * $Id: log_put.c,v 11.168 2004/10/15 16:59:42 bostic Exp $ */#include "db_config.h"#ifndef NO_SYSTEM_INCLUDES#include <sys/types.h>#if TIME_WITH_SYS_TIME#include <sys/time.h>#include <time.h>#else#if HAVE_SYS_TIME_H#include <sys/time.h>#else#include <time.h>#endif#endif#include <stdio.h>#include <string.h>#endif#include "db_int.h"#include "dbinc/crypto.h"#include "dbinc/hmac.h"#include "dbinc/log.h"#include "dbinc/txn.h"static int __log_encrypt_record __P((DB_ENV *, DBT *, HDR *, u_int32_t));static int __log_file __P((DB_ENV *, const DB_LSN *, char *, size_t));static int __log_fill __P((DB_LOG *, DB_LSN *, void *, u_int32_t));static int __log_flush_commit __P((DB_ENV *, const DB_LSN *, u_int32_t));static int __log_newfh __P((DB_LOG *, int));static int __log_put_next __P((DB_ENV *, DB_LSN *, const DBT *, HDR *, DB_LSN *));static int __log_putr __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t, HDR *));static int __log_write __P((DB_LOG *, void *, u_int32_t));/* * __log_put_pp -- * DB_ENV->log_put pre/post processing. * * PUBLIC: int __log_put_pp __P((DB_ENV *, DB_LSN *, const DBT *, u_int32_t)); */int__log_put_pp(dbenv, lsnp, udbt, flags) DB_ENV *dbenv; DB_LSN *lsnp; const DBT *udbt; u_int32_t flags;{ int rep_check, ret; PANIC_CHECK(dbenv); ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, "DB_ENV->log_put", DB_INIT_LOG); /* Validate arguments: check for allowed flags. */ if ((ret = __db_fchk(dbenv, "DB_ENV->log_put", flags, DB_LOG_CHKPNT | DB_LOG_COMMIT | DB_FLUSH | DB_LOG_NOCOPY | DB_LOG_PERM | DB_LOG_WRNOSYNC)) != 0) return (ret); /* DB_LOG_WRNOSYNC and DB_FLUSH are mutually exclusive. */ if (LF_ISSET(DB_LOG_WRNOSYNC) && LF_ISSET(DB_FLUSH)) return (__db_ferr(dbenv, "DB_ENV->log_put", 1)); /* Replication clients should never write log records. */ if (IS_REP_CLIENT(dbenv)) { __db_err(dbenv, "DB_ENV->log_put is illegal on replication clients"); return (EINVAL); } rep_check = IS_ENV_REPLICATED(dbenv) ? 1 : 0; if (rep_check) __env_rep_enter(dbenv); ret = __log_put(dbenv, lsnp, udbt, flags); if (rep_check) __env_db_rep_exit(dbenv); return (ret);}/* * __log_put -- * DB_ENV->log_put. * * PUBLIC: int __log_put __P((DB_ENV *, DB_LSN *, const DBT *, u_int32_t)); */int__log_put(dbenv, lsnp, udbt, flags) DB_ENV *dbenv; DB_LSN *lsnp; const DBT *udbt; u_int32_t flags;{ DB_CIPHER *db_cipher; DBT *dbt, t; DB_LOG *dblp; DB_LSN lsn, old_lsn; HDR hdr; LOG *lp; int lock_held, need_free, ret; u_int8_t *key; dblp = dbenv->lg_handle; lp = dblp->reginfo.primary; db_cipher = dbenv->crypto_handle; dbt = &t; t = *udbt; lock_held = need_free = 0; ZERO_LSN(old_lsn); /* * If we are coming from the logging code, we use an internal flag, * DB_LOG_NOCOPY, because we know we can overwrite/encrypt the log * record in place. Otherwise, if a user called log_put then we * must copy it to new memory so that we know we can write it. * * We also must copy it to new memory if we are a replication master * so that we retain an unencrypted copy of the log record to send * to clients. */ if (!LF_ISSET(DB_LOG_NOCOPY) || IS_REP_MASTER(dbenv)) { if (CRYPTO_ON(dbenv)) t.size += db_cipher->adj_size(udbt->size); if ((ret = __os_calloc(dbenv, 1, t.size, &t.data)) != 0) goto err; need_free = 1; memcpy(t.data, udbt->data, udbt->size); } if ((ret = __log_encrypt_record(dbenv, dbt, &hdr, udbt->size)) != 0) goto err; if (CRYPTO_ON(dbenv)) key = db_cipher->mac_key; else key = NULL; /* Otherwise, we actually have a record to put. Put it. */ /* Before we grab the region lock, calculate the record's checksum. */ __db_chksum(dbt->data, dbt->size, key, hdr.chksum); R_LOCK(dbenv, &dblp->reginfo); lock_held = 1; if ((ret = __log_put_next(dbenv, &lsn, dbt, &hdr, &old_lsn)) != 0) goto panic_check; /* * If we are not a rep application, but are sharing a master rep env, * we should not be writing log records. */ if (IS_REP_MASTER(dbenv) && dbenv->rep_send == NULL) { __db_err(dbenv, "%s %s", "Non-replication DB_ENV handle attempting", "to modify a replicated environment"); ret = EINVAL; goto err; } /* * Assign the return LSN before dropping the region lock. Necessary * in case the lsn is a begin_lsn from a TXN_DETAIL structure passed * in by the logging routines. */ *lsnp = lsn; if (IS_REP_MASTER(dbenv)) { /* * Replication masters need to drop the lock to send messages, * but want to drop and reacquire it a minimal number of times. */ R_UNLOCK(dbenv, &dblp->reginfo); lock_held = 0; /* * If we changed files and we're in a replicated environment, * we need to inform our clients now that we've dropped the * region lock. * * Note that a failed NEWFILE send is a dropped message that * our client can handle, so we can ignore it. It's possible * that the record we already put is a commit, so we don't just * want to return failure. */ if (!IS_ZERO_LSN(old_lsn)) (void)__rep_send_message(dbenv, DB_EID_BROADCAST, REP_NEWFILE, &old_lsn, NULL, 0); /* * Then send the log record itself on to our clients. * * If the send fails and we're a commit or checkpoint, * there's nothing we can do; the record's in the log. * Flush it, even if we're running with TXN_NOSYNC, on the * grounds that it should be in durable form somewhere. */ /* * !!! * In the crypto case, we MUST send the udbt, not the * now-encrypted dbt. Clients have no way to decrypt * without the header. */ if ((__rep_send_message(dbenv, DB_EID_BROADCAST, REP_LOG, &lsn, udbt, flags) != 0) && LF_ISSET(DB_LOG_PERM)) LF_SET(DB_FLUSH); } /* * If needed, do a flush. Note that failures at this point * are only permissible if we know we haven't written a commit * record; __log_flush_commit is responsible for enforcing this. * * If a flush is not needed, see if WRITE_NOSYNC was set and we * need to write out the log buffer. */ if (LF_ISSET(DB_FLUSH | DB_LOG_WRNOSYNC)) { if (!lock_held) { R_LOCK(dbenv, &dblp->reginfo); lock_held = 1; } if ((ret = __log_flush_commit(dbenv, &lsn, flags)) != 0) goto panic_check; } /* * If flushed a checkpoint record, reset the "bytes since the last * checkpoint" counters. */ if (LF_ISSET(DB_LOG_CHKPNT)) lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0; if (0) {panic_check: /* * Writing log records cannot fail if we're a replication * master. The reason is that once we send the record to * replication clients, the transaction can no longer * abort, otherwise the master would be out of sync with * the rest of the replication group. Panic the system. */ if (ret != 0 && IS_REP_MASTER(dbenv)) ret = __db_panic(dbenv, ret); }err: if (lock_held) R_UNLOCK(dbenv, &dblp->reginfo); if (need_free) __os_free(dbenv, dbt->data); /* * If auto-remove is set and we switched files, remove unnecessary * log files. */ if (ret == 0 && !IS_ZERO_LSN(old_lsn) && lp->db_log_autoremove) __log_autoremove(dbenv); return (ret);}/* * __log_txn_lsn -- * * PUBLIC: void __log_txn_lsn * PUBLIC: __P((DB_ENV *, DB_LSN *, u_int32_t *, u_int32_t *)); */void__log_txn_lsn(dbenv, lsnp, mbytesp, bytesp) DB_ENV *dbenv; DB_LSN *lsnp; u_int32_t *mbytesp, *bytesp;{ DB_LOG *dblp; LOG *lp; dblp = dbenv->lg_handle; lp = dblp->reginfo.primary; R_LOCK(dbenv, &dblp->reginfo); /* * We are trying to get the LSN of the last entry in the log. We use * this in two places: 1) DB_ENV->txn_checkpoint uses it as a first * value when trying to compute an LSN such that all transactions begun * before it are complete. 2) DB_ENV->txn_begin uses it as the * begin_lsn. * * Typically, it's easy to get the last written LSN, you simply look * at the current log pointer and back up the number of bytes of the * last log record. However, if the last thing we did was write the * log header of a new log file, then, this doesn't work, so we return * the first log record that will be written in this new file. */ *lsnp = lp->lsn; if (lp->lsn.offset > lp->len) lsnp->offset -= lp->len; /* * Since we're holding the log region lock, return the bytes put into * the log since the last checkpoint, transaction checkpoint needs it. * * We add the current buffer offset so as to count bytes that have not * yet been written, but are sitting in the log buffer. */ if (mbytesp != NULL) { *mbytesp = lp->stat.st_wc_mbytes; *bytesp = (u_int32_t)(lp->stat.st_wc_bytes + lp->b_off); } R_UNLOCK(dbenv, &dblp->reginfo);}/* * __log_put_next -- * Put the given record as the next in the log, wherever that may * turn out to be. */static int__log_put_next(dbenv, lsn, dbt, hdr, old_lsnp) DB_ENV *dbenv; DB_LSN *lsn; const DBT *dbt; HDR *hdr; DB_LSN *old_lsnp;{ DB_LOG *dblp; DB_LSN old_lsn; LOG *lp; int newfile, ret; dblp = dbenv->lg_handle; lp = dblp->reginfo.primary; /* * Save a copy of lp->lsn before we might decide to switch log * files and change it. If we do switch log files, and we're * doing replication, we'll need to tell our clients about the * switch, and they need to receive a NEWFILE message * with this "would-be" LSN in order to know they're not * missing any log records. */ old_lsn = lp->lsn; newfile = 0; /* * If this information won't fit in the file, or if we're a * replication client environment and have been told to do so, * swap files. */ if (lp->lsn.offset == 0 || lp->lsn.offset + hdr->size + dbt->size > lp->log_size) { if (hdr->size + sizeof(LOGP) + dbt->size > lp->log_size) { __db_err(dbenv, "DB_ENV->log_put: record larger than maximum file size (%lu > %lu)", (u_long)hdr->size + sizeof(LOGP) + dbt->size, (u_long)lp->log_size); return (EINVAL); } if ((ret = __log_newfile(dblp, NULL, 0)) != 0) return (ret); /* * Flag that we switched files, in case we're a master * and need to send this information to our clients. * We postpone doing the actual send until we can * safely release the log region lock and are doing so * anyway. */ newfile = 1; } /* * The offset into the log file at this point is the LSN where * we're about to put this record, and is the LSN the caller wants. */ *lsn = lp->lsn; /* If we switched log files, let our caller know where. */ if (newfile) *old_lsnp = old_lsn; /* Actually put the record. */ return (__log_putr(dblp, lsn, dbt, lp->lsn.offset - lp->len, hdr));}/* * __log_flush_commit -- * Flush a record. */static int__log_flush_commit(dbenv, lsnp, flags) DB_ENV *dbenv; const DB_LSN *lsnp; u_int32_t flags;{ DB_LOG *dblp; DB_LSN flush_lsn; LOG *lp; int ret; dblp = dbenv->lg_handle; lp = dblp->reginfo.primary; flush_lsn = *lsnp; ret = 0; /* * DB_FLUSH: * Flush a record for which the DB_FLUSH flag to log_put was set. * * DB_LOG_WRNOSYNC: * If there's anything in the current log buffer, write it out. */ if (LF_ISSET(DB_FLUSH)) ret = __log_flush_int(dblp, &flush_lsn, 1); else if (!lp->db_log_inmemory && lp->b_off != 0) if ((ret = __log_write(dblp, dblp->bufp, (u_int32_t)lp->b_off)) == 0) lp->b_off = 0; /* * If a flush supporting a transaction commit fails, we must abort the * transaction. (If we aren't doing a commit, return the failure; if * if the commit we care about made it to disk successfully, we just * ignore the failure, because there's no way to undo the commit.) */ if (ret == 0 || !LF_ISSET(DB_LOG_COMMIT)) return (ret); if (flush_lsn.file != lp->lsn.file || flush_lsn.offset < lp->w_off) return (0); /* * Else, make sure that the commit record does not get out after we * abort the transaction. Do this by overwriting the commit record * in the buffer. (Note that other commits in this buffer will wait * wait until a successful write happens, we do not wake them.) We * point at the right part of the buffer and write an abort record * over the commit. We must then try and flush the buffer again, * since the interesting part of the buffer may have actually made * it out to disk before there was a failure, we can't know for sure. */ if (__txn_force_abort(dbenv, dblp->bufp + flush_lsn.offset - lp->w_off) == 0) (void)__log_flush_int(dblp, &flush_lsn, 0); return (ret);}/* * __log_newfile -- * Initialize and switch to a new log file. (Note that this is * called both when no log yet exists and when we fill a log file.) * * PUBLIC: int __log_newfile __P((DB_LOG *, DB_LSN *, u_int32_t)); */int__log_newfile(dblp, lsnp, logfile) DB_LOG *dblp;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -