📄 log_put.c
字号:
DB_LSN *lsnp; u_int32_t logfile;{ DB_CIPHER *db_cipher; DB_ENV *dbenv; DB_LSN lsn; DBT t; HDR hdr; LOG *lp; int need_free, ret; u_int32_t lastoff; size_t tsize; u_int8_t *tmp; dbenv = dblp->dbenv; lp = dblp->reginfo.primary; DB_ASSERT(logfile == 0 || logfile > lp->lsn.file); /* If we're not at the beginning of a file already, start a new one. */ if (lp->lsn.offset != 0) { /* * Flush the log so this file is out and can be closed. We * cannot release the region lock here because we need to * protect the end of the file while we switch. In * particular, a thread with a smaller record than ours * could detect that there is space in the log. Even * blocking that event by declaring the file full would * require all threads to wait here so that the lsn.file * can be moved ahead after the flush completes. This * probably can be changed if we had an lsn for the * previous file and one for the current, but it does not * seem like this would get much more throughput, if any. */ if ((ret = __log_flush_int(dblp, NULL, 0)) != 0) return (ret); /* * Save the last known offset from the previous file, we'll * need it to initialize the persistent header information. */ lastoff = lp->lsn.offset; /* Point the current LSN to the new file. */ ++lp->lsn.file; lp->lsn.offset = 0; /* Reset the file write offset. */ lp->w_off = 0; } else lastoff = 0; /* * Replication may require we reset the log file name space entirely. * In that case we also force a file switch so that replication can * clean up old files. */ if (logfile != 0) { lp->lsn.file = logfile; lp->lsn.offset = 0; if ((ret = __log_newfh(dblp, 1)) != 0) return (ret); } DB_ASSERT(lp->db_log_inmemory || lp->b_off == 0); if (lp->db_log_inmemory && (ret = __log_inmem_newfile(dblp, lp->lsn.file)) != 0) return (ret); /* * Insert persistent information as the first record in every file. * Note that the previous length is wrong for the very first record * of the log, but that's okay, we check for it during retrieval. */ memset(&t, 0, sizeof(t)); memset(&hdr, 0, sizeof(HDR)); need_free = 0; tsize = sizeof(LOGP); db_cipher = dbenv->crypto_handle; if (CRYPTO_ON(dbenv)) tsize += db_cipher->adj_size(tsize); if ((ret = __os_calloc(dbenv, 1, tsize, &tmp)) != 0) return (ret); lp->persist.log_size = lp->log_size = lp->log_nsize; memcpy(tmp, &lp->persist, sizeof(LOGP)); t.data = tmp; t.size = (u_int32_t)tsize; need_free = 1; if ((ret = __log_encrypt_record(dbenv, &t, &hdr, (u_int32_t)tsize)) != 0) goto err; __db_chksum(t.data, t.size, (CRYPTO_ON(dbenv)) ? db_cipher->mac_key : NULL, hdr.chksum); lsn = lp->lsn; if ((ret = __log_putr(dblp, &lsn, &t, lastoff == 0 ? 0 : lastoff - lp->len, &hdr)) != 0) goto err; /* Update the LSN information returned to the caller. */ if (lsnp != NULL) *lsnp = lp->lsn;err: if (need_free) __os_free(dbenv, tmp); return (ret);}/* * __log_putr -- * Actually put a record into the log. */static int__log_putr(dblp, lsn, dbt, prev, h) DB_LOG *dblp; DB_LSN *lsn; const DBT *dbt; u_int32_t prev; HDR *h;{ DB_CIPHER *db_cipher; DB_ENV *dbenv; DB_LSN f_lsn; LOG *lp; HDR tmp, *hdr; int ret, t_ret; size_t b_off, nr; u_int32_t w_off; dbenv = dblp->dbenv; lp = dblp->reginfo.primary; /* * If we weren't given a header, use a local one. */ db_cipher = dbenv->crypto_handle; if (h == NULL) { hdr = &tmp; memset(hdr, 0, sizeof(HDR)); if (CRYPTO_ON(dbenv)) hdr->size = HDR_CRYPTO_SZ; else hdr->size = HDR_NORMAL_SZ; } else hdr = h; /* Save our position in case we fail. */ b_off = lp->b_off; w_off = lp->w_off; f_lsn = lp->f_lsn; /* * Initialize the header. If we just switched files, lsn.offset will * be 0, and what we really want is the offset of the previous record * in the previous file. Fortunately, prev holds the value we want. */ hdr->prev = prev; hdr->len = (u_int32_t)hdr->size + dbt->size; /* * If we were passed in a nonzero checksum, our caller calculated * the checksum before acquiring the log mutex, as an optimization. * * If our caller calculated a real checksum of 0, we'll needlessly * recalculate it. C'est la vie; there's no out-of-bounds value * here. */ if (hdr->chksum[0] == 0) __db_chksum(dbt->data, dbt->size, (CRYPTO_ON(dbenv)) ? db_cipher->mac_key : NULL, hdr->chksum); if (lp->db_log_inmemory && (ret = __log_inmem_chkspace(dblp, (u_int32_t)hdr->size + dbt->size)) != 0) goto err; if ((ret = __log_fill(dblp, lsn, hdr, (u_int32_t)hdr->size)) != 0) goto err; if ((ret = __log_fill(dblp, lsn, dbt->data, dbt->size)) != 0) goto err; lp->len = (u_int32_t)(hdr->size + dbt->size); lp->lsn.offset += (u_int32_t)(hdr->size + dbt->size); return (0);err: /* * If we wrote more than one buffer before failing, get the * first one back. The extra buffers will fail the checksums * and be ignored. */ if (w_off + lp->buffer_size < lp->w_off) { DB_ASSERT(!lp->db_log_inmemory); if ((t_ret = __os_seek(dbenv, dblp->lfhp, 0, 0, w_off, 0, DB_OS_SEEK_SET)) != 0 || (t_ret = __os_read(dbenv, dblp->lfhp, dblp->bufp, b_off, &nr)) != 0) return (__db_panic(dbenv, t_ret)); if (nr != b_off) { __db_err(dbenv, "Short read while restoring log"); return (__db_panic(dbenv, EIO)); } } /* Reset to where we started. */ lp->w_off = w_off; lp->b_off = b_off; lp->f_lsn = f_lsn; return (ret);}/* * __log_flush_pp -- * DB_ENV->log_flush pre/post processing. * * PUBLIC: int __log_flush_pp __P((DB_ENV *, const DB_LSN *)); */int__log_flush_pp(dbenv, lsn) DB_ENV *dbenv; const DB_LSN *lsn;{ int rep_check, ret; PANIC_CHECK(dbenv); ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, "DB_ENV->log_flush", DB_INIT_LOG); rep_check = IS_ENV_REPLICATED(dbenv) ? 1 : 0; if (rep_check) __env_rep_enter(dbenv); ret = __log_flush(dbenv, lsn); if (rep_check) __env_db_rep_exit(dbenv); return (ret);}/* * __log_flush -- * DB_ENV->log_flush * * PUBLIC: int __log_flush __P((DB_ENV *, const DB_LSN *)); */int__log_flush(dbenv, lsn) DB_ENV *dbenv; const DB_LSN *lsn;{ DB_LOG *dblp; int ret; dblp = dbenv->lg_handle; R_LOCK(dbenv, &dblp->reginfo); ret = __log_flush_int(dblp, lsn, 1); R_UNLOCK(dbenv, &dblp->reginfo); return (ret);}/* * __log_flush_int -- * Write all records less than or equal to the specified LSN; internal * version. * * PUBLIC: int __log_flush_int __P((DB_LOG *, const DB_LSN *, int)); */int__log_flush_int(dblp, lsnp, release) DB_LOG *dblp; const DB_LSN *lsnp; int release;{ struct __db_commit *commit; DB_ENV *dbenv; DB_LSN flush_lsn, f_lsn; DB_MUTEX *flush_mutexp; LOG *lp; size_t b_off; u_int32_t ncommit, w_off; int do_flush, first, ret; dbenv = dblp->dbenv; lp = dblp->reginfo.primary; flush_mutexp = R_ADDR(&dblp->reginfo, lp->flush_mutex_off); ncommit = 0; ret = 0; if (lp->db_log_inmemory) { lp->s_lsn = lp->lsn; ++lp->stat.st_scount; return (0); } /* * If no LSN specified, flush the entire log by setting the flush LSN * to the last LSN written in the log. Otherwise, check that the LSN * isn't a non-existent record for the log. */ if (lsnp == NULL) { flush_lsn.file = lp->lsn.file; flush_lsn.offset = lp->lsn.offset - lp->len; } else if (lsnp->file > lp->lsn.file || (lsnp->file == lp->lsn.file && lsnp->offset > lp->lsn.offset - lp->len)) { __db_err(dbenv, "DB_ENV->log_flush: LSN of %lu/%lu past current end-of-log of %lu/%lu", (u_long)lsnp->file, (u_long)lsnp->offset, (u_long)lp->lsn.file, (u_long)lp->lsn.offset); __db_err(dbenv, "%s %s %s", "Database environment corrupt; the wrong log files may", "have been removed or incompatible database files imported", "from another environment"); return (__db_panic(dbenv, DB_RUNRECOVERY)); } else { /* * See if we need to wait. s_lsn is not locked so some * care is needed. The sync point can only move forward. * The lsnp->file cannot be greater than the s_lsn.file. * If the file we want is in the past we are done. * If the file numbers are the same check the offset. * This all assumes we can read an integer in one * state or the other, not in transition. */ if (lp->s_lsn.file > lsnp->file) return (0); if (lp->s_lsn.file == lsnp->file && lp->s_lsn.offset > lsnp->offset) return (0); flush_lsn = *lsnp; } /* * If a flush is in progress and we're allowed to do so, drop * the region lock and block waiting for the next flush. */ if (release && lp->in_flush != 0) { if ((commit = SH_TAILQ_FIRST( &lp->free_commits, __db_commit)) == NULL) { if ((ret = __db_shalloc(&dblp->reginfo, sizeof(struct __db_commit), MUTEX_ALIGN, &commit)) != 0) goto flush; memset(commit, 0, sizeof(*commit)); if ((ret = __db_mutex_setup(dbenv, &dblp->reginfo, &commit->mutex, MUTEX_SELF_BLOCK | MUTEX_NO_RLOCK)) != 0) { __db_shalloc_free(&dblp->reginfo, commit); return (ret); } MUTEX_LOCK(dbenv, &commit->mutex); } else SH_TAILQ_REMOVE( &lp->free_commits, commit, links, __db_commit); lp->ncommit++; /* * Flushes may be requested out of LSN order; be * sure we only move lp->t_lsn forward. */ if (log_compare(&lp->t_lsn, &flush_lsn) < 0) lp->t_lsn = flush_lsn; commit->lsn = flush_lsn; SH_TAILQ_INSERT_HEAD( &lp->commits, commit, links, __db_commit); R_UNLOCK(dbenv, &dblp->reginfo); /* Wait here for the in-progress flush to finish. */ MUTEX_LOCK(dbenv, &commit->mutex); R_LOCK(dbenv, &dblp->reginfo); lp->ncommit--; /* * Grab the flag before freeing the struct to see if * we need to flush the log to commit. If so, * use the maximal lsn for any committing thread. */ do_flush = F_ISSET(commit, DB_COMMIT_FLUSH); F_CLR(commit, DB_COMMIT_FLUSH); SH_TAILQ_INSERT_HEAD( &lp->free_commits, commit, links, __db_commit); if (do_flush) { lp->in_flush--; flush_lsn = lp->t_lsn; } else return (0); } /* * Protect flushing with its own mutex so we can release * the region lock except during file switches. */flush: MUTEX_LOCK(dbenv, flush_mutexp); /* * If the LSN is less than or equal to the last-sync'd LSN, we're done. * Note, the last-sync LSN saved in s_lsn is the LSN of the first byte * after the byte we absolutely know was written to disk, so the test * is <, not <=. */ if (flush_lsn.file < lp->s_lsn.file || (flush_lsn.file == lp->s_lsn.file && flush_lsn.offset < lp->s_lsn.offset)) { MUTEX_UNLOCK(dbenv, flush_mutexp); goto done; } /* * We may need to write the current buffer. We have to write the * current buffer if the flush LSN is greater than or equal to the * buffer's starting LSN. * * Otherwise, it's still possible that this thread may never have * written to this log file. Acquire a file descriptor if we don't * already have one. */ if (lp->b_off != 0 && log_compare(&flush_lsn, &lp->f_lsn) >= 0) { if ((ret = __log_write(dblp, dblp->bufp, (u_int32_t)lp->b_off)) != 0) { MUTEX_UNLOCK(dbenv, flush_mutexp); goto done; } lp->b_off = 0; } else if (dblp->lfhp == NULL || dblp->lfname != lp->lsn.file) if ((ret = __log_newfh(dblp, 0)) != 0) { MUTEX_UNLOCK(dbenv, flush_mutexp); goto done; } /* * We are going to flush, release the region. * First get the current state of the buffer since * another write may come in, but we may not flush it. */ b_off = lp->b_off; w_off = lp->w_off; f_lsn = lp->f_lsn; lp->in_flush++; if (release) R_UNLOCK(dbenv, &dblp->reginfo); /* Sync all writes to disk. */ if ((ret = __os_fsync(dbenv, dblp->lfhp)) != 0) { MUTEX_UNLOCK(dbenv, flush_mutexp); if (release) R_LOCK(dbenv, &dblp->reginfo); ret = __db_panic(dbenv, ret); return (ret); } /* * Set the last-synced LSN. * This value must be set to the LSN past the last complete * record that has been flushed. This is at least the first * lsn, f_lsn. If the buffer is empty, b_off == 0, then * we can move up to write point since the first lsn is not * set for the new buffer. */ lp->s_lsn = f_lsn; if (b_off == 0) lp->s_lsn.offset = w_off; MUTEX_UNLOCK(dbenv, flush_mutexp);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -