📄 mp_sync.c
字号:
/*- * See the file LICENSE for redistribution information. * * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */#include "db_config.h"#ifndef lintstatic const char revid[] = "$Id: mp_sync.c,v 11.64 2002/08/25 16:00:27 bostic Exp $";#endif /* not lint */#ifndef NO_SYSTEM_INCLUDES#include <sys/types.h>#include <stdlib.h>#endif#include "db_int.h"#include "dbinc/db_shash.h"#include "dbinc/mp.h"typedef struct { DB_MPOOL_HASH *track_hp; /* Hash bucket. */ roff_t track_off; /* Page file offset. */ db_pgno_t track_pgno; /* Page number. */} BH_TRACK;static int __bhcmp __P((const void *, const void *));static int __memp_close_flush_files __P((DB_ENV *, DB_MPOOL *));static int __memp_sync_files __P((DB_ENV *, DB_MPOOL *));/* * __memp_sync -- * Mpool sync function. * * PUBLIC: int __memp_sync __P((DB_ENV *, DB_LSN *)); */int__memp_sync(dbenv, lsnp) DB_ENV *dbenv; DB_LSN *lsnp;{ DB_MPOOL *dbmp; MPOOL *mp; int ret; PANIC_CHECK(dbenv); ENV_REQUIRES_CONFIG(dbenv, dbenv->mp_handle, "memp_sync", DB_INIT_MPOOL); /* * If no LSN is provided, flush the entire cache (reasonable usage * even if there's no log subsystem configured). */ if (lsnp != NULL) ENV_REQUIRES_CONFIG(dbenv, dbenv->lg_handle, "memp_sync", DB_INIT_LOG); dbmp = dbenv->mp_handle; mp = dbmp->reginfo[0].primary; /* If we've flushed to the requested LSN, return that information. */ if (lsnp != NULL) { R_LOCK(dbenv, dbmp->reginfo); if (log_compare(lsnp, &mp->lsn) <= 0) { *lsnp = mp->lsn; R_UNLOCK(dbenv, dbmp->reginfo); return (0); } R_UNLOCK(dbenv, dbmp->reginfo); } if ((ret = __memp_sync_int(dbenv, NULL, 0, DB_SYNC_CACHE, NULL)) != 0) return (ret); if (lsnp != NULL) { R_LOCK(dbenv, dbmp->reginfo); if (log_compare(lsnp, &mp->lsn) > 0) mp->lsn = *lsnp; R_UNLOCK(dbenv, dbmp->reginfo); } return (0);}/* * __memp_fsync -- * Mpool file sync function. * * PUBLIC: int __memp_fsync __P((DB_MPOOLFILE *)); */int__memp_fsync(dbmfp) DB_MPOOLFILE *dbmfp;{ DB_ENV *dbenv; DB_MPOOL *dbmp; dbmp = dbmfp->dbmp; dbenv = dbmp->dbenv; PANIC_CHECK(dbenv); /* * If this handle doesn't have a file descriptor that's open for * writing, or if the file is a temporary, there's no reason to * proceed further. */ if (F_ISSET(dbmfp, MP_READONLY)) return (0); if (F_ISSET(dbmfp->mfp, MP_TEMP)) return (0); return (__memp_sync_int(dbenv, dbmfp, 0, DB_SYNC_FILE, NULL));}/* * __mp_xxx_fh -- * Return a file descriptor for DB 1.85 compatibility locking. * * PUBLIC: int __mp_xxx_fh __P((DB_MPOOLFILE *, DB_FH **)); */int__mp_xxx_fh(dbmfp, fhp) DB_MPOOLFILE *dbmfp; DB_FH **fhp;{ DB_ENV *dbenv; /* * This is a truly spectacular layering violation, intended ONLY to * support compatibility for the DB 1.85 DB->fd call. * * Sync the database file to disk, creating the file as necessary. * * We skip the MP_READONLY and MP_TEMP tests done by memp_fsync(3). * The MP_READONLY test isn't interesting because we will either * already have a file descriptor (we opened the database file for * reading) or we aren't readonly (we created the database which * requires write privileges). The MP_TEMP test isn't interesting * because we want to write to the backing file regardless so that * we get a file descriptor to return. */ *fhp = dbmfp->fhp; if (F_ISSET(dbmfp->fhp, DB_FH_VALID)) return (0); dbenv = dbmfp->dbmp->dbenv; return (__memp_sync_int(dbenv, dbmfp, 0, DB_SYNC_FILE, NULL));}/* * __memp_sync_int -- * Mpool sync internal function. * * PUBLIC: int __memp_sync_int * PUBLIC: __P((DB_ENV *, DB_MPOOLFILE *, int, db_sync_op, int *)); */int__memp_sync_int(dbenv, dbmfp, ar_max, op, wrotep) DB_ENV *dbenv; DB_MPOOLFILE *dbmfp; int ar_max, *wrotep; db_sync_op op;{ BH *bhp; BH_TRACK *bharray; DB_MPOOL *dbmp; DB_MPOOL_HASH *hp; DB_MUTEX *mutexp; MPOOL *c_mp, *mp; MPOOLFILE *mfp; u_int32_t n_cache; int ar_cnt, hb_lock, i, pass, remaining, ret, t_ret, wait_cnt, wrote; dbmp = dbenv->mp_handle; mp = dbmp->reginfo[0].primary; pass = wrote = 0; /* * If the caller does not specify how many pages assume one * per bucket. */ if (ar_max == 0) ar_max = mp->nreg * mp->htab_buckets; if ((ret = __os_malloc(dbenv, ar_max * sizeof(BH_TRACK), &bharray)) != 0) return (ret); /* * Walk each cache's list of buffers and mark all dirty buffers to be * written and all pinned buffers to be potentially written, depending * on our flags. */ for (ar_cnt = 0, n_cache = 0; n_cache < mp->nreg; ++n_cache) { c_mp = dbmp->reginfo[n_cache].primary; hp = R_ADDR(&dbmp->reginfo[n_cache], c_mp->htab); for (i = 0; i < c_mp->htab_buckets; i++, hp++) { /* * We can check for empty buckets before locking as we * only care if the pointer is zero or non-zero. We * can ignore empty buckets because we only need write * buffers that were dirty before we started. */ if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL) continue; MUTEX_LOCK(dbenv, &hp->hash_mutex); for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh); bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) { /* Always ignore unreferenced, clean pages. */ if (bhp->ref == 0 && !F_ISSET(bhp, BH_DIRTY)) continue; /* * Checkpoints have to wait on all pinned pages, * as pages may be marked dirty when returned to * the cache. * * File syncs only wait on pages both pinned and * dirty. (We don't care if pages are marked * dirty when returned to the cache, that means * there's another writing thread and flushing * the cache for this handle is meaningless.) */ if (op == DB_SYNC_FILE && !F_ISSET(bhp, BH_DIRTY)) continue; mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset); /* * Ignore temporary files -- this means you * can't even flush temporary files by handle. * (Checkpoint doesn't require temporary files * be flushed and the underlying buffer write * write routine may not be able to write it * anyway.) */ if (F_ISSET(mfp, MP_TEMP)) continue; /* * If we're flushing a specific file, see if * this page is from that file. */ if (dbmfp != NULL && mfp != dbmfp->mfp) continue; /* * Ignore files that aren't involved in DB's * transactional operations during checkpoints. */ if (dbmfp == NULL && mfp->lsn_off == -1) continue; /* Track the buffer, we want it. */ bharray[ar_cnt].track_hp = hp; bharray[ar_cnt].track_pgno = bhp->pgno; bharray[ar_cnt].track_off = bhp->mf_offset; ar_cnt++; if (ar_cnt >= ar_max) { if ((ret = __os_realloc(dbenv, (ar_max * 2) * sizeof(BH_TRACK), &bharray)) != 0) break; ar_max *= 2; } } MUTEX_UNLOCK(dbenv, &hp->hash_mutex); if (ret != 0) goto err; } } /* If there no buffers to write, we're done. */ if (ar_cnt == 0) goto done; /* * Write the buffers in file/page order, trying to reduce seeks by the * filesystem and, when pages are smaller than filesystem block sizes, * reduce the actual number of writes. */ if (ar_cnt > 1) qsort(bharray, ar_cnt, sizeof(BH_TRACK), __bhcmp); /* * If we're trickling buffers, only write enough to reach the correct * percentage for this region. We may not write enough if the dirty * buffers have an unbalanced distribution among the regions, but that * seems unlikely. */ if (op == DB_SYNC_TRICKLE && ar_cnt > ar_max / (int)mp->nreg) ar_cnt = ar_max / (int)mp->nreg; /* * Flush the log. We have to ensure the log records reflecting the * changes on the database pages we're writing have already made it * to disk. We still have to check the log each time we write a page * (because pages we are about to write may be modified after we have * flushed the log), but in general this will at least avoid any I/O * on the log's part. */ if (LOGGING_ON(dbenv) && (ret = dbenv->log_flush(dbenv, NULL)) != 0) goto err;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -