📄 log_get.c
字号:
if (logc->bp_lsn.file != lsn->file) return (0); if (logc->bp_lsn.offset > lsn->offset) return (0); if (logc->bp_lsn.offset + logc->bp_rlen <= lsn->offset + hdr->size) return (0); /* * Read the record's header and check if the record is entirely held * in the buffer. If the record is not entirely held, get it again. * (The only advantage in having part of the record locally is that * we might avoid a system call because we already have the HDR in * memory.) * * If the header check fails for any reason, it must be because the * LSN is bogus. Fail hard. */ p = logc->bp + (lsn->offset - logc->bp_lsn.offset); memcpy(hdr, p, hdr->size); if (__log_c_hdrchk(logc, hdr, NULL)) return (DB_NOTFOUND); if (logc->bp_lsn.offset + logc->bp_rlen <= lsn->offset + hdr->len) return (0); *pp = p; /* Success. */ return (0);}/* * __log_c_inregion -- * Check to see if the requested record is in the region's buffer. */static int__log_c_inregion(logc, lsn, rlockp, last_lsn, hdr, pp) DB_LOGC *logc; DB_LSN *lsn, *last_lsn; RLOCK *rlockp; HDR *hdr; u_int8_t **pp;{ DB_ENV *dbenv; DB_LOG *dblp; LOG *lp; size_t len, nr; u_int32_t b_disk, b_region; int ret; u_int8_t *p; dbenv = logc->dbenv; dblp = dbenv->lg_handle; lp = ((DB_LOG *)logc->dbenv->lg_handle)->reginfo.primary; ret = 0; *pp = NULL; /* If we haven't yet acquired the log region lock, do so. */ if (*rlockp == L_NONE) { *rlockp = L_ACQUIRED; R_LOCK(dbenv, &dblp->reginfo); } /* * The routines to read from disk must avoid reading past the logical * end of the log, so pass that information back to it. * * Since they're reading directly from the disk, they must also avoid * reading past the offset we've written out. If the log was * truncated, it's possible that there are zeroes or garbage on * disk after this offset, and the logical end of the log can * come later than this point if the log buffer isn't empty. */ *last_lsn = lp->lsn; if (last_lsn->offset > lp->w_off) last_lsn->offset = lp->w_off; /* * Test to see if the requested LSN could be part of the region's * buffer. * * During recovery, we read the log files getting the information to * initialize the region. In that case, the region's lsn field will * not yet have been filled in, use only the disk. * * The record must not start at a byte offset after the region buffer's * end, since that means the request is for a record after the end of * the log. Do this test even if the region's buffer is empty -- after * recovery, the log files may continue past the declared end-of-log, * and the disk reading routine will incorrectly attempt to read the * remainder of the log. * * Otherwise, test to see if the region's buffer actually has what we * want: * * The buffer must have some useful content. * The record must be in the same file as the region's buffer and must * start at a byte offset equal to or greater than the region's buffer. */ if (IS_ZERO_LSN(lp->lsn)) return (0); if (lsn->file > lp->lsn.file || (lsn->file == lp->lsn.file && lsn->offset >= lp->lsn.offset)) return (DB_NOTFOUND); if (lp->b_off == 0) return (0); if (lsn->file < lp->f_lsn.file || lsn->offset < lp->f_lsn.offset) return (0); /* * The current contents of the cursor's buffer will be useless for a * future call -- trash it rather than try and make it look correct. */ ZERO_LSN(logc->bp_lsn); /* * If the requested LSN is greater than the region buffer's first * byte, we know the entire record is in the buffer. * * If the header check fails for any reason, it must be because the * LSN is bogus. Fail hard. */ if (lsn->offset > lp->f_lsn.offset) { p = dblp->bufp + (lsn->offset - lp->w_off); memcpy(hdr, p, hdr->size); if (__log_c_hdrchk(logc, hdr, NULL)) return (DB_NOTFOUND); if (logc->bp_size <= hdr->len) { len = ALIGN(hdr->len * 2, 128); if ((ret = __os_realloc(logc->dbenv, len, &logc->bp)) != 0) return (ret); logc->bp_size = (u_int32_t)len; } memcpy(logc->bp, p, hdr->len); *pp = logc->bp; return (0); } /* * There's a partial record, that is, the requested record starts * in a log file and finishes in the region buffer. We have to * find out how many bytes of the record are in the region buffer * so we can copy them out into the cursor buffer. First, check * to see if the requested record is the only record in the region * buffer, in which case we should copy the entire region buffer. * * Else, walk back through the region's buffer to find the first LSN * after the record that crosses the buffer boundary -- we can detect * that LSN, because its "prev" field will reference the record we * want. The bytes we need to copy from the region buffer are the * bytes up to the record we find. The bytes we'll need to allocate * to hold the log record are the bytes between the two offsets. */ b_disk = lp->w_off - lsn->offset; if (lp->b_off <= lp->len) b_region = (u_int32_t)lp->b_off; else for (p = dblp->bufp + (lp->b_off - lp->len);;) { memcpy(hdr, p, hdr->size); if (hdr->prev == lsn->offset) { b_region = (u_int32_t)(p - dblp->bufp); break; } p = dblp->bufp + (hdr->prev - lp->w_off); } /* * If we don't have enough room for the record, we have to allocate * space. We have to do it while holding the region lock, which is * truly annoying, but there's no way around it. This call is why * we allocate cursor buffer space when allocating the cursor instead * of waiting. */ if (logc->bp_size <= b_region + b_disk) { len = ALIGN((b_region + b_disk) * 2, 128); if ((ret = __os_realloc(logc->dbenv, len, &logc->bp)) != 0) return (ret); logc->bp_size = (u_int32_t)len; } /* Copy the region's bytes to the end of the cursor's buffer. */ p = (logc->bp + logc->bp_size) - b_region; memcpy(p, dblp->bufp, b_region); /* Release the region lock. */ if (*rlockp == L_ACQUIRED) { *rlockp = L_NONE; R_UNLOCK(dbenv, &dblp->reginfo); } /* * Read the rest of the information from disk. Neither short reads * or EOF are acceptable, the bytes we want had better be there. */ if (b_disk != 0) { p -= b_disk; nr = b_disk; if ((ret = __log_c_io( logc, lsn->file, lsn->offset, p, &nr, NULL)) != 0) return (ret); if (nr < b_disk) return (__log_c_shortread(logc, 0)); } /* Copy the header information into the caller's structure. */ memcpy(hdr, p, hdr->size); *pp = p; return (0);}/* * __log_c_ondisk -- * Read a record off disk. */static int__log_c_ondisk(logc, lsn, last_lsn, flags, hdr, pp, eofp) DB_LOGC *logc; DB_LSN *lsn, *last_lsn; int flags, *eofp; HDR *hdr; u_int8_t **pp;{ DB_ENV *dbenv; size_t len, nr; u_int32_t offset; int ret; dbenv = logc->dbenv; *eofp = 0; nr = hdr->size; if ((ret = __log_c_io(logc, lsn->file, lsn->offset, hdr, &nr, eofp)) != 0) return (ret); if (*eofp) return (0); /* If we read 0 bytes, assume we've hit EOF. */ if (nr == 0) { *eofp = 1; return (0); } /* Check the HDR. */ if ((ret = __log_c_hdrchk(logc, hdr, eofp)) != 0) return (ret); if (*eofp) return (0); /* Otherwise, we should have gotten the bytes we wanted. */ if (nr < hdr->size) return (__log_c_shortread(logc, 0)); /* * Regardless of how we return, the previous contents of the cursor's * buffer are useless -- trash it. */ ZERO_LSN(logc->bp_lsn); /* * Otherwise, we now (finally!) know how big the record is. (Maybe * we should have just stuck the length of the record into the LSN!?) * Make sure we have enough space. */ if (logc->bp_size <= hdr->len) { len = ALIGN(hdr->len * 2, 128); if ((ret = __os_realloc(dbenv, len, &logc->bp)) != 0) return (ret); logc->bp_size = (u_int32_t)len; } /* * If we're moving forward in the log file, read this record in at the * beginning of the buffer. Otherwise, read this record in at the end * of the buffer, making sure we don't try and read before the start * of the file. (We prefer positioning at the end because transaction * aborts use DB_SET to move backward through the log and we might get * lucky.) * * Read a buffer's worth, without reading past the logical EOF. The * last_lsn may be a zero LSN, but that's OK, the test works anyway. */ if (flags == DB_FIRST || flags == DB_NEXT) offset = lsn->offset; else if (lsn->offset + hdr->len < logc->bp_size) offset = 0; else offset = (lsn->offset + hdr->len) - logc->bp_size; nr = logc->bp_size; if (lsn->file == last_lsn->file && offset + nr >= last_lsn->offset) nr = last_lsn->offset - offset; if ((ret = __log_c_io(logc, lsn->file, offset, logc->bp, &nr, eofp)) != 0) return (ret); /* * We should have at least gotten the bytes up-to-and-including the * record we're reading. */ if (nr < (lsn->offset + hdr->len) - offset) return (__log_c_shortread(logc, 1)); /* Set up the return information. */ logc->bp_rlen = (u_int32_t)nr; logc->bp_lsn.file = lsn->file; logc->bp_lsn.offset = offset; *pp = logc->bp + (lsn->offset - offset); return (0);}/* * __log_c_hdrchk -- * * Check for corrupted HDRs before we use them to allocate memory or find * records. * * If the log files were pre-allocated, a zero-filled HDR structure is the * logical file end. However, we can see buffers filled with 0's during * recovery, too (because multiple log buffers were written asynchronously, * and one made it to disk before a different one that logically precedes * it in the log file. * * XXX * I think there's a potential pre-allocation recovery flaw here -- if we * fail to write a buffer at the end of a log file (by scheduling its * write asynchronously, and it never making it to disk), then succeed in * writing a log file block to a subsequent log file, I don't think we will * detect that the buffer of 0's should have marked the end of the log files * during recovery. I think we may need to always write some garbage after * each block write if we pre-allocate log files. (At the moment, we do not * pre-allocate, so this isn't currently an issue.) * * Check for impossibly large records. The malloc should fail later, but we * have customers that run mallocs that treat all allocation failures as fatal * errors. * * Note that none of this is necessarily something awful happening. We let * the application hand us any LSN they want, and it could be a pointer into * the middle of a log record, there's no way to tell. */static int__log_c_hdrchk(logc, hdr, eofp) DB_LOGC *logc; HDR *hdr; int *eofp;{ DB_ENV *dbenv; int ret; dbenv = logc->dbenv; /* Sanity check the log record's size. */ if (hdr->len <= hdr->size) goto err; /* * If the cursor's max-record value isn't yet set, it means we aren't * reading these records from a log file and no check is necessary. */ if (logc->bp_maxrec != 0 && hdr->len > logc->bp_maxrec) { /* * If we fail the check, there's the pathological case that * we're reading the last file, it's growing, and our initial * check information was wrong. Get it again, to be sure. */ if ((ret = __log_c_set_maxrec(logc, NULL)) != 0) { __db_err(dbenv, "DB_LOGC->get: %s", db_strerror(ret)); return (ret); } if (logc->bp_maxrec != 0 && hdr->len > logc->bp_maxrec) goto err; } if (eofp != NULL) { if (hdr->prev == 0 && hdr->chksum[0] == 0 && hdr->len == 0) { *eofp = 1; return (0); } *eofp = 0; } return (0);err: if (!F_ISSET(logc, DB_LOG_SILENT_ERR)) __db_err(dbenv, "DB_LOGC->get: invalid log record header"); return (EIO);}/* * __log_c_io -- * Read records from a log file. */static int__log_c_io(logc, fnum, offset, p, nrp, eofp) DB_LOGC *logc; u_int32_t fnum, offset; void *p; size_t *nrp; int *eofp;{ DB_ENV *dbenv; DB_LOG *dblp; int ret; char *np; dbenv = logc->dbenv; dblp = dbenv->lg_handle; /* * If we've switched files, discard the current file handle and acquire * a new one. */ if (F_ISSET(logc->c_fh, DB_FH_VALID) && logc->bp_lsn.file != fnum) if ((ret = __os_closehandle(dbenv, logc->c_fh)) != 0) return (ret); if (!F_ISSET(logc->c_fh, DB_FH_VALID)) { if ((ret = __log_name(dblp, fnum, &np, logc->c_fh, DB_OSO_RDONLY | DB_OSO_SEQ)) != 0) { /* * If we're allowed to return EOF, assume that's the * problem, set the EOF status flag and return 0. */ if (eofp != NULL) { *eofp = 1; ret = 0; } else if (!F_ISSET(logc, DB_LOG_SILENT_ERR)) __db_err(dbenv, "DB_LOGC->get: %s: %s", np, db_strerror(ret)); __os_free(dbenv, np); return (ret); } if ((ret = __log_c_set_maxrec(logc, np)) != 0) { __db_err(dbenv, "DB_LOGC->get: %s: %s", np, db_strerror(ret)); __os_free(dbenv, np); return (ret); } __os_free(dbenv, np); } /* Seek to the record's offset. */ if ((ret = __os_seek(dbenv, logc->c_fh, 0, 0, offset, 0, DB_OS_SEEK_SET)) != 0) { if (!F_ISSET(logc, DB_LOG_SILENT_ERR)) __db_err(dbenv, "DB_LOGC->get: seek: %s", db_strerror(ret)); return (ret); } /* Read the data. */ if ((ret = __os_read(dbenv, logc->c_fh, p, *nrp, nrp)) != 0) { if (!F_ISSET(logc, DB_LOG_SILENT_ERR)) __db_err(dbenv, "DB_LOGC->get: read: %s", db_strerror(ret)); return (ret); } return (0);}/* * __log_c_shortread -- * Read was short -- return a consistent error message and error. */static int__log_c_shortread(logc, silent) DB_LOGC *logc; int silent;{ if (!silent || !F_ISSET(logc, DB_LOG_SILENT_ERR)) __db_err(logc->dbenv, "DB_LOGC->get: short read"); return (EIO);}/* * __log_c_set_maxrec -- * Bound the maximum log record size in a log file. */static int__log_c_set_maxrec(logc, np) DB_LOGC *logc; char *np;{ DB_ENV *dbenv; DB_LOG *dblp; LOG *lp; u_int32_t mbytes, bytes; int ret; dbenv = logc->dbenv; dblp = dbenv->lg_handle; /* * We don't want to try and allocate huge chunks of memory because * applications with error-checking malloc's often consider that a * hard failure. If we're about to look at a corrupted record with * a bizarre size, we need to know before trying to allocate space * to hold it. We could read the persistent data at the beginning * of the file but that's hard -- we may have to decrypt it, checksum * it and so on. Stat the file instead. */ if ((ret = __os_ioinfo(dbenv, np, logc->c_fh, &mbytes, &bytes, NULL)) != 0) return (ret); logc->bp_maxrec = mbytes * MEGABYTE + bytes; /* * If reading from the log file currently being written, we could get * an incorrect size, that is, if the cursor was opened on the file * when it had only a few hundred bytes, and then the cursor used to * move forward in the file, after more log records were written, the * original stat value would be wrong. Use the maximum of the current * log file size and the size of the buffer -- that should represent * the max of any log record currently in the file. * * The log buffer size is set when the environment is opened and never * changed, we don't need a lock on it. */ lp = dblp->reginfo.primary; logc->bp_maxrec += lp->buffer_size; return (0);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -