📄 xlog.c
字号:
return (false); } /* Check CRCs of backup blocks, if any */ blk = (char *) XLogRecGetData(record) + len; for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++) { if (!(record->xl_info & XLR_SET_BKP_BLOCK(i))) continue; INIT_CRC64(crc); COMP_CRC64(crc, blk + sizeof(BkpBlock), BLCKSZ); COMP_CRC64(crc, blk + sizeof(crc64), sizeof(BkpBlock) - sizeof(crc64)); FIN_CRC64(crc); memcpy((char *) &cbuf, blk, sizeof(crc64)); /* don't assume * alignment */ if (!EQ_CRC64(cbuf, crc)) { ereport(emode, (errmsg("incorrect checksum of backup block %d in record at %X/%X", i + 1, recptr.xlogid, recptr.xrecoff))); return (false); } blk += sizeof(BkpBlock) + BLCKSZ; } return (true);}/* * Attempt to read an XLOG record. * * If RecPtr is not NULL, try to read a record at that position. Otherwise * try to read a record just after the last one previously read. * * If no valid record is available, returns NULL, or fails if emode is PANIC. * (emode must be either PANIC or LOG.) * * buffer is a workspace at least _INTL_MAXLOGRECSZ bytes long. It is needed * to reassemble a record that crosses block boundaries. Note that on * successful return, the returned record pointer always points at buffer. */static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer){ XLogRecord *record; XLogRecPtr tmpRecPtr = EndRecPtr; uint32 len, total_len; uint32 targetPageOff; unsigned i; bool nextmode = false; if (readBuf == NULL) { /* * First time through, permanently allocate readBuf. We do it * this way, rather than just making a static array, for two * reasons: (1) no need to waste the storage in most * instantiations of the backend; (2) a static char array isn't * guaranteed to have any particular alignment, whereas malloc() * will provide MAXALIGN'd storage. */ readBuf = (char *) malloc(BLCKSZ); Assert(readBuf != NULL); } if (RecPtr == NULL) { RecPtr = &tmpRecPtr; nextmode = true; /* fast case if next record is on same page */ if (nextRecord != NULL) { record = nextRecord; goto got_record; } /* align old recptr to next page */ if (tmpRecPtr.xrecoff % BLCKSZ != 0) tmpRecPtr.xrecoff += (BLCKSZ - tmpRecPtr.xrecoff % BLCKSZ); if (tmpRecPtr.xrecoff >= XLogFileSize) { (tmpRecPtr.xlogid)++; tmpRecPtr.xrecoff = 0; } tmpRecPtr.xrecoff += SizeOfXLogPHD; } else if (!XRecOffIsValid(RecPtr->xrecoff)) ereport(PANIC, (errmsg("invalid record offset at %X/%X", RecPtr->xlogid, RecPtr->xrecoff))); if (readFile >= 0 && !XLByteInSeg(*RecPtr, readId, readSeg)) { close(readFile); readFile = -1; } XLByteToSeg(*RecPtr, readId, readSeg); if (readFile < 0) { readFile = XLogFileOpen(readId, readSeg, (emode == LOG)); if (readFile < 0) goto next_record_is_invalid; readOff = (uint32) (-1); /* force read to occur below */ } targetPageOff = ((RecPtr->xrecoff % XLogSegSize) / BLCKSZ) * BLCKSZ; if (readOff != targetPageOff) { readOff = targetPageOff; if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0) { ereport(emode, (errcode_for_file_access(), errmsg("could not seek in log file %u, segment %u to offset %u: %m", readId, readSeg, readOff))); goto next_record_is_invalid; } if (read(readFile, readBuf, BLCKSZ) != BLCKSZ) { ereport(emode, (errcode_for_file_access(), errmsg("could not read from log file %u, segment %u at offset %u: %m", readId, readSeg, readOff))); goto next_record_is_invalid; } if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode, nextmode)) goto next_record_is_invalid; } if ((((XLogPageHeader) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) && RecPtr->xrecoff % BLCKSZ == SizeOfXLogPHD) { ereport(emode, (errmsg("contrecord is requested by %X/%X", RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; } record = (XLogRecord *) ((char *) readBuf + RecPtr->xrecoff % BLCKSZ);got_record:; /* * Currently, xl_len == 0 must be bad data, but that might not be true * forever. See note in XLogInsert. */ if (record->xl_len == 0) { ereport(emode, (errmsg("record with zero length at %X/%X", RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; } /* * Compute total length of record including any appended backup * blocks. */ total_len = SizeOfXLogRecord + record->xl_len; for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++) { if (!(record->xl_info & XLR_SET_BKP_BLOCK(i))) continue; total_len += sizeof(BkpBlock) + BLCKSZ; } /* * Make sure it will fit in buffer (currently, it is mechanically * impossible for this test to fail, but it seems like a good idea * anyway). */ if (total_len > _INTL_MAXLOGRECSZ) { ereport(emode, (errmsg("record length %u at %X/%X too long", total_len, RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; } if (record->xl_rmid > RM_MAX_ID) { ereport(emode, (errmsg("invalid resource manager ID %u at %X/%X", record->xl_rmid, RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; } nextRecord = NULL; len = BLCKSZ - RecPtr->xrecoff % BLCKSZ; if (total_len > len) { /* Need to reassemble record */ XLogContRecord *contrecord; uint32 gotlen = len; memcpy(buffer, record, len); record = (XLogRecord *) buffer; buffer += len; for (;;) { readOff += BLCKSZ; if (readOff >= XLogSegSize) { close(readFile); readFile = -1; NextLogSeg(readId, readSeg); readFile = XLogFileOpen(readId, readSeg, (emode == LOG)); if (readFile < 0) goto next_record_is_invalid; readOff = 0; } if (read(readFile, readBuf, BLCKSZ) != BLCKSZ) { ereport(emode, (errcode_for_file_access(), errmsg("could not read from log file %u, segment %u, offset %u: %m", readId, readSeg, readOff))); goto next_record_is_invalid; } if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode, true)) goto next_record_is_invalid; if (!(((XLogPageHeader) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD)) { ereport(emode, (errmsg("there is no contrecord flag in log file %u, segment %u, offset %u", readId, readSeg, readOff))); goto next_record_is_invalid; } contrecord = (XLogContRecord *) ((char *) readBuf + SizeOfXLogPHD); if (contrecord->xl_rem_len == 0 || total_len != (contrecord->xl_rem_len + gotlen)) { ereport(emode, (errmsg("invalid contrecord length %u in log file %u, segment %u, offset %u", contrecord->xl_rem_len, readId, readSeg, readOff))); goto next_record_is_invalid; } len = BLCKSZ - SizeOfXLogPHD - SizeOfXLogContRecord; if (contrecord->xl_rem_len > len) { memcpy(buffer, (char *) contrecord + SizeOfXLogContRecord, len); gotlen += len; buffer += len; continue; } memcpy(buffer, (char *) contrecord + SizeOfXLogContRecord, contrecord->xl_rem_len); break; } if (!RecordIsValid(record, *RecPtr, emode)) goto next_record_is_invalid; if (BLCKSZ - SizeOfXLogRecord >= SizeOfXLogPHD + SizeOfXLogContRecord + MAXALIGN(contrecord->xl_rem_len)) { nextRecord = (XLogRecord *) ((char *) contrecord + SizeOfXLogContRecord + MAXALIGN(contrecord->xl_rem_len)); } EndRecPtr.xlogid = readId; EndRecPtr.xrecoff = readSeg * XLogSegSize + readOff + SizeOfXLogPHD + SizeOfXLogContRecord + MAXALIGN(contrecord->xl_rem_len); ReadRecPtr = *RecPtr; return record; } /* Record does not cross a page boundary */ if (!RecordIsValid(record, *RecPtr, emode)) goto next_record_is_invalid; if (BLCKSZ - SizeOfXLogRecord >= RecPtr->xrecoff % BLCKSZ + MAXALIGN(total_len)) nextRecord = (XLogRecord *) ((char *) record + MAXALIGN(total_len)); EndRecPtr.xlogid = RecPtr->xlogid; EndRecPtr.xrecoff = RecPtr->xrecoff + MAXALIGN(total_len); ReadRecPtr = *RecPtr; memcpy(buffer, record, total_len); return (XLogRecord *) buffer;next_record_is_invalid:; close(readFile); readFile = -1; nextRecord = NULL; return NULL;}/* * Check whether the xlog header of a page just read in looks valid. * * This is just a convenience subroutine to avoid duplicated code in * ReadRecord. It's not intended for use from anywhere else. */static boolValidXLOGHeader(XLogPageHeader hdr, int emode, bool checkSUI){ XLogRecPtr recaddr; if (hdr->xlp_magic != XLOG_PAGE_MAGIC) { ereport(emode, (errmsg("invalid magic number %04X in log file %u, segment %u, offset %u", hdr->xlp_magic, readId, readSeg, readOff))); return false; } if ((hdr->xlp_info & ~XLP_ALL_FLAGS) != 0) { ereport(emode, (errmsg("invalid info bits %04X in log file %u, segment %u, offset %u", hdr->xlp_info, readId, readSeg, readOff))); return false; } recaddr.xlogid = readId; recaddr.xrecoff = readSeg * XLogSegSize + readOff; if (!XLByteEQ(hdr->xlp_pageaddr, recaddr)) { ereport(emode, (errmsg("unexpected pageaddr %X/%X in log file %u, segment %u, offset %u", hdr->xlp_pageaddr.xlogid, hdr->xlp_pageaddr.xrecoff, readId, readSeg, readOff))); return false; } /* * We disbelieve a SUI less than the previous page's SUI, or more than * a few counts greater. In theory as many as 512 shutdown checkpoint * records could appear on a 32K-sized xlog page, so that's the most * differential there could legitimately be. * * Note this check can only be applied when we are reading the next page * in sequence, so ReadRecord passes a flag indicating whether to * check. */ if (checkSUI) { if (hdr->xlp_sui < lastReadSUI || hdr->xlp_sui > lastReadSUI + 512) { ereport(emode, /* translator: SUI = startup id */ (errmsg("out-of-sequence SUI %u (after %u) in log file %u, segment %u, offset %u", hdr->xlp_sui, lastReadSUI, readId, readSeg, readOff))); return false; } } lastReadSUI = hdr->xlp_sui; return true;}/* * I/O routines for pg_control * * *ControlFile is a buffer in shared memory that holds an image of the * contents of pg_control. WriteControlFile() initializes pg_control * given a preloaded buffer, ReadControlFile() loads the buffer from * the pg_control file (during postmaster or standalone-backend startup), * and UpdateControlFile() rewrites pg_control after we modify xlog state. * * For simplicity, WriteControlFile() initializes the fields of pg_control * that are related to checking backend/database compatibility, and * ReadControlFile() verifies they are correct. We could split out the * I/O and compatibility-check functions, but there seems no need currently. */voidXLOGPathInit(void){ /* Init XLOG file paths */ snprintf(XLogDir, MAXPGPATH, "%s/pg_xlog", DataDir); snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir);}static voidWriteControlFile(void){ int fd; char buffer[BLCKSZ]; /* need not be aligned */ char *localeptr; /* * Initialize version and compatibility-check fields */ ControlFile->pg_control_version = PG_CONTROL_VERSION; ControlFile->catalog_version_no = CATALOG_VERSION_NO; ControlFile->blcksz = BLCKSZ; ControlFile->relseg_size = RELSEG_SIZE; ControlFile->nameDataLen = NAMEDATALEN; ControlFile->funcMaxArgs = FUNC_MAX_ARGS;#ifdef HAVE_INT64_TIMESTAMP ControlFile->enableIntTimes = TRUE;#else ControlFile->enableIntTimes = FALSE;#endif ControlFile->localeBuflen = LOCALE_NAME_BUFLEN; localeptr = setlocale(LC_COLLATE, NULL); if (!localeptr) ereport(PANIC, (errmsg("invalid LC_COLLATE setting"))); StrNCpy(ControlFile->lc_collate, localeptr, LOCALE_NAME_BUFLEN); localeptr = setlocale(LC_CTYPE, NULL); if (!localeptr) ereport(PANIC, (errmsg("invalid LC_CTYPE setting"))); StrNCpy(ControlFile->lc_ctype, localeptr, LOCALE_NAME_BUFLEN); /* Contents are protected with a CRC */ INIT_CRC64(ControlFile->crc); COMP_CRC64(ControlFile->crc, (char *) ControlFile + sizeof(crc64), sizeof(ControlFileData) - sizeof(crc64)); FIN_CRC64(ControlFile->crc); /* * We write out BLCKSZ bytes into pg_control, zero-padding the excess * over sizeof(ControlFileData). This reduces the odds of * premature-EOF errors when reading pg_control. We'll still fail * when we check the contents of the file, but hopefully with a more * specific error than "couldn't read pg_control". */ if (sizeof(ControlFileData) > BLCKSZ) ereport(PANIC, (errmsg("sizeof(ControlFileData) is larger than BLCKSZ; fix either one"))); memset(buffer, 0, BLCKSZ); memcpy(buffer, ControlFile, sizeof(ControlFileData)); fd = BasicOpenFile(ControlFilePath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, S_IRUSR | S_IWUSR); if (fd < 0) ereport(PANIC, (errcode_for_file_access(), errmsg("could not create control file
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -