📄 diffbin.cpp
字号:
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
//
// This source code is licensed under Microsoft Shared Source License
// Version 1.0 for Windows CE.
// For a copy of the license visit http://go.microsoft.com/fwlink/?LinkId=3223.
//
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Module Name:
diffbin.cpp
Abstract:
This program constructs a binary diff between two files. It has
been tailored to the NK image structure for the purpose of
cutting image download times, but the basic algorithm can be
applied to any two files.
Brian Wahlert (t-brianw) 8-Jul-1998
Environment:
Non-specific
Revision History:
8-Jul-1998 Brian Wahlert (t-brianw) Created
30-Jun-2001 Scott Shell (ScottSh) Ported from nkcompr
-------------------------------------------------------------------*/
#include "diffbin.h"
// Debugging Zones
DWORD g_dwZoneMask = 0x00000001;
HRESULT
IWriteFile(HANDLE hfOutput, LPBYTE pb, DWORD cb, DWORD *pdwBytesWritten)
/*---------------------------------------------------------------------------*\
*
\*---------------------------------------------------------------------------*/
{
HRESULT hr = NOERROR;
DWORD dwWritten = 0;
ASSERT(pdwBytesWritten);
CBR(WriteFile(hfOutput, pb, cb, &dwWritten, NULL));
CBRA(cb == dwWritten);
*pdwBytesWritten += dwWritten;
Error:
return hr;
} /* IWriteFile()
*/
HRESULT
WriteInt(HANDLE hfOutput, UINT32 i, UCHAR cBytes, DWORD *pdwBytesWritten)
/*---------------------------------------------------------------------------*\
* Write the low-order "cBytes" bytes of "i" to file hfOutput
\*---------------------------------------------------------------------------*/
{
return IWriteFile(hfOutput, (LPBYTE)&i, cBytes, pdwBytesWritten);
} /* WriteInt()
*/
#define VARINT_PAGE_ALIGN_BIT 0x40
#define VARINT_CONTINUATION_BIT 0x80
HRESULT
WriteVarInt(HANDLE hfOutput, UINT32 u, DWORD *pdwBytesWritten)
/*---------------------------------------------------------------------------*\
* Writes a variable length integer using the minimal number of bytes necessary
\*---------------------------------------------------------------------------*/
{
HRESULT hr = NOERROR;
BYTE b;
ASSERT(pdwBytesWritten);
do {
b = u & 0x7f;
if(u >>= 7) {
b |= VARINT_CONTINUATION_BIT;
}
if(hfOutput) {
CHR(IWriteFile(hfOutput, &b, 1, pdwBytesWritten));
} else {
*pdwBytesWritten++;
}
} while(u > 0);
Error:
return hr;
} /* WriteVarInt()
*/
HRESULT
WriteAddrVarInt(HANDLE hfOutput, UINT32 u, DWORD *pdwBytesWritten)
/*---------------------------------------------------------------------------*\
* Writes a section address
* A section address is special in that a very large number of them are page
* aligned, which means they end in 000. Spending an extra bit to represent
* that page alignment saves quite a few bytes;
\*---------------------------------------------------------------------------*/
{
HRESULT hr = NOERROR;
BYTE b = 0;
ASSERT(pdwBytesWritten);
// Check for page alignment
if((u & 0xFFF) == 0) {
// Page aligned!
u >>= 12;
b |= VARINT_PAGE_ALIGN_BIT;
}
b |= u & 0x3f;
if(u >>= 6) {
b |= VARINT_CONTINUATION_BIT;
}
if(hfOutput) {
CHR(IWriteFile(hfOutput, &b, 1, pdwBytesWritten));
} else {
*pdwBytesWritten++;
}
// If this is going to be longer than 1 byte, the rest of the bytes
// look just like a normal VarInt, so call that on the remainder.
if(u) {
CHR(WriteVarInt(hfOutput, u, pdwBytesWritten));
}
Error:
return hr;
} /* WriteVarInt()
*/
HRESULT
WriteDataToken(HANDLE hfOutput, UINT32 cWords, DWORD *pdwBytesWritten)
/*---------------------------------------------------------------------------*\
*
\*---------------------------------------------------------------------------*/
{
HRESULT hr = NOERROR;
UCHAR chWrite;
// pack "iType" and "cWords" into between one and
// four words
UCHAR cBytes = 0;
UINT32 i;
chWrite = TOKEN_DATA | (UCHAR)(cWords << 3);
if (cWords < (1 << 5)) {
cBytes = 0;
} else if (cWords < (1 << 13)) {
cBytes = 1;
} else if (cWords < (1 << 21)) {
cBytes = 2;
} else {
cBytes = 3;
}
chWrite |= cBytes << 1;
CHR(IWriteFile(hfOutput, &chWrite, 1, pdwBytesWritten));
for (i = 0; i < cBytes; ++i) {
chWrite = (UCHAR)(cWords >> (5 + 8 * i));
CHR(IWriteFile(hfOutput, &chWrite, 1, pdwBytesWritten));
}
Error:
return hr;
} /* WriteDataToken()
*/
HRESULT
WriteCopyToken(HANDLE hfOutput, UINT32 cWords, ADDRESS iOffset,
DWORD cComprRgns, DWORD *pdwBytesWritten)
/*---------------------------------------------------------------------------*\
*
\*---------------------------------------------------------------------------*/
{
HRESULT hr = NOERROR;
UCHAR chWrite;
UCHAR cBytes = 0;
UINT32 i;
chWrite = TOKEN_COPY;
chWrite |= ((iOffset.iOffset != NO_COMPRESS) << 1);
chWrite |= (UCHAR)(cWords << 4);
if (cWords < (1 << 4)) {
cBytes = 0;
} else if (cWords < (1 << 12)) {
cBytes = 1;
} else if (cWords < (1 << 20)) {
cBytes = 2;
} else {
cBytes = 3;
}
chWrite |= cBytes << 2;
CHR(IWriteFile(hfOutput, &chWrite, 1, pdwBytesWritten));
for (i = 0; i < cBytes; ++i) {
chWrite = (UCHAR)(cWords >> (4 + 8 * i));
CHR(IWriteFile(hfOutput, &chWrite, 1, pdwBytesWritten));
}
if (iOffset.iOffset == NO_COMPRESS) {
CHR(WriteInt(hfOutput, iOffset.iAddr, 3, pdwBytesWritten));
} else {
if (cComprRgns < (1 << 8)) {
CHR(WriteInt(hfOutput, iOffset.iAddr, 1, pdwBytesWritten));
} else if (cComprRgns < (1 << 16)) {
CHR(WriteInt(hfOutput, iOffset.iAddr, 2, pdwBytesWritten));
} else {
CHR(WriteInt(hfOutput, iOffset.iAddr, 3, pdwBytesWritten));
}
CHR(WriteInt(hfOutput, iOffset.iOffset, 3, pdwBytesWritten));
}
Error:
return hr;
} /* WriteCopyToken()
*/
HRESULT
ComputeRuns(CRunList *pRunList, UINT32 *pcRunLen, UINT32* pcOffset,
LPBYTE pbSection, UINT32 cLen)
/*---------------------------------------------------------------------------*\
* Here we go through all of the common substrings and condense that
* information down into a list of matching runs
\*---------------------------------------------------------------------------*/
{
HRESULT hr = NOERROR;
UINT32 iChar = 0;
BOOL fLastRunData = FALSE;
while (iChar < cLen) {
pcRunLen[iChar] = (pcRunLen[iChar] == 0) ? 1 : pcRunLen[iChar];
if (pcRunLen[iChar] >= HASH_BYTES) {
// If we have a long enough run, let's make it a copy
fLastRunData = FALSE;
CHR(pRunList->Insert(RUNTYPE_COPYTOKEN, pcRunLen[iChar], pcOffset[iChar]));
} else {
// Okay, this run is not long enough to make it a copy. Let's
// make a data run instead...
if (fLastRunData) {
// If the last run was a data run as well, let's just add on
// to the previous run
CHR(pRunList->AddToLastRunLength(pcRunLen[iChar]));
} else {
// The last run was a copy run, so we need to start a new data
// run here.
CHR(pRunList->Insert(RUNTYPE_DATATOKEN, pcRunLen[iChar], (DWORD)pbSection + iChar));
fLastRunData = TRUE;
}
}
iChar += pcRunLen[iChar];
}
Error:
return hr;
} /* ComputeRuns()
*/
HRESULT
HashOldImage(CImageData *pOldImg, CHashTable *pHT, CSecDiv *pSecDiv)
/*---------------------------------------------------------------------------*\
*
\*---------------------------------------------------------------------------*/
{
HRESULT hr = NOERROR;
UINT32 iSecDiv = 0;
LPBYTE pbOldImage = pOldImg->GetUncompressedDataPtr();
DWORD cbOldImage = pOldImg->GetUncompressedImageLength();
LPBYTE pbCurSecDiv = NULL;
int i;
RETAILMSG(ZONE_VERBOSE, (TEXT("\nHashing old image file\n")));
RETAILMSG(ZONE_PROGRESS, (TEXT("Bytes hashed, out of %u: "), cbOldImage));
CBRA(pbCurSecDiv = pSecDiv->GetSecDiv(iSecDiv));
for (i = 0; i <= cbOldImage - HASH_BYTES; ++i) {
if (i % 10000 == 0) {
RETAILMSG(ZONE_PROGRESS, (TEXT("\b\b\b\b\b\b\b\b%8u"), i));
}
if (pbCurSecDiv - (pbOldImage + i) < HASH_BYTES) {
if (pSecDiv->GetSecDiv(iSecDiv) == pbOldImage + i) {
iSecDiv++;
CBRA(pbCurSecDiv = pSecDiv->GetSecDiv(iSecDiv));
// Used to be: iSecDiv = (iSecDiv + 1) % cSecDiv;
// Don't see why we should ever loop around, hence why bother with the %?
// Just to handle the top boundary case? (now handled inside GetSecDiv)
}
} else {
pHT->Insert(pbOldImage + i, i);
}
}
RETAILMSG(ZONE_PROGRESS, (TEXT("\b\b\b\b\b\b\b\b%8u\n"), cbOldImage));
Error:
return hr;
} /* HashOldImage()
*/
HRESULT
ComputeSubstrings(CImageData *pOldImg, CImageData *pNewImg, CSecDiv *pSecDiv,
CRunList *pRunList)
/*---------------------------------------------------------------------------*\
* This is the heart of the program. We determine all substrings that imgOld
* and imgNew have in common. When we're done, "piOffset[i]" is the offset
* into imgOld of the beginning of the longest substring that matches imgNew
* beginning at offset i. "pcMatched[i]" is the length of that matching
* substring.
\*---------------------------------------------------------------------------*/
{
UINT32* piCurMatched;
UINT32* pcCurMatched;
UINT32 cCurMatched;
UINT32* piPrevMatched;
UINT32* pcPrevMatched;
UINT32 cPrevMatched;
UINT32 iPrevMatched;
UINT32 j;
UINT32 k;
UINT32* pcMatched;
UINT32* piPlace;
HRESULT hr = NOERROR;
UINT32 i;
DWORD dwSectionIndex;
DWORD dwSectionTokenLen = 0;
LPBYTE pbOldImage = pOldImg->GetUncompressedDataPtr();
LPBYTE pbNewImage = pNewImg->GetUncompressedDataPtr();
DWORD cbNewImage = pNewImg->GetUncompressedImageLength();
CRunList SectionRunList;
CHashTable HashTable;
// $REVIEW: This should be pOldImg, right?!?
CHR(SectionRunList.Initialize(pOldImg));
CHR(HashTable.Initialize(1 << LOG_NUM_BUCKETS));
CHR(HashOldImage(pOldImg, &HashTable, pSecDiv));
//
// Allocate and initialize data structures
//
CPR(piCurMatched = (UINT32*)LocalAlloc(LMEM_FIXED, HashTable.GetMaxBucketSize() * sizeof(UINT32)));
CPR(pcCurMatched = (UINT32*)LocalAlloc(LMEM_FIXED, HashTable.GetMaxBucketSize() * sizeof(UINT32)));
cCurMatched = 0;
CPR(piPrevMatched = (UINT32*)LocalAlloc(LMEM_FIXED, HashTable.GetMaxBucketSize() * sizeof(UINT32)));
CPR(pcPrevMatched = (UINT32*)LocalAlloc(LMEM_FIXED, HashTable.GetMaxBucketSize() * sizeof(UINT32)));
cPrevMatched = 0;
iPrevMatched = 0;
CPR(pcMatched = (UINT32*)LocalAlloc(LMEM_FIXED, pNewImg->GetMaxBytesUncompressed() * sizeof(UINT32)));
CPR(piPlace = (UINT32*)LocalAlloc(LMEM_FIXED, pNewImg->GetMaxBytesUncompressed() * sizeof(UINT32)));
RETAILMSG(ZONE_VERBOSE, (TEXT("\nComputing common substrings\n")));
RETAILMSG(ZONE_PROGRESS, (TEXT("Bytes finished, out of %u: "), cbNewImage));
for(dwSectionIndex = 0; dwSectionIndex < pNewImg->GetSectionCount(); dwSectionIndex++) {
CSectionData * pSection = pNewImg->GetSection(dwSectionIndex);
CBRA(pSection);
LPBYTE pbSection = pSection->GetUncompressedDataPtr();
UINT32 cSecLen = pSection->GetUncompressedSize();
CHR(pRunList->Insert(RUNTYPE_SECTIONHEADER, 0, (DWORD)pSection));
CHR(SectionRunList.Clear());
cCurMatched = 0;
cPrevMatched = 0;
iPrevMatched = 0;
if (cSecLen >= HASH_BYTES)
{
FillMemory(pcMatched, cSecLen * sizeof(UINT32), 0);
for (j = cSecLen - HASH_BYTES; j <= cSecLen - HASH_BYTES; j -= (HASH_BYTES / 2))
{
HashBucketExt hbe = HashTable.GetBucket(pbSection + j);
UINT32* pcTemp;
UINT32* piTemp;
for (i = hbe.phb->cCount - 1; i < hbe.phb->cCount; --i)
{
UINT32 iOffset = hbe.phb->piOffsets[i];
if (hbe.fOneChar || !memcmp(pbOldImage + iOffset, pbSection + j, HASH_BYTES))
{
piCurMatched[cCurMatched] = iOffset;
while ((iPrevMatched < cPrevMatched) && (piPrevMatched[iPrevMatched] > iOffset + HASH_BYTES / 2)) {
++iPrevMatched;
}
if ((iPrevMatched < cPrevMatched) && (piPrevMatched[iPrevMatched] == iOffset + HASH_BYTES / 2)) {
pcCurMatched[cCurMatched] = max(HASH_BYTES, pcPrevMatched[iPrevMatched] + HASH_BYTES / 2);
} else {
k = 0;
while ((k < HASH_BYTES / 2) &&
(HASH_BYTES + j + k < cSecLen) &&
(!pSecDiv->IsSecDiv(pbOldImage + iOffset + HASH_BYTES + k)) &&
(pbOldImage[iOffset + HASH_BYTES + k] == pbSection[HASH_BYTES + j + k])) {
++k;
}
pcCurMatched[cCurMatched] = HASH_BYTES + k;
}
if ((pcMatched[j] < HASH_BYTES / 2) || (pcCurMatched[cCurMatched] > pcMatched[j] - HASH_BYTES / 2))
{
k = 1;
while ((k < HASH_BYTES / 2) &&
(j >= k) &&
(!pSecDiv->IsSecDiv(pbOldImage + iOffset - k)) &&
(pbOldImage[iOffset - k] == pbSection[j - k]))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -