📄 diffbin.cpp

📁 WinCE5.0部分核心源码
💻 CPP
📖 第 1 页 / 共 2 页
字号:
12 下一页
//
// Copyright (c) Microsoft Corporation.  All rights reserved.
//
//
// This source code is licensed under Microsoft Shared Source License
// Version 1.0 for Windows CE.
// For a copy of the license visit http://go.microsoft.com/fwlink/?LinkId=3223.
//
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Module Name:

	diffbin.cpp

Abstract:

	This program constructs a binary diff between two files.  It has
	been tailored to the NK image structure for the purpose of
	cutting image download times, but the basic algorithm can be
	applied to any two files.



	Brian Wahlert (t-brianw) 8-Jul-1998

Environment:

	Non-specific

Revision History:

	 8-Jul-1998 Brian Wahlert (t-brianw)    Created
    30-Jun-2001 Scott Shell (ScottSh)       Ported from nkcompr    

-------------------------------------------------------------------*/
#include "diffbin.h"

// Debugging Zones
DWORD   g_dwZoneMask        = 0x00000001;

HRESULT
IWriteFile(HANDLE hfOutput, LPBYTE pb, DWORD cb, DWORD *pdwBytesWritten)
/*---------------------------------------------------------------------------*\
 *
\*---------------------------------------------------------------------------*/
{
    HRESULT         hr          = NOERROR;
    DWORD           dwWritten   = 0;

    ASSERT(pdwBytesWritten);

    CBR(WriteFile(hfOutput, pb, cb, &dwWritten, NULL));
    CBRA(cb == dwWritten);

    *pdwBytesWritten += dwWritten;
Error:
    return hr;

} /* IWriteFile()
   */

HRESULT 
WriteInt(HANDLE hfOutput, UINT32 i, UCHAR cBytes, DWORD *pdwBytesWritten)
/*---------------------------------------------------------------------------*\
 * Write the low-order "cBytes" bytes of "i" to file hfOutput
\*---------------------------------------------------------------------------*/
{
    return IWriteFile(hfOutput, (LPBYTE)&i, cBytes, pdwBytesWritten);

} /* WriteInt()
   */

#define VARINT_PAGE_ALIGN_BIT       0x40
#define VARINT_CONTINUATION_BIT     0x80

HRESULT
WriteVarInt(HANDLE hfOutput, UINT32 u, DWORD *pdwBytesWritten)
/*---------------------------------------------------------------------------*\
 * Writes a variable length integer using the minimal number of bytes necessary
\*---------------------------------------------------------------------------*/
{
    HRESULT     hr      = NOERROR;
    BYTE        b;

    ASSERT(pdwBytesWritten);

    do {
        b = u & 0x7f;
        if(u >>= 7) {
            b |= VARINT_CONTINUATION_BIT;
        }
        if(hfOutput) {
            CHR(IWriteFile(hfOutput, &b, 1, pdwBytesWritten));
        } else {
            *pdwBytesWritten++;
        }
    } while(u > 0);

Error:
    return hr;

} /* WriteVarInt()
   */

HRESULT
WriteAddrVarInt(HANDLE hfOutput, UINT32 u, DWORD *pdwBytesWritten)
/*---------------------------------------------------------------------------*\
 * Writes a section address
 * A section address is special in that a very large number of them are page
 * aligned, which means they end in 000.  Spending an extra bit to represent
 * that page alignment saves quite a few bytes;
\*---------------------------------------------------------------------------*/
{
    HRESULT     hr      = NOERROR;
    BYTE        b       = 0;

    ASSERT(pdwBytesWritten);

    // Check for page alignment
    if((u & 0xFFF) == 0) {
        // Page aligned!
        u >>= 12;
        b |= VARINT_PAGE_ALIGN_BIT;
    }

    b |= u & 0x3f;
    if(u >>= 6) {
        b |= VARINT_CONTINUATION_BIT;
    }
    if(hfOutput) {
        CHR(IWriteFile(hfOutput, &b, 1, pdwBytesWritten));
    } else {
        *pdwBytesWritten++;
    }

    // If this is going to be longer than 1 byte, the rest of the bytes
    // look just like a normal VarInt, so call that on the remainder.
    if(u) {
        CHR(WriteVarInt(hfOutput, u, pdwBytesWritten));
    }

Error:
    return hr;

} /* WriteVarInt()
   */

HRESULT
WriteDataToken(HANDLE hfOutput, UINT32 cWords, DWORD *pdwBytesWritten)
/*---------------------------------------------------------------------------*\
 *
\*---------------------------------------------------------------------------*/
{
    HRESULT         hr          = NOERROR;
    UCHAR           chWrite;

    // pack "iType" and "cWords" into between one and
	// four words
	UCHAR cBytes = 0;
	UINT32 i;
	chWrite = TOKEN_DATA | (UCHAR)(cWords << 3);
    if (cWords < (1 << 5)) {
		cBytes = 0;
    } else if (cWords < (1 << 13)) {
		cBytes = 1;
    } else if (cWords < (1 << 21)) {
		cBytes = 2;
    } else {
		cBytes = 3;
    }
	
    chWrite |= cBytes << 1;
    CHR(IWriteFile(hfOutput, &chWrite, 1, pdwBytesWritten));

    for (i = 0; i < cBytes; ++i) {
		chWrite = (UCHAR)(cWords >> (5 + 8 * i));
        CHR(IWriteFile(hfOutput, &chWrite, 1, pdwBytesWritten));
	}

Error:
    return hr;

} /* WriteDataToken()
   */

HRESULT
WriteCopyToken(HANDLE hfOutput, UINT32 cWords, ADDRESS iOffset, 
               DWORD cComprRgns, DWORD *pdwBytesWritten)
/*---------------------------------------------------------------------------*\
 *
\*---------------------------------------------------------------------------*/
{
    HRESULT         hr          = NOERROR;
    UCHAR           chWrite;
	UCHAR           cBytes = 0;
	UINT32          i;

	chWrite = TOKEN_COPY;
	chWrite |= ((iOffset.iOffset != NO_COMPRESS) << 1);
	chWrite |= (UCHAR)(cWords << 4);

    if (cWords < (1 << 4)) {
		cBytes = 0;
    } else if (cWords < (1 << 12)) {
		cBytes = 1;
    } else if (cWords < (1 << 20)) {
		cBytes = 2;
    } else {
		cBytes = 3;
    }
	chWrite |= cBytes << 2;
    CHR(IWriteFile(hfOutput, &chWrite, 1, pdwBytesWritten));

    for (i = 0; i < cBytes; ++i) {
		chWrite = (UCHAR)(cWords >> (4 + 8 * i));
        CHR(IWriteFile(hfOutput, &chWrite, 1, pdwBytesWritten));
	}

    if (iOffset.iOffset == NO_COMPRESS) {
        CHR(WriteInt(hfOutput, iOffset.iAddr, 3, pdwBytesWritten));

    } else {
        if (cComprRgns < (1 << 8)) {
            CHR(WriteInt(hfOutput, iOffset.iAddr, 1, pdwBytesWritten));
        } else if (cComprRgns < (1 << 16)) {
            CHR(WriteInt(hfOutput, iOffset.iAddr, 2, pdwBytesWritten));
        } else {
            CHR(WriteInt(hfOutput, iOffset.iAddr, 3, pdwBytesWritten));
        }
        CHR(WriteInt(hfOutput, iOffset.iOffset, 3, pdwBytesWritten));
	}

Error:
    return hr;

} /* WriteCopyToken()
   */

HRESULT
ComputeRuns(CRunList *pRunList, UINT32 *pcRunLen, UINT32* pcOffset, 
            LPBYTE pbSection, UINT32 cLen)
/*---------------------------------------------------------------------------*\
 * Here we go through all of the common substrings and condense that 
 * information down into a list of matching runs
\*---------------------------------------------------------------------------*/
{
    HRESULT     hr              = NOERROR;
	UINT32      iChar           = 0;
    BOOL        fLastRunData    = FALSE;

	while (iChar < cLen) {
		pcRunLen[iChar] = (pcRunLen[iChar] == 0) ? 1 : pcRunLen[iChar];

		if (pcRunLen[iChar] >= HASH_BYTES) {
            // If we have a long enough run, let's make it a copy
			fLastRunData = FALSE;
            CHR(pRunList->Insert(RUNTYPE_COPYTOKEN, pcRunLen[iChar], pcOffset[iChar]));

        } else {
            // Okay, this run is not long enough to make it a copy. Let's
            // make a data run instead... 
			if (fLastRunData) {
                // If the last run was a data run as well, let's just add on
                // to the previous run
                CHR(pRunList->AddToLastRunLength(pcRunLen[iChar]));
            } else {
                // The last run was a copy run, so we need to start a new data
                // run here.
                CHR(pRunList->Insert(RUNTYPE_DATATOKEN, pcRunLen[iChar], (DWORD)pbSection + iChar));
                fLastRunData = TRUE;
            }
		}
		iChar += pcRunLen[iChar];
	}

Error:
    return hr;

} /* ComputeRuns()
   */

HRESULT
HashOldImage(CImageData *pOldImg, CHashTable *pHT, CSecDiv *pSecDiv)
/*---------------------------------------------------------------------------*\
 *
\*---------------------------------------------------------------------------*/
{
    HRESULT             hr              = NOERROR;
	UINT32              iSecDiv         = 0;
    LPBYTE              pbOldImage      = pOldImg->GetUncompressedDataPtr();
    DWORD               cbOldImage      = pOldImg->GetUncompressedImageLength();
    LPBYTE              pbCurSecDiv     = NULL;
    int                 i;

	RETAILMSG(ZONE_VERBOSE, (TEXT("\nHashing old image file\n")));
	RETAILMSG(ZONE_PROGRESS, (TEXT("Bytes hashed, out of %u:         "), cbOldImage));

    CBRA(pbCurSecDiv = pSecDiv->GetSecDiv(iSecDiv));

	for (i = 0; i <= cbOldImage - HASH_BYTES; ++i) {
        if (i % 10000 == 0) {
			RETAILMSG(ZONE_PROGRESS, (TEXT("\b\b\b\b\b\b\b\b%8u"), i));
        }

		if (pbCurSecDiv - (pbOldImage + i) < HASH_BYTES) {
            if (pSecDiv->GetSecDiv(iSecDiv) == pbOldImage + i) {
                iSecDiv++;
                CBRA(pbCurSecDiv = pSecDiv->GetSecDiv(iSecDiv));

                // Used to be: iSecDiv = (iSecDiv + 1) % cSecDiv;
                // Don't see why we should ever loop around, hence why bother with the %?
                // Just to handle the top boundary case?  (now handled inside GetSecDiv)
            }
        } else {
			pHT->Insert(pbOldImage + i, i);
        }
	}

	RETAILMSG(ZONE_PROGRESS, (TEXT("\b\b\b\b\b\b\b\b%8u\n"), cbOldImage));

Error:
    return hr;

} /* HashOldImage()
   */

HRESULT
ComputeSubstrings(CImageData *pOldImg, CImageData *pNewImg,  CSecDiv *pSecDiv, 
                  CRunList *pRunList)
/*---------------------------------------------------------------------------*\
 * This is the heart of the program.  We determine all substrings that imgOld 
 * and imgNew have in common.  When we're done, "piOffset[i]" is the offset 
 * into imgOld of the beginning of the longest substring that matches imgNew 
 * beginning at offset i.  "pcMatched[i]" is the length of that matching 
 * substring.
\*---------------------------------------------------------------------------*/
{
	UINT32*             piCurMatched;
	UINT32*             pcCurMatched;
	UINT32              cCurMatched;
	UINT32*             piPrevMatched;
	UINT32*             pcPrevMatched;
	UINT32              cPrevMatched;
	UINT32              iPrevMatched;
    UINT32              j;
    UINT32              k;
	UINT32*             pcMatched;
	UINT32*             piPlace;

    HRESULT             hr                  = NOERROR;
	UINT32              i;
    DWORD               dwSectionIndex;
    DWORD               dwSectionTokenLen   = 0;
    LPBYTE              pbOldImage          = pOldImg->GetUncompressedDataPtr();
    LPBYTE              pbNewImage          = pNewImg->GetUncompressedDataPtr();
    DWORD               cbNewImage          = pNewImg->GetUncompressedImageLength();
    CRunList            SectionRunList;           
	CHashTable          HashTable;

    // $REVIEW: This should be pOldImg, right?!?
    CHR(SectionRunList.Initialize(pOldImg));

    CHR(HashTable.Initialize(1 << LOG_NUM_BUCKETS));

    CHR(HashOldImage(pOldImg, &HashTable, pSecDiv));

    //
    // Allocate and initialize data structures
    //
    
    CPR(piCurMatched = (UINT32*)LocalAlloc(LMEM_FIXED, HashTable.GetMaxBucketSize() * sizeof(UINT32)));
	CPR(pcCurMatched = (UINT32*)LocalAlloc(LMEM_FIXED, HashTable.GetMaxBucketSize() * sizeof(UINT32)));
	cCurMatched = 0;

	CPR(piPrevMatched = (UINT32*)LocalAlloc(LMEM_FIXED, HashTable.GetMaxBucketSize() * sizeof(UINT32)));
	CPR(pcPrevMatched = (UINT32*)LocalAlloc(LMEM_FIXED, HashTable.GetMaxBucketSize() * sizeof(UINT32)));
	cPrevMatched = 0;
	iPrevMatched = 0;

	CPR(pcMatched = (UINT32*)LocalAlloc(LMEM_FIXED, pNewImg->GetMaxBytesUncompressed() * sizeof(UINT32)));
	CPR(piPlace = (UINT32*)LocalAlloc(LMEM_FIXED, pNewImg->GetMaxBytesUncompressed() * sizeof(UINT32)));

	RETAILMSG(ZONE_VERBOSE, (TEXT("\nComputing common substrings\n")));
	RETAILMSG(ZONE_PROGRESS, (TEXT("Bytes finished, out of %u:         "), cbNewImage));

    for(dwSectionIndex = 0; dwSectionIndex < pNewImg->GetSectionCount(); dwSectionIndex++) {

        CSectionData *  pSection = pNewImg->GetSection(dwSectionIndex);
        CBRA(pSection);

        LPBYTE pbSection = pSection->GetUncompressedDataPtr();
        UINT32 cSecLen   = pSection->GetUncompressedSize();

        CHR(pRunList->Insert(RUNTYPE_SECTIONHEADER, 0, (DWORD)pSection));

        CHR(SectionRunList.Clear());
        
		cCurMatched = 0;
		cPrevMatched = 0;
		iPrevMatched = 0;

		if (cSecLen >= HASH_BYTES)
		{
			FillMemory(pcMatched, cSecLen * sizeof(UINT32), 0);
			for (j = cSecLen - HASH_BYTES; j <= cSecLen - HASH_BYTES; j -= (HASH_BYTES / 2))
			{
				HashBucketExt hbe = HashTable.GetBucket(pbSection + j);
				UINT32* pcTemp;
				UINT32* piTemp;
				for (i = hbe.phb->cCount - 1; i < hbe.phb->cCount; --i)
				{
					UINT32 iOffset = hbe.phb->piOffsets[i];
					if (hbe.fOneChar || !memcmp(pbOldImage + iOffset, pbSection + j, HASH_BYTES))
					{
						piCurMatched[cCurMatched] = iOffset;
                        while ((iPrevMatched < cPrevMatched) && (piPrevMatched[iPrevMatched] > iOffset + HASH_BYTES / 2)) {
							++iPrevMatched;
                        }
                        if ((iPrevMatched < cPrevMatched) && (piPrevMatched[iPrevMatched] == iOffset + HASH_BYTES / 2)) {
							pcCurMatched[cCurMatched] = max(HASH_BYTES, pcPrevMatched[iPrevMatched] + HASH_BYTES / 2);
                        } else {
							k = 0;
							while ((k < HASH_BYTES / 2) &&
								   (HASH_BYTES + j + k < cSecLen) &&
								   (!pSecDiv->IsSecDiv(pbOldImage + iOffset + HASH_BYTES + k)) &&
                                   (pbOldImage[iOffset + HASH_BYTES + k] == pbSection[HASH_BYTES + j + k])) {
								++k;
                            }
							pcCurMatched[cCurMatched] = HASH_BYTES + k;
						}
						if ((pcMatched[j] < HASH_BYTES / 2) || (pcCurMatched[cCurMatched] > pcMatched[j] - HASH_BYTES / 2))
						{
							k = 1;
							while ((k < HASH_BYTES / 2) &&
								   (j >= k) &&
								   (!pSecDiv->IsSecDiv(pbOldImage + iOffset - k)) &&
								   (pbOldImage[iOffset - k] == pbSection[j - k]))
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -