📄 storage.c
字号:
/* Compound Storage
*
* Implemented using the documentation of the LAOLA project at
* <URL:http://wwwwbs.cs.tu-berlin.de/~schwartz/pmh/index.html>
* (Thanks to Martin Schwartz <schwartz@cs.tu-berlin.de>)
*
* Copyright 1998 Marcus Meissner
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include "config.h"
#include <assert.h>
#include <time.h>
#include <stdarg.h>
#include <string.h>
#include <sys/types.h>
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif
#define NONAMELESSUNION
#define NONAMELESSSTRUCT
#include "windef.h"
#include "winbase.h"
#include "winternl.h"
#include "winerror.h"
#include "wine/winbase16.h"
#include "wownt32.h"
#include "wine/unicode.h"
#include "objbase.h"
#include "wine/debug.h"
#include "ifs.h"
WINE_DEFAULT_DEBUG_CHANNEL(ole);
WINE_DECLARE_DEBUG_CHANNEL(relay);
struct storage_header {
BYTE magic[8]; /* 00: magic */
BYTE unknown1[36]; /* 08: unknown */
DWORD num_of_bbd_blocks;/* 2C: length of big datablocks */
DWORD root_startblock;/* 30: root storage first big block */
DWORD unknown2[2]; /* 34: unknown */
DWORD sbd_startblock; /* 3C: small block depot first big block */
DWORD unknown3[3]; /* 40: unknown */
DWORD bbd_list[109]; /* 4C: big data block list (up to end of sector)*/
};
struct storage_pps_entry {
WCHAR pps_rawname[32];/* 00: \0 terminated widechar name */
WORD pps_sizeofname; /* 40: namelength in bytes */
BYTE pps_type; /* 42: flags, 1 storage/dir, 2 stream, 5 root */
BYTE pps_unknown0; /* 43: unknown */
DWORD pps_prev; /* 44: previous pps */
DWORD pps_next; /* 48: next pps */
DWORD pps_dir; /* 4C: directory pps */
GUID pps_guid; /* 50: class ID */
DWORD pps_unknown1; /* 60: unknown */
FILETIME pps_ft1; /* 64: filetime1 */
FILETIME pps_ft2; /* 70: filetime2 */
DWORD pps_sb; /* 74: data startblock */
DWORD pps_size; /* 78: datalength. (<0x1000)?small:big blocks*/
DWORD pps_unknown2; /* 7C: unknown */
};
#define STORAGE_CHAINENTRY_FAT 0xfffffffd
#define STORAGE_CHAINENTRY_ENDOFCHAIN 0xfffffffe
#define STORAGE_CHAINENTRY_FREE 0xffffffff
static const BYTE STORAGE_magic[8] ={0xd0,0xcf,0x11,0xe0,0xa1,0xb1,0x1a,0xe1};
#define BIGSIZE 512
#define SMALLSIZE 64
#define SMALLBLOCKS_PER_BIGBLOCK (BIGSIZE/SMALLSIZE)
#define READ_HEADER(str) STORAGE_get_big_block(str,-1,(LPBYTE)&sth);assert(!memcmp(STORAGE_magic,sth.magic,sizeof(STORAGE_magic)));
static IStorage16Vtbl stvt16;
static const IStorage16Vtbl *segstvt16 = NULL;
static IStream16Vtbl strvt16;
static const IStream16Vtbl *segstrvt16 = NULL;
/*ULONG WINAPI IStorage16_AddRef(LPSTORAGE16 this);*/
static void _create_istorage16(LPSTORAGE16 *stg);
static void _create_istream16(LPSTREAM16 *str);
#define IMPLEMENTED 1
/* The following is taken from the CorVu implementation of docfiles, and
* documents things about the file format that are not implemented here, and
* not documented by the LAOLA project. The CorVu implementation was posted
* to wine-devel in February 2004, and released under the LGPL at the same
* time. Because that implementation is in C++, it's not directly usable in
* Wine, but does have documentation value.
*
*
* #define DF_EXT_VTOC -4
* #define DF_VTOC_VTOC -3
* #define DF_VTOC_EOF -2
* #define DF_VTOC_FREE -1
* #define DF_NAMELEN 0x20 // Maximum entry name length - 31 characters plus
* // a NUL terminator
*
* #define DF_FT_STORAGE 1
* #define DF_FT_STREAM 2
* #define DF_FT_LOCKBYTES 3 // Not used -- How the bloody hell did I manage
* #define DF_FT_PROPERTY 4 // Not Used -- to figure these two out?
* #define DF_FT_ROOT 5
*
* #define DF_BLOCK_SIZE 0x200
* #define DF_VTOC_SIZE 0x80
* #define DF_DE_PER_BLOCK 4
* #define DF_STREAM_BLOCK_SIZE 0x40
*
* A DocFile is divided into blocks of 512 bytes.
* The first block contains the header.
*
* The file header contains The first 109 entries in the VTOC of VTOCs.
*
* Each block pointed to by a VTOC of VTOCs contains a VTOC, which
* includes block chains - just like FAT. This is a somewhat poor
* design for the following reasons:
*
* 1. FAT was a poor file system design to begin with, and
* has long been known to be horrendously inefficient
* for day to day operations.
*
* 2. The problem is compounded here, since the file
* level streams are generally *not* read sequentially.
* This means that a significant percentage of reads
* require seeking from the start of the chain.
*
* Data chains also contain an internal VTOC. The block size for
* the standard VTOC is 512. The block size for the internal VTOC
* is 64.
*
* Now, the 109 blocks in the VTOC of VTOCs allows for files of
* up to around 7MB. So what do you think happens if that's
* exceeded? Well, there's an entry in the header block which
* points to the first block used as additional storage for
* the VTOC of VTOCs.
*
* Now we can get up to around 15MB. Now, guess how the file
* format adds in another block to the VTOC of VTOCs. Come on,
* it's no big surprise. That's right - the last entry in each
* block extending the VTOC of VTOCs is, you guessed it, the
* block number of the next block containing an extension to
* the VTOC of VTOCs. The VTOC of VTOCs is chained!!!!
*
* So, to review:
*
* 1. If you are using a FAT file system, the location of
* your file's blocks is stored in chains.
*
* 2. At the abstract level, the file contains a VTOC of VTOCs,
* which is stored in the most inefficient possible format for
* random access - a chain (AKA list).
*
* 3. The VTOC of VTOCs contains descriptions of three file level
* streams:
*
* a. The Directory stream
* b. The Data stream
* c. The Data VTOC stream
*
* These are, of course, represented as chains.
*
* 4. The Data VTOC contains data describing the chains of blocks
* within the Data stream.
*
* That's right - we have a total of four levels of block chains!
*
* Now, is that complicated enough for you? No? OK, there's another
* complication. If an individual stream (ie. an IStream) reaches
* 4096 bytes in size, it gets moved from the Data Stream to
* a new file level stream. Now, if the stream then gets truncated
* back to less than 4096 bytes, it returns to the data stream.
*
* The effect of using this format can be seen very easily. Pick
* an arbitrary application with a grid data representation that
* can export to both Lotus 123 and Excel 5 or higher. Export
* a large file to Lotus 123 and time it. Export the same thing
* to Excel 5 and time that. The difference is the inefficiency
* of the Microsoft DocFile format.
*
*
* #define TOTAL_SIMPLE_VTOCS 109
*
* struct DocFile_Header
* {
* df_byte iMagic1; // 0xd0
* df_byte iMagic2; // 0xcf
* df_byte iMagic3; // 0x11
* df_byte iMagic4; // 0xe0 - Spells D0CF11E0, or DocFile
* df_byte iMagic5; // 161 (igi upside down)
* df_byte iMagic6; // 177 (lli upside down - see below
* df_byte iMagic7; // 26 (gz upside down)
* df_byte iMagic8; // 225 (szz upside down) - see below
* df_int4 aiUnknown1[4];
* df_int4 iVersion; // DocFile Version - 0x03003E
* df_int4 aiUnknown2[4];
* df_int4 nVTOCs; // Number of VTOCs
* df_int4 iFirstDirBlock; // First Directory Block
* df_int4 aiUnknown3[2];
* df_int4 iFirstDataVTOC; // First data VTOC block
* df_int4 iHasData; // 1 if there is data in the file - yes, this is important
* df_int4 iExtendedVTOC; // Extended VTOC location
* df_int4 iExtendedVTOCSize; // Size of extended VTOC (+1?)
* df_int4 aiVTOCofVTOCs[TOTAL_SIMPLE_VTOCS];
* };
*
* struct DocFile_VTOC
* {
* df_int4 aiBlocks[DF_VTOC_SIZE];
* };
*
*
* The meaning of the magic numbers
*
* 0xd0cf11e0 is DocFile with a zero on the end (sort of)
*
* If you key 177161 into a calculator, then turn the calculator
* upside down, you get igilli, which may be a reference to
* somebody's name, or to the Hebrew word for "angel".
*
* If you key 26225 into a calculator, then turn it upside down, you
* get szzgz. Microsoft has a tradition of creating nonsense words
* using the letters s, g, z and y. We think szzgz may be one of the
* Microsoft placeholder variables, along the lines of foo, bar and baz.
* Alternatively, it could be 22526, which would be gzszz.
*
*
* struct DocFile_DirEnt
* {
* df_char achEntryName[DF_NAMELEN]; // Entry Name
* df_int2 iNameLen; // Name length in bytes, including NUL terminator
* df_byte iFileType; // Entry type
* df_byte iColour; // 1 = Black, 0 = Red
* df_int4 iLeftSibling; // Next Left Sibling Entry - See below
* df_int4 iRightSibling; // Next Right Sibling Entry
* df_int4 iFirstChild; // First Child Entry
* df_byte achClassID[16]; // Class ID
* df_int4 iStateBits; // [GS]etStateBits value
* df_int4 iCreatedLow; // Low DWORD of creation time
* df_int4 iCreatedHigh; // High DWORD of creation time
* df_int4 iModifiedLow; // Low DWORD of modification time
* df_int4 iModifiedHigh; // High DWORD of modification time
* df_int4 iVTOCPosition; // VTOC Position
* df_int4 iFileSize; // Size of the stream
* df_int4 iZero; // We think this is part of the 64 bit stream size - must be 0
* };
*
* Siblings
* ========
*
* Siblings are stored in an obscure but incredibly elegant
* data structure called a red-black tree. This is generally
* defined as a 2-3-4 tree stored in a binary tree.
*
* A red-black tree can always be balanced very easily. The rules
* for a red-black tree are as follows:
*
* 1. The root node is always black.
* 2. The parent of a red node is always black.
*
* There is a Java demo of red-black trees at:
*
* http://langevin.usc.edu/BST/RedBlackTree-Example.html
*
* This demo is an excellent tool for learning how red-black
* trees work, without having to go through the process of
* learning how they were derived.
*
* Within the tree, elements are ordered by the length of the
* name and within that, ASCII order by name. This causes the
* apparently bizarre reordering you see when you use dfview.
*
* This is a somewhat bizarre choice. It suggests that the
* designer of the DocFile format was trying to optimise
* searching through the directory entries. However searching
* through directory entries is a relatively rare operation.
* Reading and seeking within a stream are much more common
* operations, especially within the file level streams, yet
* these use the horrendously inefficient FAT chains.
*
* This suggests that the designer was probably somebody
* fresh out of university, who had some basic knowledge of
* basic data structures, but little knowledge of anything
* more practical. It is bizarre to attempt to optimise
* directory searches while not using a more efficient file
* block locating system than FAT (seedling/sapling/tree
* would result in a massive improvement - in fact we have
* an alternative to DocFiles that we use internally that
* uses seedling/sapling/tree and *is* far more efficient).
*
* It is worth noting that the MS implementation of red-black
* trees is incorrect (I can tell you're surprised) and
* actually causes more operations to occur than are really
* needed. Fortunately the fact that our implementation is
* correct will not cause any problems - the MS implementation
* still appears to cause the tree to satisfy the rules, albeit
* a sequence of the same insertions in the different
* implementations may result in a different, and possibly
* deeper (but never shallower) tree.
*/
typedef struct {
HANDLE hf;
SEGPTR lockbytes;
} stream_access16;
/* --- IStorage16 implementation struct */
typedef struct
{
/* IUnknown fields */
const IStorage16Vtbl *lpVtbl;
LONG ref;
/* IStorage16 fields */
SEGPTR thisptr; /* pointer to this struct as segmented */
struct storage_pps_entry stde;
int ppsent;
stream_access16 str;
} IStorage16Impl;
/******************************************************************************
* STORAGE_get_big_block [Internal]
*
* Reading OLE compound storage
*/
static BOOL
STORAGE_get_big_block(stream_access16 *str,int n,BYTE *block)
{
DWORD result;
assert(n>=-1);
if (str->hf) {
if ((SetFilePointer( str->hf, (n+1)*BIGSIZE, NULL,
SEEK_SET ) == INVALID_SET_FILE_POINTER) && GetLastError())
{
WARN("(%p,%d,%p), seek failed (%d)\n",str->hf, n, block, GetLastError());
return FALSE;
}
if (!ReadFile( str->hf, block, BIGSIZE, &result, NULL ) || result != BIGSIZE)
{
WARN("(hf=%p, block size %d): read didn't read (%d)\n",str->hf,n,GetLastError());
return FALSE;
}
} else {
DWORD args[6];
HRESULT hres;
HANDLE16 hsig;
args[0] = (DWORD)str->lockbytes; /* iface */
args[1] = (n+1)*BIGSIZE;
args[2] = 0; /* ULARGE_INTEGER offset */
args[3] = WOWGlobalAllocLock16( 0, BIGSIZE, &hsig ); /* sig */
args[4] = BIGSIZE;
args[5] = 0;
if (!WOWCallback16Ex(
(DWORD)((const ILockBytes16Vtbl*)MapSL(
(SEGPTR)((LPLOCKBYTES16)MapSL(str->lockbytes))->lpVtbl)
)->ReadAt,
WCB16_PASCAL,
6*sizeof(DWORD),
(LPVOID)args,
(LPDWORD)&hres
)) {
ERR("CallTo16 ILockBytes16::ReadAt() failed, hres %x\n",hres);
return FALSE;
}
memcpy(block, MapSL(args[3]), BIGSIZE);
WOWGlobalUnlockFree16(args[3]);
}
return TRUE;
}
static BOOL
_ilockbytes16_writeat(SEGPTR lockbytes, DWORD offset, DWORD length, void *buffer) {
DWORD args[6];
HRESULT hres;
args[0] = (DWORD)lockbytes; /* iface */
args[1] = offset;
args[2] = 0; /* ULARGE_INTEGER offset */
args[3] = (DWORD)MapLS( buffer );
args[4] = length;
args[5] = 0;
/* THIS_ ULARGE_INTEGER ulOffset, const void *pv, ULONG cb, ULONG *pcbWritten); */
if (!WOWCallback16Ex(
(DWORD)((const ILockBytes16Vtbl*)MapSL(
(SEGPTR)((LPLOCKBYTES16)MapSL(lockbytes))->lpVtbl)
)->WriteAt,
WCB16_PASCAL,
6*sizeof(DWORD),
(LPVOID)args,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -