📄 excelparser.php
字号:
case 0x00fd:
if( $biff_ver < 8 ) break;
if( (ord($ws[$ptr+2])+256*ord($ws[$ptr+3])) < 0x0a )
return 6;
$row = ord($ws[$ptr+4])+256*ord($ws[$ptr+5]);
$col = ord($ws[$ptr+6])+256*ord($ws[$ptr+7]);
$xf = ord($ws[$ptr+8])+256*ord($ws[$ptr+9]);
$fonti = $this->xf['font'][$xf];
$font = &$this->fonts[$fonti];
$data['cell'][$row][$col]['type'] = 0;
$sst_ind = ExcelParserUtil::str2long(substr($ws,$ptr+10,4));
$data['cell'][$row][$col]['data'] = $sst_ind;
$data['cell'][$row][$col]['font'] = $fonti;
/* echo "FontRecord for sting at $row,$col<br>";
echo str_replace("\n","<br>\n", ExcelFont::toString($font,$fonti));*/
if( !isset($data['max_row']) ||
($data['max_row'] < $row) )
$data['max_row'] = $row;
if( !isset($data['max_col']) ||
($data['max_col'] < $col) )
$data['max_col'] = $col;
break;
// unknown, unsupported or unused opcode
default:
break;
}
$ptr += 4+256*ord($ws[$ptr+3])+ord($ws[$ptr+2]);
}
//$this->dbglog->debug("parse_worksheet() function returns ".var_export($data,true));
return $data;
}
// Parse workbook
//----------------
function parse_workbook( $f_header, $dp ) {
/*DBG*/ $this->dbglog->debug("parse_workbook() function");
$root_entry_block = $f_header->getLong(0x30);
$num_fat_blocks = $f_header->getLong(0x2c);
/*TRC*/ $this->dbglog->trace("Header parsed");
$this->fat = array();
for( $i = 0; $i < $num_fat_blocks; $i++ ){
/*TRC*/ $this->dbglog->trace("FOR LOOP iteration i =".$i);
$fat_block = $f_header->getLong( 0x4c + 4 * $i );
$fatbuf = $dp->get( $fat_block * 0x200, 0x200 );
$fat = new DataProvider( $fatbuf, DP_STRING_SOURCE );
if( $fat->getSize() < 0x200 ){
/*DBG*/ $this->dbglog->debug("parse_workbook() function found (strlen($fat) < 0x200) returns 6");
return 6;
}
for( $j=0; $j<0x80; $j++ )
$this->fat[] = $fat->getLong( $j * 4 );
$fat->close();
unset( $fat_block, $fatbuf, $fat );
}
/*DBG*/ $this->dbglog->dump( $this->fat, "\$fat" );
if( count($this->fat) < $num_fat_blocks ) {
/*DBG*/ $this->dbglog->debug("parse_workbook() function found (count($this->fat) < $num_fat_blocks) returns 6");
return 6;
}
$chain = $this->get_blocks_chain($root_entry_block);
$dir = new DataProvider( $dp->ReadFromFat( $chain ), DP_STRING_SOURCE );
unset( $chain );
$this->sfat = array();
$small_block = $f_header->getLong( 0x3c );
if( $small_block != 0xfeffffff ) {
$root_entry_index = $this->find_stream( $dir, 'Root Entry');
if( $root_entry_index < 0 ) {
/*DBG*/ $this->dbglog->debug("parse_workbook() function dont found Root Entry returns 6");
return 6;
}
$sdc_start_block = $dir->getLong( $root_entry_index * 0x80 + 0x74 );
$small_data_chain = $this->get_blocks_chain($sdc_start_block);
$this->max_sblocks = count($small_data_chain) * 8;
$schain = $this->get_blocks_chain($small_block);
for( $i = 0; $i < count( $schain ); $i++ ) {
$sfatbuf = $dp->get( $schain[$i] * 0x200, 0x200 );
$sfat = new DataProvider( $sfatbuf, DP_STRING_SOURCE );
//$this->dbglog->dump( strlen($sfatbuf), "strlen(\$sftabuf)");
//$this->dbglog->dump( $sfat, "\$sfat");
if( $sfat->getSize() < 0x200 ) {
/*DBG*/ $this->dbglog->debug("parse_workbook() function found (strlen($sfat) < 0x200) returns 6");
return 6;
}
for( $j=0; $j<0x80; $j++ )
$this->sfat[] = $sfat->getLong( $j * 4 );
$sfat->close();
unset( $sfatbuf, $sfat );
}
unset( $schain );
$sfcbuf = $dp->ReadFromFat( $small_data_chain );
$sdp = new DataProvider( $sfcbuf, DP_STRING_SOURCE );
unset( $sfcbuf, $small_data_chain );
}
$workbook_index = $this->find_stream( $dir, 'Workbook' );
if( $workbook_index<0 ) {
$workbook_index = $this->find_stream( $dir, 'Book' );
if( $workbook_index<0 ){
/*DBG*/ $this->dbglog->debug("parse_workbook() function workbook index not found returns 7");
return 7;
}
}
$workbook_start_block = $dir->getLong( $workbook_index * 0x80 + 0x74 );
$workbook_length = $dir->getLong( $workbook_index * 0x80 + 0x78 );
$wb = '';
if( $workbook_length > 0 ) {
if( $workbook_length >= 0x1000 ) {
$chain = $this->get_blocks_chain($workbook_start_block);
$wb = $dp->ReadFromFat( $chain );
} else {
$chain = $this->get_blocks_chain($workbook_start_block,true);
$wb = $sdp->ReadFromFat( $chain, 0x40 );
unset( $sdp );
}
$wb = substr($wb,0,$workbook_length);
if( strlen($wb) != $workbook_length )
return 6;
unset( $chain );
}
// Unset fat arrays
unset( $this->fat, $this->sfat );
if( strlen($wb) <= 0 ) {
/*DBG*/ $this->dbglog->debug("parse_workbook() function workbook found (strlen($wb) <= 0) returns 7");
return 7;
}
if( strlen($wb) < 4 ) {
/*DBG*/ $this->dbglog->debug("parse_workbook() function workbook found (strlen($wb) < 4) returns 6");
return 6;
}
// parse workbook header
if( strlen($wb) < 256*ord($wb[3])+ord($wb[2]) ){
/*DBG*/ $this->dbglog->debug("parse_workbook() function workbook found (strlen($wb) < 256*ord($wb[3])+ord($wb[2])) < 4) returns 6");
return 6;
}
if( ord($wb[0]) != 0x09 ){
/*DBG*/ $this->dbglog->debug("parse_workbook() function workbook found (ord($wb[0]) != 0x09) returns 6");
return 6;
}
$vers = ord($wb[1]);
if( ($vers!=0) && ($vers!=2) && ($vers!=4) && ($vers!=8) ){
return 8;
}
if( $vers!=8 )
$biff_ver = ($ver+4)/2;
else {
if( strlen($wb) < 12 ) return 6;
switch( ord($wb[4])+256*ord($wb[5]) )
{
case 0x0500:
if( ord($wb[0x0a])+256*ord($wb[0x0b]) < 1994 )
$biff_ver = 5;
else {
switch(ord( $wb[8])+256*ord($wb[9]) ) {
case 2412:
case 3218:
case 3321:
$biff_ver = 5;
break;
default:
$biff_ver = 7;
break;
}
}
break;
case 0x0600:
$biff_ver = 8;
break;
default:
return 8;
}
}
if( $biff_ver < 5 ) return 8;
$ptr = 0;
$this->worksheet['offset'] = array();
$this->worksheet['options'] = array();
$this->worksheet['unicode'] = array();
$this->worksheet['name'] = array();
$this->worksheet['data'] = array();
$this->format = $this->populateFormat();
$this->fonts = array();
$this->fonts[0] = ExcelFont::basicFontRecord();
$this->xf = array();
$this->xf['format'] = array();
$this->xf['font'] = array();
$this->xf['type_prot'] = array();
$this->xf['alignment'] = array();
$this->xf['decoration'] = array();
$xf_cnt=0;
$this->sst['unicode'] = array();
$this->sst['data'] = array();
$opcode = 0;
$sst_defined = false;
$wblen = strlen($wb);
while( (ord($wb[$ptr])!=0x0a) && ($ptr<$wblen) )
{
$oc = ord($wb[$ptr])+256*ord($wb[$ptr+1]);
if( $oc != 0x3c )
$opcode = $oc;
switch ($opcode)
{
case 0x0085:
$ofs = ExcelParserUtil::str2long(substr($wb,$ptr+4,4));
$this->worksheet['offset'][] = $ofs;
$this->worksheet['options'][] = ord($wb[$ptr+8])+256*ord($wb[$ptr+9]);
if( $biff_ver==8 ) {
$len = ord($wb[$ptr+10]);
if( (ord($wb[$ptr+11]) & 1) > 0 ) {
$this->worksheet['unicode'][] = true;
$len = $len*2;
} else {
$this->worksheet['unicode'][] = false;
}
$this->worksheet['name'][] = substr($wb,$ptr+12,$len);
} else {
$this->worksheet['unicode'][] = false;
$len = ord($wb[$ptr+10]);
$this->worksheet['name'][] = substr($wb,$ptr+11,$len);
}
$pws = $this->parse_worksheet(substr($wb,$ofs));
if( is_array($pws) )
$this->worksheet['data'][] = $pws;
else
return $pws;
break;
// Format
case 0x041e:
$fidx = ord($wb[$ptr+4])+256*ord($wb[$ptr+5]);
if($fidx<0x31 ||$fidx==0x31 )
break;
elseif($biff_ver>7)
$this->format[$fidx] = $this->getUnicodeString($wb,$ptr+6);
break;
// FONT 0x31
case EXCEL_FONT_RID:
$rec = ExcelFont::getFontRecord($wb,$ptr+4);
$this->fonts[count($this->fonts)] = $rec;
/*echo str_replace("\n","<br>\n",ExcelFont::toString($rec,count($this->fonts)-1));
echo "FontRecord<br>" */;
break;
// XF
case 0x00e0:
$this->xf['font'][$xf_cnt] = ord($wb[$ptr+4])+256*ord($wb[$ptr+5]);
$this->xf['format'][$xf_cnt] = ord($wb[$ptr+6])+256*ord($wb[$ptr+7]);
$this->xf['type'][$xf_cnt] = "1";
$this->xf['bitmask'][$xf_cnt] = "1";
$xf_cnt++;
break;
// SST
case 0x00fc:
if( $biff_ver < 8 ) break;
$sbuflen = ord($wb[$ptr+2]) + 256*ord($wb[$ptr+3]);
if( $oc != 0x3c ) {
if( $sst_defined ) return 6;
$snum = ExcelParserUtil::str2long(substr($wb,$ptr+8,4));
$sptr = $ptr+12;
$sst_defined = true;
} else {
if( $rslen > $slen ) {
$sptr = $ptr+4;
$rslen -= $slen;
$slen = $rslen;
if( (ord($wb[$sptr]) & 1) > 0 ) {
if( $char_bytes == 1 ) {
$sstr = '';
for( $i=0; $i<strlen($str); $i++ )
$sstr .= $str[$i].chr(0);
$str = $sstr;
$char_bytes=2;
}
$schar_bytes = 2;
} else {
$schar_bytes = 1;
}
if( $sptr+$slen*$schar_bytes > $ptr+4+$sbuflen )
$slen = ($ptr+$sbuflen-$sptr+3)/$schar_bytes;
$sstr = substr($wb,$sptr+1,$slen*$schar_bytes);
if( ($char_bytes == 2) && ($schar_bytes == 1) )
{
$sstr2 = '';
for( $i=0; $i<strlen($sstr); $i++ )
$sstr2 .= $sstr[$i].chr(0);
$sstr = $sstr2;
}
$str .= $sstr;
$sptr += $slen*$schar_bytes+1+4*$rt+$fesz;
if( $slen < $rslen ) {
if( ($sptr >= strlen($wb)) ||
($sptr < $ptr+4+$sbuflen) ||
(ord($wb[$sptr]) != 0x3c) )
{
return 6;
}
break;
} else {
if( $char_bytes == 2 ) {
$this->sst['unicode'][] = true;
} else {
$this->sst['unicode'][] = false;
}
$this->sst['data'][] = $str;
$snum--;
}
} else {
$sptr = $ptr+4;
if( $sptr > $ptr ) $sptr += 4*$rt+$fesz;
}
}
while( ($sptr < $ptr+4+$sbuflen) &&
($sptr < strlen($wb)) &&
($snum > 0) )
{
$rslen = ord($wb[$sptr])+256*ord($wb[$sptr+1]);
$slen = $rslen;
if( (ord($wb[$sptr+2]) & 1) > 0 ) {
$char_bytes = 2;
} else {
$char_bytes = 1;
}
$rt = 0;
$fesz = 0;
switch (ord($wb[$sptr+2]) & 0x0c) {
// Rich-Text with Far-East
case 0x0c:
$rt = ord($wb[$sptr+3])+256*(ord($wb[$sptr+4]));
$fesz = ExcelParserUtil::str2long(substr($wb,$sptr+5,4));
if( $sptr+9+$slen*$char_bytes > $ptr+4+$sbuflen )
$slen = ($ptr+$sbuflen-$sptr-5)/$char_bytes;
$str = substr($wb,$sptr+9,$slen*$char_bytes);
$sptr += $slen*$char_bytes+9;
break;
// Rich-Text
case 8:
$rt = ord($wb[$sptr+3])+256*(ord($wb[$sptr+4]));
if( $sptr+5+$slen*$char_bytes > $ptr+4+$sbuflen )
$slen = ($ptr+$sbuflen-$sptr-1)/$char_bytes;
$str = substr($wb,$sptr+5,$slen*$char_bytes);
$sptr += $slen*$char_bytes+5;
break;
// Far-East
case 4:
$fesz = ExcelParserUtil::str2long(substr($wb,$sptr+3,4));
if( $sptr+7+$slen*$char_bytes > $ptr+4+$sbuflen )
$slen = ($ptr+$sbuflen-$sptr-3)/$char_bytes;
$str = substr($wb,$sptr+7,$slen*$char_bytes);
$sptr += $slen*$char_bytes+7;
break;
// Compressed or uncompressed unicode
case 0:
if( $sptr+3+$slen*$char_bytes > $ptr+4+$sbuflen )
$slen = ($ptr+$sbuflen-$sptr+1)/$char_bytes;
$str = substr($wb,$sptr+3,$slen*$char_bytes);
$sptr += $slen*$char_bytes+3;
break;
}
if( $slen < $rslen ) {
if( ($sptr >= strlen($wb)) ||
($sptr < $ptr+4+$sbuflen) ||
(ord($wb[$sptr]) != 0x3c) ) return 6;
} else {
if( $char_bytes == 2 ) {
$this->sst['unicode'][] = true;
} else {
$this->sst['unicode'][] = false;
}
$sptr += 4*$rt+$fesz;
$this->sst['data'][] = $str;
$snum--;
}
} // switch
break;
} // switch
// !!! Optimization:
// $this->wsb[] = substr($wb,$ptr,4+256*ord($wb[$ptr+3])+ord($wb[$ptr+2]));
$ptr += 4+256*ord($wb[$ptr+3])+ord($wb[$ptr+2]);
} // while
// !!! Optimization:
// $this->workbook = $wb;
$this->biff_version = $biff_ver;
/*DBG*/ $this->dbglog->debug("parse_workbook() function returns 0");
return 0;
}
// ParseFromString & ParseFromFile
//---------------------------------
//
// IN:
// string contents - File contents
// string filename - File name of an existing Excel file.
//
// OUT:
// 0 - success
// 1 - can't open file
// 2 - file too small to be an Excel file
// 3 - error reading header
// 4 - error reading file
// 5 - This is not an Excel file or file stored in < Excel 5.0
// 6 - file corrupted
// 7 - data not found
// 8 - Unsupported file version
function ParseFromString( $contents )
{
$this->dbglog->info("ParseFromString() enter.");
$this->dp = new DataProvider( $contents, DP_STRING_SOURCE );
return $this->InitParser();
}
function ParseFromFile( $filename )
{
$this->dbglog->info("ParseFromFile() enter.");
$this->dp = new DataProvider( $filename, DP_FILE_SOURCE );
return $this->InitParser();
}
function InitParser()
{
$this->dbglog->info("InitParser() enter.");
if( !$this->dp->isValid() )
{
$this->dbglog->error("InitParser() Failed to open file.");
$this->dbglog->error("InitParser() function returns 1");
return 1;
}
if( $this->dp->getSize() <= 0x200 )
{
$this->dbglog->error("InitParser() File too small to be an Excel file.");
$this->dbglog->error("InitParser() function returns 2");
return 2;
}
$this->max_blocks = $this->dp->getBlocks();
// read file header
$hdrbuf = $this->dp->get( 0, 0x200 );
if( strlen( $hdrbuf ) < 0x200 )
{
$this->dbglog->error("InitParser() Error reading header.");
$this->dbglog->error("InitParser() function returns 3");
return 3;
}
// check file header
$header_sig = array(0xd0,0xcf,0x11,0xe0,0xa1,0xb1,0x1a,0xe1);
for( $i = 0; $i < count($header_sig); $i++ )
if( $header_sig[$i] != ord( $hdrbuf[$i] ) ){
/*DBG*/ $this->dbglog->error("InitParser() function founds invalid header");
/*DBG*/ $this->dbglog->error("InitParser() function returns 5");
return 5;
}
$f_header = new DataProvider( $hdrbuf, DP_STRING_SOURCE );
unset( $hdrbuf, $header_sig, $i );
$this->dp->_baseOfs = 0x200;
$rc = $this->parse_workbook( $f_header, $this->dp );
unset( $f_header );
unset( $this->dp, $this->max_blocks, $this->max_sblocks );
return $rc;
}
}
?>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -