⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 excelparser.php

📁 在线商店,可以在线购买,下定单,商品上架
💻 PHP
📖 第 1 页 / 共 2 页
字号:
		  case 0x00fd:
			if( $biff_ver < 8 ) break;
			if( (ord($ws[$ptr+2])+256*ord($ws[$ptr+3])) < 0x0a )
				return 6;
			$row = ord($ws[$ptr+4])+256*ord($ws[$ptr+5]);
			$col = ord($ws[$ptr+6])+256*ord($ws[$ptr+7]);
			$xf = ord($ws[$ptr+8])+256*ord($ws[$ptr+9]);
			$fonti = $this->xf['font'][$xf];
			$font = &$this->fonts[$fonti];

			$data['cell'][$row][$col]['type'] = 0;
			$sst_ind = ExcelParserUtil::str2long(substr($ws,$ptr+10,4));
			$data['cell'][$row][$col]['data'] = $sst_ind;
			$data['cell'][$row][$col]['font'] = $fonti;

/*            echo "FontRecord for sting at $row,$col<br>";
			echo str_replace("\n","<br>\n", ExcelFont::toString($font,$fonti));*/

			if( !isset($data['max_row']) ||
			    ($data['max_row'] < $row) )
				$data['max_row'] = $row;

			if( !isset($data['max_col']) ||
			    ($data['max_col'] < $col) )
				$data['max_col'] = $col;

			break;

		  // unknown, unsupported or unused opcode
		  default:
			break;
		 }

		 $ptr += 4+256*ord($ws[$ptr+3])+ord($ws[$ptr+2]);
		}
		//$this->dbglog->debug("parse_worksheet() function returns ".var_export($data,true));

		return $data;

	}

	// Parse workbook
	//----------------

	function parse_workbook( $f_header, $dp ) {

/*DBG*/ $this->dbglog->debug("parse_workbook() function");

		$root_entry_block = $f_header->getLong(0x30);
		$num_fat_blocks = $f_header->getLong(0x2c);

/*TRC*/ $this->dbglog->trace("Header parsed");

		$this->fat = array();
		for( $i = 0; $i < $num_fat_blocks; $i++ ){
/*TRC*/		$this->dbglog->trace("FOR LOOP iteration i =".$i);

			$fat_block = $f_header->getLong( 0x4c + 4 * $i );			
			$fatbuf = $dp->get( $fat_block * 0x200, 0x200 );
			$fat = new DataProvider( $fatbuf, DP_STRING_SOURCE );

			if( $fat->getSize() < 0x200 ){
/*DBG*/    		$this->dbglog->debug("parse_workbook() function found (strlen($fat) < 0x200) returns 6");
				return 6;
			}
			
			for( $j=0; $j<0x80; $j++ )
				$this->fat[] = $fat->getLong( $j * 4 );
			
			$fat->close();
			unset( $fat_block, $fatbuf, $fat );			
		}
		
/*DBG*/ $this->dbglog->dump( $this->fat, "\$fat" );
		
		if( count($this->fat) < $num_fat_blocks ) {
/*DBG*/    	$this->dbglog->debug("parse_workbook() function found (count($this->fat) < $num_fat_blocks) returns 6");
			return 6;
		}
		
		$chain = $this->get_blocks_chain($root_entry_block);
		$dir = new DataProvider( $dp->ReadFromFat( $chain ), DP_STRING_SOURCE );
		unset( $chain );

		$this->sfat = array();
		$small_block = $f_header->getLong( 0x3c );
		if( $small_block != 0xfeffffff ) {
			$root_entry_index = $this->find_stream( $dir, 'Root Entry');
			if( $root_entry_index < 0 ) {
/*DBG*/    		$this->dbglog->debug("parse_workbook() function dont found Root Entry returns 6");
		    	return 6;
		 	}
		 	
		 	$sdc_start_block = $dir->getLong( $root_entry_index * 0x80 + 0x74 );
		 	$small_data_chain = $this->get_blocks_chain($sdc_start_block);
		 	$this->max_sblocks = count($small_data_chain) * 8;
		 	
		 	$schain = $this->get_blocks_chain($small_block);		 	
		 	for( $i = 0; $i < count( $schain ); $i++ ) {
		 		
				$sfatbuf = $dp->get( $schain[$i] * 0x200, 0x200 );
				$sfat = new DataProvider( $sfatbuf, DP_STRING_SOURCE );
				
				//$this->dbglog->dump( strlen($sfatbuf), "strlen(\$sftabuf)");
				//$this->dbglog->dump( $sfat, "\$sfat");
				
		  		if( $sfat->getSize() < 0x200 ) {
/*DBG*/    	 		$this->dbglog->debug("parse_workbook() function found (strlen($sfat) < 0x200)  returns 6");
		     		return 6;
 	      		}
 	      		
		  		for( $j=0; $j<0x80; $j++ )
		   			$this->sfat[] = $sfat->getLong( $j * 4 );
		   		
		   		$sfat->close();
		   		unset( $sfatbuf, $sfat );
		 	}
		 	unset( $schain );

		 	$sfcbuf = $dp->ReadFromFat( $small_data_chain );
		  	$sdp = new DataProvider( $sfcbuf, DP_STRING_SOURCE );
		  	unset( $sfcbuf, $small_data_chain );
		}

		$workbook_index = $this->find_stream( $dir, 'Workbook' );
		if( $workbook_index<0 ) {
			$workbook_index = $this->find_stream( $dir, 'Book' );
			if( $workbook_index<0 ){
/*DBG*/    	    $this->dbglog->debug("parse_workbook() function workbook index not found returns 7");
				return 7;
			}
		}

		$workbook_start_block = $dir->getLong( $workbook_index * 0x80 + 0x74 );
		$workbook_length = $dir->getLong( $workbook_index * 0x80 + 0x78 );
		$wb = '';

		if( $workbook_length > 0 ) {
			if( $workbook_length >= 0x1000 ) {
				$chain = $this->get_blocks_chain($workbook_start_block);
				$wb = $dp->ReadFromFat( $chain );
		 	} else {
				$chain = $this->get_blocks_chain($workbook_start_block,true);
				$wb = $sdp->ReadFromFat( $chain, 0x40 );
				unset( $sdp );
		 	}
			$wb = substr($wb,0,$workbook_length);
			if( strlen($wb) != $workbook_length )
				return 6;
			unset( $chain );
		}
		
		// Unset fat arrays
		unset( $this->fat, $this->sfat );

		if( strlen($wb) <= 0 ) {
/*DBG*/    $this->dbglog->debug("parse_workbook() function workbook found (strlen($wb) <= 0) returns 7");
		   return 7;
		}
		if( strlen($wb) <  4 ) {
/*DBG*/    $this->dbglog->debug("parse_workbook() function workbook found (strlen($wb) < 4) returns 6");
		    return 6;
		}

		// parse workbook header
		if( strlen($wb) < 256*ord($wb[3])+ord($wb[2]) ){
/*DBG*/ 	$this->dbglog->debug("parse_workbook() function workbook found (strlen($wb) < 256*ord($wb[3])+ord($wb[2])) < 4) returns 6");
			return 6;
		}

		if( ord($wb[0]) != 0x09 ){
/*DBG*/ 	$this->dbglog->debug("parse_workbook() function workbook found (ord($wb[0]) != 0x09) returns 6");
			return 6;
		}
		
		$vers = ord($wb[1]);
		if( ($vers!=0) && ($vers!=2) && ($vers!=4) && ($vers!=8) ){
			return 8;
        }
		if( $vers!=8 )
		 	$biff_ver = ($ver+4)/2;
		else {
			if( strlen($wb) < 12 ) return 6;
		 	switch( ord($wb[4])+256*ord($wb[5]) )
		 	{
			case 0x0500:
				if( ord($wb[0x0a])+256*ord($wb[0x0b]) < 1994 )
					$biff_ver = 5;
				else {
					switch(ord( $wb[8])+256*ord($wb[9]) ) {
					 case 2412:
					 case 3218:
					 case 3321:
						$biff_ver = 5;
						break;
					 default:
						$biff_ver = 7;
						break;
					}
				}
				break;
			case 0x0600:
				$biff_ver = 8;
				break;
			default:
				return 8;
		 	}
		}

		if( $biff_ver < 5 ) return 8;

		$ptr = 0;
		$this->worksheet['offset'] = array();
		$this->worksheet['options'] = array();
		$this->worksheet['unicode'] = array();
		$this->worksheet['name'] = array();
		$this->worksheet['data'] = array();
		$this->format = $this->populateFormat();
		$this->fonts = array();
		$this->fonts[0] = ExcelFont::basicFontRecord();

		$this->xf = array();
		$this->xf['format'] = array();
		$this->xf['font'] = array();
		$this->xf['type_prot'] = array();
		$this->xf['alignment'] = array();
		$this->xf['decoration'] = array();

		$xf_cnt=0;

		$this->sst['unicode'] = array();
		$this->sst['data'] = array();

		$opcode = 0;
		$sst_defined = false;
		$wblen = strlen($wb);

		while( (ord($wb[$ptr])!=0x0a) && ($ptr<$wblen) )
		{
			$oc = ord($wb[$ptr])+256*ord($wb[$ptr+1]);
			if( $oc != 0x3c )
				$opcode = $oc;
		 	switch ($opcode)
		 	{
		  	case 0x0085:
				$ofs = ExcelParserUtil::str2long(substr($wb,$ptr+4,4));
				$this->worksheet['offset'][] = $ofs;
				$this->worksheet['options'][] = ord($wb[$ptr+8])+256*ord($wb[$ptr+9]);
				if( $biff_ver==8 ) {
					$len = ord($wb[$ptr+10]);
					if( (ord($wb[$ptr+11]) & 1) > 0 ) {
				 		$this->worksheet['unicode'][] = true;
						$len = $len*2;
				 	} else {
				 		$this->worksheet['unicode'][] = false;
				 	}
				 	$this->worksheet['name'][] = substr($wb,$ptr+12,$len);
				} else {
					$this->worksheet['unicode'][] = false;
					$len = ord($wb[$ptr+10]);
					$this->worksheet['name'][] = substr($wb,$ptr+11,$len);
				}
	
				$pws = $this->parse_worksheet(substr($wb,$ofs));
				if( is_array($pws) )
					$this->worksheet['data'][] = $pws;
				else
					return $pws;
				break;

		 	// Format
		  	case 0x041e:
	 		  	$fidx = ord($wb[$ptr+4])+256*ord($wb[$ptr+5]);
			  	if($fidx<0x31 ||$fidx==0x31 )
			  		break;
			  	elseif($biff_ver>7)
			  	  	$this->format[$fidx] = $this->getUnicodeString($wb,$ptr+6);
		        break;

		 	// FONT 0x31
		   	case EXCEL_FONT_RID:
                $rec = ExcelFont::getFontRecord($wb,$ptr+4);
                $this->fonts[count($this->fonts)] = $rec;
/*echo str_replace("\n","<br>\n",ExcelFont::toString($rec,count($this->fonts)-1));
echo "FontRecord<br>" */;
		        break;

		 	// XF
		  	case 0x00e0:
			  	$this->xf['font'][$xf_cnt] = ord($wb[$ptr+4])+256*ord($wb[$ptr+5]);
			  	$this->xf['format'][$xf_cnt] = ord($wb[$ptr+6])+256*ord($wb[$ptr+7]);
			  	$this->xf['type'][$xf_cnt]  = "1";
			  	$this->xf['bitmask'][$xf_cnt] = "1";
			  	$xf_cnt++;
		        break;

		  	// SST
		  	case 0x00fc:
				if( $biff_ver < 8 ) break;

				$sbuflen = ord($wb[$ptr+2]) + 256*ord($wb[$ptr+3]);

				if( $oc != 0x3c ) {
			 		if( $sst_defined ) return 6;
					$snum = ExcelParserUtil::str2long(substr($wb,$ptr+8,4));
					$sptr = $ptr+12;
					$sst_defined = true;
				} else {
			 		if( $rslen > $slen ) {
						$sptr = $ptr+4;
						$rslen -= $slen;
						$slen = $rslen;

						if( (ord($wb[$sptr]) & 1) > 0 ) {
				 			if( $char_bytes == 1 ) {
				  				$sstr = '';
								for( $i=0; $i<strlen($str); $i++ )
									$sstr .= $str[$i].chr(0);
								$str = $sstr;
								$char_bytes=2;
				 			}
				 			$schar_bytes = 2;
						} else {
				 			$schar_bytes = 1;
						}

						if( $sptr+$slen*$schar_bytes > $ptr+4+$sbuflen )
							$slen = ($ptr+$sbuflen-$sptr+3)/$schar_bytes;

						$sstr = substr($wb,$sptr+1,$slen*$schar_bytes);

						if( ($char_bytes == 2) && ($schar_bytes == 1) )
						{
							$sstr2 = '';
							for( $i=0; $i<strlen($sstr); $i++ )
								$sstr2 .= $sstr[$i].chr(0);
							$sstr = $sstr2;
						}
						$str .= $sstr;

						$sptr += $slen*$schar_bytes+1+4*$rt+$fesz;
					 	if( $slen < $rslen ) {
							if( ($sptr >= strlen($wb)) ||
							    ($sptr < $ptr+4+$sbuflen) ||
							    (ord($wb[$sptr]) != 0x3c) )
							{
							    return 6;
							}
							break;
					 	} else {
							if( $char_bytes == 2 ) {
								$this->sst['unicode'][] = true;
							} else {
								$this->sst['unicode'][] = false;
							}
							$this->sst['data'][] = $str;
							$snum--;
					 	}
				 	} else {
						$sptr = $ptr+4;
						if( $sptr > $ptr ) $sptr += 4*$rt+$fesz;
				 	}
				}

				while(  ($sptr < $ptr+4+$sbuflen) &&
					($sptr < strlen($wb)) &&
					($snum > 0) )
				{
					 $rslen = ord($wb[$sptr])+256*ord($wb[$sptr+1]);
					 $slen = $rslen;

					 if( (ord($wb[$sptr+2]) & 1) > 0 ) {
						$char_bytes = 2;
					 } else {
						$char_bytes = 1;
					 }

					 $rt = 0;
					 $fesz = 0;
					 switch (ord($wb[$sptr+2]) & 0x0c) {
					  // Rich-Text with Far-East
					  case 0x0c:
						$rt = ord($wb[$sptr+3])+256*(ord($wb[$sptr+4]));
						$fesz = ExcelParserUtil::str2long(substr($wb,$sptr+5,4));
						if( $sptr+9+$slen*$char_bytes > $ptr+4+$sbuflen )
							$slen = ($ptr+$sbuflen-$sptr-5)/$char_bytes;
						$str = substr($wb,$sptr+9,$slen*$char_bytes);
						$sptr += $slen*$char_bytes+9;
						break;
		
					  // Rich-Text
					  case 8:
						$rt = ord($wb[$sptr+3])+256*(ord($wb[$sptr+4]));
						if( $sptr+5+$slen*$char_bytes > $ptr+4+$sbuflen )
							$slen = ($ptr+$sbuflen-$sptr-1)/$char_bytes;
						$str = substr($wb,$sptr+5,$slen*$char_bytes);
						$sptr += $slen*$char_bytes+5;
						break;
		
					  // Far-East
					  case 4:
						$fesz = ExcelParserUtil::str2long(substr($wb,$sptr+3,4));
						if( $sptr+7+$slen*$char_bytes > $ptr+4+$sbuflen )
							$slen = ($ptr+$sbuflen-$sptr-3)/$char_bytes;
						$str = substr($wb,$sptr+7,$slen*$char_bytes);
						$sptr += $slen*$char_bytes+7;
						break;
		
					  // Compressed or uncompressed unicode
					  case 0:
						if( $sptr+3+$slen*$char_bytes > $ptr+4+$sbuflen )
							$slen = ($ptr+$sbuflen-$sptr+1)/$char_bytes;
					 	$str = substr($wb,$sptr+3,$slen*$char_bytes);
					 	$sptr += $slen*$char_bytes+3;
						break;
					 }

					 if( $slen < $rslen ) {
						if( ($sptr >= strlen($wb)) ||
						    ($sptr < $ptr+4+$sbuflen) ||
						    (ord($wb[$sptr]) != 0x3c) ) return 6;
					 } else {
						if( $char_bytes == 2 ) {
							$this->sst['unicode'][] = true;
						} else {
							$this->sst['unicode'][] = false;
						}
						$sptr += 4*$rt+$fesz;
						$this->sst['data'][] = $str;
					 	$snum--;
					 }
				} // switch
				break;
		 	} // switch
			
			// !!! Optimization:
			//  $this->wsb[] = substr($wb,$ptr,4+256*ord($wb[$ptr+3])+ord($wb[$ptr+2]));
			
			$ptr += 4+256*ord($wb[$ptr+3])+ord($wb[$ptr+2]);
		} // while

		// !!! Optimization:
		//  $this->workbook = $wb;
		$this->biff_version = $biff_ver;
/*DBG*/ $this->dbglog->debug("parse_workbook() function returns 0");
		return 0;
	}

	// ParseFromString & ParseFromFile
	//---------------------------------
	//
	// IN:
	//	string contents - File contents
	//	string filename - File name of an existing Excel file.
	//
	// OUT:
	//	0 - success
	//	1 - can't open file
	//	2 - file too small to be an Excel file
	//	3 - error reading header
	//	4 - error reading file
	//	5 - This is not an Excel file or file stored in < Excel 5.0
	//	6 - file corrupted
	//	7 - data not found
	//	8 - Unsupported file version

	function ParseFromString( $contents )
	{
		$this->dbglog->info("ParseFromString() enter.");
		$this->dp = new DataProvider( $contents, DP_STRING_SOURCE );
		return $this->InitParser();
	}

	function ParseFromFile( $filename )
	{
		$this->dbglog->info("ParseFromFile() enter.");
		$this->dp = new DataProvider( $filename, DP_FILE_SOURCE );
		return $this->InitParser();
	}

	function InitParser()
	{
		$this->dbglog->info("InitParser() enter.");
		if( !$this->dp->isValid() )
		{
			$this->dbglog->error("InitParser() Failed to open file.");
			$this->dbglog->error("InitParser() function returns 1");
			return 1;
		}
		if( $this->dp->getSize() <= 0x200 )
		{
			$this->dbglog->error("InitParser() File too small to be an Excel file.");
			$this->dbglog->error("InitParser() function returns 2");
			return 2;
		}

		$this->max_blocks = $this->dp->getBlocks();
		
		// read file header
		$hdrbuf = $this->dp->get( 0, 0x200 );
		if( strlen( $hdrbuf ) < 0x200 )
		{
			$this->dbglog->error("InitParser() Error reading header.");
			$this->dbglog->error("InitParser() function returns 3");
			return 3;
		}
	
		// check file header
		$header_sig = array(0xd0,0xcf,0x11,0xe0,0xa1,0xb1,0x1a,0xe1);
		for( $i = 0; $i < count($header_sig); $i++ )
			if( $header_sig[$i] != ord( $hdrbuf[$i] ) ){
/*DBG*/    	    $this->dbglog->error("InitParser() function founds invalid header");
/*DBG*/    	    $this->dbglog->error("InitParser() function returns 5");
				return 5;
            }
			
		$f_header = new DataProvider( $hdrbuf, DP_STRING_SOURCE );
		unset( $hdrbuf, $header_sig, $i );

		$this->dp->_baseOfs = 0x200;
		$rc = $this->parse_workbook( $f_header, $this->dp );
		unset( $f_header );
		unset( $this->dp, $this->max_blocks, $this->max_sblocks );
		
		return $rc;
	}
}

?>

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -