⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xlhtml.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 5 页
字号:
								free(ws_array[i]->c_array[(j*ws_array[i]->max_cols)+k]->ustr.str);							if (ws_array[i]->c_array[(j*ws_array[i]->max_cols)+k]->ustr.fmt_run)								free(ws_array[i]->c_array[(j*ws_array[i]->max_cols)+k]->ustr.fmt_run);							if (ws_array[i]->c_array[(j*ws_array[i]->max_cols)+k]->h_link.str)								free(ws_array[i]->c_array[(j*ws_array[i]->max_cols)+k]->h_link.str);							free(ws_array[i]->c_array[(j*ws_array[i]->max_cols)+k]);						}					}				}				free(ws_array[i]->c_array);			}			free(ws_array[i]);		}	}		for (i=0; i<(int)max_xformats; i++)	{		if (xf_array[i])			free(xf_array[i]);	}	if (numCustomColors)	{		for (i=0; i<numCustomColors; i++)			free(customColors[i]);		free(customColors);	}	if (default_font.str)		free(default_font.str);	if (author.str)		free(author.str);	if (title)		free(title);	if (lastUpdated)		free(lastUpdated);		return 0;}void scan_file(COLEDIRENT *cde, void *_info){	U32 count = 0;	U16 length=0, target=0, opcode=0, version=0;	U8 buf[16];	COLEFILE *cf;	COLERRNO err;	cf = cole_fopen_direntry(cde, &err);	if (cf == 0)	{	/* error abort processing */		cole_perror (PRGNAME, err);		return;	}	/* Read & process the file... */	while (cole_fread(cf, buf, 1, &err))	{		if (count > 3)			main_line_processor(opcode, version, count-4, target, buf[0]);		else if (count == 0)		{	/* Init everything */			length = 0;			opcode = (U16)buf[0];			target = 80;	/* ficticious number */		}		else if (count == 1)			version = (U16)buf[0];		else if (count == 2)			length = (U16)buf[0];		else if (count == 3)		{			length |= (U16)(buf[0]<<8);			target = length;		}		if (count == (U32)(target+3))			count = 0;		else			count++;		if (MaxColExceeded || MaxRowExceeded || MaxWorksheetsExceeded)			break;	/* We're outta memory and therefore...done */	}	cole_fclose(cf, &err);	if (Ascii)	{		if (DumpPage)		{	/* Output the XLS Parameters */			int i;			printf("There are %d pages total.\n", sheet_count+1);			for (i=0; i<=sheet_count; i++)			{				printf("Page:%d Name:%s MaxRow:%ld MaxCol:%d\n", i,					ws_array[i]->ws_title.str ? (char *)ws_array[i]->ws_title.str : "(Unknown Page)",					ws_array[i]->biggest_row, ws_array[i]->biggest_col);			}		}		else if (Xtract)			OutputPartialTableAscii();	}	else	{		if (DumpPage)		{	/* Output the XLS Parameters */			int i;			output_header();			printf("<p>There are %d pages total.</p>\n", sheet_count+1);			for (i=0; i<=sheet_count; i++)			{				printf("<p>Page:%d Name:%s MaxRow:%ld MaxCol:%d</p>\n", i,					ws_array[i]->ws_title.str ? (char *)ws_array[i]->ws_title.str : "(Unknown Page)",					ws_array[i]->biggest_row, ws_array[i]->biggest_col);			}					output_footer();		}		else		{			if( OutputXML )				OutputTableXML();			else				OutputTableHTML();		}	}}void SetupExtraction(void){	if (Xtract)	{	/* Revise the page settings... *//*		printf("-%d %d %d %d %d<br>\n", xp, xr1, xr2, xc1, xc2); */		if ((xp >= first_sheet)&&(xp <= last_sheet)&&(xp <= sheet_count))		{			first_sheet = xp;			last_sheet = xp;			if (xr1 < 0)			{				xr1 = (S16)ws_array[xp]->first_row;				xr2 = (S16)ws_array[xp]->biggest_row;			}			else if ((xr1 >= ws_array[xp]->first_row)&&(xr1 <= ws_array[xp]->biggest_row)				&&(xr2 >= ws_array[xp]->first_row)&&(xr2 <= ws_array[xp]->biggest_row))			{				ws_array[xp]->first_row = xr1;				ws_array[xp]->biggest_row = xr2;				if (xc1 < 0)				{					xc1 = ws_array[xp]->first_col;					xc2 = ws_array[xp]->biggest_col;				}				else if((xc1 >= ws_array[xp]->first_col)&&(xc1 <= ws_array[xp]->biggest_col)					&&(xc2 >= ws_array[xp]->first_col)&&(xc2 <= ws_array[xp]->biggest_col))				{					ws_array[xp]->first_col = xc1;					ws_array[xp]->biggest_col = xc2;				}				else				{					if (Ascii)						fprintf(stderr, "Error - Col not in range during extraction"						    " (%d or %d not in [%d..%d])\n", xc1, xc2, ws_array[xp]->first_col, ws_array[xp]->biggest_col);					else					{						printf("Error - Col not in range during extraction.\n");												output_footer();					}					return;				}			}			else			{				if (Ascii)					fprintf(stderr, "Error - Row not in range during extraction"					    " (%d or %d not in [%ld..%ld])\n", xr1, xr2, ws_array[xp]->first_row, ws_array[xp]->biggest_row);				else				{					printf("Error - Row not in range during extraction.");					output_footer();				}				return;			}		}		else		{			if (Ascii)				fprintf(stderr, "Error - Page not in range during extraction.");			else			{				printf("Error - Page not in range during extraction.");				output_footer();			}			return;		}	}}/*!*******************************************************************	\param count	the absolute count in the record*	\param last	the size of the record*	\param bufidx	the index into the working buffer*	\param buflast	the expected length of the working buffer********************************************************************/void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data){	U16 cont_opcode = 0;		/* If first pass, reset stuff. */	if (count == 0)	{		if (opcode != 0x3C)	/* continue command *//*		{			printf("\n* * * * * * CONTINUE * * * * * * * * *\n\n");		}		else */		{	/* Normal path... */			last_opcode = opcode;			bufidx = 0;			buflast = 0;			cont_str_array = 0;			memset(working_buffer, 0, WBUFF_SIZE);		}	}	if (opcode == 0x3C)	{		opcode = last_opcode;		cont_opcode = 1;	}	/* Abort processing if too big. Next opcode will reset everything. */	if (bufidx >= WBUFF_SIZE)	{		/*printf("OC:%02X C:%04X I:%04X BL:%04X cch:%04X gr:%04X\n", opcode, count, bufidx, buflast, cch, grbit); */		/*abort(); */		return;	}	/* no chart processing for now. */	if (version == 0x0010)		return;	switch (opcode)	{		case 0x09:	/* BOF */			working_buffer[bufidx++] = data;			if (bufidx == last)			{				if (file_version == 0)				{	/* File version info can be gathered here...					 *    4 = Excel version 4					 * 1280 = Excel version 5					 * 0500 = Excel 95					 * 1536 = Excel 97 */					if (version == 8)						file_version = getShort(&working_buffer[0]);					else						file_version = version;					if (file_version == EXCEL95)					{						use_colors = 0;						HARD_MAX_ROWS = HARD_MAX_ROWS_95;					}/*					printf("Biff:%X\n", file_version); */				}				sheet_count++;				if (sheet_count >= (int)max_worksheets)					add_more_worksheet_ptrs();			}			break;		case 0x01:	/* Blank */			working_buffer[bufidx++] = data;			if (bufidx == last)			{				U16 r, c, f;				r = getShort(&working_buffer[0]);				c = getShort(&working_buffer[2]);				if (version == 2)					f = getShort(&working_buffer[4]);				else					f = 0;				add_wb_array(r, c, f, opcode, (U16)0, (U8 *)0, 0, (U16)0, 0);			}			break;		case 0x02:	/* Integer */			working_buffer[bufidx++] = data;					if (bufidx == last)			{				U16 r, c, i, f;				char temp[32];				r = getShort(&working_buffer[0]);				c = getShort(&working_buffer[2]);				if (version == 2)				{					f = getShort(&working_buffer[4]);					i = getShort(&working_buffer[7]);					sprintf(temp, "%d", i);				}				else				{					f = 0;					Unsupported++;					strcpy(temp, OutputXML ? "<Unsupported/>INT" : "****INT");				}				add_wb_array(r, c, f, opcode, (U16)0, (U8 *)temp, (U16)strlen(temp), 0, NULL);			}					break;		case 0x03:	/* Number - Float */			working_buffer[bufidx++] = data;			if (bufidx == last)			{				U16 r, c, f;				F64 d;				char temp[64];				r = getShort(&working_buffer[0]);				c = getShort(&working_buffer[2]);				if (version == 2)				{					f = getShort(&working_buffer[4]);					getDouble(&working_buffer[6], &d);					sprintf(temp, "%.15g", d);				}				else				{	/* Who knows what the future looks like */					f = 0;					Unsupported = 1;					sprintf(temp, "****FPv:%d", version);				}				add_wb_array(r, c, f, opcode, (U16)0, (U8 *)temp, (U16)strlen(temp), 0, 0);			}			break;		case 0xD6:	/* RString */			working_buffer[bufidx++] = data;					if ((bufidx == 8)&&(buflast == 0))				buflast = 8 + getShort(&working_buffer[6]);			if (buflast)			{				if (bufidx == buflast)				{					U16 r, c, l, f;					r = getShort(&working_buffer[0]);					c = getShort(&working_buffer[2]);					f = getShort(&working_buffer[4]);					l = getShort(&working_buffer[6]);					working_buffer[8+l] = 0;					add_wb_array(r, c, f, opcode, (U16)0, &working_buffer[8],							(U16)strlen((char *)&working_buffer[8]), 0, 0);				}			}			break;		case 0x04:	/* Label - UNI */			working_buffer[bufidx++] = data;			if (file_version == EXCEL95)			{				if (bufidx == last)				{					U16 r, c, f;					r = getShort(&working_buffer[0]);					c = getShort(&working_buffer[2]);					f = getShort(&working_buffer[4]);					working_buffer[bufidx] = 0;					add_wb_array(r, c, f, opcode, (U16)0, &working_buffer[8],							(U16)strlen((char *)&working_buffer[8]), 0, 0);				}			}			else if (file_version == EXCEL97)			{    /* Remember, bufidx is 1 more than it should be */				if ((bufidx == 8)&&(buflast == 0))				{	/* buflast = working_buffer[7]; */					cch = getShort(&working_buffer[6]);					buflast = cch + 9;				}				if (bufidx == 9)				{					if (working_buffer[8] == 1)						buflast = (cch << 1) + 9;				}				if (buflast)				{					if (bufidx == buflast)					{						U16 r, c, f;						U16 len;						r = getShort(&working_buffer[0]);						c = getShort(&working_buffer[2]);						if (version == 2)							f = getShort(&working_buffer[4]);						else	/* Unknown version */							f = 0;						working_buffer[bufidx] = 0;						len = (U16)strlen((char *)&working_buffer[8]);						if (working_buffer[8] == 1)						{							UnicodeStrings = 2;							add_wb_array(r, c, f, opcode, (U16)2, &working_buffer[9], (U16)(cch << 1), 0, 0);						}						else							add_wb_array(r, c, f, opcode, (U16)0, &working_buffer[8], len, 0, 0);					}				}			}			break;		case 0x05:	/* Boolerr */			working_buffer[bufidx++] = data;			if (bufidx == last)			{				U16 r, c, f;				char temp[16];				r = getShort(&working_buffer[0]);				c = getShort(&working_buffer[2]);				if (version == 2)				{					f = getShort(&working_buffer[4]);					decodeBoolErr(working_buffer[6], working_buffer[7], temp);					add_wb_array(r, c, f, opcode, (U16)0, (U8 *)temp, (U16)strlen(temp), 0, 0);				}				else				{					f = 0;					Unsupported = 1;					strcpy(temp, "****Bool");					add_wb_array(r, c, f, opcode, (U16)0, (U8 *)temp, (U16)strlen(temp), 0, 0);				}			}			break;			/************			*	This function has 2 entry points. 1 is the mainline FC opcode.			*	In this event there are several bytes that setup the type of			*	strings that will follow. Then there is the continue entry			*	point which is immediate - e.g location 0.			*************/		case 0xFC:	/* Packed String Array A.K.A. SST Shared String Table...UNI */			if ((count > 7)||(cont_opcode == 1)) /* Skip the 1st 8 locations they are bs */			{/*				if ((count == 0)&&(data == 0)&&(buflast))	*/				if ((count == 0)&&(cont_opcode == 1)&&(buflast))				{/*					printf("Adjusting...\n"); *//*					printf("I:%04X BL:%04X\n", bufidx, buflast); */					cont_str_array = 1;					cont_grbit = data;					return;				}				working_buffer[bufidx] = data;				bufidx++;				if((cont_str_array)&&(grbit & 0x01)&& !(cont_grbit & 0x01))				{	/* ASCII -> unicode */					working_buffer[bufidx] = 0;					bufidx++;				}				if (buflast == 0)	/* Header processor */				{					if (bufidx == 0x03)  /* After 3 locations we have length */					{				    /* and type of chars... */						cch = getShort(&working_buffer[0]);						grbit = working_buffer[2];						if (grbit < 0x04)	/* Normal run */						{							nonascii = 0;							bufidx = 0;							crun = 0;							extrst = 0;							buflast = cch << (grbit & 0x01);							/* special case for empty strings */							if (!cch && !buflast)								add_str_array(0, (U8 *)0, 0, 0, 0);							else								memset(working_buffer, 0, WBUFF_SIZE);						}					}					else if (bufidx == 0x05)					{						if ((grbit & 0x0C) == 0x08)	/* Rich string only */						{							nonascii = 0;							bufidx = 0;							crun = getShort(&working_buffer[3]);							extrst = 0;							buflast = (cch << (grbit & 0x01)) + (crun*4);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -