⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xlhtml.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 5 页
字号:
/*							printf("rtbuflast:%X cch%X grbit:%X extrst:%X crun:%X last:%X\n",										buflast, cch, grbit, extrst, crun, last);							printf("%02X %02X %02X %02X %02X %02X\n",							working_buffer[0], working_buffer[1], working_buffer[2],							working_buffer[3], working_buffer[4], working_buffer[5]); */							memset(working_buffer, 0, WBUFF_SIZE);						}					}					else if (bufidx == 0x07)					{						if ((grbit & 0x0C) == 0x04)	/* Extended string only */						{							nonascii = 0;							bufidx = 0;							crun = 0;							extrst = getLong(&working_buffer[3]);							buflast = (cch << (grbit & 0x01)) + extrst;/*							printf("esbuflast:%X cch%X grbit:%X extrst:%X last:%X\n",										buflast, cch, grbit, extrst, last);							printf("%02X %02X %02X %02X %02X %02X\n",							working_buffer[0], working_buffer[1], working_buffer[2],							working_buffer[3], working_buffer[4], working_buffer[5]); */							memset(working_buffer, 0, WBUFF_SIZE);						}					}					else if (bufidx == 0x09)					{						if ((grbit & 0x0C) == 0x0C)						{							/* Rich String + Extended String **/							nonascii = 0;							bufidx = 0;							crun = getShort(&working_buffer[3]);							extrst = getLong(&working_buffer[5]);							buflast = (cch << (grbit & 0x01)) + extrst + (crun*4);/*							printf("xrtbuflast:%X cch%X grbit:%X extrst:%X crun:%X last:%X\n",										buflast, cch, grbit, extrst, crun, last);							printf("%02X %02X %02X %02X %02X %02X\n",							working_buffer[0], working_buffer[1], working_buffer[2],							working_buffer[3], working_buffer[4], working_buffer[5]); */							memset(working_buffer, 0, WBUFF_SIZE);						}					}/*					printf("*%02X ", data); */				}				else	/* payload processor */				{/*					if (cont_opcode == 1)						printf(" %02X", data); */					if (data > 127)						nonascii = 1;					if (bufidx == buflast)					{						U8 uni;						U16 len = (U16)(cch << (grbit & 0x01));/*						int i;	*/						if (grbit & 01)						{							uni = 2;							UnicodeStrings = 2;						}						else							uni = nonascii;						working_buffer[bufidx] = 0;/*  						fprintf(stderr,":buflast-"); *//*                                                  { int i; *//*  						for (i=0; i<buflast; i++) *//*                                                    putchar(working_buffer[i]); *//*  						fprintf(stderr,"\nNext String:%d\n", next_string); *//*                                                  } */						if (crun)							add_str_array(uni, working_buffer, len, working_buffer+len, crun);						else							add_str_array(uni, working_buffer, len, 0, 0);						if (uni > UnicodeStrings)	/* Try to "upgrade" charset */							UnicodeStrings = uni;						bufidx = 0;						buflast = 0;						cch = 0;						cont_str_array = 0;						memset(working_buffer, 0, WBUFF_SIZE);					}				}			}			break;		case 0xFD:	/* String Array Index A.K.A. LABELSST */			working_buffer[count] = data;			if (count == (last - 1))			{				U32 i;				U16 r, c, f;				/* This is byte reversed... */				r = getShort(&working_buffer[0]);				c = getShort(&working_buffer[2]);				f = getShort(&working_buffer[4]);				i = getLong(&working_buffer[6]);				if (i < next_string)				{/*					printf("String used:%d\n", (int)i); */					if (str_array[i])					{						if (str_array[i]->str)							add_wb_array(								r, c, f, opcode,								str_array[i]->uni, str_array[i]->str,								str_array[i]->len, str_array[i]->crun_cnt, str_array[i]->fmt_run);					}					else	/* Error, so just set it empty */						add_wb_array( r, c, f, opcode,								(U16)0, (U8 *)"String Table Error", 18, 0, 0);				}				else					MaxStringsExceeded = 1;			}			break;		case 0x31:	/* Font */			working_buffer[bufidx++] = data;			if (bufidx > 14) /* Address 14 has length in unicode chars */			{				if ((file_version == EXCEL95)&&(bufidx == last))				{	/* Microsoft doesn't stick to their documentation. Excel 97 is supposed					   to be 0x0231...but its not. Have to use file_version to separate them. */					unsigned int i;					U16 size, attr, c_idx, b, su;					U8 u;					size = getShort(&working_buffer[0]);					attr = getShort(&working_buffer[2]);					c_idx = getShort(&working_buffer[4]);					b = getShort(&working_buffer[6]);					su = getShort(&working_buffer[8]);					u = working_buffer[10];					buflast = working_buffer[14];					for (i=0; i<buflast; i++)						working_buffer[i] = working_buffer[i+15];					working_buffer[buflast] = 0;/*					printf("S:%04X A:%04X C:%04X B:%04X SU:%04X U:%02X\n",							size, attr,c_idx,b,su,u);					printf("f:%s\n", working_buffer); */					add_font(size, attr, c_idx, b, su, u, 0, &working_buffer[0], 0);				}				else if ((file_version == EXCEL97)&&(bufidx == last))				{	/* Microsoft doesn't stick to their documentation. Excel 97 is supposed					   to be 0x0231...but its not. Have to use file_version to separate them. */					unsigned int i;					U16 len;					U16 size, attr, c_idx, b, su;					U8 u, uni=0;					size = getShort(&working_buffer[0]);					attr = getShort(&working_buffer[2]);					c_idx = getShort(&working_buffer[4]);					b = getShort(&working_buffer[6]);					su = getShort(&working_buffer[8]);					u = working_buffer[10];					buflast = working_buffer[14];					for (i=0; i<(buflast-2); i++)					{	/* This looks at the 2nd byte to see if its unicode... */						if (working_buffer[(i<<1)+17] != 0)							uni = 2;					}					if (uni == 2)						len = (U16)(buflast<<1);					else						len = (U16)buflast;					if (uni == 0)					{							for (i=0; i<len; i++)						{							working_buffer[i] = working_buffer[(i<<1)+16];							if ((working_buffer[i] > 0x0080U) && (uni == 0))								uni = 1;						}					}					else					{						for (i=0; i<len; i++)							working_buffer[i] = working_buffer[i+16];					}					working_buffer[len] = 0;/*					printf("S:%04X A:%04X C:%04X B:%04X SU:%04X U:%02X\n",							size, attr,c_idx,b,su,u);					printf("BL:%d L:%d Uni:%d\n", buflast, len, uni);					printf("%X %X %X %X\n", working_buffer[15], working_buffer[16], working_buffer[17], working_buffer[18]);					printf("f:%s\n", working_buffer); */					add_font(size, attr, c_idx, b, su, u, uni, &working_buffer[0], len);				}			}			break;		case 0x14:	/* Header */			break;		case 0x15:	/* Footer */			break;		case 0x06:	/* Formula */			working_buffer[bufidx++] = data;			if (bufidx == last)			{				U16 r, c, f;				U8 calc_val[64];				r = getShort(&working_buffer[0]);				c = getShort(&working_buffer[2]);				f = getShort(&working_buffer[4]);				if ((working_buffer[12] == 0xFF)&&(working_buffer[13] == 0xFF))				{	/* Formula evaluates to Bool, Err, or String */					if (working_buffer[6] == 1)				/* Boolean */					{						decodeBoolErr(working_buffer[8], 0, (char *)calc_val);						opcode = 0x0105;					}					else if (working_buffer[6] == 2)			/* Err */					{						decodeBoolErr(working_buffer[8], 1, (char *)calc_val);						opcode = 0x0105;					}					else					{							/* String UNI */						str_formula_row = r;						str_formula_col = c;						str_formula_format = f;						break;					}				}				else				{	/* Otherwise...this is a number */					F64 n;					getDouble(&working_buffer[6], &n);					sprintf((char *)calc_val, "%.15g", n);					opcode = 0x0103;	/* To fix up OutputCellFormatted... */				}				add_wb_array(r, c, f, opcode, (U16)0, calc_val, (U16)strlen((char *)calc_val), 0, 0);			}			break;		case 0x07:	/* String Formula Results */			working_buffer[bufidx++] = data;			if (bufidx == last)			{				U8 *str;				U8 uni = 0;				U16 len = getShort(&working_buffer[0]);				if (len > (last-3))					len = (U16)(last-3);				if (file_version == EXCEL97)				{					/* Check for unicode. Terminate the buffer at 2x len						since unicode is 2bytes per char. Then see if						strlen is short...upperbyte is usually 0 in						western chararcter sets. */					int t = len << 1;					if ((t+3) < WBUFF_SIZE)						working_buffer[t+3] = 0;					else						working_buffer[len+3] = 0;					if ((len+3) < last)					{						uni = 2;						len = (U16)t;					}					str = &working_buffer[3];				}				else if (file_version == EXCEL95)				{					str = &working_buffer[2];					working_buffer[len+2] = 0;				}				else				{					if (OutputXML)						str = (U8*)"<NotImplemented/>String Formula";					else						str = (U8*)"***String Formula";					len = (U16)strlen((char*)str);					NotImplemented++;				}				add_wb_array(str_formula_row, str_formula_col, str_formula_format, opcode, uni, str, len, 0, 0);			}			break;		case 0x5C:	/* Author's name A.K.A. WRITEACCESS */			working_buffer[bufidx++] = data;			if ((bufidx == last)&&(author.str == 0))			{				if (file_version == EXCEL97)				{					author.len = getShort(&working_buffer[0]);					if ((int)working_buffer[2] & 0x01)					{						author.len *= (U16)2;						author.uni = 2;					}					else						author.uni = 0;					if (author.len > (last-2))						author.len = (U16)(last-2);					author.str = (U8 *)malloc(author.len+1);					if (author.str)					{						memcpy(author.str, &working_buffer[3], author.len);						author.str[author.len] = 0;					}				}				else if (file_version == EXCEL95)				{					author.len = working_buffer[0];					author.str = (U8 *)malloc(author.len+1);					if (author.str)					{						memcpy(author.str, &working_buffer[1], author.len);						author.str[author.len] = 0;					}					author.uni = 0;				}			}			break;		case 0x08:	/* Row Data */			/* There's actually some other interesting things			   here that we're not collecting. For now, we'll			   Just get the dimensions of the sheet. */			working_buffer[bufidx++] = data;			if (bufidx == last)			{				/* question...what is the actual limit?					This can go as high as 64K. Is this really OK? */				U16 i, r, fc, lc, d, xf;				r = getShort(&working_buffer[0]);				fc = getShort(&working_buffer[2]);				lc = (U16)(getShort(&working_buffer[4]) - (U16)1);				d = getShort(&working_buffer[12]);				xf = getShort(&working_buffer[14]);				if (ws_array[sheet_count] == 0)					if (ws_init(sheet_count))						return;				if (r > ws_array[sheet_count]->biggest_row)				{					if (r < ws_array[sheet_count]->max_rows)						ws_array[sheet_count]->biggest_row = r;					else					{	/* Resize the array... */						if (MaxRowExceeded == 0)						{							int diff = (r/ROWS_INCR) + 1;							if(resize_c_array(ws_array[sheet_count], ROWS_INCR*diff, 0))							{								ws_array[sheet_count]->biggest_row = ws_array[sheet_count]->max_rows - 1;								MaxRowExceeded = 1;								return;							}							else								ws_array[sheet_count]->biggest_row = r;						}						else							return;					}				}				if (lc > ws_array[sheet_count]->biggest_col)				{					if (lc < ws_array[sheet_count]->max_cols)						ws_array[sheet_count]->biggest_col = lc;					else					{	/* Resize array... */						if (MaxColExceeded == 0)						{							int diff = (lc/COLS_INCR) + 1;							if (resize_c_array(ws_array[sheet_count], 0, (U16)(COLS_INCR*diff)))							{								ws_array[sheet_count]->biggest_col = (S16)(ws_array[sheet_count]->max_cols - 1);								MaxColExceeded = 1;								lc = ws_array[sheet_count]->max_cols;							}							else								ws_array[sheet_count]->biggest_col = lc;						}						else							lc = ws_array[sheet_count]->max_cols;					}				}				if ((fc < ws_array[sheet_count]->max_cols)&&(d & 0x0080))	/* fGhostDirty flag */				{					for (i=fc; i<lc; i++)					{	/* Set the default attr... */						update_cell_xf(r, i, xf);					}				}			}			break;		case 0x22:	/* 1904 Flag - MacIntosh Dates or PC Dates */			working_buffer[bufidx++] = data;			if (bufidx == 2)				DatesR1904 = getShort(&working_buffer[0]);			break;		case 0x085:	/* BoundSheet */			working_buffer[bufidx++] = data;			if (bufidx == last)			{	/* This is based on Office 97 info... */				if ((working_buffer[4] & 0x0F) == 0)				{	/* Worksheet as opposed to chart, etc */					U16 len;					U8 uni=0;					if (file_version == EXCEL97)					{						len = (U16)working_buffer[6];		/* FIXME: Check this !!! Was GetShort */						if (working_buffer[7] & 0x01)						{							uni = 2;							len = (U16)(len<<1);						}						if (len != 0)						{							working_buffer[8 + len + 1] = 0;							add_ws_title(uni, &working_buffer[8], len);						}					}					else					{						len = working_buffer[6];						if (len != 0)						{							working_buffer[7 + len + 1] = 0;							add_ws_title(uni, &working_buffer[7], len);						}					}				}			}			break;		case 0x7E:	/* RK Number */			working_buffer[bufidx++] = data;			if (bufidx == last)			{	/* This is based on Office 97 info... */				U16 r, c, f;				U32 t;				S32 n, n2;		/* Must be signed long !!! */				F64 d;				char temp[64];				r = getShort(&working_buffer[0]);				c = getShort(&working_buffer[2]);				f = getShort(&working_buffer[4]);				n = getLong(&working_buffer[6]);				t = n & 0x03;				n2 = n>>2;				switch (t)				{					case 0:						RKtoDouble(n2, &d);						sprintf(temp, "%.15g", d);						break;					case 1:						RKtoDouble(n2, &d);						sprintf(temp, "%.15g", d / 100.0);						break;					case 2:						sprintf(temp, "%ld", (S32)n2);						break;					default:						d = (F64) n2;						sprintf(temp, "%.15g", d / 100.0 );						break;				}				add_wb_array(r, c, f, opcode, (U16)0, (U8 *)temp, (U16)strlen(temp), 0, 0);			}			break;		case 0xBC:		/* Shared Formula's *//*			working_buffer[bufidx++] = data;			if (bufidx == last)			{				int fr, lr, fc, lc, i, j;				fr = getShort(&working_buffer[0]);				lr = getShort(&working_buffer[2]);				fc = working_buffer[4];				lc = working_buffer[5];				for (i=fr; i<=lr; i++)				{					for (j=fc; j<=lc; j++)						add_wb_array(i, j, (U16)0, opcode, 0, "***SHRFORMULA", 13);				}				NotImplemented = 1;			}	*/			break;		case 0x21:		/* Arrays */			working_buffer[bufidx++] = data;			if (bufidx == last)			{				U16 fr, lr, fc, lc, i, j;				fr = getShort(&working_buffer[0]);				lr = getShort(&working_buffer[2]);				fc = working_buffer[4];				lc = working_buffer[5];				for (i=fr; i<=lr; i++)				{					for (j=fc; j<=lc; j++)						add_wb_array(i, j, (U16)0, opcode, 0, (U8 *)"***Array", 8, 0, 0);				}				NotImplemented = 1;			}			break;		case 0xBD:		/* MULRK */			working_buffer[bufidx++] = data;			if (bufidx == last)			{				U16 r, fc, lc;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -