⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xlhtml.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 5 页
字号:
/*! \file xlhtml.c    \brief converts excel files to Html   xlhtml generates HTML, XML, csv and tab-delimitted versions of Excel   spreadsheets.*//*   Copyright 2002  Charles N Wyble  <jackshck@yahoo.com>   This program is free software; you can redistribute it and/or modify   it under the terms of the GNU General Public License as published  by   the Free Software Foundation; either version 2 of the License, or   (at your option) any later version.   This program is distributed in the hope that it will be useful,   but WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the   GNU General Public License for more details.   You should have received a copy of the GNU General Public License   along with this program; if not, write to the Free Software   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA */#include "tuneable.h"#include "xlhtml.h"static char SectionName[2][12] =	/* The section of the Excel Stream where the workbooks are kept */{	"/Workbook",		/*!< Excel 97 & 2000 */	"/Book"			/*!< Everything else ? */};int numCustomColors = 0;U8 **customColors = 0;char colorTab[MAX_COLORS][8] ={	"000000",	/* FIXME: Need to find these first 8 colors! */	"FFFFFF",	"FFFFFF",	"FFFFFF",	"FFFFFF",	"FFFFFF",	"FFFFFF",	"FFFFFF",	"FFFFFF",	/*0x08 - This one's Black, too ??? */	"FFFFFF", /* This one's normal */	"red",	/*	"FF0000", */	"lime",	/*	"00FF00", */	"blue",	/*	"0000FF", */	"FFFF00",	"FF00FF",	"aqua",	/*	"00FFFF", */	"800000",	/* 0x10 */	"green",	/*	"008000", */	"navy",	/*	"000080", */	"808000",	"800080",	"teal",	/*	"008080", */	"C0C0C0",	"gray",	/*	"808080", */	"9999FF",	/* 0x18 */	"993366",	"FFFFCC",	"CCFFFF",	"660066",	"FF8080",	"0066CC",	"CCCCFF",	"000080",	"FF00FF",	/* 0x20 */	"FFFF00",	"00FFFF",	"800080",	"800000",	"008080",	"0000FF",	"00CCFF",	/* 0x28 */	"CCFFFF",	"CCFFCC",	"FFFF99",	"99CCFF",	"FF99CC",	"CC99FF",	"FFCC99",	"3366FF",	/* 0x30 */	"33CCCC",	"99CC00",	"FFCC00",	"FF9900",	"FF6600",	"666699",	"969696",	"003366",	/* 0x38 */	"339966",	"003300",	"333300",	"993300",	"993366",	"333399",	"333333",	"FFFFFF"	/* 0x40 */};int DatesR1904 = 0;	/*!< Flag that the dates are based on McIntosh Dates system *//* FIXME: Support major languages here...not just English */const char month_abbr[12][5] = {	"Jan", "Feb", "Mar", "Apr", "May", "June",					"July", "Aug", "Sep", "Oct", "Nov", "Dec" };/* Function Prototypes *//* These functions are in support.c */extern void print_version(void);extern void display_usage(void);extern void do_cr(void);extern void OutputTableHTML(void);extern S32 getLong(U8 *);extern U16 getShort(U8 *);extern void getDouble(U8 *, F64 *);extern int null_string(U8 *);extern void FracToTime(U8 *, int *, int *, int *, int *);extern void NumToDate(long, int *, int *, int *);extern void RKtoDouble(S32, F64 *);/* This function is in xml.c */extern void OutputTableXML(void);/* This function is in ascii.c */void OutputPartialTableAscii(void);/* These functions are in html.c */extern void output_start_html_attr(html_attr *h, unsigned int, int);extern void output_end_html_attr(html_attr *h);extern void output_footer(void);extern void output_header(void);COLE_LOCATE_ACTION_FUNC scan_file;void main_line_processor(U16, U16, U32, U16, U8);void SetupExtraction(void);void decodeBoolErr(U16, U16, char *);int IsCellNumeric(cell *);int IsCellSafe(cell *);int IsCellFormula(cell *);void output_cell(cell *, int);void output_formatted_data(uni_string *, U16, int, int);void PrintFloatComma(char *, int, F64);void print_as_fraction(F64, int);void trim_sheet_edges(unsigned int);void update_default_font(unsigned int);void incr_f_cnt(uni_string *);int get_default_font(void);void update_default_alignment(unsigned int, int);void OutputString(uni_string *);void OutputCharCorrected(U8);void update_crun_info(U16 *loc, U16 *fnt_idx, U16 crun_cnt, U8 *fmt_run);void put_utf8(U16);void print_utf8(U16);void uni_string_clear(uni_string *);int uni_string_comp(uni_string *, uni_string *);void html_flag_init(html_attr *h);void output_start_font_attribute(html_attr *h, U16 fnt_idx);/* The array update functions */int ws_init(int);int add_more_worksheet_ptrs(void);int resize_c_array(work_sheet *, U32, U16);void add_wb_array(U16, U16, U16, U16, U8, U8 *, U16, U16, U8 *);void update_cell_xf(U16, U16, U16);void update_cell_hyperlink(U16 r, U16 c, U8 *hyperlink, int len, U16 type);void add_str_array(U8, U8 *, U16, U8 *, U8);void add_font(U16, U16, U16, U16, U16, U8, U16, U8 *, U16);void add_ws_title(U16, U8 *, U16);void add_xf_array(U16 fnt_idx, U16 fmt_idx, U16 gen, U16 align,U16 indent, U16 b_style, U16 b_l_color, U32  b_t_color, U16 cell_color);/* Global data */char filename[256];int file_version = 0;U32 next_string=0;unsigned int next_font=0, next_ws_title=0, next_xf=0;U8 working_buffer[WBUFF_SIZE];unsigned int bufidx, buflast;	/*!< Needed for working buffer */U8 grbit=0;			/*!< Needed by the SST Opcode FC */U16 crun=0, cch=0;               /*!< Needed by the SST Opcode FC */U32 extrst=0;			/*!< Needed by the SST Opcode FC */U16 nonascii = 0;		/*!< Needed by the SST Opcode FC */int sheet_count=-2;		/*!< Number of worksheets found */U16 last_opcode = -1;		/*!< Used for the continue command */unsigned int cont_grbit=0, cont_str_array=0;uni_string default_font;		/*!< Font for table */int default_fontsize = 3;	/*!< Default font size for table */char *default_alignment = 0;	/*!< Alignment for table */int first_sheet = 0;		/*!< First worksheet to display */int last_sheet = WORKSHEETS_INCR-1;	/*!< The last worksheet to display */S16 xp=0, xr1=-1, xr2=-1, xc1=-1, xc2=-1; /*!< Extraction info... */int currency_symbol = '$';		/*!< What to use for currency */U16 str_formula_row = 0;		/*!< Data holders for string formulas */U16 str_formula_col = 0;         /*!< Data holders for string formulas */U16 str_formula_format = 0;	/*!< Data holders for string formulas *//* Limits */unsigned int max_fonts = FONTS_INCR;unsigned int max_xformats = XFORMATS_INCR;unsigned long max_strings = STRINGS_INCR;unsigned int max_worksheets = WORKSHEETS_INCR;/* Global arrays */xf_attr **xf_array;work_sheet **ws_array;uni_string **str_array;font_attr **font_array;fnt_cnt *f_cnt;int fnt_size_cnt[7];		/*!< Html has only 7 sizes... */uni_string author;char *title = 0;char *lastUpdated = 0;/* Command Line flags */int use_colors = 1;		/*!< Whether or not to use colors in output */int aggressive = 0;		/*!< Aggressive html optimization */int formula_warnings = 1;	/*!< Whether or not to suppress formula warnings */int center_tables = 0;		/*!< Whether or not to center justify tables or leave it left */int trim_edges = 0;		/*!< Whether or not to trim the edges of columns or rows */char *default_text_color = "000000";char *default_background_color="FFFFFF";char *default_image=NULL;	/*!< Point to background image */int Ascii = 0;			/*!< Whether or not to out ascii instaed of html */int Csv = 0;			/*!< Whether or not to out csv instaed of html */int OutputXML = 0;		/*!< Output as xml */int DumpPage = 0;		/*!< Dump page count & max cols & rows */int Xtract = 0;			/*!< Extract a range on a page. */int MultiByte = 0;		/*!< Output as multibyte */int NoHeaders = 0;		/*!< Dont output html header *//* Some Global Flags */int notAccurate = 0;	/*!< Flag used to indicate that stale data was used */int NoFormat = 0;	/*!< Flag used to indicated unimplemented format */int NotImplemented = 0;	/*!< Flag to print unimplemented cell type message */int Unsupported = 0;	/*!< Flag to print unsupported cell type message */int MaxPalExceeded = 0;int MaxXFExceeded = 0;int MaxFormatsExceeded = 0;int MaxColExceeded = 0;int MaxRowExceeded = 0;int MaxWorksheetsExceeded = 0;int MaxStringsExceeded = 0;int MaxFontsExceeded = 0;int UnicodeStrings = 0;	/*!< 0==ASCII, 1==windows-1252, 2==uft-8 */int CodePage = 0;			/*!< Micosoft CodePage as specified in the Excel file. */int main (int argc, char **argv){	int i, f_ptr = 0;	U16 k;	U32 j;	COLEFS * cfs;	COLERRNO colerrno;	if (argc < 2)	{		printf("Incorrect usage. Try xlhtml --help for more information\n"); 		exit(0);	}	else	{		strncpy(filename, argv[argc-1], 252);		filename[252] = 0;		for (i=1; i<(argc-1); i++)		{			if (strcmp(argv[i], "-nc") == 0)				use_colors = 0;			else if(strcmp(argv[i], "-xml") == 0 )				OutputXML = 1;			else if (strcmp(argv[i], "-asc") == 0)				Ascii = 1;			else if (strcmp(argv[i], "--ascii") == 0)				Ascii = 1;			else if (strcmp(argv[i], "-csv") == 0)			{				Ascii = 1;				Csv = 1;						}			else if (strcmp(argv[i], "-a") == 0)				aggressive = 1;			else if (strcmp(argv[i], "-fw") == 0)				formula_warnings = 0;			else if (strcmp(argv[i], "-c") == 0)				center_tables = 1;			else if (strcmp(argv[i], "-dp") == 0)				DumpPage = 1;			else if (strcmp(argv[i], "-m") == 0)				MultiByte = 1;			else if (strncmp(argv[i], "-tc", 3) == 0)			{				default_text_color = &argv[i][3];				if (strlen(default_text_color) != 6)					display_usage();			}			else if (strncmp(argv[i], "-bc", 3) == 0)			{				default_background_color = &argv[i][3];				if (strlen(default_background_color) != 6)					display_usage();			}			else if (strncmp(argv[i], "-bi", 3) == 0)			{				default_image = &argv[i][3];				use_colors = 0;			}			else if (strncmp(argv[i], "-te", 3) == 0)				trim_edges = 1;			else if (strcmp(argv[i], "-v") == 0)				print_version();			else if(strcmp(argv[i], "-nh") == 0 )				NoHeaders = 1;			else if (strncmp(argv[i], "-xc:", 4) == 0)			{				int d1, d2;				if (sscanf(argv[i] + 4, "%d-%d", &d1, &d2) != 2)				{					fprintf(stderr, "column range %s not valid, expected -xc:FIRST-LAST\n", argv[i] + 4);					display_usage();				}				xc1 = (S16)d1;				xc2 = (S16)d2;				Xtract = 1;				if (xc1 > xc2)				{					fprintf(stderr, "last column must be >= the first\n");					exit(1);				}			}			else if (strncmp(argv[i], "-xp:", 4) == 0)			{				Xtract = 1;				xp = (S16)atoi(&(argv[i][4]));				if (xp < 0)				{					fprintf(stderr, "Negative numbers are illegal.\n");					exit(1);				}			}			else if (strncmp(argv[i], "-xr:", 4) == 0)			{				char *ptr, *buf;				Xtract = 1;				buf = strdup(argv[i]);				ptr = strrchr(buf, '-');				xr2 = (S16)atoi(ptr+1);				*ptr = 0;				ptr = strchr(buf, ':');				xr1 = (S16)atoi(ptr+1);				free(buf);				if (xr1 > xr2)				{					fprintf(stderr, "row's 2nd digit must be >= the first\n");					exit(1);				}			}			else				display_usage();		}		if (strcmp(filename, "-v") == 0)		{			print_version();		}		if (strcmp(filename, "--version") == 0)		{			print_version();		}		if (strcmp(filename, "--help") == 0)			display_usage();		if (strcmp(filename, "-?") == 0)			display_usage();	}	if (Ascii)	{	/* Disable it if DumpPage or Xtract isn't used... */		if (!(DumpPage||Xtract))			Ascii = 0;	}	if (Xtract)		trim_edges = 0;		/* No trimming when extracting... */	if (OutputXML)		aggressive = 0;	/* Init arrays... */	ws_array = (work_sheet **)malloc(max_worksheets * sizeof(work_sheet *));	for (i=0; i<(int)max_worksheets; i++)		ws_array[i] = 0;	str_array = (uni_string **)malloc(max_strings*sizeof(uni_string *));	for (i=0; i<(int)max_strings; i++)		str_array[i] = 0;	font_array = (font_attr **)malloc(max_fonts * sizeof(font_attr *));	f_cnt = (fnt_cnt *)malloc(max_fonts * sizeof(fnt_cnt));	for (i=0; i<(int)max_fonts; i++)	{	/* I assume these won't fail since we are just starting up... */		font_array[i] = 0;		f_cnt[i].name = 0;	}	xf_array = (xf_attr **)malloc(max_xformats * sizeof(xf_attr *));	for (i=0; i<(int)max_xformats; i++)		xf_array[i] = 0;	uni_string_clear(&author);	uni_string_clear(&default_font);	umask(GLOBAL_UMASK);#if defined( __WIN32__ ) || defined( __BORLANDC__ )	{		char *ptr = strchr(filename, ':');		if (ptr)		{			int len;			char new_drive[MAXPATH];			fnsplit(filename, new_drive, 0, 0, 0);			if (new_drive[0] >= 'a')				setdisk(new_drive[0] - 'a');			else				setdisk(new_drive[0] - 'A');			ptr++;	/* Get past the colon */			len = strlen(ptr);			memmove(filename, ptr, len);			filename[len] = 0;		}	}#endif	/* If successful, this calls scan_file to extract the work book... */	cfs = cole_mount(filename, &colerrno);	if (cfs == NULL)	{		cole_perror (NULL, colerrno);		exit(1);	}	while (cole_locate_filename (cfs, SectionName[f_ptr], NULL, scan_file, &colerrno))	{		if (f_ptr)		{	/* Two strikes...we're out! */			cole_perror (PRGNAME, colerrno);			if (colerrno == COLE_EFILENOTFOUND)				fprintf(stderr, "Section: Workbook\n");			break;		}		else			f_ptr++;	}	if (cole_umount (cfs, &colerrno))	{		cole_perror (PRGNAME, colerrno);		exit(1);	}	for (i=0; i<max_strings; i++)	{		if (str_array[i])		{			if (str_array[i]->str)				free(str_array[i]->str);			free(str_array[i]);		}	}		for (i=0; i<(int)max_fonts; i++)	{		if (font_array[i])		{			if (font_array[i]->name.str)				free(font_array[i]->name.str);			free(font_array[i]);			if (f_cnt[i].name)			{				if (f_cnt[i].name->str)					free(f_cnt[i].name->str);				free(f_cnt[i].name);			}		}	}			for (i=0; i<(int)max_worksheets; i++)	{		if (ws_array[i])		{			if (ws_array[i]->ws_title.str)				free(ws_array[i]->ws_title.str);			if (ws_array[i]->c_array)			{				for (j=0; j<ws_array[i]->max_rows; j++)				{					for (k=0; k<ws_array[i]->max_cols; k++)					{						if (ws_array[i]->c_array[(j*ws_array[i]->max_cols)+k])						{							if (ws_array[i]->c_array[(j*ws_array[i]->max_cols)+k]->ustr.str)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -