📄 xlhtml.c~
字号:
/*! \file xlhtml.c \brief converts excel files to Html xlhtml generates HTML, XML, csv and tab-delimitted versions of Excel spreadsheets.*//* Copyright 2002 Charles N Wyble <jackshck@yahoo.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */#if !(defined( __BORLANDC__ ) || defined( __WIN32__ ))#include "config.h" /* Created by ./configure script */#include "support.h" /* Needs to be before internal.h */#include "internal.h" /* Needs to be before cole */#include "cole.h"#else#include "config.h.in" /* Created by ./configure script */#include "support.h" /* Needs to be before internal.h */#include "internal.h" /* Needs to be before cole */#include "cole.h.in"#include <io.h> /* for umask */#include <dir.h>#endif#include <stdlib.h> /* For atof(), calloc() */#include <string.h> /* For string functions */#include <math.h> /* For fabs() */#include <ctype.h> /* For isprint() */#include <errno.h>/**************************** The next couple of lines are "tunable". They are the amount that* various things get incremented by when we need more.****************************/#define XFORMATS_INCR 64 /*!< Increments to allocate extended formats */#define FONTS_INCR 32 /*!< Increments to allocate fonts */#define WORKSHEETS_INCR 4 /*!< Increments to allocate worksheet pages */#define COLS_INCR (U16)24 /*!< Increments to allocate Columns per Worksheet page */#define ROWS_INCR (U32)128 /*!< Increments to allocate Rows per Worksheet page */#define STRINGS_INCR 256UL /*!< Increments to allocate the string array - *//* Used by packed string array Opcode: 0xFC */#define HARD_MAX_ROWS_97 0x7FFE /*!< Used in add_wb_array to prevent OOM */#define HARD_MAX_ROWS_95 0x3FFF /*!< Used in add_wb_array to prevent OOM */#define HARD_MAX_COLS 256 /*!< Used in add_wb_array to prevent OOM */U16 HARD_MAX_ROWS = HARD_MAX_ROWS_97;/************************************ Don't change anything below here...*************************************/#if defined( __WIN32__ ) || defined( __BORLANDC__ )#define VERSION "0.4.9.1"#endif#define PRGNAME "xlhtml"#define WBUFF_SIZE 8240 /*!< The working buffer. SB 522+10+4(header) bytes minimum = 536 */#define MAX_COLORS 65 /*!< This is the size of the built-in color table */#define EXCEL95 0x500 /*!< This is the file stamp for biff7 - Excel 5 & 95 */#define EXCEL97 0x600 /*!< This is the file stamp for biff8 - Excel 97 & 2000 */#if !(defined( __WIN32__ ) || defined( __BORLANDC__ ))#include <sys/stat.h>#define GLOBAL_UMASK (S_IXUSR|S_IWGRP|S_IRGRP|S_IXGRP|S_IWOTH|S_IROTH|S_IXOTH)#else#define GLOBAL_UMASK (2)#endifstatic char SectionName[2][12] = /* The section of the Excel Stream where the workbooks are kept */{ "/Workbook", /*!< Excel 97 & 2000 */ "/Book" /*!< Everything else ? */};typedef struct /*!< This encapsulates the Unicode String */{ U8 uni; /*!< Unicode String: 0==ASCII/8859-1, 1==windows-1252, 2==utf-8 */ U8 *str; /*!< Characters of string */ U16 len; /*!< Length of string */ U8 *fmt_run; /*!< formatting run, short pairs: offset, index to font */ U8 crun_cnt; /*!< The count of format runs */}uni_string;typedef struct /*!< This is everything we need for a cell */{ U16 xfmt; /*!< The high bit will tell us which version 0 =< 2; 1 == 2+ */ U16 type; /*!< This will record the record type that generated the cell */ U16 spanned; /*!< If 1 don't output */ uni_string ustr; /*!< The cell's displayed contents */ U16 rowspan; /*!< rows to span */ U16 colspan; /*!< columns to span */ uni_string h_link; /*!< If a hyperlinked cell, this is the link*/}cell;typedef struct /*!< This encapsulates some information about each worksheet */{ U32 first_row; S32 biggest_row; U32 max_rows; U16 first_col; S16 biggest_col; U16 max_cols; uni_string ws_title; cell **c_array; U16 spanned;}work_sheet;typedef struct /*!< This is everything we need to know about fonts */{ U16 size; U16 attr; U16 c_idx; U16 bold; U16 super; U8 underline; uni_string name;}font_attr;typedef struct{ uni_string *name; U16 cnt;}fnt_cnt;typedef struct /*!< This covers the Extended Format records */{ U16 fnt_idx; U16 fmt_idx; U16 gen; U16 align; U16 indent; U16 b_style; U16 b_l_color; U32 b_t_color; U16 cell_color;}xf_attr;typedef struct /*!< HTML Attribute */{ int fflag; /*!< Font Flag */ int bflag; /*!< Bold Flag */ int iflag; /*!< Itallic Flag */ int sflag; /*!< Strike thru flag */ int uflag; /*!< Underline flag */ int sbflag; /*!< Subscript */ int spflag; /*!< Superscript */}html_attr;static int numCustomColors = 0;static U8 **customColors = 0;static char colorTab[MAX_COLORS][8] ={ "000000", /* FIXME: Need to find these first 8 colors! */ "FFFFFF", "FFFFFF", "FFFFFF", "FFFFFF", "FFFFFF", "FFFFFF", "FFFFFF", "FFFFFF", /*0x08 - This one's Black, too ??? */ "FFFFFF", /* This one's normal */ "red", /* "FF0000", */ "lime", /* "00FF00", */ "blue", /* "0000FF", */ "FFFF00", "FF00FF", "aqua", /* "00FFFF", */ "800000", /* 0x10 */ "green", /* "008000", */ "navy", /* "000080", */ "808000", "800080", "teal", /* "008080", */ "C0C0C0", "gray", /* "808080", */ "9999FF", /* 0x18 */ "993366", "FFFFCC", "CCFFFF", "660066", "FF8080", "0066CC", "CCCCFF", "000080", "FF00FF", /* 0x20 */ "FFFF00", "00FFFF", "800080", "800000", "008080", "0000FF", "00CCFF", /* 0x28 */ "CCFFFF", "CCFFCC", "FFFF99", "99CCFF", "FF99CC", "CC99FF", "FFCC99", "3366FF", /* 0x30 */ "33CCCC", "99CC00", "FFCC00", "FF9900", "FF6600", "666699", "969696", "003366", /* 0x38 */ "339966", "003300", "333300", "993300", "993366", "333399", "333333", "FFFFFF" /* 0x40 */};/* FIXME: Support major languages here...not just English */static const char month_abbr[12][5] = { "Jan", "Feb", "Mar", "Apr", "May", "June", "July", "Aug", "Sep", "Oct", "Nov", "Dec" };static const int ndays[]={31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};static const int ldays[]={31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};/*************************************************************************** Functions and data are declared static to prevent name collisions.***************************************************************************//* Function Prototypes */static void print_version(void);COLE_LOCATE_ACTION_FUNC scan_file;static void OutputPartialTableAscii(void);static void OutputTableHTML(void);static void OutputTableXML(void);static void do_cr(void);static void main_line_processor(U16, U16, U32, U16, U8);static void SetupExtraction(void);static void output_header(void);static void output_footer(void);static U16 getShort(U8 *);static S32 getLong(U8 *);static void getDouble(U8 *, F64 *);static void RKtoDouble(S32, F64 *);static void decodeBoolErr(U16, U16, char *);static int IsCellNumeric(cell *);static int IsCellSafe(cell *);static int IsCellFormula(cell *);static void output_cell(cell *, int);static void output_formatted_data(uni_string *, U16, int, int);static void PrintFloatComma(char *, int, F64);static void print_as_fraction(F64, int);static void NumToDate(long, int *, int *, int *);static void FracToTime(U8 *, int *, int *, int *, int *);static void trim_sheet_edges(unsigned int);static void update_default_font(unsigned int);static void incr_f_cnt(uni_string *);static int get_default_font(void);static void update_default_alignment(unsigned int, int);static int null_string(U8 *);static void OutputString(uni_string *);static void OutputCharCorrected(U8);static void update_crun_info(U16 *loc, U16 *fnt_idx, U16 crun_cnt, U8 *fmt_run);static void put_utf8(U16);static void print_utf8(U16);static void uni_string_clear(uni_string *);static int uni_string_comp(uni_string *, uni_string *);static void output_start_html_attr(html_attr *h, unsigned int, int);static void output_end_html_attr(html_attr *h);static void html_flag_init(html_attr *h);static void output_start_font_attribute(html_attr *h, U16 fnt_idx);/* The array update functions */static int ws_init(int);static int add_more_worksheet_ptrs(void);static int resize_c_array(work_sheet *, U32, U16);static void add_wb_array(U16, U16, U16, U16, U8, U8 *, U16, U16, U8 *);static void update_cell_xf(U16, U16, U16);static void update_cell_hyperlink(U16 r, U16 c, U8 *hyperlink, int len, U16 type);static void add_str_array(U8, U8 *, U16, U8 *, U8);static void add_font(U16, U16, U16, U16, U16, U8, U16, U8 *, U16);static void add_ws_title(U16, U8 *, U16);static void add_xf_array(U16 fnt_idx, U16 fmt_idx, U16 gen, U16 align, U16 indent, U16 b_style, U16 b_l_color, U32 b_t_color, U16 cell_color);/* Global data */static char filename[128];static int file_version = 0;static U32 next_string=0;static unsigned int next_font=0, next_ws_title=0, next_xf=0;static U8 working_buffer[WBUFF_SIZE];static unsigned int bufidx, buflast; /*!< Needed for working buffer */static U8 grbit=0; /*!< Needed by the SST Opcode FC */static U16 crun=0, cch=0; /*!< Needed by the SST Opcode FC */static U32 extrst=0; /*!< Needed by the SST Opcode FC */static U16 nonascii = 0; /*!< Needed by the SST Opcode FC */static int sheet_count=-2; /*!< Number of worksheets found */static U16 last_opcode = -1; /*!< Used for the continue command */static unsigned int cont_grbit=0, cont_str_array=0;static uni_string default_font; /*!< Font for table */static int default_fontsize = 3; /*!< Default font size for table */static char *default_alignment = 0; /*!< Alignment for table */static int first_sheet = 0; /*!< First worksheet to display */static int last_sheet = WORKSHEETS_INCR-1; /*!< The last worksheet to display */static S16 xp=0, xr1=-1, xr2=-1, xc1=-1, xc2=-1; /*!< Extraction info... */static int currency_symbol = '$'; /*!< What to use for currency */static U16 str_formula_row = 0; /*!< Data holders for string formulas */static U16 str_formula_col = 0; /*!< Data holders for string formulas */static U16 str_formula_format = 0; /*!< Data holders for string formulas *//* Limits */static unsigned int max_fonts = FONTS_INCR;static unsigned int max_xformats = XFORMATS_INCR;static unsigned long max_strings = STRINGS_INCR;static unsigned int max_worksheets = WORKSHEETS_INCR;/* Global arrays */static xf_attr **xf_array;static work_sheet **ws_array;static uni_string **str_array;static font_attr **font_array;static fnt_cnt *f_cnt;static int fnt_size_cnt[7]; /*!< Html has only 7 sizes... */static uni_string author;static char *title = 0;static char *lastUpdated = 0;/* Command Line flags */static int use_colors = 1; /*!< Whether or not to use colors in output */static int aggressive = 0; /*!< Aggressive html optimization */static int formula_warnings = 1; /*!< Whether or not to suppress formula warnings */static int center_tables = 0; /*!< Whether or not to center justify tables or leave it left */static int trim_edges = 0; /*!< Whether or not to trim the edges of columns or rows */static char *default_text_color = "000000";static char *default_background_color="FFFFFF";static char *default_image=NULL; /*!< Point to background image */static int Ascii = 0; /*!< Whether or not to out ascii instaed of html */static int Csv = 0; /*!< Whether or not to out csv instaed of html */static int OutputXML = 0; /*!< Output as xml */static int DumpPage = 0; /*!< Dump page count & max cols & rows */static int Xtract = 0; /*!< Extract a range on a page. */static int MultiByte = 0; /*!< Output as multibyte */static int NoHeaders = 0; /*!< Dont output html header *//* Some Global Flags */static int notAccurate = 0; /*!< Flag used to indicate that stale data was used */static int NoFormat = 0; /*!< Flag used to indicated unimplemented format */static int NotImplemented = 0; /*!< Flag to print unimplemented cell type message */static int Unsupported = 0; /*!< Flag to print unsupported cell type message */static int DatesR1904 = 0; /*!< Flag that the dates are based on McIntosh Dates system */static int MaxPalExceeded = 0;static int MaxXFExceeded = 0;static int MaxFormatsExceeded = 0;static int MaxColExceeded = 0;static int MaxRowExceeded = 0;static int MaxWorksheetsExceeded = 0;static int MaxStringsExceeded = 0;static int MaxFontsExceeded = 0;static int UnicodeStrings = 0; /*!< 0==ASCII, 1==windows-1252, 2==uft-8 */static int CodePage = 0; /*!< Micosoft CodePage as specified in the Excel file. */static void display_usage(void){fprintf(stderr, "\nxlhtml %s converts excel files (.xls) to Html.\n" "Copyright (c) 1999-2001, Charles Wyble. Released under GPL.\n""Usage: "PRGNAME" [-xp:# -xc:#-# -xr:#-# -bc###### -bi???????? -tc######] <FILE>\n" "\t-a: aggressive html optimization\n" "\t-asc ascii output for -dp & -x? options\n" "\t-csv comma separated value output for -dp & -x? options\n" "\t-xml XML output\n" "\t-bc: Set default background color - default white\n" "\t-bi: Set background image path\n" "\t-c: Center justify tables\n" "\t-dp: Dumps page count and max rows & colums per page\n" "\t-v: Prints program version number\n" "\t-fw: Suppress formula warnings\n" "\t-m: No encoding for multibyte\n" "\t-nc: No Colors - black & white\n" "\t-nh: No Html Headers\n" "\t-tc: Set default text color - default black\n" "\t-te: Trims empty rows & columns at the edges of a worksheet\n" "\t-xc: Columns (separated by a dash) for extraction (zero based)\n" "\t-xp: Page extracted (zero based)\n" "\t-xr: Rows (separated by a dash) to be extracted (zero based)\n", VERSION); fprintf(stderr, "\nReport bugs to jackshck@yahoo.com\n"); exit (1);}int main (int argc, char **argv){ int i, f_ptr = 0; U16 k; U32 j; COLEFS * cfs; COLERRNO colerrno; if (argc < 2) { printf("Incorrect usage. Try "PRGNAME" --help for more information\n"); exit(0); } else { strncpy(filename, argv[argc-1], 124); filename[124] = 0; for (i=1; i<(argc-1); i++) { if (strcmp(argv[i], "-nc") == 0) use_colors = 0; else if(strcmp(argv[i], "-xml") == 0 ) OutputXML = 1; else if (strcmp(argv[i], "-asc") == 0) Ascii = 1; else if (strcmp(argv[i], "--ascii") == 0) Ascii = 1; else if (strcmp(argv[i], "-csv") == 0) { Ascii = 1; Csv = 1; } else if (strcmp(argv[i], "-a") == 0) aggressive = 1; else if (strcmp(argv[i], "-fw") == 0) formula_warnings = 0; else if (strcmp(argv[i], "-c") == 0) center_tables = 1; else if (strcmp(argv[i], "-dp") == 0) DumpPage = 1; else if (strcmp(argv[i], "-m") == 0) MultiByte = 1; else if (strncmp(argv[i], "-tc", 3) == 0) { default_text_color = &argv[i][3]; if (strlen(default_text_color) != 6) display_usage(); } else if (strncmp(argv[i], "-bc", 3) == 0) { default_background_color = &argv[i][3]; if (strlen(default_background_color) != 6) display_usage(); } else if (strncmp(argv[i], "-bi", 3) == 0) { default_image = &argv[i][3]; use_colors = 0; } else if (strncmp(argv[i], "-te", 3) == 0) trim_edges = 1; else if (strcmp(argv[i], "-v") == 0) print_version(); else if(strcmp(argv[i], "-nh") == 0 ) NoHeaders = 1; else if (strncmp(argv[i], "-xc:", 4) == 0)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -