⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 substmap.c

📁 harvest是一个下载html网页得机器人
💻 C
字号:
#include <string.h>#include <ctype.h>#include <stdlib.h>#include "catdoc.h"SUBSTMAP spec_chars, replacements;char *map_path=CHARSETPATH;void map_insert(SUBSTMAP map, int uc, const char *s) ;int longest_sequence=6;/* six is longest character sequence which can be                          generated by catdoc internally*/int isstop(char  c, char stop) {  if (stop=='\n') {     return isspace(c);  } else {     return c==stop;  }}/************************************************************************//* Reads substitution map file.                                         *//************************************************************************/SUBSTMAP read_substmap(char* filename) {    FILE *f;    SUBSTMAP map=calloc(sizeof(char **),256);    char *path, line[1024], *p, *q;    char s[256];    char stopchar;    int escaped, lineno=0, i;    unsigned int uc;            path=find_file(filename,add_exe_path(map_path));    if (!path) {       free(map);       return(NULL);    }       if (!map) {        fprintf(stderr,"Insufficient memory\n");        exit(1);    }    f=fopen(path,"rb");    if (!f) {        perror("catdoc");        return NULL;    }    if (input_buffer)      setvbuf(f,input_buffer,_IOFBF,FILE_BUFFER);    while (!feof(f)) {	if (!fgets(line,1024,f)) continue;	lineno++;	/* parse line */        /* skip leading space */        for(p=line;*p && isspace(*p);p++);        /* if #, it is comment */	if (!*p ||#ifdef  __MSDOS__	*p==0x1A || /* DOS have strange habit of using ^Z as eof */#endif	*p=='#') continue;        /* read hexadecimal code */        uc = strtol(p,&p,16);        if (!isspace(*p)|| uc<0 || uc>0xfffd) {            fprintf(stderr,"Error parsing %s(%d)\n",path,lineno);            continue;        }        /* skip space between  code and sequence */        for(;*p && isspace(*p);p++);        if (!p) continue;        switch (*p) {            case '\'':            case '"':                  stopchar=*p;                  break;            case '(':                stopchar=')';                break;            case '[':                stopchar=']';                break;            case '{':                stopchar='}';                break;            default:                p--;                stopchar='\n';        }        p++;        q=p;        escaped=0;	while (*q && (!isstop(*q,stopchar) || escaped)) {                if (escaped) {                  escaped=0;                } else {                  escaped= (*q=='\\');                }               q++;           }	if (*q!=stopchar && !(isspace(*q) && stopchar=='\n')) {            fprintf(stderr,"Error parsing %s(%d): unterminated sequence\n",                    path,lineno);            continue;        }        /* HERE SHOULD BE BACKSLASH ESCAPE PROCESSING !!!*/        *q=0;        for (q=s,i=0;*p && i<256;q++,i++) {            if (*p!='\\') {                *q=*p++;	    } else {               switch (*(++p)) {               case 'n': *q='\n'; break;               case 'r': *q='\r'; break;               case 't': *q='\t'; break;               case 'b': *q='\b'; break;               case '\"': *q='\"'; break;               case '\'': *q='\''; break;               case '0': *q=strtol(p,&p,8); p--; break;               case '\\':               default:                     *q=*p;               }               p++;            }	}	*q=0;	if (i>longest_sequence)	   longest_sequence=i;	map_insert(map,uc,s);    }    fclose(f);    free(path);    /* Debug output    {long int i,j;char ***p; char **q;    for (i=0,p=map;i<255;i++,p++) {      if (*p) {	for (j=0,q=*p;j<255;j++,q++) {	 if (*q) {	   printf("%04x ",(i<<8)|j);	   fputc('\"',stdout);	   fputs(*q,stdout);	   fputc('\"',stdout);	   fputc('\n',stdout);	 }       }      }    }    }    */    return map; }/*************************************************************************//* searches for string in the substituton map. Returns NULL if not found *//*************************************************************************/char * map_subst ( SUBSTMAP map, int uc) {    char **p=map[(unsigned)uc >>8];    if (!p) return NULL;    return p[uc & 0xff];}/*************************************************************************//*  inserts string + unicode code into map                               *//*************************************************************************/void map_insert(SUBSTMAP map, int uc, const char *s) {  SUBSTMAP p=map+((unsigned)uc>>8);    if (!*p) {    *p= calloc(sizeof(char*),256);    if (!*p) { fprintf(stderr,"Insufficient memory\n");               exit(1);             }  }  (*p)[uc & 0xff] = strdup(s);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -