⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 entgen.c

📁 harvest是一个下载html网页得机器人
💻 C
字号:
/* entgen.c -   Implement entgen() which generates a list of filenames from a struct fpi.      Written by James Clark (jjc@jclark.com).*/#include "config.h"#ifdef HAVE_ACCESS#ifdef HAVE_UNISTD_H#include <unistd.h>		/* For R_OK. */#endif /* HAVE_UNISTD_H */#ifndef R_OK#define R_OK 4#endif /* not R_OK */#endif /* HAVE_ACCESS */#include "sgmlaux.h"/* Environment variable that contains path. */#ifndef PATH_ENV_VAR#define PATH_ENV_VAR "SGML_PATH"#endif/* Default search path.  See field() for interpretation of %*. */#ifndef DEFAULT_PATH#define DEFAULT_PATH "/usr/local/lib/sgml/%O/%C/%T:%N.%X:%N.%D"#endif#ifndef PATH_FILE_SEP#define PATH_FILE_SEP ':'#endif#ifndef SYSID_FILE_SEP#define SYSID_FILE_SEP ':'#endif/* This says: change space to underscore, slash to percent. */#ifndef MIN_DAT_SUBS_FROM#define MIN_DAT_SUBS_FROM " /"#endif#ifndef MIN_DAT_SUBS_TO#define MIN_DAT_SUBS_TO "_%"#endifstatic int field P((struct fpi *, int, char *));static int mindatcpy P((char *, char *, int, int));static int testopen P((char *));static UNIV sysidgen P((char *));static UNIV catsysidgen P((const char *, const char *));static const char *xbasename P((const char *));static char *path = 0;/* Non-zero if searching should be performed when a system identifieris specified. */static int sysidsrch = 0;#define EMPTY_VERSION "default"static char *classes[] = {     "capacity",     "charset",     "notation",     "syntax",     "document",     "dtd",     "elements",     "entities",     "lpd",     "nonsgml",     "shortref",     "subdoc",     "text"     };/* This is mainly for compatibility with arcsgml. */static char *genext[] = {     "nsd",  /* Non-SGML data entity. */     "gml",  /* GML document or text entity. */     "spe",  /* System parameter entity. */     "dtd",  /* Document type definition. */     "lpd",  /* Link process definition. */     "pns",  /* Public non-SGML data entity. */     "pge",  /* Public general entity. */     "ppe",  /* Public parameter entity. */     "pdt",  /* Public document type definition. */     "plp",  /* Public link process definition. */     "vns",  /* Display version non-SGML data entity. */     "vge",  /* Display version general entity. */     "vpe",  /* Display version parameter entity. */     "vdt",  /* Display version document type definition.*/     "vlp",  /* Display version link process definition.*/};static char *ext[] = {     "sgml",			/* SGML subdocument */     "data",			/* Data */     "text",			/* General text */     "parm",			/* Parameter entity */     "dtd",			/* Document type definition */     "lpd",			/* Link process definition */};static CATALOG catalog;VOID entginit(swp)struct switches *swp;{     catalog = swp->catalog;}/* Like memcpy, but substitute, fold to lower case (if fold isnon-zero) and null terminate.  This is used both for minimum data andfor names. If p is NULL, do nothing. Return len. */static int mindatcpy(p, q, len, fold)char *p, *q;int len;int fold;{     static char subsfrom[] = MIN_DAT_SUBS_FROM;     static char substo[] = MIN_DAT_SUBS_TO;     int n;     if (!p)	  return len;     for (n = len; --n >= 0; q++) {	  char *r = strchr(subsfrom, *q);	  if (!r) {	       if (fold && ISASCII(*q) && isupper((UNCH)*q))		    *p++ = tolower((UNCH)*q);	       else		    *p++ = *q;	  }	  else {	       int i = r - subsfrom;	       if (i < sizeof(substo) - 1)		    *p++ = substo[i];	  }     }     *p = '\0';     return len;}/* Return length of field.  Copy into buf if non-NULL. */static int field(f, c, buf)struct fpi *f;int c;char *buf;{     int n;     switch (c) {     case '%':	  if (buf) {	       buf[0] = '%';	       buf[1] = '\0';	  }	  return 1;     case 'N':			/* the entity, document or dcn name */	  return mindatcpy(buf, (char *)f->fpinm, ustrlen(f->fpinm),		    (f->fpistore != 1 && f->fpistore != 2 && f->fpistore != 3		     ? NAMECASE		     : ENTCASE));     case 'D':			/* dcn name */	  if (f->fpistore != 1) /* not a external data entity */	       return -1;	  if (f->fpinedcn == 0) /* it's a SUBDOC */	       return -1;	  return mindatcpy(buf, (char *)f->fpinedcn, ustrlen(f->fpinedcn),   	                   NAMECASE);     case 'X':	  /* This is for compatibility with arcsgml */	  if (f->fpistore < 1 || f->fpistore > 5)	       return -1;	  n = (f->fpipubis != 0)*(f->fpiversw > 0 ? 2 : 1)*5+f->fpistore - 1;	  if (buf)	       strcpy(buf, genext[n]);	  return strlen(genext[n]);     case 'Y':			/* tYpe */	  n = f->fpistore;	  if (n < 1 || n > 5)	       return -1;	  if (n == 1 && f->fpinedcn == 0) /* it's a SUBDOC */	       n = 0;	  if (buf)	       strcpy(buf, ext[n]);	  return strlen(ext[n]);     case 'P':			/* public identifier */	  if (!f->fpipubis)	       return -1;	  return mindatcpy(buf, (char *)f->fpipubis, ustrlen(f->fpipubis), 0);     case 'S':			/* system identifier */	  if (!f->fpisysis)	       return -1;	  else {	       UNCH *p;	       n = 0;	       for (p = f->fpisysis; *p; p++)		    if (*p != RSCHAR) {			 if (buf)			      buf[n] = *p == RECHAR ? '\n' : *p;			 n++;		    }	       return n;	  }     }     /* Other fields need a formal public identifier. */     /* return -1 if the formal public identifier was invalid or missing. */     if (f->fpiversw < 0 || !f->fpipubis)	  return -1;          switch (c) {     case 'A':			/* Is it available? */	  return f->fpitt == '+' ? 0 : -1;     case 'I':			/* Is it ISO? */	  return f->fpiot == '!' ? 0 : -1;     case 'R':			/* Is it registered? */	  return f->fpiot == '+' ? 0 : -1;     case 'U':			/* Is it unregistered? */	  return f->fpiot == '-' ? 0 : -1;     case 'L':			/* public text language */	  if (f->fpic == FPICHARS)	       return -1;	  /* it's entered in all upper case letters */	  return mindatcpy(buf, (char *)f->fpipubis + f->fpil, f->fpill, 1);     case 'O':			/* owner identifier */	  return mindatcpy(buf, (char *)f->fpipubis + f->fpio, f->fpiol, 0);     case 'C':			/* public text class */	  n = f->fpic - 1;	  if (n < 0 || n >= sizeof(classes)/sizeof(classes[0]))	       return -1;	  if (buf)	       strcpy(buf, classes[n]);	  return strlen(classes[n]);     case 'T':			/* text description */	  return mindatcpy(buf, (char *)f->fpipubis + f->fpit, f->fpitl, 0);     case 'V':	  if (f->fpic < FPICMINV)	/* class doesn't have version */	       return -1;	  if (f->fpiversw > 0)         	/* no version */	       return -1;	  if (f->fpivl == 0) {		/* empty version: */				        /* use device-independent version*/	       if (buf)		    strcpy(buf, EMPTY_VERSION);	       return strlen(EMPTY_VERSION);	  }	  return mindatcpy(buf, (char *)f->fpipubis + f->fpiv, f->fpivl, 0);     case 'E':	              /* public text designating (escape) sequence */	  if (f->fpic != FPICHARS)	       return -1;	  return mindatcpy(buf, (char *)f->fpipubis + f->fpil, f->fpill, 0);     default:	  break;     }     return -1;}static int testopen(pathname)char *pathname;{#ifdef HAVE_ACCESS     return access(pathname, R_OK) >= 0;#else /* not HAVE_ACCESS */     FILE *fp;     fp = fopen(pathname, "r");     if (!fp)	  return 0;     fclose(fp);     return 1;#endif /* not HAVE_ACCESS */}/* Return a pointer to an dynamically-allocated buffer that contains   the names of the files containing this entity, with each filename   terminated by a '\0', and with the list of filenames terminated by   another '\0'. */UNIV entgen(f)struct fpi *f;{     char *qname;     char *file;     enum catalog_decl_type dtype;     char *subst = 0;     const char *sysid;     const char *catfile;     assert(f->fpistore != 6);	/* Musn't call entgen for a notation. */     if (!path) {	  char *p;	  char c;	  path = getenv(PATH_ENV_VAR);	  if (!path)	       path = DEFAULT_PATH;	  p = path;	  /* Only search for system identifiers if path uses %S. */	  while ((c = *p++) != '\0')	       if (c == '%') {		    if (*p == 'S') {			 sysidsrch = 1;			 break;		    }		    if (*p != '\0' && *p != PATH_FILE_SEP)			 p++;	       }     }     if (f->fpisysis && !sysidsrch)	  return sysidgen((char *)f->fpisysis);     qname = (char *)f->fpinm;          switch (f->fpistore) {     case 3:	  /* fall through */	  qname--;		/* hack */     case 1:     case 2:	  dtype = CATALOG_ENTITY_DECL;	  if (ENTCASE)	       subst = getsubst();	  break;     case 4:	  dtype = CATALOG_DOCTYPE_DECL;	  if (NAMECASE)	       subst = getsubst();	  break;     default:	  dtype = CATALOG_NO_DECL;     }     if (catalog_lookup_entity(catalog,			       (char *)f->fpipubis,			       qname,			       dtype,			       (char *)subst,			       &sysid,			       &catfile))	  return catsysidgen(sysid, catfile);     if (f->fpisysis	 && (strchr((char *)f->fpisysis, SYSID_FILE_SEP)	     || strcmp((char *)f->fpisysis, STDINNAME) == 0))	  return sysidgen((char *)f->fpisysis);     file = path;          for (;;) {	  char *p;	  int len = 0;	  char *fileend = strchr(file, PATH_FILE_SEP);	  if (!fileend)	       fileend = strchr(file, '\0');	  /* Check that all substitutions are non-null, and calculate	     the resulting total length of the filename. */	  for (p = file; p < fileend; p++)	       if (*p == '%') {		    int n;		    /* Set len to -1 if a substitution is invalid. */		    if (++p >= fileend) {			 len = -1;			 break;		    }		    n = field(f, *p, (char *)0);		    if (n < 0) {			 len = -1;			 break;		    }		    len += n;	       }	       else		    len++;	  	  if (len > 0) {	       /* We've got a valid non-empty filename. */	       char *s;	       char *buf;	       s = buf = (char *)rmalloc(len + 2);	       for (p = file; p < fileend; p++)		    if (*p == '%')			 s += field(f, *++p, s);		    else			 *s++ = *p;	       *s++ = '\0';	       if (testopen(buf)) {		    /* Terminate the array of filenames. */		    *s++ = '\0';		    return buf;	       }	       free((UNIV)buf);	  }	  if (*fileend == '\0')	       break;	  file = ++fileend;     }     return 0;}/* Handle a system identifier without searching. */staticUNIV sysidgen(s)char *s;{     char *buf, *p;          buf = (char *)rmalloc(strlen(s) + 2);     for (p = buf; *s; s++) {	  if (*s == SYSID_FILE_SEP) {	       if (p > buf && p[-1] != '\0')		    *p++ = '\0';	  }	  else if (*s == RECHAR)	       *p++ = '\n';	  else if (*s != RSCHAR)	       *p++ = *s;     }     /* Terminate this filename. */     if (p > buf && p[-1] != '\0')	  *p++ = '\0';     if (p == buf) {	  /* No filenames. */	  frem((UNIV)buf);	  return 0;     }     /* Terminate the list. */     *p++ = '\0';     return buf;}/* Handle a system id in a catalog entry file. */staticUNIV catsysidgen(s, catfile)const char *s;const char *catfile;{     const char *p;     char *bufp;     char *buf;     int nrelative = 0;     int catdirlen = 0;     if (FILE_IS_RELATIVE(s))	  nrelative++;     for (p = s; *p; p++)	  if (*p == SYSID_FILE_SEP	      && FILE_IS_RELATIVE(p + 1))	       nrelative++;     if (nrelative) {	  const char *base = xbasename(catfile);	  catdirlen = base - catfile;     }     buf = (char *)rmalloc(p - s + 2 + nrelative*catdirlen);     bufp = buf;     for (;;) {	  if (!*s)	       break;	  if (*s != SYSID_FILE_SEP && FILE_IS_RELATIVE(s)) {	       memcpy(bufp, catfile, catdirlen);	       bufp += catdirlen;	  }	  for (;;) {	       if (*s == SYSID_FILE_SEP) {		    s++;		    break;	       }	       *bufp++ = *s++;	       if (*s == '\0')		    break;	  }	  if (bufp > buf && bufp[-1] != '\0')	       *bufp++ = '\0';     }     if (bufp == buf) {	  frem((UNIV)buf);	  return 0;     }     *bufp++ = '\0';     return buf;}staticconst char *xbasename(s)const char *s;{     const char *p = s;     while (*p)	  p++;     if (p > s) {	  while (--p > s)	       if (strchr(DIR_BASE_SEP, *p))		    return p + 1;     }     return s;}/*Local Variables:c-indent-level: 5c-continued-statement-offset: 5c-brace-offset: -5c-argdecl-indent: 0c-label-offset: -5End:*/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -