⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ckuusx.c

📁 KERMIT工具 这在办公室下载不了,很多人都没有载不到.
💻 C
📖 第 1 页 / 共 5 页
字号:
   non-Unicode cases.*/intscanfile(name,flag,nscanfile) char * name; int * flag, nscanfile; {    FILE * fp;				/* File pointer */    unsigned char buf[SCANFILEBUF];	/* File data buffer for analysis */    int x, val = -1, count = 0;		/* Workers */    int rc = -1;			/* Return code */    int pv = -1;			/* Pattern-match value */    int eof = 0;			/* Flag for file EOF encountered */    int bytes = 0;			/* Total byte count */#ifdef UNICODE    unsigned int c0, c1;		/* First 2 file bytes (for BOM) */#endif /* UNICODE */    extern int pipesend, filepeek;    register int i;			/* Loop control */    int readsize = 0;			/* How much to read */    int eightbit = 0;			/* Number of bytes with 8th bit on */    int c0controls = 0;			/* C0 non-text control-char counter */    int c0noniso = 0;			/* C0 non-ISO control-char counter */    int c1controls = 0;			/* C1 control-character counter */    unsigned int c;			/* Current character */    int runmax = 0;			/* Longest run of 0 bytes */    int runzero = 0;			/* Run of 0 bytes */    int pctzero = 0;			/* Percentage of 0 bytes */    int txtcz = 0;#ifdef CK_CTRLZ    extern int eofmethod;#endif /* CK_CTRLZ */#ifdef UNICODE    int notutf8 = 0;			/* Nonzero if definitely not UTF-8 */    int utf8state = 0;			/* UTF-8 recognizer state */    int oddzero = 0;			/* Number of 0 bytes in odd postions */    int evenzero = 0;			/* and in even positions */    int lfnul = 0;			/* Number of <LF><NUL> sequences */    int crlf = 0;			/* Number of <CRLF> sequences */#else    int notutf8 = 1;#endif /* UNICODE */#ifdef COMMENT#ifdef EVENMAX    int oddrun = 0, oddmax = 0, oddbyte = 0, oddmaxbyte = 0;    int evenrun = 0, evenmax = 0, evenbyte = 0, evenmaxbyte = 0;#endif /* EVENMAX */#endif /* COMMENT */#ifndef NOXFER    if (pipesend || calibrate || sndarray) /* Only for real files */      return(-1);#endif /* NOXFER */    debug(F111,"scanfile",name,nscanfile);#ifdef PATTERNS    if (!filepeek) {	pv = matchname(name,1,-1);	if (pv < 0)	  rc = -1;	else	  rc = (pv == 1) ? FT_BIN : FT_TEXT;	debug(F111,"scanfile !filepeek result",name,rc);	return(rc);    }#endif /* PATTERNS */#ifdef VMS/* We don't scan in VMS where text files have various record formats in  *//* which record headers contain seemingly non-text bytes.  So the best   *//* we can do in VMS is tell whether the file is text or binary, period.  */    {	int b, x;	b = binary;			/* Save current binary setting */	if (zopeni(ZIFILE,name) > 0) {	/* In VMS this sets binary */	    x = binary;			/* Get result */	    zclose(ZIFILE);		/* Close the file */	    binary = b;			/* Restore previous binary setting */	    rc = x ? FT_BIN : FT_TEXT;	    val = 0;	    goto xscanfile;	}    }#endif /* VMS */    eof = 0;				/* End-of-file reached indicator */#ifdef OS2    fp = fopen(name, "rb");		/* Open the file in binary mode */#else    fp = fopen(name, "r");#endif /* OS2 */    if (!fp)				/* Failed? */      return(-1);    while (1) {				/* One or more gulps from file */	if (eof) {			/* EOF from last time? */	    debug(F111,"scanfile at EOF",name,bytes);	    if (runzero > runmax)	      runmax = runzero;	    break;	}	if (nscanfile < 0) {		/* Reading whole file */	    readsize = SCANFILEBUF;	} else {			/* Reading first nscanfilee bytes */	    readsize = nscanfile - bytes;	    if (readsize < 1)	      break;	    if (readsize > SCANFILEBUF)	      readsize = SCANFILEBUF;	}	debug(F101,"scanfile readsize","",readsize);	count = fread(buf,1,readsize,fp); /* Read a buffer */	if (count == EOF || count == 0) {	    debug(F111,"scanfile EOF",name,count);	    break;	}	debug(F111,"scanfile buffer ok",name,count);	if (bytes == 0 && count > 8) {	    /* PDF files can look like text in the beginning. */	    if (!ckstrcmp((char *)buf,"%PDF-1.",7,1)) {		if (isdigit(buf[7])) {		    if (buf[8] == '\015' ||			count > 9 && buf[8] == SP && buf[9] == '\015') {#ifdef DEBUG			buf[8] = NUL;			debug(F110,"scanfile PDF",buf,0);#endif /* DEBUG */			binary = 1;	/* But they are binary. */			break;		    }		}	    } else if (!ckstrcmp((char *)buf,"%!PS-Ado",8,1)) {		/* Ditto for PostScript */#ifdef DEBUG		int i;		for (i = 8; i < count; i++) {		    if (buf[i] < '!') {			buf[i] = NUL;			break;		    }		}		debug(F110,"scanfile PostScript",buf,0);#endif /* DEBUG */		binary = 1;		break;#ifndef NOPCLSCAN	    } else if (!ckstrcmp((char *)buf,") HP-PCL",8,1)) {		/* HP PCL printer language */#ifdef DEBUG		int i;		for (i = 8; i < count; i++) {		    if (buf[i] < '!') {			buf[i] = NUL;			break;		    }		}		debug(F110,"scanfile PCL",buf,0);#endif /* DEBUG */		binary = 1;		break;	    } #endif /* NOPCLSCAN */#ifndef NOPJLSCAN	      else if (buf[0] == '\033' && (buf[1] == 'E' || buf[1] == '%')) {		/* Ditto for PJL Job printer header */#ifdef DEBUG		int i;		for (i = 2; i < count; i++) {		    if (buf[i] < '!') {			buf[i] = NUL;			break;		    }		}		debug(F110,"scanfile PJL Job printer header",buf,0);#endif /* DEBUG */		binary = 1;		break;#endif /* NOPJLSCAN */	    }	}#ifdef UNICODE	if (bytes == 0 && count > 1) {	    int incl_cnt = 0;	    /* First look for BOM */	    c0 = (unsigned)((unsigned)buf[0]&0xFF); /* First file byte */	    c1 = (unsigned)((unsigned)buf[1]&0xFF); /* Second byte */	    if (c0 == 0xFE && c1 == 0xFF) {	/* UCS-2 BE */		rc = FT_UCS2;		val = 0;		debug(F111,"scanfile UCS2 BOM BE",ckitoa(val),rc);		incl_cnt++;	    } else if (c0 == 0xFF && c1 == 0xFE) { /* UCS-2 LE */		rc = FT_UCS2;		val = 1;		debug(F111,"scanfile UCS2 BOM LE",ckitoa(val),rc);		incl_cnt++;	    } else if (count > 2) if (c0 == 0xEF && c1 == 0xBB &&		       (unsigned)((unsigned)buf[2]&0xFF) == 0xBF) {		rc = FT_UTF8;		debug(F111,"scanfile UTF8 BOM",ckitoa(val),rc);		incl_cnt++;	    }	    if (incl_cnt) {		/* Have BOM */		bytes += count;		goto xscanfile;	    }	}#endif /* UNICODE */	bytes += count;			/* Count bytes read */	eof = feof(fp);			/* Flag for at EOF  */	for (i = 0; i < count; i++) {	/* For each byte... */	    c = (unsigned)buf[i];	/* For ease of reference */	    if (!c) {			/* Zero byte? */#ifdef EVENMAX		if (i&1)		/* In odd position */		  oddzero++;		else		  evenzero++;		/* In even position */#endif /* EVENMAX */		runzero++;	    } else {			/* Not a zero byte */		if (runzero > runmax)		  runmax = runzero;		if (runmax > 2)		/* That's all we need to be certain */		  break;		/* it's a binary file. */		runzero = 0;	    }#ifdef COMMENT#ifdef EVENMAX/* This is to catch UCS-2 with a non-ASCII, non-Latin-1 repertoire  */	    if (i > 1) {	      /* Look for runs of alternating chars */		if (i&1) {		    if (c == buf[i-2]) { /* In odd positions */			oddrun++;			oddbyte = c;		    } else {			oddmax = oddrun;			oddmaxbyte = oddbyte;		    }		} else {		/* and even positions */		    if (c == buf[i-2]) {			evenrun++;			evenbyte = c;		    } else {			evenmax = evenrun;			evenmaxbyte = evenbyte;		    }		}	    }#endif /* EVENMAX */#endif /* COMMENT */	    if ((c & 0x80) == 0) {	/* We have a 7-bit byte */#ifdef UNICODE		if (i > 0 && c == 10) { /* Linefeed */		    if (buf[i-1] == 0) lfnul++; /* Preceded by NUL */		    else if (buf[i-1] == 13) crlf++; /* or by CR... */		}#endif /* UNICODE */		if (c < ' ') {		/* Check for CO controls */		    if (c != LF && c != CR && c != HT && c != FF) {			c0controls++;			if (c != ESC && c != SO && c != SI)			  c0noniso++;		    }		    if ((c == '\032')	/* Ctrl-Z */#ifdef COMMENT			&& eof && (i >= count - 2)#endif /* COMMENT */			) {			c0controls--;			c0noniso--;#ifdef CK_CTRLZ			if (eofmethod == XYEOF_Z && txtcz == 0) {			    if (c0controls == 0) /* All text prior to Ctrl-Z */			      txtcz = 1;			}#endif /* CK_CTRLZ */		    }		}#ifdef UNICODE		if (!notutf8 && utf8state) { /* In UTF-8 sequence? */		    utf8state = 0;		    debug(F000,"scanfile","7-bit byte in UTF8 sequence",c);		    notutf8++;		/* Then it's not UTF-8 */		    continue;		}#endif /* UNICODE */	    } else {			/* We have an 8-bit byte */		eightbit++;		/* Count it */		if (c >= 0x80 && c < 0xA0) /* Check for C1 controls */		  c1controls++;#ifdef UNICODE		if (!notutf8) {		/* If it might still be UTF8... */		    switch (utf8state) { /* Enter the UTF-8 state machine */		      case 0:		 /* First byte... */			if ((c & 0xE0) == 0xC0) { /* Tells number of */			    utf8state = 1;        /* subsequent bytes */			} else if ((c & 0xF0) == 0xE0) {			    utf8state = 2;			} else if ((c & 0xF8) == 0xF0) {			    utf8state = 3;			} else {			    notutf8++;			}			break;		      case 1:		/* Subsequent byte */		      case 2:		      case 3:			if ((c & 0xC0) != 0x80) { /* Must start with 10 */			    debug(F000,"scanfile",				  "bad byte in UTF8 sequence",c);			    notutf8++;			    break;			}			utf8state--;	/* Good, one less in this sequence */			break;		      default:		/* Shouldn't happen */			debug(F111,"scanfile","bad UTF8 state",utf8state);			notutf8++;		    }		}#endif /* UNICODE */	    }	}    }    fclose(fp);				/* Close the file */    debug(F101,"scanfile bytes","",bytes);    if (bytes == 0)			/* If nothing was read */      return(-1);			/* we're done. */#ifdef EVENMAX    /* In case we had a run that never broke... */#ifdef COMMENT    if (oddmax == 0) {	oddmax = oddrun;	oddmaxbyte = oddbyte;    }    if (evenmax == 0) {	evenmax = evenrun;	evenmaxbyte = evenbyte;    }#endif /* COMMENT */    if (runmax == 0) {	runmax = runzero;    }#endif /* EVENMAX */#ifdef UNICODE    if (bytes > 100)			/* Bytes is not 0 */      pctzero = (evenzero + oddzero) / (bytes / 100);    else      pctzero = ((evenzero + oddzero) * 100) / bytes;#endif /* UNICODE */#ifdef DEBUG    if (deblog) {			/* If debugging, dump statistics */	debug(F101,"scanfile c0controls ","",c0controls);	debug(F101,"scanfile c0noniso   ","",c0noniso);	debug(F101,"scanfile c1controls ","",c1controls);	debug(F101,"scanfile eightbit   ","",eightbit);#ifdef UNICODE	debug(F101,"scanfile crlf       ","",crlf);	debug(F101,"scanfile lfnul      ","",lfnul);	debug(F101,"scanfile notutf8    ","",notutf8);	debug(F101,"scanfile evenzero   ","",evenzero);	debug(F101,"scanfile oddzero    ","",oddzero);	debug(F101,"scanfile even/odd   ","",(evenzero / (oddzero + 1)));	debug(F101,"scanfile odd/even   ","",(oddzero / (evenzero + 1)));	debug(F101,"scanfile pctzero    ","",pctzero);#endif /* UNICODE */#ifdef COMMENT#ifdef EVENMAX	debug(F101,"scanfile oddmax     ","",oddmax);	debug(F101,"scanfile oddmaxbyte ","",oddmaxbyte);	debug(F101,"scanfile evenmax    ","",evenmax);	debug(F101,"scanfile evenmaxbyte","",evenmaxbyte);#endif /* EVENMAX */#endif /* COMMENT */	debug(F101,"scanfile runmax     ","",runmax);    }#endif /* DEBUG */#ifdef UNICODE    x = eightbit ? bytes / 20 : bytes / 4; /* For UCS-2... */    if (runmax > 2) {			/* File has run of more than 2 NULs */	debug(F100,"scanfile BIN runmax","",0);	rc = FT_BIN;			/* so it can't be any kind of text. */	goto xscanfile;    } else if (rc == FT_UCS2 || (rc == FT_UTF8 && runmax == 0)) {	goto xscanfile;			/* File starts with a BOM */    } else if (eightbit > 0 && !notutf8) { /* File has 8-bit data */	if (runmax > 0) {		   /* and runs of NULs */	    debug(F100,"scanfile BIN (nnUTF8) runmax","",0);	    rc = FT_BIN;		   /* UTF-8 doesn't have NULs */	} else {			   /* No NULs */	    debug(F100,"scanfile UTF8 (nnUTF8 + runmax == 0)","",0);	    rc = FT_UTF8;		   /* and not not UTF-8, so is UTF-8 */	}	goto xscanfile;    }/*  For UCS-2 detection, see if the text contains lines delimited by  ASCII controls and containing spaces, ASCII digits, or other ASCII  characters, thus forcing the presence of a certain percentage of zero bytes.  For this purpose require 20% zero bytes, with at least six times as many  in even (odd) positions as in odd (even) positions.*/    if ((evenzero >= x && oddzero == 0) ||	((((evenzero / (oddzero + 1)) > 6) && (pctzero > 20)) &&	(crlf == 0) &&	(lfnul > 1))	) {	    debug(F100,"scanfile UCS2 noBOM BE (even/oddzero)","",0);	rc = FT_UCS2;	val = 0;    } else if ((evenzero == 0 && oddzero >= x) ||	       ((((oddzero / (evenzero + 1)) > 6) && (pctzero > 20)) &&	       (crlf == 0) &&	       (lfnul > 1))	       ) {	debug(F100,"scanfile UCS2 noBOM LE (even/oddzero)","",0);	rc = FT_UCS2;	val = 1;#ifdef COMMENT#ifdef EVENMAX/*  If the tests above fail, we still might have UCS-2 if there are significant  runs of identical bytes in alternating positions, but only if it also has  unusual C0 controls (otherwise we'd pick up hex files here).  NOTE: We  don't actually do this -- EVENMAX is not defined (see comments above at  first occurrence of EVENMAX).*/    } else if (c0noniso && evenmax > bytes / 4) {	debug(F100,"scanfile UCS2 BE (evenmax)","",0);	rc = FT_UCS2;	val = 0;    } else if (c0noniso && oddmax > bytes / 4) {	debug(F100,"scanfile UCS2 LE (evenmax)","",0);	rc = FT_UCS2;	val = 1;#endif /* EVENMAX */#endif /* COMMENT */    }/*  It seems to be UCS-2 but let's be more certain since there is no BOM...  If the number of 7- and 8-bit characters is approximately equal, it might  be a compressed file.  In this case we decide based on the name.*/    if (rc == FT_UCS2) {	if (eightbit > 0) {	    int j, k;	    j = (c1controls * 100) / (c0controls + 1);	    debug(F101,"scanfile c1/c0      ","",j);	    k = (bytes * 100) / eightbit;	    debug(F101,"scanfile pct 8bit   ","",k);	    if (k > 40 && k < 60 && j > 60) {		if (ckmatch("{*.Z,*.gz,*.zip,*.ZIP}",name,1,1)) {		    debug(F110,"scanfile 8-bit BIN compressed",name,0);		    rc = FT_BIN;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -