📄 file.c
字号:
#include <u.h>#include <libc.h>#include <bio.h>#include <ctype.h>#include <mach.h>/* * file - determine type of file */#define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))uchar buf[6001];short cfreq[140];short wfreq[50];int nbuf;Dir* mbuf;int fd;char *fname;char *slash;enum{ Cword, Fword, Aword, Alword, Lword, I1, I2, I3, Clatin = 128, Cbinary, Cnull, Ceascii, Cutf,};struct{ char* word; int class;} dict[] ={ "PATH", Lword, "TEXT", Aword, "adt", Alword, "aggr", Alword, "alef", Alword, "array", Lword, "block", Fword, "chan", Alword, "char", Cword, "common", Fword, "con", Lword, "data", Fword, "dimension", Fword, "double", Cword, "extern", Cword, "bio", I2, "float", Cword, "fn", Lword, "function", Fword, "h", I3, "implement", Lword, "import", Lword, "include", I1, "int", Cword, "integer", Fword, "iota", Lword, "libc", I2, "long", Cword, "module", Lword, "real", Fword, "ref", Lword, "register", Cword, "self", Lword, "short", Cword, "static", Cword, "stdio", I2, "struct", Cword, "subroutine", Fword, "u", I2, "void", Cword,};/* codes for 'mode' field in language structure */enum { Normal = 0, First, /* first entry for language spanning several ranges */ Multi, /* later entries " " " ... */ Shared, /* codes used in several languages */ };struct{ int mode; /* see enum above */ int count; int low; int high; char *name;} language[] ={ Normal, 0, 0x0100, 0x01FF, "Extended Latin", Normal, 0, 0x0370, 0x03FF, "Greek", Normal, 0, 0x0400, 0x04FF, "Cyrillic", Normal, 0, 0x0530, 0x058F, "Armenian", Normal, 0, 0x0590, 0x05FF, "Hebrew", Normal, 0, 0x0600, 0x06FF, "Arabic", Normal, 0, 0x0900, 0x097F, "Devanagari", Normal, 0, 0x0980, 0x09FF, "Bengali", Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi", Normal, 0, 0x0A80, 0x0AFF, "Gujarati", Normal, 0, 0x0B00, 0x0B7F, "Oriya", Normal, 0, 0x0B80, 0x0BFF, "Tamil", Normal, 0, 0x0C00, 0x0C7F, "Telugu", Normal, 0, 0x0C80, 0x0CFF, "Kannada", Normal, 0, 0x0D00, 0x0D7F, "Malayalam", Normal, 0, 0x0E00, 0x0E7F, "Thai", Normal, 0, 0x0E80, 0x0EFF, "Lao", Normal, 0, 0x1000, 0x105F, "Tibetan", Normal, 0, 0x10A0, 0x10FF, "Georgian", Normal, 0, 0x3040, 0x30FF, "Japanese", Normal, 0, 0x3100, 0x312F, "Chinese", First, 0, 0x3130, 0x318F, "Korean", Multi, 0, 0x3400, 0x3D2F, "Korean", Shared, 0, 0x4e00, 0x9fff, "CJK", Normal, 0, 0, 0, 0, /* terminal entry */};enum{ Fascii, /* printable ascii */ Flatin, /* latin 1*/ Futf, /* UTF character set */ Fbinary, /* binary */ Feascii, /* ASCII with control chars */ Fnull, /* NULL in file */} guess;void bump_utf_count(Rune);int cistrncmp(char*, char*, int);void filetype(int);int getfontnum(uchar*, uchar**);int isas(void);int isc(void);int iscint(void);int isenglish(void);int ishp(void);int ishtml(void);int isrfc822(void);int ismbox(void);int islimbo(void);int ismung(void);int isp9bit(void);int isp9font(void);int isrtf(void);int ismsdos(void);int iself(void);int istring(void);int isoffstr(void);int iff(void);int long0(void);int longoff(void);int istar(void);int isface(void);int isexec(void);int p9bitnum(uchar*);int p9subfont(uchar*);void print_utf(void);void type(char*, int);int utf_count(void);void wordfreq(void);int (*call[])(void) ={ long0, /* recognizable by first 4 bytes */ istring, /* recognizable by first string */ iself, /* ELF (foreign) executable */ isexec, /* native executables */ iff, /* interchange file format (strings) */ longoff, /* recognizable by 4 bytes at some offset */ isoffstr, /* recognizable by string at some offset */ isrfc822, /* email file */ ismbox, /* mail box */ istar, /* recognizable by tar checksum */ ishtml, /* html keywords */ iscint, /* compiler/assembler intermediate */ islimbo, /* limbo source */ isc, /* c & alef compiler key words */ isas, /* assembler key words */ ismung, /* entropy compressed/encrypted */ isp9font, /* plan 9 font */ isp9bit, /* plan 9 image (as from /dev/window) */ isenglish, /* char frequency English */ isrtf, /* rich text format */ ismsdos, /* msdos exe (virus file attachement) */ isface, /* ascii face file */ 0};int mime;#define OCTET "application/octet-stream\n"#define PLAIN "text/plain\n"voidmain(int argc, char *argv[]){ int i, j, maxlen; char *cp; Rune r; ARGBEGIN{ case 'm': mime = 1; break; default: fprint(2, "usage: file [-m] [file...]\n"); exits("usage"); }ARGEND; maxlen = 0; if(mime == 0 || argc > 1){ for(i = 0; i < argc; i++) { for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp)) ; if(j > maxlen) maxlen = j; } } if (argc <= 0) { if(!mime) print ("stdin: "); filetype(0); } else { for(i = 0; i < argc; i++) type(argv[i], maxlen); } exits(0);}voidtype(char *file, int nlen){ Rune r; int i; char *p; if(nlen > 0){ slash = 0; for (i = 0, p = file; *p; i++) { if (*p == '/') /* find rightmost slash */ slash = p; p += chartorune(&r, p); /* count runes */ } print("%s:%*s",file, nlen-i+1, ""); } fname = file; if ((fd = open(file, OREAD)) < 0) { print("cannot open\n"); return; } filetype(fd); close(fd);}/* * Unicode 4.0 4-byte runes. */typedef int Rune1;enum { UTFmax1 = 4,};intfullrune1(char *p, int n){ int c; if(n >= 1) { c = *(uchar*)p; if(c < 0x80) return 1; if(n >= 2 && c < 0xE0) return 1; if(n >= 3 && c < 0xF0) return 1; if(n >= 4) return 1; } return 0;}intchartorune1(Rune1 *rune, char *str){ int c, c1, c2, c3, n; Rune r; c = *(uchar*)str; if(c < 0xF0){ r = 0; n = chartorune(&r, str); *rune = r; return n; } c &= ~0xF0; c1 = *(uchar*)(str+1) & ~0x80; c2 = *(uchar*)(str+2) & ~0x80; c3 = *(uchar*)(str+3) & ~0x80; n = (c<<18) | (c1<<12) | (c2<<6) | c3; if(n < 0x10000 || n > 0x10FFFF){ *rune = Runeerror; return 1; } *rune = n; return 4;}voidfiletype(int fd){ Rune1 r; int i, f, n; char *p, *eob; free(mbuf); mbuf = dirfstat(fd); if(mbuf == nil){ print("cannot stat: %r\n"); return; } if(mbuf->mode & DMDIR) { print(mime ? "text/directory\n" : "directory\n"); return; } if(mbuf->type != 'M' && mbuf->type != '|') { print(mime ? OCTET : "special file #%c/%s\n", mbuf->type, mbuf->name); return; } nbuf = read(fd, buf, sizeof(buf)-1); if(nbuf < 0) { print("cannot read\n"); return; } if(nbuf == 0) { print(mime ? PLAIN : "empty file\n"); return; } buf[nbuf] = 0; /* * build histogram table */ memset(cfreq, 0, sizeof(cfreq)); for (i = 0; language[i].name; i++) language[i].count = 0; eob = (char *)buf+nbuf; for(n = 0, p = (char *)buf; p < eob; n++) { if (!fullrune1(p, eob-p) && eob-p < UTFmax1) break; p += chartorune1(&r, p); if (r == 0) f = Cnull; else if (r <= 0x7f) { if (!isprint(r) && !isspace(r)) f = Ceascii; /* ASCII control char */ else f = r; } else if (r == 0x80) { bump_utf_count(r); f = Cutf; } else if (r < 0xA0) f = Cbinary; /* Invalid Runes */ else if (r <= 0xff) f = Clatin; /* Latin 1 */ else { bump_utf_count(r); f = Cutf; /* UTF extension */ } cfreq[f]++; /* ASCII chars peg directly */ } /* * gross classify */ if (cfreq[Cbinary]) guess = Fbinary; else if (cfreq[Cutf]) guess = Futf; else if (cfreq[Clatin]) guess = Flatin; else if (cfreq[Ceascii]) guess = Feascii; else if (cfreq[Cnull]) guess = Fbinary; else guess = Fascii; /* * lookup dictionary words */ memset(wfreq, 0, sizeof(wfreq)); if(guess == Fascii || guess == Flatin || guess == Futf) wordfreq(); /* * call individual classify routines */ for(i=0; call[i]; i++) if((*call[i])()) return; /* * if all else fails, * print out gross classification */ if (nbuf < 100 && !mime) print(mime ? PLAIN : "short "); if (guess == Fascii) print(mime ? PLAIN : "Ascii\n"); else if (guess == Feascii) print(mime ? PLAIN : "extended ascii\n"); else if (guess == Flatin) print(mime ? PLAIN : "latin ascii\n"); else if (guess == Futf && utf_count() < 4) print_utf(); else print(mime ? OCTET : "binary\n");}voidbump_utf_count(Rune r){ int low, high, mid; high = sizeof(language)/sizeof(language[0])-1; for (low = 0; low < high;) { mid = (low+high)/2; if (r >= language[mid].low) { if (r <= language[mid].high) { language[mid].count++; break; } else low = mid+1; } else high = mid; }}intutf_count(void){ int i, count; count = 0; for (i = 0; language[i].name; i++) if (language[i].count > 0) switch (language[i].mode) { case Normal: case First: count++; break; default: break; } return count;}intchkascii(void){ int i; for (i = 'a'; i < 'z'; i++) if (cfreq[i]) return 1; for (i = 'A'; i < 'Z'; i++) if (cfreq[i]) return 1; return 0;}intfind_first(char *name){ int i; for (i = 0; language[i].name != 0; i++) if (language[i].mode == First && strcmp(language[i].name, name) == 0) return i; return -1;}voidprint_utf(void){ int i, printed, j; if(mime){ print(PLAIN); return; } if (chkascii()) { printed = 1; print("Ascii"); } else printed = 0; for (i = 0; language[i].name; i++) if (language[i].count) { switch(language[i].mode) { case Multi: j = find_first(language[i].name); if (j < 0) break; if (language[j].count > 0) break; /* Fall through */ case Normal: case First: if (printed) print(" & "); else printed = 1; print("%s", language[i].name); break; case Shared: default: break; } } if(!printed) print("UTF"); print(" text\n");}voidwordfreq(void){ int low, high, mid, r; uchar *p, *p2, c; p = buf; for(;;) { while (p < buf+nbuf && !isalpha(*p)) p++; if (p >= buf+nbuf) return; p2 = p; while(p < buf+nbuf && isalpha(*p)) p++; c = *p; *p = 0; high = sizeof(dict)/sizeof(dict[0]); for(low = 0;low < high;) { mid = (low+high)/2; r = strcmp(dict[mid].word, (char*)p2); if(r == 0) { wfreq[dict[mid].class]++; break; } if(r < 0) low = mid+1; else high = mid; } *p++ = c; }}typedef struct Filemagic Filemagic;struct Filemagic { ulong x; ulong mask; char *desc; char *mime;};/* * integers in this table must be as seen on a little-endian machine * when read from a file. */Filemagic long0tab[] = { 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file\n", OCTET, /* "pac1" */ 0x31636170, 0xFFFFFFFF, "pac3 audio file\n", OCTET, /* "pXc2 */ 0x32630070, 0xFFFF00FF, "pac4 audio file\n", OCTET, 0xBA010000, 0xFFFFFFFF, "mpeg system stream\n", OCTET, 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable\n", OCTET, 0x04034B50, 0xFFFFFFFF, "zip archive\n", "application/zip", 070707, 0xFFFF, "cpio archive\n", OCTET, 0x2F7, 0xFFFF, "tex dvi\n", "application/dvi", 0xfaff, 0xfeff, "mp3 audio\n", "audio/mpeg", 0xfeff0000, 0xffffffff, "utf-32be\n", "text/plain charset=utf-32be", 0xfffe, 0xffffffff, "utf-32le\n", "text/plain charset=utf-32le", 0xfeff, 0xffff, "utf-16be\n", "text/plain charset=utf-16be", 0xfffe, 0xffff, "utf-16le\n", "text/plain charset=utf-16le", /* * venti & fossil magic numbers are stored big-endian on disk, * thus the numbers appear reversed in this table. */ 0xad4e5cd1, 0xFFFFFFFF, "venti arena\n", OCTET,};intfilemagic(Filemagic *tab, int ntab, ulong x){ int i; for(i=0; i<ntab; i++) if((x&tab[i].mask) == tab[i].x){ print(mime ? tab[i].mime : tab[i].desc); return 1; } return 0;}intlong0(void){ return filemagic(long0tab, nelem(long0tab), LENDIAN(buf));}typedef struct Fileoffmag Fileoffmag;struct Fileoffmag { ulong off; Filemagic;};/* * integers in this table must be as seen on a little-endian machine * when read from a file. */Fileoffmag longofftab[] = { /* * venti & fossil magic numbers are stored big-endian on disk, * thus the numbers appear reversed in this table. */ 256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition\n", OCTET, 256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section\n", OCTET, 128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer\n", OCTET,};intfileoffmagic(Fileoffmag *tab, int ntab){ int i; ulong x; Fileoffmag *tp; uchar buf[sizeof(long)]; for(i=0; i<ntab; i++) { tp = tab + i; seek(fd, tp->off, 0); if (read(fd, buf, sizeof buf) != sizeof buf) continue; x = LENDIAN(buf); if((x&tp->mask) == tp->x){ print(mime? tp->mime: tp->desc); return 1; } } return 0;}intlongoff(void){ return fileoffmagic(longofftab, nelem(longofftab));}intisexec(void){ Fhdr f; seek(fd, 0, 0); /* reposition to start of file */ if(crackhdr(fd, &f)) { print(mime ? OCTET : "%s\n", f.name); return 1; } return 0;}/* from tar.c */enum { NAMSIZ = 100, TBLOCK = 512 };union hblock{ char dummy[TBLOCK]; struct header { char name[NAMSIZ]; char mode[8]; char uid[8]; char gid[8]; char size[12]; char mtime[12]; char chksum[8]; char linkflag; char linkname[NAMSIZ]; /* rest are defined by POSIX's ustar format; see p1003.2b */ char magic[6]; /* "ustar" */ char version[2]; char uname[32]; char gname[32]; char devmajor[8]; char devminor[8]; char prefix[155]; /* if non-null, path = prefix "/" name */ } dbuf;};intchecksum(union hblock *hp){ int i; char *cp; struct header *hdr = &hp->dbuf; for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++) *cp = ' '; i = 0; for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++) i += *cp & 0xff; return i;}intistar(void){ int chksum; char tblock[TBLOCK]; union hblock *hp = (union hblock *)tblock; struct header *hdr = &hp->dbuf; seek(fd, 0, 0); /* reposition to start of file */ if (readn(fd, tblock, sizeof tblock) != sizeof tblock) return 0; chksum = strtol(hdr->chksum, 0, 8); if (hdr->name[0] != '\0' && checksum(hp) == chksum) { if (strcmp(hdr->magic, "ustar") == 0) print(mime? "application/x-ustar\n": "posix tar archive\n"); else print(mime? "application/x-tar\n": "tar archive\n"); return 1; } return 0;}/* * initial words to classify file */struct FILE_STRING{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -