📄 htwais.c
字号:
/* WorldWideWeb - Wide Area Informaion Server Access HTWAIS.c** ==================================================**** This module allows a WWW server or client to read data from a** remote WAIS** server, and provide that data to a WWW client in hypertext form.** Source files, once retrieved, are stored and used to provide** information about the index when that is acessed.**** Authors** BK Brewster Kahle, Thinking Machines, <Brewster@think.com>** TBL Tim Berners-Lee, CERN <timbl@info.cern.ch>** FM Foteos Macrides, WFEB <macrides@sci.wfeb.edu>**** History** Sep 91 TBL adapted shell-ui.c (BK) with HTRetrieve.c from WWW.** Feb 91 TBL Generated HTML cleaned up a bit (quotes, escaping)** Refers to lists of sources.** Mar 93 TBL Lib 2.0 compatible module made.** May 94 FM Added DIRECT_WAIS support for VMS.**** Bugs** Uses C stream i/o to read and write sockets, which won't work** on VMS TCP systems.**** Should cache connections.**** ANSI C only as written**** Bugs fixed** NT Nathan Torkington (Nathan.Torkington@vuw.ac.nz)**** WAIS comments:**** 1. Separate directories for different system's .o would help** 2. Document ids are rather long!**** W WW Address mapping convention:**** /servername/database/type/length/document-id**** /servername/database?word+word+word*//* WIDE AREA INFORMATION SERVER SOFTWARE: No guarantees or restrictions. See the readme file for the full standard disclaimer. Brewster@think.com*/#include <HTUtils.h>#include <HTParse.h>#include <HTAccess.h> /* We implement a protocol */#include <HTML.h> /* The object we will generate */#include <HTWSRC.h>#include <HTTCP.h>#include <HTCJK.h>#include <HTAlert.h>/* From WAIS** ---------*/#ifdef VMS#include <HTVMS_WaisUI.h>#include <HTVMS_WaisProt.h>#else#include <ui.h>#endif /* VMS */#define MAX_MESSAGE_LEN 100000#define CHARS_PER_PAGE 10000 /* number of chars retrieved in each request */#define WAISSEARCH_DATE "Fri Jul 19 1991"/* FROM WWW** --------*/#include <LYUtils.h>#include <LYLeaks.h>#define DIRECTORY "/cnidr.org:210/directory-of-servers"/* #define DIRECTORY "/quake.think.com:210/directory-of-servers" */#define BIG 1024 /* identifier size limit @@@@@ */#define BUFFER_SIZE 4096 /* Arbitrary size for efficiency */#define HEX_ESCAPE '%'PRIVATE BOOL as_gate; /* Client is using us as gateway */PRIVATE char line[2048]; /* For building strings to display */ /* Must be able to take id */#define PUTC(c) (*target->isa->put_character)(target, c)#define PUTS(s) (*target->isa->put_string)(target, s)#define START(e) (*target->isa->start_element)(target, e, 0, 0, -1, 0)#define END(e) (*target->isa->end_element)(target, e, 0)#define MAYBE_END(e) if (HTML_dtd.tags[e].contents != SGML_EMPTY) \ (*target->isa->end_element)(target, e, 0)#define FREE_TARGET (*target->isa->_free)(target)struct _HTStructured { CONST HTStructuredClass * isa; /* ... */};struct _HTStream { CONST HTStreamClass * isa; /* ... */};/* ------------------------------------------------------------------------ *//* ---------------- Local copy of connect_to_server calls ----------------- *//* ------------------------------------------------------------------------ *//* Returns 1 on success, 0 on fail, -1 on interrupt. */PRIVATE int fd_mosaic_connect_to_server ARGS3( char *, host_name, long, port, long *, fd){ char *dummy = NULL; int status; int result; HTSprintf0(&dummy, "%s//%s:%d/", STR_WAIS_URL, host_name, port); status = HTDoConnect (dummy, "WAIS", 210, (int *)fd); if (status == HT_INTERRUPTED) { result = -1; } else if (status < 0) { result = 0; } else { result = 1; } FREE(dummy); return result;}/* Returns 1 on success, 0 on fail, -1 on interrupt. */#ifdef VMSPRIVATE int mosaic_connect_to_server ARGS3( char *, host_name, long, port, long *, fdp)#elsePRIVATE int mosaic_connect_to_server ARGS3( char *, host_name, long, port, FILE **, fp)#endif /* VMS */{#ifndef VMS FILE* file;#endif /* VMS */ long fd; int rv; rv = fd_mosaic_connect_to_server (host_name, port, &fd); if (rv == 0) { HTAlert (gettext("Could not connect to WAIS server.")); return 0; } else if (rv == -1) { HTAlert (CONNECTION_INTERRUPTED); return -1; }#ifndef VMS if ((file = fdopen(fd,"r+")) == NULL) { HTAlert (gettext("Could not open WAIS connection for reading.")); return 0; } *fp = file;#else *fdp = fd;#endif /* VMS */ return 1;}/* ------------------------------------------------------------------------ *//* ------------------------------------------------------------------------ *//* showDiags*//* modified from Jonny G's version in ui/question.c */PRIVATE void showDiags ARGS2( HTStream *, target, diagnosticRecord **, d){ long i; for (i = 0; d[i] != NULL; i++) { if (d[i]->ADDINFO != NULL) { PUTS(gettext("Diagnostic code is ")); PUTS(d[i]->DIAG); PUTC(' '); PUTS(d[i]->ADDINFO); PUTC('\n'); } }}/* Matrix of allowed characters in filenames** -----------------------------------------*/PRIVATE BOOL acceptable[256];PRIVATE BOOL acceptable_inited = NO;PRIVATE void init_acceptable NOARGS{ unsigned int i; char * good = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$"; for(i=0; i<256; i++) acceptable[i] = NO; for(;*good; good++) acceptable[(unsigned int)*good] = YES; acceptable_inited = YES;}/* Transform file identifier into WWW address** ------------------------------------------****** On exit,** returns nil if error** pointer to malloced string (must be freed) if ok*/PRIVATE char * WWW_from_archie ARGS1( char *, file){ char * end; char * result; char * colon; for(end=file; *end > ' '; end++); /* assumes ASCII encoding*/ result = (char *)malloc(10 + (end-file)); if (!result) return result; /* Malloc error */ strcpy(result, "file://"); strncat(result, file, end-file); colon = strchr(result+7, ':'); /* Expect colon after host */ if (colon) { for(; colon[0]; colon[0]=colon[1], colon++); /* move down */ } return result;} /* WWW_from_archie *//* Transform document identifier into URL** --------------------------------------**** Bugs: A static buffer of finite size is used!** The format of the docid MUST be good!**** On exit,** returns nil if error** pointer to malloced string (must be freed) if ok*/PRIVATE char hex [17] = "0123456789ABCDEF";PRIVATE char * WWW_from_WAIS ARGS1( any *, docid){ static char buf[BIG]; char * q = buf; char * p = (docid->bytes); char * result = NULL; int i, l; if (TRACE) { char *p; fprintf(tfp, "WAIS id (%d bytes) is ", (int)docid->size); for (p = docid->bytes; p < docid->bytes+docid->size; p++) { if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */ fprintf(tfp, "%c", *p); else fprintf(tfp, "<%x>", (unsigned)*p); } fprintf(tfp, "\n"); } for (p = docid->bytes; (p < docid->bytes+docid->size) && (q < &buf[BIG]);) { CTRACE((tfp, " Record type %d, length %d\n", p[0], p[1])); if (*p > 10) { CTRACE((tfp, "Eh? DOCID record type of %d!\n", *p)); return 0; } { /* Bug fix -- allow any byte value 15 Apr 93 */ unsigned int i = (unsigned) *p++; if (i > 99) { *q++ = (i/100) + '0'; i = i % 100; } if (i > 9) { *q++ = (i/10) + '0'; i = i % 10; } *q++ = i + '0'; /* Record type */ } *q++ = '='; /* Separate */ l = *p++; /* Length */ for (i = 0; i < l; i++, p++){ if (!acceptable[*p]) { *q++ = HEX_ESCAPE; /* Means hex coming */ *q++ = hex[(*p) >> 4]; *q++ = hex[(*p) & 15]; } else *q++ = *p; } *q++= ';'; /* Terminate field */ } *q++ = 0; /* Terminate string */ CTRACE((tfp, "WWW form of id: %s\n", buf)); StrAllocCopy(result, buf); return result;} /* WWW_from_WAIS *//* Transform URL into WAIS document identifier** -------------------------------------------**** On entry,** docname points to valid name produced originally by** WWW_from_WAIS** On exit,** docid->size is valid** docid->bytes is malloced and must later be freed.*/PRIVATE any * WAIS_from_WWW ARGS2( any *, docid, char *, docname){ char *z; /* Output pointer */ char *sor; /* Start of record - points to size field. */ char *p; /* Input pointer */ char *q; /* Poisition of "=" */ char *s; /* Position of semicolon */ int n; /* size */ CTRACE((tfp, "WWW id (to become WAIS id): %s\n", docname)); for (n = 0, p = docname; *p; p++) { /* Count sizes of strings */ n++; if (*p == ';') n--; /* Not converted */ else if (*p == HEX_ESCAPE) n = n-2; /* Save two bytes */ docid->size = n; } if (!(docid->bytes = (char *) malloc(docid->size))) /* result record */ outofmem(__FILE__, "WAIS_from_WWW"); z = docid->bytes; for (p = docname; *p; ) { /* Convert of strings */ /* Record type */ *z = 0; /* Initialize record type */ while (*p >= '0' && *p <= '9') { *z = *z*10 + (*p++ - '0'); /* Decode decimal record type */ } z++; if (*p != '=') return 0; q = p;/* *z++ = *p++ - '0'; q = strchr(p , '='); if (!q) return 0;*/ s = strchr(q, ';'); /* (Check only) */ if (!s) return 0; /* Bad! No ';'; */ sor = z; /* Remember where the size field was */ z++; /* Skip record size for now */ for (p = q+1; *p != ';';) { if (*p == HEX_ESCAPE) { char c; unsigned int b; p++; c = *p++; b = from_hex(c); c = *p++; if (!c) break; /* Odd number of chars! */ *z++ = (b<<4) + from_hex(c); } else { *z++ = *p++; /* Record */ } } *sor = (z-sor-1); /* Fill in size -- not counting size itself */ p++; /* After semicolon: start of next record */ } if (TRACE) { char *p; fprintf(tfp, "WAIS id (%d bytes) is ", (int)docid->size); for (p = docid->bytes; p < docid->bytes+docid->size; p++) { if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */ fprintf(tfp, "%c", *p); else fprintf(tfp, "<%x>", (unsigned)*p); } fprintf(tfp, "\n"); } return docid; /* Ok */} /* WAIS_from_WWW *//* Send a plain text record to the client output_text_record()** --------------------------------------*/PRIVATE void output_text_record ARGS4( HTStream *, target, WAISDocumentText *, record, boolean, quote_string_quotes, boolean, binary){ long count; /* printf(" Text\n"); print_any(" DocumentID: ", record->DocumentID); printf(" VersionNumber: %d\n", record->VersionNumber); */ if (binary) { (*target->isa->put_block)(target, record->DocumentText->bytes, record->DocumentText->size); return; } for (count = 0; count < record->DocumentText->size; count++){ long ch = (unsigned char)record->DocumentText->bytes[count]; if (ch == 27) { /* What is this in for? Tim */ /* then we have an escape code */ /* if the next letter is '(' or ')', then ignore two letters */ if ('(' == record->DocumentText->bytes[count + 1] || ')' == record->DocumentText->bytes[count + 1]) count += 1; /* it is a term marker */ else count += 4; /* it is a paragraph marker */ } else if (ch == '\n' || ch == '\r') { PUTC('\n'); } else if (HTCJK != NOCJK || ch == '\t' || isprint(ch)){ PUTC(ch); } }} /* output text record *//* Format A Search response for the client display_search_response** ---------------------------------------*//* modified from tracy shen's version in wutil.c * displays either a text record or a set of headlines. */PRIVATE void display_search_response ARGS4( HTStructured *, target, SearchResponseAPDU *, response, char *, database, char *, keywords){ WAISSearchResponse *info; long i, k; BOOL archie = strstr(database, "archie")!=0; /* Special handling */ CTRACE((tfp, "HTWAIS: Displaying search response\n")); PUTS(gettext("Index ")); START(HTML_EM); PUTS(database); END(HTML_EM); sprintf(line, gettext(" contains the following %d item%s relevant to \""), (int)(response->NumberOfRecordsReturned), response->NumberOfRecordsReturned ==1 ? "" : "s"); PUTS(line); START(HTML_EM); PUTS(keywords); END(HTML_EM); PUTS("\".\n"); PUTS(gettext("The first figure after each entry is its relative score, ")); PUTS(gettext("the second is the number of lines in the item.")); START(HTML_BR); START(HTML_BR); PUTC('\n'); START(HTML_OL); if (response->DatabaseDiagnosticRecords != 0) { info = (WAISSearchResponse *)response->DatabaseDiagnosticRecords; i =0; if (info->Diagnostics != NULL) showDiags((HTStream*)target, info->Diagnostics); if (info->DocHeaders != 0) { for (k = 0; info->DocHeaders[k] != 0; k++ ) { WAISDocumentHeader* head = info->DocHeaders[k]; char * headline = trim_junk(head->Headline); any * docid = head->DocumentID; char * docname; /* printable version of docid */ i++; /* ** Make a printable string out of the document id. */ CTRACE((tfp, "HTWAIS: %2ld: Score: %4ld, lines:%4ld '%s'\n", i, (long int)(info->DocHeaders[k]->Score), (long int)(info->DocHeaders[k]->Lines), headline)); START(HTML_LI); if (archie) { char * www_name = WWW_from_archie(headline); if (www_name) { HTStartAnchor(target, NULL, www_name); PUTS(headline); END(HTML_A); FREE(www_name); } else { PUTS(headline); PUTS(gettext(" (bad file name)")); } } else { /* Not archie */ docname = WWW_from_WAIS(docid); if (docname) { if ((head->Types) && (!strcmp(head->Types[0], "URL"))) { HTStartAnchor(target, NULL, headline); } else{ char * dbname = HTEscape(database, URL_XPALPHAS); char * w3_address = NULL; HTSprintf0(&w3_address, "/%s/%s/%d/%s", dbname, head->Types ? head->Types[0] : "TEXT", (int)(head->DocumentLength), docname); HTStartAnchor(target, NULL, w3_address); FREE(w3_address);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -