📄 htwais.c
字号:
/* HTWAIS.c** WORLDWIDEWEB - WIDE AREA INFORMAION SERVER ACCESS**** (c) COPYRIGHT MIT 1995.** Please first read the full copyright statement in the file COPYRIGH.** @(#) $Id: HTWAIS.c,v 2.58 1999/02/22 22:10:12 frystyk Exp $**** This module allows a WWW server or client to read data from a** remote WAIS server, and provide that data to a WWW client in** hypertext form. Source files, once retrieved, are stored and used** to provide information about the index when that is acessed.**** Authors** BK Brewster Kahle, Thinking Machines, <Brewster@think.com>** TBL Tim Berners-Lee, CERN <timbl@w3.org>**** Contributors** QL QingLong, Yggdrasil Inc., <qinglong@Yggdrasil.com>**** History** Sep 91 TBL adapted shell-ui.c (BK) with HTRetrieve.c from WWW.** Feb 91 TBL Generated HTML cleaned up a bit (quotes, escaping)** Refers to lists of sources. ** Mar 93 TBL Lib 2.0 compatible module made. ** May 95 CHJ modified for freeWAIS-0.5** Jun 97 QL modified for w3c-libwww-5.0a.** Mar 98 QL modified for w3c-libwww-5.1i.**** Bugs** Uses C stream i/o to read and write sockets, which won't work** on VMS TCP systems.**** Should cache connections.**** ANSI C only as written**** Bugs fixed** NT Nathan Torkington (Nathan.Torkington@vuw.ac.nz)**** WAIS comments:**** 1. Separate directories for different system's .o would help** 2. Document ids are rather long!**** WWW Address mapping convention:**** /servername/database/type/length/document-id**** /servername/database?word+word+word*//* WIDE AREA INFORMATION SERVER SOFTWARE: No guarantees or restrictions. See the readme file for the full standard disclaimer. Brewster@think.com*//* Library include files */#include "wwwsys.h"#include "WWWUtil.h"#include "WWWCore.h"#include "WWWHTML.h"#include "HTReqMan.h" #ifndef HT_DIRECT_WAISPUBLIC int HTLoadWAIS (SOCKET soc, HTRequest* request){ return HT_ERROR;}#else#ifdef HAVE_WAIS_WAIS_H#include "wais/wais.h"#else#ifdef HAVE_WAIS_H#include "wais.h"#else#ifdef WAIS_INCLUDE#include WAIS_INCLUDE#else#include "wais/wais.h"#endif#endif#endif#define DIRECTORY "/cnidr.org:210/directory-of-servers"#define BIG 1024 /* identifier size limit @@@@@ *//* From WAIS** ---------*/#undef MAX_MESSAGE_LEN#define MAX_MESSAGE_LEN 100000#undef CHARS_PER_PAGE#define CHARS_PER_PAGE 10000 /* number of chars retrieved in each request */#undef WAISSEARCH_DATE#define WAISSEARCH_DATE "Fri Jul 19 1991"/* FROM WWW** --------*/#define BUFFER_SIZE 4096 /* Arbitrary size for efficiency */#define LINE_BUFFER_SIZE 2048#define HEX_ESCAPE '%'extern FILE * logfile; /* Log file output */PRIVATE int HTMaxWAISLines = 200; /* Max number of entries from a search *//* Hypertext object building machinery */#define PUTC(c) (*target->isa->put_character)(target, c)#define PUTS(s) (*target->isa->put_string)(target, s)#define START(e) (*target->isa->start_element)(target, e, 0, 0)#define END(e) (*target->isa->end_element)(target, e)#define FREE_TARGET (*target->isa->_free)(target)/* * Type definitions and global variables etc. local to this module *//* Final states have negative value */typedef enum _HTWAISState{ HTWAIS_ERROR = -2, HTWAIS_OK = -1, HTWAIS_BEGIN = 0, HTWAIS_PARSING_URL = 1, HTWAIS_NEED_CONNECTION = 2, HTWAIS_NEED_REQUEST = 3, HTWAIS_NEED_RESPONSE = 4, HTWAIS_PARSING_RESPONSE = 5, HTWAIS_FETCH_DOCUMENT = 6, HTWAIS_CLEANUP = 7} HTWAISState;/* * This is the context structure for this module */typedef struct _wais_info{ BOOL as_gate; /* Client is using us as gateway */ HTWAISState state; /* Current State */ int result; /* Result to report to the after filter */ HTNet* net; /* Net object */ FILE* connection; char* names; /* Copy of arg to be hacked up */ char* basetitle; char* wais_database; /* name of current database */ char* www_database; /* Same name escaped */ char* request_message; /* arbitrary message limit */ char* response_message; /* arbitrary message limit */} wais_info;struct _HTStream{ const HTStreamClass* isa; HTStream* target; HTRequest* request; wais_info* wais; int status; /* ... */};struct _HTInputStream{ const HTInputStreamClass * isa;};struct _HTStructured{ const HTStructuredClass * isa; /* ... */};/* ------------------------------------------------------------------------- *//* Auxilliary Functions *//* ------------------------------------------------------------------------- *//* HTshowDiags *//* modified from Jonny G's version in ui/question.c */void HTshowDiags ( HTStream * target, diagnosticRecord ** d){ long i; for (i = 0; d[i] != NULL; i++) { if (d[i]->ADDINFO != NULL) { PUTS("Diagnostic code is "); PUTS(d[i]->DIAG); PUTC(' '); PUTS(d[i]->ADDINFO); PUTC('\n'); ; } }}/* Matrix of allowed characters in filenames** -----------------------------------------*/PRIVATE BOOL acceptable[256];PRIVATE BOOL acceptable_inited = NO;PRIVATE void init_acceptable (void){ unsigned int i; char * good = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$"; if (acceptable_inited == YES) return; for(i=256; i--; ) acceptable[i] = NO; for(;*good; good++) acceptable[(unsigned int)*good] = YES; acceptable_inited = YES;}/* Transform file identifier into WWW address** ------------------------------------------****** On exit,** returns nil if error** pointer to malloced string (must be freed) if ok*/char * WWW_from_archie (char * file){ char * end; char * result; char * colon; for(end=file; *end > ' '; end++); /* assumes ASCII encoding*/ if ((result = (char *) HT_MALLOC(10 + (end-file))) == NULL) HT_OUTOFMEM("result "); if (!result) return result; /* Malloc error */ strcpy(result, "file://"); strncat(result, file, end-file); colon = strchr(result+7, ':'); /* Expect colon after host */ if (colon) { for(; colon[0]; colon[0]=colon[1], colon++); /* move down */ } return result;} /* WWW_from_archie *//* Transform document identifier into URL** --------------------------------------**** Bugs: A static buffer of finite size is used!** The format of the docid MUST be good!**** On exit,** returns nil if error** pointer to malloced string (must be freed) if ok*/PRIVATE char hex [17] = "0123456789ABCDEF";PRIVATE char * WWW_from_WAIS (any * docid){ static unsigned char buf[BIG]; char num[10]; unsigned char * q = buf; char * p = (docid->bytes); int i, l;#ifdef HTDEBUG if (PROT_TRACE) { char *p; HTTRACE(PROT_TRACE, "HTLoadWAIS.. id (%d bytes) is " _ (int)docid->size); for(p=docid->bytes; p<docid->bytes+docid->size; p++) { if ((*p >= ' ') && (*p<= '~')) /* Assume ASCII! */ HTTRACE(PROT_TRACE, "%c" _ *p); else HTTRACE(PROT_TRACE, "<%x>" _ (unsigned)*p); } HTTRACE(PROT_TRACE, "\n"); } #endif /* HTDEBUG */ for (p=docid->bytes; (p<docid->bytes+docid->size) && (q<&buf[BIG]);) { HTTRACE(PROT_TRACE, "............ Record type %d, length %d\n" _ (unsigned char) p[0] _ (unsigned char) p[1]); sprintf(num, "%d", (int)*p); memcpy(q, num, strlen(num)); q += strlen(num); p++; *q++ = '='; /* Separate */ l = (int)((unsigned char)*p); p++; if (l > 127) { l = (l - 128) * 128; l = l + (int)((unsigned char)*p); p++; } for (i = 0; i < l; i++, p++) { if (!acceptable[(unsigned char)*p]) { *q++ = HEX_ESCAPE; *q++ = hex[((unsigned char)*p) >> 4]; *q++ = hex[((unsigned char)*p) & 15]; } else *q++ = (unsigned char)*p; } *q++= ';'; /* Terminate field */#ifdef OLD_CODE if (*p>10) { HTTRACE(PROT_TRACE, "WAIS........ DOCID record type of %d!\n" _ *p); return 0; } { /* Bug fix -- allow any byte value 15 Apr 93 */ unsigned int i = (unsigned) *p++; if (i > 99) { *q++ = (i/100) + '0'; i = i % 100; } if (i > 9) { *q++ = (i/10) + '0'; i = i % 10; } *q++ = i + '0'; /* Record type */ } *q++ = '='; /* Separate */ l = *p++; /* Length */ for(i=0; i<l; i++, p++){ if (!acceptable[(int)*p]) { *q++ = HEX_ESCAPE; /* Means hex commming */ *q++ = hex[(*p) >> 4]; *q++ = hex[(*p) & 15]; } else *q++ = *p; } *q++= ';'; /* Terminate field */#endif /* OLD_CODE */ } *q++ = 0; /* Terminate string */ HTTRACE(PROT_TRACE, "HTLoadWAIS.. WWW form of id: %s\n" _ buf); { char *result; if ((result = (char *) HT_MALLOC((int) strlen((char *) buf)+1))==NULL) HT_OUTOFMEM("WWW_from_WAIS"); strcpy(result, (char *) buf); return result; }} /* WWW_from_WAIS *//* Transform URL into WAIS document identifier** -------------------------------------------**** On entry,** docname points to valid name produced originally by** WWW_from_WAIS** On exit,** docid->size is valid** docid->bytes is malloced and must later be freed.*/PRIVATE any * WAIS_from_WWW (any * docid, char * docname){ char *z; /* Output pointer */ char *sor; /* Start of record - points to size field. */ char *p; /* Input pointer */ char *q; /* Poisition of "=" */ char *s; /* Position of semicolon */ int n; /* size */ HTTRACE(PROT_TRACE, "HTLoadWAIS.. WWW id (to become WAIS id): %s\n" _ docname); for(n=0, p = docname; *p; p++) { /* Count sizes of strings */ n++; if (*p == ';') n--; /* Not converted */ else if (*p == HEX_ESCAPE) n=n-2; /* Save two bytes */ docid->size = n; } /* result record */ if ((docid->bytes = (char *) HT_MALLOC(docid->size+32)) == NULL) HT_OUTOFMEM("docid->bytes"); z = docid->bytes; for(p = docname; *p; ) { q = strchr(p, '='); if (!q) return 0; *q = '\0'; *z++ = atoi(p); *q = '='; s = strchr(q, ';'); /* (Check only) */ if (!s) return 0; /* Bad! No ';'; */ sor = z; /* Remember where the size field was */ z++; /* Skip record size for now */ { int len; int tmp; for(p=q+1; *p!=';' ; ) { if (*p == HEX_ESCAPE) { char c; unsigned int b; p++; c = *p++; b = HTAsciiHexToChar(c); c = *p++; if (!c) break; /* Odd number of chars! */ *z++ = (b<<4) + HTAsciiHexToChar(c); } else { *z++ = *p++; /* Record */ } } len = (z-sor-1); z = sor; if (len > 127) { tmp = (len / 128); len = len - (tmp * 128); tmp = tmp + 128; *z++ = (char)tmp; *z = (char)len; } else { *z = (char)len; } z++; } for(p=q+1; *p!=';' ; ) { if (*p == HEX_ESCAPE) { char c; unsigned int b; p++; c = *p++; b = HTAsciiHexToChar(c); c = *p++; if (!c) break; /* Odd number of chars! */ *z++ = (b<<4) + HTAsciiHexToChar(c); } else { *z++ = *p++; /* Record */ } } p++; /* After semicolon: start of next record */ }#ifdef OLD_CODE for(p = docname; *p; ) { /* Convert of strings */ /* Record type */ *z = 0; /* Initialize record type */ while (*p >= '0' && *p <= '9') { *z = *z*10 + (*p++ - '0'); /* Decode decimal record type */ } z++; if (*p != '=') return 0; q = p;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -