📄 apitest.cpp

📁 These are precompiled demonstration programs of the Onix toolkit. They do not include the Onix tool
💻 CPP
📖 第 1 页 / 共 3 页
字号:
12 3 下一页

/*

  This is a simple test program for Onix that should be able to be run unmodified on most
  platforms.  It includes a simple text parser and a "choose a number" style interface.
  Most all of the interface calls are mapped directly to the Onix API which should give
  you a good feel as to how the API is used and can be used for testing purposes.
  
	The text parser is designed to take an ASCII text file and parse it into records and
	words.  The default record delimiter is an ASCII (decimal) 16.  For test files, it you
	can get text files at http://www.gutenberg.net.  Putting an ASCII (decimal) 16 between
	paragraphs and saving the files to a plain text file is all that is needed to prepare
	them for use with this test program.
	
	  Some simple calling sequences are as follows:
	  
		
		  CREATE AN INDEX, INDEX A TEXT FILE, CLOSE INDEX
		  
			1)	Create Index
			2) Word Mode
			Distributed Index: n
			Store Text Data : y
			Store Record ID: n
			2)	Open Index
			<FileName>
			3)	Start Indexing Session
			4)	Index Text File
			<FileName>
			Store Text Data: y
			Store Record ID: n
			3)	End Indexing Session
			2)	Close Index.
			
			  
				OPEN AN INDEX, CONDUCT A SEARCH, PRINT OUT THE FIRST FOUR RECORD NUMBERS,  CLOSE INDEX
				
				  2)	Open the index
				  <FileName>
				  5)	Start Retrieval Session
				  7)	Perform Query
				  Position To Store Results (1-50) : 1
				  Query: cat & dog
				  11)	Current Hit
				  Vector To Look at: 1
				  12)	Next Hit
				  Vector To Look at: 1
				  12)	Next Hit
				  Vector To Look at: 1
				  12)	Next Hit
				  Vector To Look at: 1
				  5)	End retrieval Session
				  2)	Close Index.
				  
					
					  
						OPEN THE INDEX, AND RETRIEVE THE TEXT ASSOCIATED WITH A RECORD.
						
						  2)	Open the index
						  <FileName>
						  5)	Start Retrieval Session
						  21)	Get the record text
						  Record of text to retrieve: 21
						  22)	Get more record text (Perform this until finished.) 		
						  5)	End Retrieval Session
						  2)	Close the Index
						  
							
							  
								OPEN THE INDEX AND DELETE A RECORD
								
								  2)	Open the index
								  <FileName>
								  5)	Start Retrieval Session
								  14)	Delete Record
								  Record Number: 65004
								  5)	End Retrieval Session
								  2)	Close Index.
								  
									
*/

// The following is just a define I use so I can keep all my test and
// demo code in my project.  I simply comment and uncomment the 
// __RUN_NOW__ define to select which demo to run.	I often have 
// several demos that I run and this really makes life easier.

//#define __RUN__NOW__
#ifdef	__RUN__NOW__


#include "onixapi.h"
#include <stdio.h>
#include <string.h>
#include <time.h>


// PASSCODES
// ---------

#define PASSCODE1 0xdf83ffae    // Change these to the passcodes you were given as
#define PASSCODE2 0x4e782363    // part of your evaluation or the final codes.

/*
This table tells us which characters are letters and belong to a word and which characters 
are whitespace or need to be ignored.  It is easy to modify this table to change which characters
are part of a word or not.	This can be done by changing the values of the #defines below and by
modifying the table directly.  Note, the table has 10 characters per row making it easy to find
any given character's entry.

  NOTE: The current table uses the character with the decimal value of 16 as a record delimiter.  
  This can be changed by moving (or removing) the INCREMENTREC #define or changing its value.
*/

#define 	CHARACTER		(1)
#define 	WHITESPACE		(2)
#define 	NUMBER			(3)
#define 	INCREMENTREC	(4)

#define 	EAT_WHITESPACE	(1)
#define 	EAT_CHARACTERS	(2)


unsigned char character_type[256]=
{
		  WHITESPACE,
			  WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
			  WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,INCREMENTREC,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
			  WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
			  WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
			  WHITESPACE,INCREMENTREC,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,NUMBER,NUMBER,NUMBER,
			  NUMBER,NUMBER,NUMBER,NUMBER,NUMBER,NUMBER,NUMBER,WHITESPACE,WHITESPACE,WHITESPACE,
			  WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,
			  CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,
			  CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,
			  WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,CHARACTER,CHARACTER,CHARACTER,CHARACTER,
			  CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,
			  CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,
			  CHARACTER,CHARACTER,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
			  WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
			  WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
			  WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,INCREMENTREC,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
			  WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
			  WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
			  WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
			  WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
			  WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
			  WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
			  WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
			  WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
			  WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
			  WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE
};


/* 
The character_convert array is used to quickly convert upper case characters to 
lower case. The array must be initialized by a call to init_character_convert() 
before it is usable.
*/

char character_convert[256];


/*
This function initilizes the character_convert array.  First it initializes the table 
so that every index reflects its corresponding character.  Thus character_convert['a'] == 'a'.
Next we normalize all the array indexes that correspond to upper case characters so that they
reflect their lowercase counterparts.
*/

void init_character_convert()
{
		  int counter;
		  
		  for (counter=0;counter<=128;counter++)
			  character_convert[counter]=counter;
		  
		  for (counter=65;counter<=90;counter++)
			  character_convert[counter]=counter+32;
}

/*
This is the word structure that is used to pass words into the indexing engine.
The only entries in the structure that currently need to be filled out is the word
itself (in the Word buffer) and the Length entry.
*/
WordTypeT Word;



/*
This is the scratch buffer which is used to build up the text for
the record data if it is going to be stored.  This buffer assumes
that the record is under 16K in size.  If it is larger than this,
then we will only store the first 16K. The Buff offset keeps track of 
where we are in the scratch buffer.  The StoreText is our flag telling
us if we are storing text or not.
*/
size_t		BuffOffset = 0;
UCharT		ScratchBuff[16284];  
BooleanT	StoreText = BooleanFalse;

/* 
Record ID Stuff....
These are the flags and buffers for storing the record IDs.
The record IDs here are generated inside the scan() function
and are quite simple minded.  The record ID is simply the record
number with a "RecID prepended.  I.e, the record ID for record 
number 35 is "RecID35"

  Record IDs can either be fixed or variable length.	These test
  routines allow either to be defined.
*/
UCharT		RecordIDBuff[80];
ULongT		RecordIDLength = 0;
ULongT		FixedLengthRecordIDFlag = 0;
ULongT		StoreRecordIDFlag = 0;
BooleanT	IndexRecordIDsFlag = BooleanFalse;

// The current record number.  Use for testing or debugging.
unsigned long CurrentRecord;


/*
This is the main function used to parse a file.  This is a simple
parser which considers a word to be a string of characters boardered
by non-characters.	

  Two tables are used for the indexing process.  The first is the character_type
  array which defines the type each byte read in falls into (character,
  whitespace, record delimiter, number, etc.)  The second is the character_convert
  array which is used to transform upper case characters to lower case characters.
  
	scan() also stores the record text if chosen and also can store a record ID if
	desired.
	
	  As you might notice, the parser simply switches state between characters, whitespace,
	  and record delimiters.	While a simple parser, it is extremly fast.
	  
*/

void scan(FILE *Infile, OnixIndexingEngineT Indexer)
{
	char	ch;
	int 	place = 0;
	size_t	BuffOffset = 0;
	int 	state = EAT_WHITESPACE;
	unsigned long RecordNum = 1, WordNum = 1;
	StatusCodeT Status;
		  
		  
	while (EOF != (ch=getc(Infile)) )	 {
		
		if(StoreText == BooleanTrue && BuffOffset < 16284) {
			ScratchBuff[BuffOffset] = ch;
			BuffOffset ++;
			if(BuffOffset == 4096) {
				ixStoreRecordData(Indexer,ScratchBuff,BuffOffset,&Status);
				BuffOffset = 0;
			}
		}
		
Re_do:	  
		switch (state)
		{
		case EAT_CHARACTERS: 
			switch (character_type[ch])  {
			case CHARACTER: 	Word.Word[place++]=character_convert[ch];
				break;
				
			case WHITESPACE:	state=EAT_WHITESPACE;
				Word.Word[place]=0;
				Word.Length=place;
				ixIndexWord(Indexer,&Word,&Status);
				WordNum++;
				
				place=0;
				goto Re_do;
				
			case INCREMENTREC:	state=EAT_WHITESPACE;
				Word.Word[place]=0;
				Word.Length=place;
				ixIndexWord(Indexer,&Word,&Status);
				if(StoreText == BooleanTrue) {
					ixStoreRecordData(Indexer,ScratchBuff,BuffOffset,&Status);
				}
				
				if(StoreRecordIDFlag) {
					sprintf((char *)RecordIDBuff,"RecID%ld",RecordNum,&Status);
					if(FixedLengthRecordIDFlag) {
						ixProcessRecordID(Indexer,RecordIDBuff,RecordIDLength,BooleanTrue, IndexRecordIDsFlag,&Status);
					}
					else {
						ixProcessRecordID(Indexer,RecordIDBuff,strlen((const char *)RecordIDBuff),BooleanTrue, IndexRecordIDsFlag, &Status);
					}
				}
				BuffOffset = 0;
				ixIncrementRecord(Indexer,&Word,&Status);
				RecordNum ++;
				WordNum = 1;
				place=0;
				CurrentRecord++;
				break;
												default: break;
			};
			break;
			
			case EAT_WHITESPACE: 
				switch (character_type[ch])  {
				case CHARACTER: state=EAT_CHARACTERS;
					place=0;
					goto Re_do;
					
				case WHITESPACE:break;
					
				case INCREMENTREC:state=EAT_WHITESPACE;
					place=0;
					if(StoreText == BooleanTrue) {
						ixStoreRecordData(Indexer,ScratchBuff,BuffOffset,&Status);
					}
					if(StoreRecordIDFlag) {
						sprintf((char *)RecordIDBuff,"RecID%ld",RecordNum);
						if(FixedLengthRecordIDFlag) {
							ixProcessRecordID(Indexer,RecordIDBuff,RecordIDLength,BooleanTrue, IndexRecordIDsFlag, &Status);
						}
						else {
							ixProcessRecordID(Indexer,RecordIDBuff,strlen((const char *)RecordIDBuff),BooleanTrue, IndexRecordIDsFlag, &Status);
						}
					}
					BuffOffset = 0;
					ixIncrementRecord(Indexer,&Word,&Status);
					CurrentRecord++;
					RecordNum ++;
					WordNum = 1;
					break;
				default: break;
				};
				break;
		};	// end of switch (state)
	}  // end of while
		  
	
	   /*
	   Store any last data that we may have not already 
	   stored before we bail.
	*/
	if(StoreText == BooleanTrue) {
		if(BuffOffset > 0) {
			ixStoreRecordData(Indexer,ScratchBuff,BuffOffset,&Status);
		}
	}
	
	// If we have anything that we still need to store or whatnot for the
	// final record, be sure we add it here.
	if(StoreRecordIDFlag == BooleanTrue && WordNum > 1) {
		sprintf((char *)RecordIDBuff,"%ld*",RecordNum);
		if(FixedLengthRecordIDFlag) {
			ixProcessRecordID(Indexer,RecordIDBuff,RecordIDLength,BooleanTrue, IndexRecordIDsFlag, &Status);
			printf("\nRecord ID :%s",RecordIDBuff);
		}
		else {
			ixProcessRecordID(Indexer,RecordIDBuff,strlen((const char *)RecordIDBuff),BooleanTrue, IndexRecordIDsFlag, &Status);
			printf("\nRecord ID :%s",RecordIDBuff);
		}
	}
	
} // End of function


  /*
  These are the sizes of the different buffers that are used during the indexing
  process.  Onix is extremely fast even when supplied with very small buffers.
  In fact, we recommend that the three buffers be 60K in size.
  */
  
#define BUFFER1SIZE 60000
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -