📄 apitest.cpp
字号:
/*
This is a simple test program for Onix that should be able to be run unmodified on most
platforms. It includes a simple text parser and a "choose a number" style interface.
Most all of the interface calls are mapped directly to the Onix API which should give
you a good feel as to how the API is used and can be used for testing purposes.
The text parser is designed to take an ASCII text file and parse it into records and
words. The default record delimiter is an ASCII (decimal) 16. For test files, it you
can get text files at http://www.gutenberg.net. Putting an ASCII (decimal) 16 between
paragraphs and saving the files to a plain text file is all that is needed to prepare
them for use with this test program.
Some simple calling sequences are as follows:
CREATE AN INDEX, INDEX A TEXT FILE, CLOSE INDEX
1) Create Index
2) Word Mode
Distributed Index: n
Store Text Data : y
Store Record ID: n
2) Open Index
<FileName>
3) Start Indexing Session
4) Index Text File
<FileName>
Store Text Data: y
Store Record ID: n
3) End Indexing Session
2) Close Index.
OPEN AN INDEX, CONDUCT A SEARCH, PRINT OUT THE FIRST FOUR RECORD NUMBERS, CLOSE INDEX
2) Open the index
<FileName>
5) Start Retrieval Session
7) Perform Query
Position To Store Results (1-50) : 1
Query: cat & dog
11) Current Hit
Vector To Look at: 1
12) Next Hit
Vector To Look at: 1
12) Next Hit
Vector To Look at: 1
12) Next Hit
Vector To Look at: 1
5) End retrieval Session
2) Close Index.
OPEN THE INDEX, AND RETRIEVE THE TEXT ASSOCIATED WITH A RECORD.
2) Open the index
<FileName>
5) Start Retrieval Session
21) Get the record text
Record of text to retrieve: 21
22) Get more record text (Perform this until finished.)
5) End Retrieval Session
2) Close the Index
OPEN THE INDEX AND DELETE A RECORD
2) Open the index
<FileName>
5) Start Retrieval Session
14) Delete Record
Record Number: 65004
5) End Retrieval Session
2) Close Index.
*/
// The following is just a define I use so I can keep all my test and
// demo code in my project. I simply comment and uncomment the
// __RUN_NOW__ define to select which demo to run. I often have
// several demos that I run and this really makes life easier.
//#define __RUN__NOW__
#ifdef __RUN__NOW__
#include "onixapi.h"
#include <stdio.h>
#include <string.h>
#include <time.h>
// PASSCODES
// ---------
#define PASSCODE1 0xdf83ffae // Change these to the passcodes you were given as
#define PASSCODE2 0x4e782363 // part of your evaluation or the final codes.
/*
This table tells us which characters are letters and belong to a word and which characters
are whitespace or need to be ignored. It is easy to modify this table to change which characters
are part of a word or not. This can be done by changing the values of the #defines below and by
modifying the table directly. Note, the table has 10 characters per row making it easy to find
any given character's entry.
NOTE: The current table uses the character with the decimal value of 16 as a record delimiter.
This can be changed by moving (or removing) the INCREMENTREC #define or changing its value.
*/
#define CHARACTER (1)
#define WHITESPACE (2)
#define NUMBER (3)
#define INCREMENTREC (4)
#define EAT_WHITESPACE (1)
#define EAT_CHARACTERS (2)
unsigned char character_type[256]=
{
WHITESPACE,
WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,INCREMENTREC,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
WHITESPACE,INCREMENTREC,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,NUMBER,NUMBER,NUMBER,
NUMBER,NUMBER,NUMBER,NUMBER,NUMBER,NUMBER,NUMBER,WHITESPACE,WHITESPACE,WHITESPACE,
WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,
CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,
CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,
WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,CHARACTER,CHARACTER,CHARACTER,CHARACTER,
CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,
CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,CHARACTER,
CHARACTER,CHARACTER,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,INCREMENTREC,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,
WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE,WHITESPACE
};
/*
The character_convert array is used to quickly convert upper case characters to
lower case. The array must be initialized by a call to init_character_convert()
before it is usable.
*/
char character_convert[256];
/*
This function initilizes the character_convert array. First it initializes the table
so that every index reflects its corresponding character. Thus character_convert['a'] == 'a'.
Next we normalize all the array indexes that correspond to upper case characters so that they
reflect their lowercase counterparts.
*/
void init_character_convert()
{
int counter;
for (counter=0;counter<=128;counter++)
character_convert[counter]=counter;
for (counter=65;counter<=90;counter++)
character_convert[counter]=counter+32;
}
/*
This is the word structure that is used to pass words into the indexing engine.
The only entries in the structure that currently need to be filled out is the word
itself (in the Word buffer) and the Length entry.
*/
WordTypeT Word;
/*
This is the scratch buffer which is used to build up the text for
the record data if it is going to be stored. This buffer assumes
that the record is under 16K in size. If it is larger than this,
then we will only store the first 16K. The Buff offset keeps track of
where we are in the scratch buffer. The StoreText is our flag telling
us if we are storing text or not.
*/
size_t BuffOffset = 0;
UCharT ScratchBuff[16284];
BooleanT StoreText = BooleanFalse;
/*
Record ID Stuff....
These are the flags and buffers for storing the record IDs.
The record IDs here are generated inside the scan() function
and are quite simple minded. The record ID is simply the record
number with a "RecID prepended. I.e, the record ID for record
number 35 is "RecID35"
Record IDs can either be fixed or variable length. These test
routines allow either to be defined.
*/
UCharT RecordIDBuff[80];
ULongT RecordIDLength = 0;
ULongT FixedLengthRecordIDFlag = 0;
ULongT StoreRecordIDFlag = 0;
BooleanT IndexRecordIDsFlag = BooleanFalse;
// The current record number. Use for testing or debugging.
unsigned long CurrentRecord;
/*
This is the main function used to parse a file. This is a simple
parser which considers a word to be a string of characters boardered
by non-characters.
Two tables are used for the indexing process. The first is the character_type
array which defines the type each byte read in falls into (character,
whitespace, record delimiter, number, etc.) The second is the character_convert
array which is used to transform upper case characters to lower case characters.
scan() also stores the record text if chosen and also can store a record ID if
desired.
As you might notice, the parser simply switches state between characters, whitespace,
and record delimiters. While a simple parser, it is extremly fast.
*/
void scan(FILE *Infile, OnixIndexingEngineT Indexer)
{
char ch;
int place = 0;
size_t BuffOffset = 0;
int state = EAT_WHITESPACE;
unsigned long RecordNum = 1, WordNum = 1;
StatusCodeT Status;
while (EOF != (ch=getc(Infile)) ) {
if(StoreText == BooleanTrue && BuffOffset < 16284) {
ScratchBuff[BuffOffset] = ch;
BuffOffset ++;
if(BuffOffset == 4096) {
ixStoreRecordData(Indexer,ScratchBuff,BuffOffset,&Status);
BuffOffset = 0;
}
}
Re_do:
switch (state)
{
case EAT_CHARACTERS:
switch (character_type[ch]) {
case CHARACTER: Word.Word[place++]=character_convert[ch];
break;
case WHITESPACE: state=EAT_WHITESPACE;
Word.Word[place]=0;
Word.Length=place;
ixIndexWord(Indexer,&Word,&Status);
WordNum++;
place=0;
goto Re_do;
case INCREMENTREC: state=EAT_WHITESPACE;
Word.Word[place]=0;
Word.Length=place;
ixIndexWord(Indexer,&Word,&Status);
if(StoreText == BooleanTrue) {
ixStoreRecordData(Indexer,ScratchBuff,BuffOffset,&Status);
}
if(StoreRecordIDFlag) {
sprintf((char *)RecordIDBuff,"RecID%ld",RecordNum,&Status);
if(FixedLengthRecordIDFlag) {
ixProcessRecordID(Indexer,RecordIDBuff,RecordIDLength,BooleanTrue, IndexRecordIDsFlag,&Status);
}
else {
ixProcessRecordID(Indexer,RecordIDBuff,strlen((const char *)RecordIDBuff),BooleanTrue, IndexRecordIDsFlag, &Status);
}
}
BuffOffset = 0;
ixIncrementRecord(Indexer,&Word,&Status);
RecordNum ++;
WordNum = 1;
place=0;
CurrentRecord++;
break;
default: break;
};
break;
case EAT_WHITESPACE:
switch (character_type[ch]) {
case CHARACTER: state=EAT_CHARACTERS;
place=0;
goto Re_do;
case WHITESPACE:break;
case INCREMENTREC:state=EAT_WHITESPACE;
place=0;
if(StoreText == BooleanTrue) {
ixStoreRecordData(Indexer,ScratchBuff,BuffOffset,&Status);
}
if(StoreRecordIDFlag) {
sprintf((char *)RecordIDBuff,"RecID%ld",RecordNum);
if(FixedLengthRecordIDFlag) {
ixProcessRecordID(Indexer,RecordIDBuff,RecordIDLength,BooleanTrue, IndexRecordIDsFlag, &Status);
}
else {
ixProcessRecordID(Indexer,RecordIDBuff,strlen((const char *)RecordIDBuff),BooleanTrue, IndexRecordIDsFlag, &Status);
}
}
BuffOffset = 0;
ixIncrementRecord(Indexer,&Word,&Status);
CurrentRecord++;
RecordNum ++;
WordNum = 1;
break;
default: break;
};
break;
}; // end of switch (state)
} // end of while
/*
Store any last data that we may have not already
stored before we bail.
*/
if(StoreText == BooleanTrue) {
if(BuffOffset > 0) {
ixStoreRecordData(Indexer,ScratchBuff,BuffOffset,&Status);
}
}
// If we have anything that we still need to store or whatnot for the
// final record, be sure we add it here.
if(StoreRecordIDFlag == BooleanTrue && WordNum > 1) {
sprintf((char *)RecordIDBuff,"%ld*",RecordNum);
if(FixedLengthRecordIDFlag) {
ixProcessRecordID(Indexer,RecordIDBuff,RecordIDLength,BooleanTrue, IndexRecordIDsFlag, &Status);
printf("\nRecord ID :%s",RecordIDBuff);
}
else {
ixProcessRecordID(Indexer,RecordIDBuff,strlen((const char *)RecordIDBuff),BooleanTrue, IndexRecordIDsFlag, &Status);
printf("\nRecord ID :%s",RecordIDBuff);
}
}
} // End of function
/*
These are the sizes of the different buffers that are used during the indexing
process. Onix is extremely fast even when supplied with very small buffers.
In fact, we recommend that the three buffers be 60K in size.
*/
#define BUFFER1SIZE 60000
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -