📄 demo1.cpp
字号:
//----------------------------------------------------------------------
// Onix Toolkit Demo
//----------------------------------------------------------------------
//
// This is a small demonstration of the Onix Full Text Indexing and
// Retrieval Toolkit. It utilizes only a few of the many features of
// the Onix toolkit but does allow you to get a feel for the speed and
// flexibility that Onix provides.
//
// The program has a few basic features.
//
// Select Index: Allows you to select a new index (which is created)
// or open up an existing index.
//
// Index Files: Allows you to select a directory and index all text
// files in that directory (.txt extensions). It also allows
// you to index individual files.
//
// Search Index: Allows you to make queries of the index and display
// the returned records.
//
// One thing to be aware is that each indexing session is not finished
// until all the files indexed are *merged* into the main index. This
// is not done until after you quit indexing new files.
// The following is just a define I use so I can keep all my test and
// demo code in my project. I simply comment and uncomment the
// __RUN_NOW__ define to select which demo to run. I often have
// several demos that I run and this really makes life easier.
//#define __RUN__NOW__
#ifdef __RUN__NOW__
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <windows.h>
#include "onixapi.h"
// PASSCODES
// ---------
#define PASSCODE1 0xdf83ffae // Change these to the passcodes you were given as
#define PASSCODE2 0x4e782363 // part of your evaluation or the final codes.
// GLOBALS
// -------
OnixIndexingEngineT theIndexingSession;
StatusCodeT theStatus;
OnixIndexManagerT theIndexManager;
WordTypeT WordS;
ULongT Record;
// Flags
UCharT Record_Break1; // Character to break character on
UCharT Record_Break2; // Character to break character on
UCharT Record_Break3; // Character to break character on
UCharT Store_Text; // Boolean - store text in index?
UCharT New_Index; // Boolean - create new index?
UCharT Verbose_Mode; // Boolean - print diagnostics?
UCharT Recurse; // Are we recursing a directory?
UCharT isFirst; // Is this the first file indexed?
UCharT IndexPath[255]; // Path to the index to be opened or
// created.
UCharT FilePath[255]; // Path to file(s) to index
UCharT Query[255]; // Boolean Query we are searching for
UCharT RQuery[255]; // Ranked Query we are searching for
char Choice[255]; // Temp string we store menu results in
int pick = 0; // Temp int we store menu results in. We
// frequently have to set this to -1 so
// we don't escape our loop. (See main_menu)
int State = 0; // Flag to tell us what "state" we were in.
// The possible states are the following:
#define inNothing 0
#define inQuery 1
#define inIndexing 2
//------------------------------------------------------------------------------------
// General Utility Functions
//------------------------------------------------------------------------------------
// StripControls
// -------------
//
// Given a string it removes all trailing spaces and control characters. It then
// replaces all control characters in the string with spaces. This is mainly
// used to clean up user input and remove carriage returns and so forth.
void StripControls( char *string )
{
char *loc = string + strlen( string );
// remove *trailing* controls by converting to 0 (end of string)
while ( ( loc >= string ) && ( *loc <= 32 ) ) {
*loc = 0;
loc--;
}
// convert control character to spaces
while ( loc >= string ) {
if ( *loc < 32 ) {
*loc = ' ';
}
loc--;
}
}
// LowerString
// -----------
//
// Since not all compilers have strlwr, we wrote this function. It converts a
// string entirely to lowercase. We use it to normalize all the words we index.
// In some cases you may not wish to do this. The code itself is fairly
// self explanatory.
char *LowerString( char *string )
{
char *loc = string;
while ( *loc != 0 ) {
if ( isupper( *loc ) ) {
*loc = tolower( *loc );
}
loc++;
}
return string;
}
// CompareExt
// ----------
//
// Compares the given extension(s) with a given string. Pass null in for the
// extensions you don't want to use. You can check up to three extensions
// simultaneously. Returns true (1) for success and false (0) for failure.
int CompareExt( char *string, char *ext1, char *ext2, char *ext3 )
{
char *sloc, *loc1, *loc2, *loc3;
if ( string == NULL )
return 0;
sloc = string + strlen(string) -1;
if ( ext1 != NULL )
loc1 = ext1 + strlen(ext1) -1;
if ( ext2 != NULL )
loc2 = ext2 + strlen(ext2) -1;
if ( ext3 != NULL )
loc3 = ext3 + strlen(ext3) -1;
while ( sloc >= string ) {
// Basically we only check if the extension string is non-NULL
// This way we can simply set to NULL those extensions that
// have a character that don't match. We then see if we've
// gone through the complete extension. If so, then we're
// done. Otherwise decrement our pointer.
if ( ext1 != NULL ) {
if ( *loc1 != *sloc ) {
ext1 = NULL;
}
else {
if ( loc1 == ext1 )
return 1;
loc1--;
}
}
if ( ext2 != NULL ) {
if ( *loc2 != *sloc ) {
ext2 = NULL;
}
else {
if ( loc2 == ext2 )
return 1;
loc2--;
}
}
if ( ext3 != NULL ) {
if ( *loc3 != *sloc ) {
ext3 = NULL;
}
else {
if ( loc3 == ext3 )
return 1;
loc3--;
}
}
sloc--;
// If all three extensions are NULL we retunr 0 (failure)
if (( ext1 == NULL ) && ( ext2 == NULL ) && ( ext3 == NULL ) ){
return 0;
}
}
}
// isdir
// -----
//
// This returns whether the file is a directory or not. (It ignores the
// special directories) If you want to port this over to Unix you'll need
// to rewrite this function.
int isdir( WIN32_FIND_DATA &filedata )
{
return (( (filedata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY ) != 0) &&
( lstrcmp(filedata.cFileName, __TEXT(".")) != 0 ) &&
( lstrcmp(filedata.cFileName, __TEXT("..")) != 0 ) );
}
// isignored
// ---------
//
// This tells whether a file is one of the special directories. We need
// this if we are traversing sub-directories. If you are porting this
// demo to Unix you'll need to modify this function.
//
// Right now we don't use this as we added the CompareExt which handles
// this automatically. However we originally wrote it to index all files
// regardless of extension. If you wish to do this replace calls to
// CompareExt with this.
int isignored( WIN32_FIND_DATA &filedata )
{
return ( (lstrcmp(filedata.cFileName, __TEXT(".")) == 0 ) &&
(lstrcmp(filedata.cFileName, __TEXT("..")) == 0 ) );
}
//------------------------------------------------------------------------------------
// Onix Initialization Functions
//------------------------------------------------------------------------------------
// set_globals
// -----------
//
// Many of our functions are set as globals. Onix has quite a few ways that
// it can be used. Since this demo is designed as an introduction to Onix
// we've picked many of these settings for you. Many of these settings are
// stored globally and can be changed here.
void set_globals()
{
// Set our main globals used for the actual indexing. Basically
// these determine whether we are creating a new index and whether
// we are storing the text of the documents we're indexing in the
// index itself. (Obviously index size and speed will improve if
// we don't do this - however by doing it we can display our
// located records much easier)
New_Index = true;
Store_Text = true;
Verbose_Mode = true;
// Set the record break characters. These determine where record
// breaks are in a document. Our sample text has explicitly set
// breaks using ascii 16. Since most documents actually use
// carriage returns and the like we've put those here as well.
// Change these to whatever you wish and examine the function
// indexfile to see how we use them.
Record_Break1 = 16;
Record_Break2 = '\n';
Record_Break3 = '\r';
// The following are strings for paths and the like. We initialize
// them as NULL so we don't have some default path. We probably
// don't need to do this, but since some functions check to see if
// the strings are empty this is a safe thing to do.
IndexPath[0] = 0;
FilePath[0] = 0;
Query[0] = 0;
RQuery[0] = 0;
Choice[0] = 0;
pick = -1;
}
// prepare_indexer
// ---------------
//
// Sets up the indexing object and opens up the index. It checks the global
// New_Index to decide whether to open up an existing index or to create
// a new one. In either case the new index is stored in the global IndexPath.
// We used globals simply because it made for a somewhat more general solution.
// Normally one wants to pass arguments and avoid globals as much as possible.
// However for this demo we conceived of it as acting of a few "states" that
// are constantly displayed. (See the description of globals above)
int prepare_indexer()
{
OnixIndexCreationParamsT IndexCreationParams;
IndexModeT IndexingMode;
// After making any Onix call *always* check the Status state.
if ( theStatus < 0 ) {
printf( "\nError Creating Index Manager: %d \n", theStatus);
return -1;
}
// There may be carriage returns and so forth in the name of the index,
// so we start cleaning that up.
StripControls( (char*) IndexPath );
// Check to see if we are creating a new index rather than simply
// adding files to an existing index.
if ( New_Index ) {
// The first thing we do is create an index object. This is used by all
// indexing functions and basically is the indexer itself.
IndexCreationParams = ixCreateIndexCreationParams( &theStatus);
IndexingMode = (IndexModeT) 2;
ixSetIndexCreationParams( IndexCreationParams, ixSetIndexMode, &IndexingMode);
ixSetIndexCreationParams( IndexCreationParams, ixSetSinglePointIndex, NULL);
ixSetIndexCreationParams( IndexCreationParams, ixSetVariableLengthRecordInfo, NULL);
ixSetIndexCreationParams( IndexCreationParams, ixSet32BitRecordInfoOffset, NULL );
ixSetIndexCreationParams( IndexCreationParams, ixSetIndexFileName, IndexPath );
ixSetIndexCreationParams( IndexCreationParams, ixSetRankingMethodTwo, NULL );
ixCreateIndexEx( theIndexManager, IndexCreationParams, &theStatus );
ixDeleteIndexCreationParams( IndexCreationParams );
if ( theStatus < 0 ) {
printf( "\nError Creating Index: %d \n", theStatus);
return -1;
}
}
// Open the index
ixOpenIndex( theIndexManager, (char*) IndexPath, &theStatus );
if ( theStatus < 0 ) {
printf( "\nError Opening Index: %d \n", theStatus);
return -1;
}
// Initially we start with an indexing sesssion. We have an other
// function (prepare_retrieval) that does much the same as this
// function, but with a retrieval session.
theIndexingSession = ixStartIndexingSession( theIndexManager, &theStatus);
if ( theStatus < 0 ) {
printf( "\nError Starting Index Session: %d \n", theStatus);
return -1;
}
// We have selected a new index and thus a new indexing session.
// So we set a flag that tells us that the next file indexed is
// the first file to be indexed. See indexfile for more info.
isFirst = true;
return 0;
}
// close_indexer
// -------------
//
// Closes down the indexing objects. Note that even though we may have added
// data to the index, the new index information isn't fully merged with the
// index until ixEndIndexingSession is called. For this reason we have a
// separate function called "SpeedTest" designed to give you a more complete
// view of the indexing speed.
int close_indexer()
{
// Now that we've finished indexing the file, we close things down
// Note that progress is for threaded apps that wish to display a progress
// bar during the merging process done at this time.
size_t progress = 0;
ixEndIndexingSession( theIndexManager, theIndexingSession, &progress, 100, &theStatus);
if ( theStatus < 0 ) {
printf( "\nError Ending Index Session: %d \n", theStatus);
}
ixCloseIndex( theIndexManager, &theStatus);
if ( theStatus < 0 ) {
printf( "\nError Closing Index: %d \n", theStatus);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -