📄 indexcmd.cpp
字号:
//----------------------------------------------------------------------
// Index Command Demo
//----------------------------------------------------------------------
//
// This is a demonstration of the Onix Full Text Indexer and Retrieval
// Toolkit. It demonstrates how to write a dos command that can be
// called to index files. We've tried to keep it relatively simple.
// A few of the features we added only work in a Dos/Windows
// environment. Feel free to modify this program to meet your own
// needs. It is mainly here to demonstrate how you might integrate
// Onix into your own projects and is not meant as a full featured
// command line program. While it is designed to be run from the
// command line, it will also work in most IDEs in a self-running
// mode. (See the function test_code for setting up the self-running
// options)
//
// PLATFORM DEPENDENCIES
//
// We originally wrote this as a demo for windows, but added in
// posix functionality so that it should work with all unixes and
// most platforms with posix compliance. To set it up so that it
// works with standard windows calls define _WINDOWS_VERSION_.
// The only real place for dependencies is in the pathnames and
// in scandir().
//
//
//
// The calling features of the command are as follows:
//
// index [-n] [-v] [-t] [-b #] [-a #] [-c #] [help] indexname filelist
//
// -n (optional) create new index
// -v (optional) verbose mode (displays diagnostic info)
// -t (optional) store text in index
// -r (optional) recurse subdirectories
// -b # (optional) character to break on
// -a # (optional) second character to break on
// -c # (optional) third character to break on
//
// help print help
// indexname the name of the index to be created or opened
// filelist a list of file names to be indexed
//
//
// HISTORY
// -------
//
// 06.23.00 CG Initial version as a test program.
// 02.15.01 CG Modified to be a command line demo program.
// 06.21.01 CG Cleaned up some code and made directory scanning cross
// platform.
// 11.01.01 CG Cleaned up further and unified the various versions
// hanging around.
//--------------------------------------------------------------------
// The following is just a define I use so I can keep all my test and
// demo programs in one project. I simply comment and uncomment the
// __RUN_NOW__ define to select which demo to run. I have several
// automated test programs that I use to check for various errors and
// this really speeds things up.
//#define __RUN__NOW__
#ifdef __RUN__NOW__
#define _WINDOWS_VERSION_
#ifdef _WINDOWS_VERSION_
#include <windows.h>
#else
#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>
#endif
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include "onixapi.h"
// PASSCODES
// ---------
#define PASSCODE1 0xdf83ffae // Change these to the passcodes you were given as
#define PASSCODE2 0x4e782363 // part of your evaluation or the final codes.
// GLOBALS
// -------
OnixIndexingEngineT theIndexingSession; // The indexing object you create
OnixIndexManagerT theIndexManager; // The Onix object itself
StatusCodeT theStatus; // Error status that you check after each
// Onix call
WordTypeT WordS; // A word object you pass to Onix for indexing
ULongT Record; // The record number we are indexing
// Flags
UCharT Record_Break1; // Character to break character on
UCharT Record_Break2; // Character to break character on
UCharT Record_Break3; // Character to break character on
UCharT Store_Text; // Boolean - store text in index?
UCharT New_Index; // Boolean - create new index?
UCharT Verbose_Mode; // Boolean - print diagnostics?
UCharT Recurse; // Boolean - recurse subdirectories?
UCharT IndexPath[255]; // Path to the index to be opened or
// created.
UCharT *FilePath[255]; // Path to file(s) to index
int first_file = 1; // flag for first file - we need this so that we don't try and
// index an empty record. See IndexFile and scandir for more info.
// prepare_indexer
// ---------------
//
// Sets up the indexing object and opens up the index. If we have the "New_Index" flag
// set we create a new index. It returns 0 if successful and -1 if there was an error.
int prepare_indexer()
{
OnixIndexCreationParamsT IndexCreationParams;
IndexModeT IndexingMode;
// Put your current passcodes here. We've left them out below and this will flag as
// an error. If you don't know the passcodes, please call us here at Lextek at
// 801-375-8332
theIndexManager = ixCreateIndexManager(PASSCODE1, PASSCODE2, &theStatus);
if ( theStatus < 0 ) {
printf( "\nError Creating Index Manager: %d \n", theStatus);
return -1;
}
// Check to see if we are creating a new index rather than simply
// adding files to an existing index.
if ( New_Index ) {
// The first thing we do is create an index object. This is used by all
// indexing functions and basically is the indexer itself.
IndexCreationParams = ixCreateIndexCreationParams( &theStatus);
IndexingMode = (IndexModeT) 2;
ixSetIndexCreationParams( IndexCreationParams, ixSetIndexMode, &IndexingMode);
ixSetIndexCreationParams( IndexCreationParams, ixSetSinglePointIndex, NULL);
ixSetIndexCreationParams( IndexCreationParams, ixSetVariableLengthRecordInfo, NULL);
ixSetIndexCreationParams( IndexCreationParams, ixSet32BitRecordInfoOffset, NULL );
ixSetIndexCreationParams( IndexCreationParams, ixSetIndexFileName, IndexPath );
ixSetIndexCreationParams( IndexCreationParams, ixSetRankingMethodTwo, NULL );
ixCreateIndexEx( theIndexManager, IndexCreationParams, &theStatus );
ixDeleteIndexCreationParams( IndexCreationParams );
if ( theStatus < 0 ) {
printf( "\nError Creating Index: %d \n", theStatus);
return -1;
}
}
// Open the index
ixOpenIndex( theIndexManager, (char*) IndexPath, &theStatus );
if ( theStatus < 0 ) {
printf( "\nError Opening Index: %d \n", theStatus);
return -1;
}
theIndexingSession = ixStartIndexingSession( theIndexManager, &theStatus);
if ( theStatus < 0 ) {
printf( "\nError Starting Index Session: %d \n", theStatus);
return -1;
}
return 0;
}
// close_indexer
// -------------
//
// Closes down the indexing objects, returning 0 if successful and -1 if it
// found problems.
int close_indexer()
{
printf("\nMerging indexes\n");
// Now that we've finished indexing the file, we close things down
// Note that the progress variable is for multithreaded programs that display
// a progress bar during the merging of the indexes done by this function.
size_t progress = 0;
ixEndIndexingSession( theIndexManager, theIndexingSession, &progress, 100, &theStatus);
if ( theStatus < 0 ) {
printf( "\nError Ending Index Session: %d \n", theStatus);
}
ixCloseIndex( theIndexManager, &theStatus);
if ( theStatus < 0 ) {
printf( "\nError Destroying Index Manager: %d \n", theStatus);
}
ixDeleteIndexManager(theIndexManager, &theStatus);
if ( theStatus < 0 ) {
printf( "\nError Destroying Index Manager: %d \n", theStatus);
}
return 0;
}
// lowerword
// ---------
//
// Converts a word to lowercase. Basically needed because not all
// platforms define strlwr the same way. Rather than trying to find
// out the function call for every system I just rolled my own.
int lowerword(char *word)
{
char *loc = word;
while ( *loc != 0 ) {
if ( isupper(*loc) ) {
*loc = tolower(*loc);
}
loc++;
}
return 0;
}
// indexword
// ---------
//
// Given a null terminated word it adds it to the index after making it
// lower case. We make it lower case so as to find both "Bird" and "bird"
// if we search for bird. To search based on mixed case you'll have to
// modify this somewhat. (Generally I index both the regular form and a
// lower cased form so as to be able to search both case sensitive and
// insensitive - I often prepend some special character to mixed case
// words so as to be able to search for those specifically)
//
// This function returns 0 if successful and -1 if it found a problem.
int indexword(char *word)
{
// Lowercase the word
lowerword( word );
if ( word[0] == '\0' )
return 0;
// copy the string into the word structure's word
strcpy( (char *) WordS.Word, word );
WordS.Length = strlen(word); // store the length of the word
// There are some other fields in the word structure, but they aren't
// needed and we will simply leave them out.
ixIndexWord(theIndexingSession, &WordS, &theStatus);
if ( theStatus < 0 )
return -1;
return 0;
}
// indexfile
// ---------
//
// Given a file path it opens the file and parses it, indexing the
// file. The argument 'first' simply specifies whether this is the first
// file being indexed. If it isn't then it increments the current record
// so that we start the file with a new record. The reason for this is
// that we don't want to create a record with no information in it.
// It returns 0 if successful and -1 if unsuccessful.
int indexfile(char *thefilename)
{
FILE *tFile;
int c;
int word_chars = 0, record_chars = 0;
char the_word[255], *wp;
char the_record[100000], *rp;
long recnum = 0;
long max_char = 100000;
// If not the first file, increment the record number
if ( ! first_file ) {
WordS.Word[0] = '\0'; // empty out the word
WordS.Length = 0;
// If this isn't the first file we need to increment the record
ixIncrementRecord( theIndexingSession, (WordTypeT *) &WordS, &theStatus );
// We had to do this so that we always have the record counter setup
// correctly. The most common error people have when first using the
// Onix toolkit is calling ixIncrementRecord too many or too few times.
// This leads to Status returning -48.
}
else {
first_file = false; // no longer first file
}
// Open the file
tFile = fopen( thefilename, "r");
if ( Verbose_Mode )
printf("\nIndexing %s", thefilename);
if ( tFile == NULL ) {
printf("\n\nCouldn't open text file for indexing.\n");
printf("File: %s",thefilename);
return -1;
}
// We iterate through the text file, isolating words and
// creating a new record every time a Record_Break character
// is found.
// We store the file name at the beginning of the record
strcpy( the_record, thefilename );
strcpy( the_record + strlen( thefilename ), " : ");
max_char = 100000 - strlen( the_record ) - 2;
rp = the_record + strlen( the_record );
c = fgetc( tFile );
while ( c != EOF ) {
wp = the_word;
word_chars = 0;
// read in the word
while ( isalnum( c ) && ( word_chars < 250) && ( c != EOF) && ( record_chars < max_char ) ) {
word_chars++;
record_chars++;
*wp++ = (char) c;
*rp++ = (char) c;
c = fgetc( tFile );
} // is a alphanum character
if ( wp-the_word > 250 )
printf("H");
// we have to null terminate the word
*wp++ = '\0';
// make sure we actually have a word with at least 2 letters
if ( strlen( the_word ) > 0 ) {
// index the word
if ( indexword( the_word ) != 0 ) {
// if there is an error, quit
printf("Error indexing word.\n");
fclose( tFile );
return -1;
}
} // have word
// Check for record boundary
if (( c == Record_Break1 ) || ( c == Record_Break2 ) || ( c == Record_Break3 ) || ( record_chars > max_char ) ) {
// The Word structure passed to Increment Record is
// designed for advanced functionality but isn't
// necessary for the vast majority of uses. We'll
// zero out our word, just to be safe. This isn't
// really necessary though.
WordS.Word[0] = '\0'; // empty out the word
WordS.Length = 0;
// The record may contain multiple record break characters (i.e. \n\r pair)
// Add these into the stored record
while ( (( c == Record_Break1 ) || ( c == Record_Break2 ) || ( c == Record_Break3 ) ) && ( record_chars < max_char ) ) {
*rp++ = c;
record_chars++;
c = fgetc( tFile );
}
*rp = '\0';
if ( record_chars > 0 ) {
// We can store any data with the record that we wish. In the case of this
// demo we either store the path to the file or else the text of the record
// itself.
if ( Store_Text == true )
ixStoreRecordData( theIndexingSession, (UCharT *) the_record, rp-the_record, &theStatus);
else
ixStoreRecordData( theIndexingSession, (UCharT *) thefilename, strlen( thefilename) , &theStatus);
if ( theStatus < 0 ) {
return -1; // if there is an error, quit
}
// Increment the record number (for displaying how many records were in the
// file)
record_chars = 0;
recnum++;
// Prepare to start a new record
rp = the_record + strlen( thefilename )+ 5;
// We've finished with this record so increment to the next record
ixIncrementRecord( theIndexingSession, (WordTypeT *) &WordS, &theStatus );
if ( theStatus < 0 ) {
// if there is an error, quit
return -1;
}
}
} // check for record break
// read non alnum characters
while ( !isalnum(c) && ( c != EOF ) && ( c != Record_Break1 ) && ( c != Record_Break2 ) && ( c != Record_Break3 ) ) {
*rp++ = (char) c;
record_chars++;
c = fgetc( tFile );
}
} // while there are still characters
*rp = '\0';
// At this point we still have the last bit of record data. Since many files simply end with
// an EOF and not a record break character we have to store our record.
if ( Store_Text == true )
ixStoreRecordData( theIndexingSession, (UCharT *) the_record, rp-the_record, &theStatus);
else
ixStoreRecordData( theIndexingSession, (UCharT *) thefilename, strlen( thefilename) , &theStatus);
recnum++;
if ( record_chars == 0 )
printf("");
if ( Verbose_Mode )
printf("...%d records\n", recnum);
// clean things up and return
fclose (tFile);
return 0;
}
// CompareExt
// ----------
//
// Compares the given extension(s) with a given string. Pass null in for the
// extensions you don't want to use. You can check up to three extensions
// simultaneously. Returns true (1) for success and false (0) for failure.
int CompareExt( char *string, char *ext1, char *ext2, char *ext3 )
{
char *sloc, *loc1, *loc2, *loc3;
if ( string == NULL )
return 0;
sloc = string + strlen(string) -1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -