⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 indexcmd.cpp

📁 These are precompiled demonstration programs of the Onix toolkit. They do not include the Onix tool
💻 CPP
📖 第 1 页 / 共 2 页
字号:
//----------------------------------------------------------------------
// Index Command Demo
//----------------------------------------------------------------------
//
// This is a demonstration of the Onix Full Text Indexer and Retrieval
// Toolkit.  It demonstrates how to write a dos command that can be 
// called to index files.  We've tried to keep it relatively simple.
// A few of the features we added only work in a Dos/Windows 
// environment.  Feel free to modify this program to meet your own 
// needs.  It is mainly here to demonstrate how you might integrate
// Onix into your own projects and is not meant as a full featured
// command line program.  While it is designed to be run from the 
// command line, it will also work in most IDEs in a self-running
// mode.  (See the function test_code for setting up the self-running
// options)
//
// PLATFORM DEPENDENCIES
//
// We originally wrote this as a demo for windows, but added in 
// posix functionality so that it should work with all unixes and 
// most platforms with posix compliance.  To set it up so that it
// works with standard windows calls define _WINDOWS_VERSION_.
// The only real place for dependencies is in the pathnames and
// in scandir().  
//
//
//
// The calling features of the command are as follows:
//
// index [-n] [-v] [-t] [-b #] [-a #] [-c #] [help] indexname filelist
//
// -n			(optional) create new index 
// -v			(optional) verbose mode (displays diagnostic info)
// -t			(optional) store text in index
// -r			(optional) recurse subdirectories
// -b #			(optional) character to break on
// -a #			(optional) second character to break on
// -c #			(optional) third character to break on
//
// help			print help
// indexname	the name of the index to be created or opened
// filelist     a list of file names to be indexed
//
//
// HISTORY
// -------
//
// 06.23.00	CG  Initial version as a test program.
// 02.15.01 CG  Modified to be a command line demo program.
// 06.21.01 CG  Cleaned up some code and made directory scanning cross
//              platform.
// 11.01.01 CG  Cleaned up further and unified the various versions 
//              hanging around.


//--------------------------------------------------------------------

// The following is just a define I use so I can keep all my test and
// demo programs in one project.  I simply comment and uncomment the 
// __RUN_NOW__ define to select which demo to run.  I have several
// automated test programs that I use to check for various errors and
// this really speeds things up.


//#define __RUN__NOW__
#ifdef  __RUN__NOW__


#define _WINDOWS_VERSION_

#ifdef _WINDOWS_VERSION_
#include <windows.h>
#else
#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>
#endif

#include <stdio.h>
#include <ctype.h>
#include <string.h>


#include "onixapi.h"

// PASSCODES
// ---------

#define PASSCODE1 0xdf83ffae    // Change these to the passcodes you were given as
#define PASSCODE2 0x4e782363    // part of your evaluation or the final codes.

// GLOBALS
// -------

OnixIndexingEngineT	theIndexingSession;	// The indexing object you create
OnixIndexManagerT	theIndexManager;	// The Onix object itself

StatusCodeT			theStatus;			// Error status that you check after each
										// Onix call

WordTypeT			WordS;				// A word object you pass to Onix for indexing

ULongT				Record;				// The record number we are indexing

// Flags
UCharT				Record_Break1;		// Character to break character on
UCharT				Record_Break2;		// Character to break character on
UCharT				Record_Break3;		// Character to break character on

UCharT				Store_Text;			// Boolean - store text in index?			
UCharT				New_Index;			// Boolean - create new index?
UCharT				Verbose_Mode;		// Boolean - print diagnostics?
UCharT				Recurse;			// Boolean - recurse subdirectories?

UCharT				IndexPath[255];		// Path to the index to be opened or
										// created.

UCharT				*FilePath[255];		// Path to file(s) to index

int					first_file = 1;		// flag for first file - we need this so that we don't try and
										// index an empty record.  See IndexFile and scandir for more info.

// prepare_indexer
// ---------------
//
// Sets up the indexing object and opens up the index.  If we have the "New_Index" flag
// set we create a new index.  It returns 0 if successful and -1 if there was an error.

int prepare_indexer()
{
	OnixIndexCreationParamsT	IndexCreationParams;
	IndexModeT					IndexingMode;

	// Put your current passcodes here.  We've left them out below and this will flag as
	// an error.  If you don't know the passcodes, please call us here at Lextek at 
	// 801-375-8332

	theIndexManager = ixCreateIndexManager(PASSCODE1, PASSCODE2, &theStatus); 


	if ( theStatus < 0 ) {
		printf( "\nError Creating Index Manager: %d \n", theStatus);
		return -1;
	}

	// Check to see if we are creating a new index rather than simply
	// adding files to an existing index.

	if ( New_Index ) {

		// The first thing we do is create an index object.  This is used by all
		// indexing functions and basically is the indexer itself.

		IndexCreationParams = ixCreateIndexCreationParams( &theStatus);
		IndexingMode = (IndexModeT) 2;
		ixSetIndexCreationParams( IndexCreationParams, ixSetIndexMode, &IndexingMode);
		ixSetIndexCreationParams( IndexCreationParams, ixSetSinglePointIndex, NULL);
		ixSetIndexCreationParams( IndexCreationParams, ixSetVariableLengthRecordInfo, NULL);
		ixSetIndexCreationParams( IndexCreationParams, ixSet32BitRecordInfoOffset, NULL );
		ixSetIndexCreationParams( IndexCreationParams, ixSetIndexFileName, IndexPath );
		ixSetIndexCreationParams( IndexCreationParams, ixSetRankingMethodTwo, NULL );
		
		ixCreateIndexEx( theIndexManager, IndexCreationParams, &theStatus );
		ixDeleteIndexCreationParams( IndexCreationParams );

		if ( theStatus < 0 ) {
			printf( "\nError Creating Index: %d \n", theStatus);
			return -1;
		}
	}

	// Open the index

	ixOpenIndex( theIndexManager, (char*) IndexPath, &theStatus );

	if ( theStatus < 0 ) {

		printf( "\nError Opening Index: %d \n", theStatus);
		return -1;
	}


	theIndexingSession = ixStartIndexingSession( theIndexManager, &theStatus);

	if ( theStatus < 0 ) {
		printf( "\nError Starting Index Session: %d \n", theStatus);
		return -1;
	}

	return 0;
}



// close_indexer
// -------------
//
// Closes down the indexing objects, returning 0 if successful and -1 if it
// found problems.

int close_indexer()
{
	printf("\nMerging indexes\n");

	// Now that we've finished indexing the file, we close things down 

	// Note that the progress variable is for multithreaded programs that display
	// a progress bar during the merging of the indexes done by this function.
	
	size_t	progress = 0;

	ixEndIndexingSession( theIndexManager, theIndexingSession, &progress, 100, &theStatus);

	if ( theStatus < 0 ) {
		printf( "\nError Ending Index Session: %d \n", theStatus);
	}

	ixCloseIndex( theIndexManager, &theStatus);

	if ( theStatus < 0 ) {
		printf( "\nError Destroying Index Manager: %d \n", theStatus);
	}


	ixDeleteIndexManager(theIndexManager, &theStatus);

	if ( theStatus < 0 ) {
		printf( "\nError Destroying Index Manager: %d \n", theStatus);
	}

	return 0;
}

// lowerword
// ---------
//
// Converts a word to lowercase.  Basically needed because not all
// platforms define strlwr the same way.  Rather than trying to find
// out the function call for every system I just rolled my own.

int lowerword(char *word)
{
	char *loc = word;

	while ( *loc != 0 ) {
		if ( isupper(*loc) ) {
			*loc = tolower(*loc);
		}
	
		loc++;
	}

	return 0;
}


// indexword
// ---------
//
// Given a null terminated word it adds it to the index after making it 
// lower case.  We make it lower case so as to find both "Bird" and "bird"
// if we search for bird.  To search based on mixed case you'll have to
// modify this somewhat.  (Generally I index both the regular form and a
// lower cased form so as to be able to search both case sensitive and
// insensitive - I often prepend some special character to mixed case
// words so as to be able to search for those specifically)
// 
// This function returns 0 if successful and -1 if it found a problem.

int indexword(char *word)
{
	// Lowercase the word 

	lowerword( word );

	if ( word[0] == '\0' )
		return 0;

	// copy the string into the word structure's word

	strcpy( (char *) WordS.Word, word );		
	WordS.Length = strlen(word);	// store the length of the word

	// There are some other fields in the word structure, but they aren't 
	// needed and we will simply leave them out.

 	ixIndexWord(theIndexingSession, &WordS, &theStatus);

	if ( theStatus < 0 ) 
		return -1;

	return 0;

}

// indexfile
// ---------
//
// Given a file path it opens the file and parses it, indexing the 
// file.  The argument 'first' simply specifies whether this is the first
// file being indexed.  If it isn't then it increments the current record
// so that we start the file with a new record.  The reason for this is
// that we don't want to create a record with no information in it.  
// It returns 0 if successful and -1 if unsuccessful.



int indexfile(char *thefilename) 
{
	FILE	*tFile;
	int		c;
	int		word_chars = 0, record_chars = 0;
	char	the_word[255], *wp;
	char	the_record[100000], *rp;
	long	recnum = 0;
	long	max_char = 100000;

	// If not the first file, increment the record number

	if ( ! first_file ) {

		WordS.Word[0] = '\0';  // empty out the word
		WordS.Length = 0;

		// If this isn't the first file we need to increment the record 
		ixIncrementRecord( theIndexingSession, (WordTypeT *) &WordS, &theStatus );

		// We had to do this so that we always have the record counter setup
		// correctly.  The most common error people have when first using the
		// Onix toolkit is calling ixIncrementRecord too many or too few times.
		// This leads to Status returning -48.
	}
	else {
		first_file = false;		// no longer first file
	}

	// Open the file

	tFile = fopen( thefilename, "r");

	if ( Verbose_Mode )
		printf("\nIndexing %s", thefilename);

	if ( tFile == NULL ) {
		printf("\n\nCouldn't open text file for indexing.\n");
		printf("File: %s",thefilename);
		return -1;
	}

	// We iterate through the text file, isolating words and 
	// creating a new record every time a Record_Break character
	// is found.

	// We store the file name at the beginning of the record

	strcpy( the_record, thefilename );

	strcpy( the_record + strlen( thefilename ), " :  ");

	max_char = 100000 - strlen( the_record ) - 2;

	rp = the_record + strlen( the_record );
	c = fgetc( tFile );

	while ( c != EOF ) {
		wp = the_word;
		word_chars = 0;

		// read in the word
		while ( isalnum( c ) && ( word_chars < 250) && ( c != EOF) && ( record_chars < max_char ) ) {	
		
			word_chars++;
			record_chars++;
			*wp++ = (char) c; 	
			*rp++ = (char) c;
			
			c = fgetc( tFile );
			
		} // is a alphanum character


		if ( wp-the_word > 250 )
			printf("H");

		// we have to null terminate the word
		*wp++ = '\0';

		// make sure we actually have a word with at least 2 letters
		if ( strlen( the_word ) > 0 ) {

			// index the word
			if ( indexword( the_word ) != 0 ) {

				// if there is an error, quit
				printf("Error indexing word.\n");
				fclose( tFile );
				return -1;
			}

		} // have word

		// Check for record boundary

		if (( c == Record_Break1 ) || ( c == Record_Break2 ) || ( c == Record_Break3 ) || ( record_chars > max_char ) )  {


			// The Word structure passed to Increment Record is 
			// designed for advanced functionality but isn't 
			// necessary for the vast majority of uses.  We'll
			// zero out our word, just to be safe.  This isn't 
			// really necessary though.


			WordS.Word[0] = '\0';  // empty out the word
			WordS.Length = 0;

			// The record may contain multiple record break characters (i.e. \n\r pair)
			// Add these into the stored record

			while ( (( c == Record_Break1 ) || ( c == Record_Break2 ) || ( c == Record_Break3 ) ) && ( record_chars < max_char ) ) {
				*rp++ = c;
				record_chars++;
				c = fgetc( tFile );
			}

			*rp = '\0';

			if ( record_chars > 0 ) {

				// We can store any data with the record that we wish.  In the case of this
				// demo we either store the path to the file or else the text of the record
				// itself.  

				if ( Store_Text == true )
					ixStoreRecordData( theIndexingSession, (UCharT *) the_record, rp-the_record, &theStatus);
				else
					ixStoreRecordData( theIndexingSession, (UCharT *) thefilename, strlen( thefilename) , &theStatus);
			
				if ( theStatus < 0 ) {
					return -1;	// if there is an error, quit
				}	

				// Increment the record number (for displaying how many records were in the
				// file)
				
				record_chars = 0;
				recnum++;

				// Prepare to start a new record

				rp = the_record + strlen( thefilename )+ 5;


				// We've finished with this record so increment to the next record
				ixIncrementRecord( theIndexingSession, (WordTypeT *) &WordS, &theStatus );

				if ( theStatus < 0 ) {
					// if there is an error, quit
					return -1;
				}	

			}
		

		} // check for record break

		// read non alnum characters

		while ( !isalnum(c) && ( c != EOF ) && ( c != Record_Break1 ) && ( c != Record_Break2 ) && ( c != Record_Break3 ) ) {
			*rp++ = (char) c;
			record_chars++;
			c = fgetc( tFile );
		}

	} // while there are still characters


	*rp = '\0';

	// At this point we still have the last bit of record data.  Since many files simply end with
	// an EOF and not a record break character we have to store our record.

	if ( Store_Text == true )
		ixStoreRecordData( theIndexingSession, (UCharT *) the_record, rp-the_record, &theStatus);
	else
		ixStoreRecordData( theIndexingSession, (UCharT *) thefilename, strlen( thefilename) , &theStatus);

	recnum++;



	if ( record_chars == 0 )
		printf("");

	if ( Verbose_Mode )
		printf("...%d records\n", recnum);

	// clean things up and return
	fclose (tFile);


	return 0;

}



// CompareExt
// ----------
//
// Compares the given extension(s) with a given string.  Pass null in for the
// extensions you don't want to use.  You can check up to three extensions
// simultaneously.  Returns true (1) for success and false (0) for failure.

int CompareExt( char *string, char *ext1, char *ext2, char *ext3 )
{
	char *sloc, *loc1, *loc2, *loc3;

	if ( string == NULL )
		return 0;

	sloc = string + strlen(string) -1;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -