📄 demo1.cpp
字号:
// Onix.
ixDeleteResultVector( Results, &theStatus);
if ( theStatus < 0 ) {
printf("\nError deleting query vectors");
return -1;
}
}
// We convert our state from retrieval to indexing again
close_retrieval();
prepare_indexer();
return 0;
break;
case 'n':
case 'N':
case 'r':
case 'R':
case 'C':
case 'c':
// Since we are about to start a new search, free the memory
// from the old search
if ( Results != NULL ) {
ixDeleteResultVector( Results, &theStatus);
}
if ( theStatus < 0 ) {
printf("\nError deleting query vectors");
return -1;
}
// Get a query and it's result. We pass in the first character
// of Choice as a way of telling the kind of query we wish to
// make. A hack, but it is a simple way of passing this info
Results = get_query( Choice[0] );
// Get stats about the number of records and hits for the query
ixNumHits( Results, &hits, &records, &words, &theStatus);
if ( theStatus < 0 ) {
printf("\nError in Query\n");
return -1;
}
break;
default:
int temp = atoi(Choice); // convert choice to a number
if ( ( temp <= 0 ) || ( temp > hits) ) {
printf("\nThat is not a valid hit number.\n");
break;
}
process_hit( Results, temp );
break;
}
}
}
//------------------------------------------------------------------------------------
// Indexing Functions
//------------------------------------------------------------------------------------
// indexword
// ---------
//
// Given a null terminated word it adds it to the index after making it
// lower case. We make it lower case so as to find both "Bird" and "bird"
// if we search for bird. To search based on mixed case you'll have to
// modify this somewhat. (Generally I index both the regular form and a
// lower cased form so as to be able to search both case sensitive and
// insensitive)
int indexword(char *word)
{
// Lowercase the word
LowerString( word );
if ( word[0] == '\0' ) {
return 0;
}
// copy the string into the word structure's word
strcpy( (char *) WordS.Word, word );
WordS.Length = strlen(word); // store the length of the word
// There are some other fields in the word structure, but they aren't
// needed and we will simply leave them out.
ixIndexWord(theIndexingSession, &WordS, &theStatus);
if ( theStatus < 0 ) {
return -1;
}
return 0;
}
// indexfile
// ---------
//
// Given a file path it opens the file and parses it, indexing the file.
// The argument 'first' simply specifies whether this is the first file
// being indexed. If it isn't then it increments the current record so
// that we start the file with a new record.
int indexfile(char *thefilename)
{
FILE *tFile; // the File we are indexing
int c; // current character being read
int count; // a count of characters in the record
const int word_size = 255; // largest word size we allow
char the_word[word_size], *wp; // string for the current word
const int rec_size = 100000; // largest record size we allow (auto
// breaks past this limit)
char the_record[rec_size], *rp; // string for the current record
long rec = 0; // record #
// We clean up the file name, just in case it has carriage returns
StripControls( thefilename );
// Open up the file
tFile = fopen( thefilename, "r");
if ( tFile == NULL ) {
printf("Couldn't open text file for indexing.\n");
printf("File: %s",thefilename);
return -1;
}
// Our verbose mode just gives interactive information. However since
// we are displaying text, it *really* slows the indexing down
if ( Verbose_Mode ) {
printf("\nIndexing %s", thefilename);
}
// We keep track of whether we are the first file indexed in
// this session or not. We do this since if we are indexing
// further files we still have to increment the record, otherwise
// we are putting the last record of the previous file as the
// same as the next file's first record.
if ( !isFirst ) {
WordS.Word[0] = '\0'; // empty out the word
WordS.Length = 0;
// If this isn't the first file we need to increment the record
ixIncrementRecord( theIndexingSession, (WordTypeT *) &WordS,
&theStatus );
}
// We iterate through the text file, isolating words and creating a new
// record every time a Record_Break character is found. (See set_globals
// for where those break characters are set.
rp = the_record;
c = fgetc( tFile );
static long my_count = 0;
while ( c != EOF ) {
wp = the_word;
count = 0;
// read in the word
while ( isalnum( c ) && ( count < word_size ) && ( c != EOF) ) {
count++;
*wp++ = (char) c;
*rp++ = (char) c;
c = fgetc( tFile );
} // is a alphanum character
my_count++;
// we have to null terminate the word
*wp++ = '\0';
wp = the_word;
// make sure we actually have a word with at least 3 letters
if ( strlen(the_word) >= 3 ) {
// index the word
if ( indexword( the_word ) != 0 ) {
// if there is an error, quit
printf("Error indexing word.\n");
fclose( tFile );
return -1;
}
} // have word
// Check for record boundary or if we've exceeded our record size
if (( c == Record_Break1 ) || ( c == Record_Break2 ) || ( c == Record_Break3 )
|| ( ( rp - the_record + 5) >= rec_size ) ) {
// The Word structure passed to Increment Record is
// designed for advanced functionality but isn't
// necessary for the vast majority of uses. We'll
// zero out our word, just to be safe. This isn't
// really necessary though.
WordS.Word[0] = '\0'; // empty out the word
WordS.Length = 0;
// The record may contain multiple record break characters (i.e. \n\r pair)
// I don't add these to the record, but you may wish to. If you do want
// them stored, simply remove the comment around *rp++ = c. Also I replace
// the current character in the record (the first record break character)
// with a carriage return \n. If you don't want this to happen comment
// out the following line.
*rp = '\n'; // convert stored break char to carriage return
while (( c == Record_Break1 ) || ( c == Record_Break2 ) ||
( c == Record_Break3 ) ) {
// *rp++ = c;
c = fgetc( tFile );
}
*rp = '\0'; // terminate the record string
// (not technically needed by Onix, but makes debugging easier)
rec++;
if ( Store_Text == true ) {
ixStoreRecordData( theIndexingSession, (UCharT *) the_record,
rp-the_record, &theStatus);
}
else {
ixStoreRecordData( theIndexingSession, (UCharT *) thefilename,
strlen( thefilename) , &theStatus);
}
rp = the_record; // reset our record location to the beginning of the string
if ( theStatus < 0 ) {
// if there is an error, quit
printf("\nError storing text: %d\n", theStatus);
return -1;
}
// We've finished with this record so increment to the next record
ixIncrementRecord( theIndexingSession, (WordTypeT *) &WordS, &theStatus );
if ( theStatus < 0 ) {
// if there is an error, quit
printf("\nError incrementing record %d\n", theStatus);
return -1;
}
} // if record break
// read non alphanum characters
while ( !isalnum(c) && ( c != EOF ) && ( c != Record_Break1 ) &&
( c != Record_Break2 ) && ( c != Record_Break3 ) ) {
*rp++ = (char) c;
c = fgetc( tFile );
}
} // while there are still characters
*rp = '\0';
// At this point we still have the last bit of record data. Since many files
// simply end with an EOF and not a record break character we have to store
// our record.
if ( Store_Text == true ) {
ixStoreRecordData( theIndexingSession, (UCharT *) the_record, rp-the_record,
&theStatus);
}
else {
// You may wish to store the file path - but for now we'll store nothing
// Uncomment out this line if you wish
ixStoreRecordData( theIndexingSession, (UCharT *) thefilename,
strlen( thefilename) , &theStatus);
}
rec++;
// clean things up and return
fclose (tFile);
if ( Verbose_Mode ) {
printf("...%d records ", rec );
}
isFirst = false; // no longer the first file (This is reset the next time
// we enter the index files menu)
return 0;
}
// scandir
// -------
//
// This scans a directory, indexing all the files in it. If the global
// flag Recurse is set then it recursively indexes sub-directories.
// It uses some windows functions to work - this is based on the MVC
// compilier and may need modified slightly for other compiliers and
// platforms.
int scandir( char *dir)
{
WIN32_FIND_DATA filedata; // file data structure
HANDLE filehandle; // search handle
int filesleft;
int dirlen;
char tempfile[255], passfile[255];
// Clean up the directory name
StripControls( dir );
dirlen = strlen( dir );
if ( dirlen >= 254 ) {
// string too long
printf("We can't handle paths that are that long.\n\n");
return -1;
}
// Check to see if we have the trailing "\"
// Add it if you need to
if ( dir[ dirlen -1 ] != '\\' ) {
strcpy ( dir + dirlen, "\\" );
}
// We have a directory, so get a list of all the files ending
// in .txt in this directory. To index more files than
// simply .txt extensions modify this.
strcpy(tempfile, dir); // the path including the file name
strcpy(passfile, dir); // the path to the directory
// Create a path with wildcard - modify this for being more
// specific in your indexing.
strcpy( tempfile + strlen( tempfile ), "*");
filehandle = FindFirstFile ( tempfile, &filedata);
if (filehandle == INVALID_HANDLE_VALUE ) {
printf("\nFile error\n");
FindClose ( filehandle );
return -1;
}
strcpy( passfile + strlen(dir), filedata.cFileName );
// See if it is a directory or a file
if ( isdir( filedata ) ) {
if ( Recurse == true ) {
scandir( passfile ); // recurse on this subdirectory
}
}
else {
// Note that you can change the extensions we check
if ( CompareExt( passfile, ".txt", NULL, NULL )) {
indexfile( passfile );
}
}
// Iterate through all the other files.
filesleft = FindNextFile( filehandle, &filedata );
while ( filesleft ) {
// Create a path by appending the file's name to the base path
strcpy( passfile + strlen(dir), filedata.cFileName );
if ( isdir( filedata ) ) {
if ( Recurse == true ) {
scandir( passfile ); // recurse on this subdirectory
}
}
else {
if ( CompareExt( passfile, ".txt", NULL, NULL )) {
indexfile( passfile );
}
}
filesleft = FindNextFile( filehandle, &filedata );
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -