⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 demo1.cpp

📁 These are precompiled demonstration programs of the Onix toolkit. They do not include the Onix tool
💻 CPP
📖 第 1 页 / 共 4 页
字号:
				// Onix.

				ixDeleteResultVector( Results, &theStatus);

				if ( theStatus < 0 ) {
					printf("\nError deleting query vectors");
					return -1;
				}
			}
			
			// We convert our state from retrieval to indexing again

			close_retrieval();
			prepare_indexer();
			return 0;
			break;

		case 'n':
		case 'N':
		case 'r':
		case 'R':
		case 'C':
		case 'c':
			
			// Since we are about to start a new search, free the memory
			// from the old search

			if ( Results != NULL ) {
				ixDeleteResultVector( Results, &theStatus);
			}

			if ( theStatus < 0 ) {
				printf("\nError deleting query vectors");
				return -1;
			}

			// Get a query and it's result.  We pass in the first character
			// of Choice as a way of telling the kind of query we wish to
			// make.  A hack, but it is a simple way of passing this info

			Results = get_query( Choice[0] );	

			// Get stats about the number of records and hits for the query
			ixNumHits( Results, &hits, &records, &words, &theStatus);

			if ( theStatus < 0 ) {
				printf("\nError in Query\n");
				return -1;
			}

			break;

		default:

			int temp = atoi(Choice);	// convert choice to a number

			if ( ( temp <= 0 ) || ( temp > hits) ) {
				printf("\nThat is not a valid hit number.\n");
				break;
			}

			process_hit( Results, temp );
			break;

		}
	}

}



//------------------------------------------------------------------------------------
// Indexing Functions
//------------------------------------------------------------------------------------




// indexword
// ---------
//
// Given a null terminated word it adds it to the index after making it 
// lower case.  We make it lower case so as to find both "Bird" and "bird"
// if we search for bird.  To search based on mixed case you'll have to
// modify this somewhat.  (Generally I index both the regular form and a
// lower cased form so as to be able to search both case sensitive and
// insensitive)

int indexword(char *word)
{
	// Lowercase the word 

	LowerString( word );

	if ( word[0] == '\0' ) {
		return 0;
	}

	// copy the string into the word structure's word

	strcpy( (char *) WordS.Word, word );		
	WordS.Length = strlen(word);	// store the length of the word

	// There are some other fields in the word structure, but they aren't 
	// needed and we will simply leave them out.

 	ixIndexWord(theIndexingSession, &WordS, &theStatus);

	if ( theStatus < 0 ) {
		return -1;
	}

	return 0;

}

// indexfile
// ---------
//
// Given a file path it opens the file and parses it, indexing the file.  
// The argument 'first' simply specifies whether this is the first file 
// being indexed.  If it isn't then it increments the current record so 
// that we start the file with a new record.

int indexfile(char *thefilename) 
{
	FILE	*tFile;						// the File we are indexing
	int		c;							// current character being read

	int		count;						// a count of characters in the record

	const	int	word_size = 255;		// largest word size we allow

	char	the_word[word_size], *wp;	// string for the current word 

	const	int rec_size = 100000;		// largest record size we allow (auto
										// breaks past this limit)

	char	the_record[rec_size], *rp;	// string for the current record

	long	rec = 0;					// record #

	// We clean up the file name, just in case it has carriage returns 

	StripControls( thefilename );

	// Open up the file

	tFile = fopen( thefilename, "r");

	if ( tFile == NULL ) {
		printf("Couldn't open text file for indexing.\n");
		printf("File: %s",thefilename);
		return -1;
	}

	// Our verbose mode just gives interactive information.  However since
	// we are displaying text, it *really* slows the indexing down

	if ( Verbose_Mode ) {
		printf("\nIndexing %s", thefilename);
	}



	// We keep track of whether we are the first file indexed in 
	// this session or not. We do this since if we are indexing 
	// further files we still have to increment the record, otherwise
	// we are putting the last record of the previous file as the
	// same as the next file's first record.

	if ( !isFirst ) {

		WordS.Word[0] = '\0';  // empty out the word
		WordS.Length = 0;

		// If this isn't the first file we need to increment the record

		ixIncrementRecord( theIndexingSession, (WordTypeT *) &WordS, 
					&theStatus );

	}


	// We iterate through the text file, isolating words and creating a new 
	// record every time a Record_Break character is found.  (See set_globals
	// for where those break characters are set.

	rp = the_record;
	c = fgetc( tFile );

	static long my_count = 0;

	while ( c != EOF ) {
		wp = the_word;
		count = 0;

		// read in the word

		while ( isalnum( c ) && ( count < word_size ) && ( c != EOF) ) {	
		
			count++;	
			*wp++ = (char) c; 	
			*rp++ = (char) c;
			
			c = fgetc( tFile );
			
		} // is a alphanum character

		
		my_count++;


		// we have to null terminate the word
		*wp++ = '\0';
		wp = the_word;

		// make sure we actually have a word with at least 3 letters
		if ( strlen(the_word) >= 3  ) {

			// index the word
			if ( indexword( the_word ) != 0 ) {

				// if there is an error, quit
				printf("Error indexing word.\n");
				fclose( tFile );
				return -1;
			}

		} // have word


		// Check for record boundary or if we've exceeded our record size

		if (( c == Record_Break1 ) || ( c == Record_Break2 ) || ( c == Record_Break3 ) 
				|| ( ( rp - the_record + 5) >= rec_size ) )  {


			// The Word structure passed to Increment Record is 
			// designed for advanced functionality but isn't 
			// necessary for the vast majority of uses.  We'll
			// zero out our word, just to be safe.  This isn't 
			// really necessary though.


			WordS.Word[0] = '\0';  // empty out the word
			WordS.Length = 0;

			// The record may contain multiple record break characters (i.e. \n\r pair)
			// I don't add these to the record, but you may wish to.  If you do want
			// them stored, simply remove the comment around *rp++ = c.  Also I replace 
			// the current character in the record (the first record break character) 
			// with a carriage return \n.  If you don't want this to happen comment
			// out the following line.

			*rp = '\n';		// convert stored break char to carriage return

			while (( c == Record_Break1 ) || ( c == Record_Break2 ) || 
				   ( c == Record_Break3 ) ) {

				// *rp++ = c;
				c = fgetc( tFile );
			}

			*rp = '\0';		// terminate the record string 
							// (not technically needed by Onix, but makes debugging easier)

			rec++;

			if ( Store_Text == true ) {
				ixStoreRecordData( theIndexingSession, (UCharT *) the_record,  
					rp-the_record, &theStatus);
			}
			else {
				ixStoreRecordData( theIndexingSession, (UCharT *) thefilename, 
					strlen( thefilename) , &theStatus);
			}


			rp = the_record;	// reset our record location to the beginning of the string

			if ( theStatus < 0 ) {
				// if there is an error, quit
				printf("\nError storing text: %d\n", theStatus);
				return -1;
			}	

			// We've finished with this record so increment to the next record
			ixIncrementRecord( theIndexingSession, (WordTypeT *) &WordS, &theStatus );

			if ( theStatus < 0 ) {
				// if there is an error, quit
				printf("\nError incrementing record %d\n", theStatus);
				return -1;
			}	


		} // if record break

		// read non alphanum characters

		while ( !isalnum(c) && ( c != EOF ) && ( c != Record_Break1 ) && 
			( c != Record_Break2 ) && ( c != Record_Break3 ) ) {

			*rp++ = (char) c;

			c = fgetc( tFile );
		}

	} // while there are still characters


	*rp = '\0';

	// At this point we still have the last bit of record data.  Since many files 
	// simply end with an EOF and not a record break character we have to store 
	// our record.

	if ( Store_Text == true ) {
		ixStoreRecordData( theIndexingSession, (UCharT *) the_record, rp-the_record, 
			&theStatus);
	}
	else {
		// You may wish to store the file path - but for now we'll store nothing
		// Uncomment out this line if you wish

		ixStoreRecordData( theIndexingSession, (UCharT *) thefilename, 
			strlen( thefilename) , &theStatus);
	}

	rec++;

	// clean things up and return

	fclose (tFile);

	if ( Verbose_Mode ) {
		printf("...%d records ", rec );
	}

	isFirst = false;	// no longer the first file  (This is reset the next time
						// we enter the index files menu)
	return 0;

}




// scandir
// -------
//
// This scans a directory, indexing all the files in it.  If the global
// flag Recurse is set then it recursively indexes sub-directories.
// It uses some windows functions to work - this is based on the MVC 
// compilier and may need modified slightly for other compiliers and
// platforms.

int scandir( char *dir)
{
	WIN32_FIND_DATA filedata;		// file data structure
	HANDLE			filehandle;		// search handle
	int				filesleft;

	int				dirlen;

	char			tempfile[255], passfile[255];

	// Clean up the directory name

	StripControls( dir );

	dirlen = strlen( dir );

	if ( dirlen >= 254 ) {
		// string too long
		printf("We can't handle paths that are that long.\n\n");
		return -1;
	}

	// Check to see if we have the trailing "\" 
	// Add it if you need to

	if ( dir[ dirlen -1 ] != '\\' ) {
		strcpy ( dir + dirlen, "\\" );
	}

	// We have a directory, so get a list of all the files ending 
	// in .txt in this directory.  To index more files than 
	// simply .txt extensions modify this.

	strcpy(tempfile, dir);	// the path including the file name

	strcpy(passfile, dir);	// the path to the directory

	// Create a path with wildcard - modify this for being more 
	// specific in your indexing.

	strcpy( tempfile + strlen( tempfile ), "*");
	
	filehandle = FindFirstFile ( tempfile, &filedata);
	
	if (filehandle == INVALID_HANDLE_VALUE ) {
		printf("\nFile error\n");
		FindClose ( filehandle );
		return -1;
	}

	strcpy( passfile + strlen(dir), filedata.cFileName );


	// See if it is a directory or a file 

	if ( isdir( filedata ) ) {
		if ( Recurse == true ) {
			scandir( passfile );	// recurse on this subdirectory
		}
	}
	else {
		// Note that you can change the extensions we check 

		if ( CompareExt( passfile, ".txt", NULL, NULL )) {
			indexfile( passfile );
		}
	}

	// Iterate through all the other files.

	filesleft = FindNextFile( filehandle, &filedata );
	while ( filesleft ) {

		// Create a path by appending the file's name to the base path
		strcpy( passfile + strlen(dir), filedata.cFileName );
 
		if ( isdir( filedata ) ) {
			if ( Recurse == true ) {
				scandir( passfile );	// recurse on this subdirectory
			}
		}
		else {
			if ( CompareExt( passfile, ".txt", NULL, NULL )) {
				indexfile( passfile );
			}
		}

		filesleft = FindNextFile( filehandle, &filedata );

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -