📄 libs-standalone.c
字号:
/*** Copyright (C) 2006 Thai Computational Linguistics Laboratory (TCL)** National Institute of Information and Communications Technology (NICT)** Canasai Kruengkrai <canasai xx gmail yy com, where xx=at and yy=dot>**** This file is part of the `libs' library.**** This library is free software; you can redistribute it and/or modify** it under the terms of the GNU General Public License as published by** the Free Software Foundation; either version 2 of the License, or** (at your option) any later version.**** This program is distributed in the hope that it will be useful,** but WITHOUT ANY WARRANTY; without even the implied warranty of** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the** GNU General Public License for more details.**** You should have received a copy of the GNU General Public License** along with this program; if not, write to the Free Software** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.*/#include <stdio.h>#include <ctype.h>#include <stdlib.h>#include <string.h>#include <unistd.h>#ifdef __cplusplusextern "C" {#include "CKHash.h"#endif#ifdef __cplusplus}#endif#include "libsvm-string-2.71/svm.h"inline void Swap( int &a, int &b ) { int c = a; a = b; b = c; }#define file_error( msg ) fprintf( stderr, "\nFILE ERROR: Could not open the file: %s\n", msg ), exit( 0 )#define fatal_error( msg ) fprintf( stderr, "\n%s\n", msg ), exit( 0 )#define malloc_error( str1, str2 ) fprintf( stderr, "ERROR: In function `%s': Could not allocate memory for `%s'\n", str1, str2 ), exit( 1 )#define Malloc( type, n ) ( type * )malloc( ( n )*sizeof( type ) )#define MODEL "samples/default/model"#define LABEL "samples/default/label"#define _LOWER_BOUND_LENGTH_ 10#define _UPPER_BOUND_LENGTH_ 100#define _SAMPLING_SIZE_ 10typedef struct { int LOWER_BOUND_LENGTH; int UPPER_BOUND_LENGTH; char *MODEL_FILE; char *LABEL_FILE; int SAMPLING_SIZE;} SAParam;SAParam sa_param;void exit_with_help( char *prog_name ){ fprintf( stderr, "\nUsage:" "\n------" "\n%s [options] target_file" "\noptions:" "\n -l lower-bound: set minimum string length per line (default 15)" "\n -u upper-bound: set maximum string length per line (default 100)" "\n -m model_file: set model file (default %s)" "\n -b label_file: set label file (default %s)" "\n -s sampling_size: set number of randomly selected text samples (default 10)" "\n", prog_name, MODEL, LABEL ); exit( 0 );}void predict( svm_model *model, char **label_name, char *target_file ){ int num_tokens = 0; int num_lines = 0; int longest_line = 0; text_scan( target_file, &num_tokens, &num_lines, &longest_line ); char *line = Malloc( char, longest_line ); FILE *file_ptr = fopen( target_file, "r" ); char **sample = Malloc( char *, num_lines ); int count = 0; while( fgets( line, longest_line, file_ptr ) != NULL ) { if( text_not_blank( line ) ) { line[ strlen( line ) - 1 ] = '\0'; char *string = text_copy2( line ); int str_length = strlen( string ); if( str_length > sa_param.LOWER_BOUND_LENGTH ) { if( str_length > sa_param.UPPER_BOUND_LENGTH ) { char *new_string; if( ( new_string = Malloc( char, sa_param.UPPER_BOUND_LENGTH + 1 ) ) == NULL ) malloc_error( "predict", "new_string" ); strncpy( new_string, string, sa_param.UPPER_BOUND_LENGTH ); new_string[sa_param.UPPER_BOUND_LENGTH] = '\0'; free( string ); string = new_string; } sample[count++] = string; } } } fclose( file_ptr ); if( count == 0 ) { fprintf( stderr, "Unidentified\n" ); exit( 0 ); } if( count < sa_param.SAMPLING_SIZE ) sa_param.SAMPLING_SIZE = count; //fprintf( stderr, "sa_param.SAMPLING_SIZE=%d | count=%d\n", sa_param.SAMPLING_SIZE, count ); // random shuffle int i, j; int *perm = Malloc( int, count ); for( i = 0 ; i < count; i++ ) perm[i] = i; for( i = 0; i < count; i++ ) { j = i + rand() % ( count - i ); Swap( perm[i], perm[j] ); } Dict *lang_enc = ckh_construct_dict( 16 ); for( j = 0; j < sa_param.SAMPLING_SIZE; j++ ) { int str_length = strlen( sample[perm[j]] ); struct svm_node *x = Malloc( struct svm_node, str_length+1 ); char *tmp_str = sample[perm[j]]; //fprintf( stderr, "%d %d %s\n", j, perm[j], tmp_str ); for( i = 0; i < str_length; i++ ) { x[i].index = i; x[i].value = ( unsigned char )tmp_str[i]; } x[i++].index = -1; // predict this sample int v = ( int )svm_predict( model, x ); fprintf( stderr, "." ); //fprintf( stderr, "%d %s\n", v, label_name[v] ); if( !ckh_lookup( lang_enc, label_name[v] ) ) ckh_insert( lang_enc, label_name[v], 1 ); else ckh_increase_value( lang_enc, label_name[v] ); free( x ); } fprintf( stderr, "*\n" ); int max_score = -1; char *best_result = NULL; for( j = 0; j < lang_enc->table_size; j++ ) { if( lang_enc->T1[j].key != NULL && lang_enc->T1[j].value > max_score ) { max_score = lang_enc->T1[j].value; best_result = lang_enc->T1[j].key; } if( lang_enc->T2[j].key != NULL && lang_enc->T2[j].value > max_score ) { max_score = lang_enc->T2[j].value; best_result = lang_enc->T2[j].key; } } //fprintf( stderr, "%s (%d) \n", best_result, max_score ); printf( "%s\n", best_result ); ckh_destruct_dict( lang_enc ); free( sample ); free( perm ); }void run( int argc, char **argv ){ int i; sa_param.LOWER_BOUND_LENGTH = _LOWER_BOUND_LENGTH_; sa_param.UPPER_BOUND_LENGTH = _UPPER_BOUND_LENGTH_; sa_param.MODEL_FILE = text_copy( MODEL ); sa_param.LABEL_FILE = text_copy( LABEL ); sa_param.SAMPLING_SIZE = _SAMPLING_SIZE_; // parse options for( i = 1; i < argc; i++ ) { if( argv[i][0] != '-') break; ++i; switch( argv[i-1][1] ) { case 'l': sa_param.LOWER_BOUND_LENGTH = atoi( argv[i] ); break; case 'u': sa_param.UPPER_BOUND_LENGTH = atoi( argv[i] ); break; case 'm': sa_param.MODEL_FILE = text_copy( argv[i] ); break; case 'b': sa_param.LABEL_FILE = text_copy( argv[i] ); break; case 's': sa_param.SAMPLING_SIZE = atoi( argv[i] ); break; default: fprintf( stderr, "unknown option\n" ); exit_with_help( argv[0] ); } } if( i >= argc ) exit_with_help( argv[0] ); if( access( sa_param.MODEL_FILE, R_OK ) != 0 ) file_error( sa_param.MODEL_FILE ); if( access( sa_param.LABEL_FILE, R_OK ) != 0 ) file_error( sa_param.LABEL_FILE ); if( access( argv[i], R_OK ) != 0 ) file_error( argv[i] ); svm_model *model = svm_load_model( sa_param.MODEL_FILE ); int max_classname_length = 0; char **label_name = load_label( sa_param.LABEL_FILE, &max_classname_length ); predict( model, label_name, argv[i] ); svm_destroy_model( model );}int main( int argc, char **argv ){ if( argc < 2 ) exit_with_help( argv[0] ); run( argc, argv ); return( 0 );}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -