📄 text.c
字号:
/*** Copyright (C) 2006 Thai Computational Linguistics Laboratory (TCL)** National Institute of Information and Communications Technology (NICT)** Canasai Kruengkrai <canasai xx gmail yy com, where xx=at and yy=dot>**** This file is part of the `libs' library.**** This library is free software; you can redistribute it and/or modify** it under the terms of the GNU General Public License as published by** the Free Software Foundation; either version 2 of the License, or** (at your option) any later version.**** This program is distributed in the hope that it will be useful,** but WITHOUT ANY WARRANTY; without even the implied warranty of** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the** GNU General Public License for more details.**** You should have received a copy of the GNU General Public License** along with this program; if not, write to the Free Software** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include "text.h"#define Malloc( type, n ) ( type * )malloc( ( n ) * sizeof( type ) )#define malloc_error( str ) fprintf( stderr, "ERROR: In function `%s': Could not allocate memory\n", str ), exit( 1 )#define file_error( str1, str2 ) fprintf( stderr, "ERROR: In function `%s': Could not open file `%s'\n", str1, str2 ), exit( 1 )char *text_append_with_space( char *w1, char *w2 ){ char *result; int str_len = strlen( w1 ) + strlen( w2 ) + 2; if( ( result = Malloc( char, str_len ) ) == NULL ) malloc_error( "text_append_with_space" ); snprintf( result, str_len, "%s %s", w1, w2 ); return( result );}char *text_append_with_char( char *w1, char *w2, char w3 ){ char *result; int str_len = strlen( w1 ) + strlen( w2 ) + 2; if( ( result = Malloc( char, str_len ) ) == NULL ) malloc_error( "text_append_with_char" ); snprintf( result, str_len, "%s%c%s", w1, w3, w2 ); return( result );}char *text_append( char *w1, char *w2 ){ char *result; int str_len = strlen( w1 ) + strlen( w2 ) + 1; if( ( result = Malloc( char, str_len ) ) == NULL ) malloc_error( "text_append" ); snprintf( result, str_len, "%s%s", w1, w2 ); return( result );}char *text_sub_string_itself( char *old_string, int start, int end ){ char *new_string = text_sub_string( old_string, start, end ); free( old_string ); return( new_string );}char *text_append_itself_with_prefix( char *prefix, char *str_itself ){ char *new_str_itself = text_append( prefix, str_itself ); free( str_itself ); return( new_str_itself );}char *text_append_itself_with_suffix( char *str_itself, char *suffix ){ char *new_str_itself = text_append( str_itself, suffix ); free( str_itself ); return( new_str_itself );}char *text_copy( char *str ){ char *result; int str_len = strlen( str ) + 1; if( ( result = Malloc( char, str_len ) ) == NULL ) malloc_error( "text_copy" ); strncpy( result, str, str_len ); return( result );}char *text_copy2( char *str ) { char *result; while( *str == ' ' || *str == '\t' ) ++str; int str_len = strlen( str ) + 1; if( ( result = Malloc( char, str_len ) ) == NULL ) malloc_error( "text_copy2" ); strncpy( result, str, str_len ); return( result );}char *text_copy3( char *str ){ char *result; int str_len = strlen( str ) + 1; if( ( result = Malloc( char, str_len ) ) == NULL ) malloc_error( "text_copy3" ); strncpy( result, str, str_len ); register char *i; for( i = result; *i; i++ ) { if( iscntrl( *i ) || isspace( *i ) ) { *i = '\0'; break; } } return( result ); }char *text_copy4( char *str ){ char *result; int str_len = strlen( str ) + 1; if( ( result = Malloc( char, str_len ) ) == NULL ) malloc_error( "text_copy4" ); strncpy( result, str, str_len ); register char *i; for( i = result; *i ; i++ ) { if( iscntrl( *i ) && *i != '\t' ) { *i = '\0'; break; } } return( result ); }int text_not_blank( char *str ){ char *tmp; char c; tmp = str; while( *tmp != '\0' ) { c = *tmp; if( !isspace( c ) ) return( 1 ); ++tmp; } return( 0 );}int text_num_words( char *str ){ int count, count2; count = 0; count2 = 0; while( str[ count ] != '\0' && isspace( str[ count ] ) ) ++count; while( str[ count++ ] != '\0') { if( isspace( str[ count - 1 ] ) ) { ++count2; while( isspace( str[ count ] ) ) ++count; if( str[ count ] == '\0' ) --count2; } } return( count2 );}int text_num_spaces( char *str ){ int count, count2; count2 = 0; for( count = 0; count < strlen( str ); ++count ) if( isspace( str[ count ] ) ) ++count2; return( count2 );}char **text_split_independent( char *str ){ char **new_str; char *tmp; int cntr = 0; char *str1 = text_copy( str ); char *str2 = str1; while( *str1 == ' ' || *str1 == '\t' ) ++str1; if( ( new_str = Malloc( char *, text_num_spaces( str1 ) + 3 ) ) == NULL ) malloc_error( "text_split_independent" ); new_str[cntr++] = text_copy( ( char * )strtok( str1, "\t " ) ); while( ( tmp = strtok( NULL, "\t " ) ) != NULL ) { new_str[cntr] = text_copy( tmp ); ++cntr; } new_str[cntr] = NULL; free( str2 ); return( new_str );}char **text_split_with_delimiter( char *str, char *delimiter ){ char **new_str; int cntr = 0; int count = 0; char *tmp = text_copy( str ); char *tmp2 = text_copy( ( char * )strtok( tmp, delimiter ) ); ++count; while( ( tmp2 = strtok( NULL, delimiter ) ) != NULL ) ++count; free( tmp ); if( ( new_str = Malloc( char *, count + 3 ) ) == NULL ) malloc_error( "text_split_with_delimiter" ); char *tmp3 = text_copy( str ); new_str[cntr++] = ( char * )text_copy( ( char * )strtok( tmp3, delimiter ) ); while( ( tmp = ( char * )strtok( NULL, delimiter ) ) != NULL ) { new_str[cntr] = ( char * )text_copy( tmp ); ++cntr; } new_str[cntr] = NULL; free( tmp3); return( new_str );}char **text_explode( char *str, char *delimiter ){ int count = 0; char *tmp_ptr; char **new_str; int cntr = 0; char *str1 = text_copy( str ); char *str2 = str1; while( ( tmp_ptr = strstr( str1, delimiter ) ) != NULL ) { *tmp_ptr = '\0'; tmp_ptr += strlen( delimiter ); count++; str1 = tmp_ptr; } free( str2 ); if( ( new_str = Malloc( char *, count + 2 ) ) == NULL ) malloc_error( "text_explode" ); str1 = text_copy( str ); str2 = str1; while( ( tmp_ptr = strstr( str1, delimiter ) ) != NULL ) { *tmp_ptr = '\0'; tmp_ptr += strlen( delimiter ); new_str[cntr++] = text_copy( str1 ); str1 = tmp_ptr; } new_str[cntr++] = text_copy( str1 ); new_str[cntr++] = NULL; free( str2 ); return( new_str );}char *text_make_lower( char *str ){ register char *i; char *result; int str_len = strlen( str ) + 1; if( ( result = Malloc( char, str_len ) ) == NULL ) malloc_error( "text_make_lower" ); strncpy( result, str, str_len ); for( i = result; *i ; i++ ) if( isupper( *i ) ) *i = tolower( *i ); return( result ); } char *text_make_upper( char *str ){ register char *i; char *result; int str_len = strlen( str ) + 1; if( ( result = Malloc( char, str_len ) ) == NULL ) malloc_error( "text_make_upper" ); strncpy( result, str, str_len ); for( i = result; *i ; i++ ) if( islower( *i ) ) *i = toupper( *i ); return( result ); }char *text_sub_string( char *str, int start, int end ){ char *sub_str; int str_len = end - start + 1; if( str_len <= 0 ) { fprintf( stderr, "ERROR: In function `text_sub_string': str_len = %d\n", str_len ); exit( 0 ); } if( ( sub_str = Malloc( char, str_len ) ) == NULL ) malloc_error( "text_sub_string" ); int i = 0, j; for( j = start; j < end; j++ ) { sub_str[i++] = str[j]; } sub_str[i] = '\0'; return( sub_str );}void text_scan( char *file_name, int *num_tokens, int *num_lines, int *longest_line ){ FILE *file_ptr; int tmp, token_count, line_count, ic; char c; if( ( file_ptr = fopen( file_name, "r" ) ) == NULL ) file_error( "text_scan", file_name ); (*longest_line) = 0; (*num_tokens) = 0; (*num_lines) = 0; tmp = 0; token_count = 0; line_count = 0; while( ( ic = getc( file_ptr ) ) != EOF ) { c = ( char )ic; if( c != '\n' ) tmp++; if( c == '\n' ) { line_count++; token_count += 2; // add two tokens for newline if( tmp > (*longest_line) ) (*longest_line) = tmp; tmp = 0; } if( isspace( c ) ) token_count += 2; // for a text segment and its space } // Ckeck the last token if the file does not end with any signal if( tmp != 0 ) { fprintf( stderr, "\nERROR: In function `text_scan': No newline at the end of file `%s'\n\n", file_name ); exit( 0 ); } (*longest_line) += 10; (*num_tokens) = token_count; (*num_lines) = line_count; fclose( file_ptr );}int text_found_char_in( char *any_string ){ unsigned char *i; for( i = any_string; *i; i++ ) { if( isalpha( *i ) ) return( 1 ); } return( 0 );}int text_is_digit( char *any_string ){ unsigned char *i; for( i = any_string; *i; i++ ) { if( !isdigit( *i ) ) return( 0 ); } return( 1 );}void text_string_scan( char *file_name, int *num_tokens, int *num_lines, int *longest_line, int *num_chars ){ FILE *file_ptr; int tmp, token_count, line_count, ic, nc; char c; (*longest_line) = 0; (*num_tokens) = 0; (*num_lines) = 0; (*num_chars) = 0; if( ( file_ptr = fopen( file_name, "r" ) ) == NULL ) file_error( "text_string_scan", file_name ); tmp = token_count = line_count = nc = 0; while( ( ic = getc( file_ptr ) ) != EOF ) { c = (char)ic; if( c != '\n' ) tmp++; if( c == '\n' ) { nc += ( tmp + 50 ); line_count++; token_count += 2; // add two tokens for newline if( tmp > (*longest_line) ) (*longest_line) = tmp; tmp = 0; } if( isspace( c ) ) token_count += 2; // for a text segment and its space } // Ckeck the last token if the file does not end with any signal if( tmp != 0 ) { fprintf( stderr, "\nERROR: In function `text_string_scan': No newline at the end of file `%s'\n\n", file_name ); exit( 0 ); } (*longest_line) += 10; (*num_tokens) = token_count; (*num_lines) = line_count; (*num_chars) = nc; fclose( file_ptr );}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -