📄 am_string_array.c

📁 ADaM is a data mining and image processing toolkit
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*  Logistic Regression using Truncated Iteratively Re-weighted Least Squares  (includes several programs)  Copyright (C) 2005  Paul Komarek  This program is free software; you can redistribute it and/or modify  it under the terms of the GNU General Public License as published by  the Free Software Foundation; either version 2 of the License, or  (at your option) any later version.  This program is distributed in the hope that it will be useful,  but WITHOUT ANY WARRANTY; without even the implied warranty of  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  GNU General Public License for more details.  You should have received a copy of the GNU General Public License  along with this program; if not, write to the Free Software  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA  Author: Paul Komarek, komarek@cmu.edu  Alternate contact: Andrew Moore, awm@cs.cmu.edu*//* 	File: am_string_array.c  * 	Author(s): * 	Date: * 	Purpose: */#include "standard.h"#include "ambs.h"#include "amiv.h"#include "am_string_array.h"#include "amma.h"#include "am_string.h"#include "amdyv_array.h"int String_Arrays_mallocked = 0;int String_Arrays_freed = 0;#define STRING_ARRAY_CODE 20542string_array *mk_string_array(int size){  string_array *result = AM_MALLOC(string_array);  int i;  result -> string_array_code = STRING_ARRAY_CODE;  result -> size = size;  result -> sarr_size = size;  result -> sarr = AM_MALLOC_ARRAY(char_ptr,size);  for ( i = 0 ; i < size ; i++ )    result->sarr[i] = mk_copy_string("<UnDeFiNeD>");  String_Arrays_mallocked += 1;  return(result);}void free_string_array(string_array *sar){  int i;  sar -> string_array_code = 7777;  for ( i = 0 ; i < sar->size ; i++ )    if ( sar->sarr[i] != NULL )      am_free_string(sar->sarr[i]);  AM_FREE_ARRAY(sar->sarr,char_ptr,sar->sarr_size);  AM_FREE(sar,string_array);  String_Arrays_freed += 1;}string_array *mk_copy_string_array(const string_array *sa){  string_array *nsa = mk_string_array(string_array_size(sa));  int i;  for ( i = 0 ; i < string_array_size(sa) ; i++ )    string_array_set(nsa,i,string_array_ref(sa,i));  return(nsa);}string_array *mk_string_array_x( int size, ...){  /* XXX: no type checking */  int i;  char *s;  va_list argptr;  string_array *sa;  sa = mk_string_array( size);  va_start( argptr, size);  for (i=0; i<size; ++i) {    s = va_arg( argptr, char *);    string_array_set( sa, i, s); /* Copies string into string array. */  }  va_end(argptr);  return sa;}char **mk_array_from_string_array( string_array *sa){  int size, i;  char **array, *s;  size = string_array_size( sa);  array = AM_MALLOC_ARRAY( char *, size);  for (i=0; i<size; ++i) {    s = string_array_ref( sa, i);    array[i] = mk_copy_string( s);  }  return array;}void fprintf_string_array(FILE *s,const char *m1,const string_array *sar,                          const char *m2){  int i;  if (sar == NULL)    fprintf(s,"%s = <NULL string array>%s",m1,m2);  else  {    if ( string_array_size(sar) == 0 )      fprintf(s,"%s = <empty string array>%s",m1,m2);    else    {      for ( i = 0 ; i < string_array_size(sar) ; i++ )	fprintf(s,"%s[%3d] = %s%s",		m1,i,		(string_array_ref(sar,i)==NULL) ? "NULL" : string_array_ref(sar,i),		m2		);    }  }}char *string_array_ref(const string_array *sar, int i){  return(sar->sarr[i]);}void string_array_set(string_array *sar,int i,char *value)/* value is COPIED in */{  if ( sar->sarr[i] != NULL )    am_free_string(sar->sarr[i]);  sar->sarr[i] = mk_copy_string(value);}int string_array_size(const string_array *sar){  return(sar->size);}/* only for people who really want to save time allocating memory.   after calling this, you should forget about the memory in string without   freeing it. */void add_to_string_array_no_copy(string_array *sa, char *string){  if ( sa -> size == sa -> sarr_size )  {    int new_sarr_size = 2 + 2 * sa->sarr_size;    char **new_sarr = AM_MALLOC_ARRAY(char_ptr,new_sarr_size);    int i;    for ( i = 0 ; i < sa->size ; i++ ) new_sarr[i] = sa->sarr[i];    AM_FREE_ARRAY(sa->sarr,char_ptr,sa->sarr_size);    sa -> sarr_size = new_sarr_size;    sa -> sarr = new_sarr;  }  sa -> size += 1;  sa -> sarr[sa->size-1] = string;}void add_to_string_array(string_array *sa, const char *string){  if ( sa -> size == sa -> sarr_size )    {    int new_sarr_size = 2 + 2 * sa->sarr_size;    char **new_sarr = AM_MALLOC_ARRAY(char_ptr,new_sarr_size);    int i;    for ( i = 0 ; i < sa->size ; i++ )      new_sarr[i] = sa->sarr[i];    AM_FREE_ARRAY(sa->sarr,char_ptr,sa->sarr_size);    sa -> sarr_size = new_sarr_size;    sa -> sarr = new_sarr;    }  sa -> size += 1;  sa -> sarr[sa->size-1] = (string==NULL) ? NULL : mk_copy_string(string);}void string_array_add(string_array *sa,char *string){  add_to_string_array(sa,string);}void string_array_remove(string_array* sa, int idx){  int i;  if ( string_array_size(sa) <= 0 ) my_error("string_array_remove: empty string_array");  if ( idx < 0 || idx >= string_array_size(sa) )    my_error("string_array_remove: bad index");  /* WKW - New version shuffles the pointers over by one instead     of using string_array_set, which copies the (i+1)th element into the     ith spot and then frees the (i+1)th element.  This is faster and     it avoids some memory bugs in the old version */  if( sa->sarr[idx] != NULL )    {    am_free_string(sa->sarr[idx]);    sa->sarr[idx]=NULL;    }  for( i = idx ; i < (sa->size - 1); i++ )    sa->sarr[i] = sa->sarr[i+1];  sa->sarr[sa->size-1] = NULL; /* Might help catch some weird errors */  sa -> size -= 1;}string_array *mk_broken_string_using_seppers(const char *string, const char *seppers){  string_array *sa = mk_string_array(0);  int i = 0;  while ( string != NULL && string[i] != '\0' )    {    int j = 0;    while ( is_sepper(string[i],seppers) )      i += 1;    if ( string[i] != '\0' )      {      int part_size,backslashcount = 0;      char *part,stopchar = ' ';      int k;      while ( string[i+j] != '\0' )        {	if(stopchar == ' ')	  {	  if(is_sepper(string[i+j],seppers))            break;	  else if((string[i+j]=='\"') && !(backslashcount%2))            stopchar = '\"';	  }	else if(stopchar == '\"')	  {	  /* bug fix? this used to say stopchar = '\n' which made it put the             whole rest of the line into one string once it had seen a double             quote.  Now it only includes up to the next double quote. 8/24/99 JGS */	  if((string[i+j] == '\"') && !(backslashcount %2))            stopchar = ' ';	  }	if (string[i+j] == '\\') backslashcount++;	else                     backslashcount=0;	        j++;        }      part_size = j+1;      part = AM_MALLOC_ARRAY(char,part_size);      for ( k = 0 ; k < j ; k++ )        part[k] = string[i+k];      if ( k != part_size-1 ) my_error("oaibxwibxpwibx");      part[k] = '\0';      string_array_add(sa,part);      AM_FREE_ARRAY(part,char,part_size);      }    i = i+j;    }  return(sa);}string_array *mk_broken_string_using_seppers_only(char *string,char *seppers){  string_array *sa = mk_string_array(0);  char *p = strpbrk(string,seppers);  char c;  while(p){    c = *p;    *p = '\0';    add_to_string_array(sa,string);    *p = c;    string = p+1;    p = strpbrk(string,seppers);  }  add_to_string_array(sa,string);  return sa;}string_array *mk_broken_string(const char *string){  string_array *result = mk_broken_string_using_seppers(string,NULL);  return(result);}string_array *mk_broken_quoteless_string(char *string){  char *quoteless = mk_quoteless_string(string);  string_array *result = mk_broken_string_using_seppers(quoteless,NULL);  free_string(quoteless);  return(result);}string_array *mk_string_array_subset(string_array *sa,ivec *indices){  int size = ivec_size(indices);  string_array *result = mk_string_array(size);  int i;  for ( i = 0 ; i < size ; i++ )    string_array_set(result,i,string_array_ref(sa,ivec_ref(indices,i)));  return result;}int find_index_in_string_array(const string_array *sa, const char *string){  int result = -1;  int i;  for ( i = 0 ; i < string_array_size(sa) && result < 0 ; i++ )    if ( eq_string(string,string_array_ref(sa,i)) )      result = i;  return(result);}bool string_array_member(string_array *sa,char *string){  return(find_index_in_string_array(sa,string)>=0);}/************* NEW LINE PARSING CODE *************//* If line_format is WHITESPACE then the line is read SPACE_STYLE   if line_format is COMMA      then the line is read COMMA_STYLE   if line_format is BAR        then the line is read BAR_STYLE   if lineformat is  ANY        then        count the number of unquoted commas on the line (n_comma)        and count the number of unquoted bars on the line (n_bar)        if ( n_comma == 0 && n_bar == 0 ) use SPACE_FORMAT        if ( n_comma >= n_bar )           use COMMA_FORMAT        if ( n_bar > n_comma )            use BAR_FORMAT   The line parser runs through a finite state machine. On   each character it looks at the character type:     S Space       - The character is the ' ' char     C Comma       - The character is the ',' char     A SingleQuote - The character is the '\'' char     Q DoubleQuote - The character is the '\"' char     T Token       - The character is something other than the above        The line parser is building up an array of tokens. It begins with   an empty array of tokens. It has a current token being built. It begins   with the current token empty. After each character is read, it performs   one of the following actions:     ADD   Add the curent token to the array. Set the current token to empty     PUSH  Put the current character at the end of the current token     NIL   Do nothing     DASH  Put a dash character at the end of the current token     DP    Put a dash, then the current character at end of token     UNKN  Add the UNKNOWN_STRING to the array. Clear current token  COMMA_STYLE parsing:       All whitespace to immediate left and right of commas is removed.       All other contiguous blocks of whitespace are replaced with - symbols         (outside quotes, N contiguous spaces are replaced with one -.          inside quotes, N contiguous spaces are replaced with N -'s)       The resulting tokens between commas are used.       Empty string between commas is turned into UNKNOWN STRING  BAR_STYLE parsing:       Just like commas, except use bar (|) symbol instead of , symbol  SPACE_STYLE parsing:       All whitespace inside quotes are turned to dashes       All other CONTIGUOUS blocks of whitespace are collapsed to one space       Then the resulting tokens between whitespaces are used.*//* Returns TRUE iff all characters in line_string are c (note special   case: returns TRUE if string hads length zero) */bool line_contains_only_character_c(char *line_string,char c){  bool result = TRUE;  int i;  for ( i = 0 ; result && line_string[i] != '\0' ; i++ )    result = line_string[i] == c;  return result;}/* Returns TRUE iff all characters in line_string are '-' (note special   case: returns TRUE if string hads length zero) */bool line_contains_only_dashes(char *line_string){  return line_contains_only_character_c(line_string,'-');}/* A line is interesting if its not all white space andthe leftmost non whitespace character isnt # */bool line_string_is_interesting(char *line_string){  int i;  char first_non_whitespace = ' ';  char second_non_whitespace = ' ';  bool result;  for ( i = 0 ; first_non_whitespace == ' ' && line_string[i] != '\0' ; i++ )  {    if ( line_string[i] != ' ' && line_string[i] != '\t' &&          line_string[i] != '\r' )      first_non_whitespace = line_string[i];    if (first_non_whitespace != '\0') second_non_whitespace = line_string[i+1];  }  result = ( first_non_whitespace != ' ' );  /* we allow the special sequence '##' to be a "machine readable comment */  if ((first_non_whitespace == '#') && (second_non_whitespace != '#'))    result = FALSE;  if ( result && line_contains_only_dashes(line_string) )    result = FALSE;  return(result);}/* Searches the file for the next line that isn't all whitespace and   that doesn't have # as its first non-whitespace character.    If no-such line before file-end, returns NULL */char *mk_next_interesting_line_string(PFILE *s,int *line_number){  char *line_string = NULL;  bool finished = FALSE;  while ( !finished )  {    line_string = mk_string_from_line(s);    *line_number += 1;    if ( line_string == NULL )      finished = TRUE;    else      finished = line_string_is_interesting(line_string);    if ( !finished && line_string != NULL )    {      free_string(line_string);      line_string = NULL;    }  }  return(line_string);}/* As above excepts breaks resulting line into a string array of tokens... */string_array *mk_next_interesting_line(PFILE *s,int *line_number){  char *str = mk_next_interesting_line_string(s,line_number);  string_array *sa = (str == NULL) ? NULL : mk_broken_string(str);  if ( str != NULL ) free_string(str);  return(sa);}
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -