📄 sofp_05.cc

📁 这是一个从音频信号里提取特征参量的程序
💻 CC
📖 第 1 页 / 共 2 页
字号:
12 下一页
// file: $isip/class/io/SofParser/sofp_05.cc// version: $Id: sofp_05.cc,v 1.8 2002/12/17 16:35:31 parihar Exp $// // isip include files//#include "SofParser.h"// method: preParse//// arguments://  long& out_len: (output) size of clean entry//  unichar* buffer: (input/output) parameter file text//  long in_len: (input) size of buffer//  long offset: (input) where to set absolute positions from//// return: a boolean value indicating status//// this method pre parses the buffer to remove comments, and excess// whitespace, handle literal characters and quotation marks, and// checks to make sure that there are no hanging quotation marks//// this function runs off of a state machine, see documentation// for a diagram if interested.//boolean SofParser::preParse(long& out_len_a, unichar* buffer_a,			    long in_len_a, long offset_a) {    unichar* buff_p;  unichar* fixed_buff_p;    // buff_p: the pointer to the original buffer that is read from  // fixed_buff_p: the pointer that is written to.  //  // note: we can both read at buff_p and write at fixed_buff_p  //       since buff_p >= fixed_buff_p and we always read before  //       we write  //  buff_p = buffer_a;  fixed_buff_p = buffer_a;  SysChar c;  long current_pos = 0;  // go through the state machine  //  while (((long)buff_p - (long)buffer_a)/(long)sizeof(unichar) < in_len_a) {        current_pos = ((long)buff_p - (long)buffer_a) / sizeof(unichar) + offset_a;    c.assign((unichar)*buff_p);    // give debug information for the state machine    //    if (debug_level_d > Integral::DETAILED) {      SysChar fixed((unichar)*fixed_buff_p);      SysString numeric(current_pos);            SysString message(L"*buff=");      numeric.assign(c);      message.concat(numeric);      message.concat(L", *fixed=");      numeric.assign(fixed);      message.concat(numeric);      message.concat(L", pos=");      message.concat(numeric);            numeric.assign(out_len_a);      message.concat(L", out_len=");      message.concat(numeric);      setString(numeric, state_d);      message.concat(L", state=");      message.concat(numeric);            numeric.assign(statement_last_token_d);      message.concat(L", last_tok=");      message.concat(numeric);            numeric.assign(token_count_d);      message.concat(L", tok_cnt=");      message.concat(numeric);            Console::put(message);    }    // branch on state    //    //----------------------------------    // state: no good previous input (initial state)    //----------------------------------    //    if (state_d == NO_GPI) {            // branch on input      //            // input is whitespace, no change of state, no output      //      if (c.isSpace()) {		// nothing is done here	//      }      // input is the terminal character, null valid statement      //      else if (c.eq(terminator_char_d)) {	return false;      }            // input is comment operator, go into ngpi comment state, no output      //      else if (c.eq(comment_char_d)) {	state_d = COMMENT_OP_NO_GPI;      }            // input is quote operator, go into quote state, output      //      else if (c.eq(QUOTE_CHAR)) {	state_d = QUOTE_OP;      }            // input is literal operator, go into literal state, no output      //      else if (c.eq(LITERAL_CHAR)) {	state_d = LITERAL_OP;      }      // input is a block_start character, do nothing      //      else if (c.eq(blockstart_char_d)) {	// if we are nested, the first block-start encountered	// is not output, and we set the implicit_block flag	//	if (nest_d && (!implicit_block_d)) {	  implicit_block_d = true;	}	// else we need to worry about this block-start char	//	else {	  // if we are within the token range, output char	  //	  if ((token_count_d >= token_start_d)	      && (token_count_d < token_stop_d)) {	    *fixed_buff_p = *buff_p;	    fixed_buff_p++;	  }	  	  // everything within this block is left for the sub-object to parse	  //	  block_count_d++;	  statement_last_token_d = current_pos;	  state_d = BLOCK_GPI;	}      }      // input is a block_stop character, decrease block_count but      // stay in nogpi state      //      else if (c.eq(blockstop_char_d)) {	// if we dug through one level of brackets, then hitting a	// blockstop means the data is null	//	if (nest_d && implicit_block_d) {	  out_len_a = 0;	  *fixed_buff_p = NULL_CHAR;	  // valid read	  //	  return true;	}	// any other time a blockstop char means error	//	else {	  return false;	}      }            // else copy character directly, go into gpi state      //      else {	if ((token_count_d >= token_start_d)&&(token_count_d < token_stop_d)) {	  *fixed_buff_p = *buff_p;	  fixed_buff_p++;	}		if (c.eq(delimiter_char_d)) {	  	  // just increment the counter, never output the first or last	  // delimiter character	  //	  token_count_d++;	}	// we wanted to break into a nest, we didn't	//	if (nest_d && (!implicit_block_d)) {	  // let's figure we were tokenizing so we never hit the open	  // brace	  //	  implicit_block_d = true;	}		// if this is an implicit parameter, the first character	// parsed is the first character of data	//	if (implicit_object_d) {	  	  // good input received, go into GPI state and set last_token	  //	  statement_last_token_d = current_pos;	  statement_asgn_d = current_pos;	}		// go into GPI state	//	state_d = GPI;      }    }    //----------------------------------    // state: comment operator in ngpi state    //----------------------------------    //    else if (state_d == COMMENT_OP_NO_GPI) {            // go back to no gpi state on newline, stay in this state if      // not, no output in either case      //      if (c.eq(NEWLINE_CHAR)) {	state_d = NO_GPI;      }    }        //----------------------------------    // state: literal operator    //----------------------------------    //    else if (state_d == LITERAL_OP) {      // if tokenizing, only output if we have surpassed token_start_d      //      if ((token_count_d >= token_start_d) && (token_count_d < token_stop_d)) {		// regardless of input, output the character and go to gpi	// state,	//	*fixed_buff_p = *buff_p;	fixed_buff_p++;      }            // if we are in a block, go back to BLOCK_GPI, else GPI      //      if (block_count_d == 0) {		// in case the first good character is a literal	//	if (implicit_object_d && (statement_last_token_d < 0)) {	  statement_last_token_d = current_pos;	  statement_asgn_d = current_pos;	}	state_d = GPI;      }      else {	state_d = BLOCK_GPI;      }    }    //----------------------------------    // state: gpi    //----------------------------------    //    else if (state_d == GPI) {            // make sure statement_asgn_d is set for implicit      //      if (implicit_object_d && (statement_asgn_d == -1)) {	return Error::handle(name(), L"preParse - error parsing rvalue", ERR_RVALUE, __FILE__, __LINE__);      }            // branch on input      //            // literal character, go into literal state, no output      //      if (c.eq(LITERAL_CHAR)) {	state_d = LITERAL_OP;      }            // quote character, go into quote state, no output      //      else if (c.eq(QUOTE_CHAR)) {	state_d = QUOTE_OP;      }      // comment operator, go into gpi comment state, output space      //      else if (c.eq(comment_char_d)) {	// only output if we are in the token range	//	if ((token_count_d >= token_start_d)&&(token_count_d < token_stop_d)) {	  *fixed_buff_p = SPACE_CHAR;	  fixed_buff_p++;	}	state_d = COMMENT_OP_GPI;      }      // block start character, increment block start, output      //      else if (c.eq(blockstart_char_d)) {	if ((token_count_d >= token_start_d)&&(token_count_d < token_stop_d)) {	  *fixed_buff_p = *buff_p;	  fixed_buff_p++;	}	block_count_d++;	state_d = BLOCK_GPI;      }      // block stop character (but not in block_gpi state), only if      // are nesting was set should this be valid      //      //      else if (c.eq(blockstop_char_d)) {		if (nest_d && implicit_block_d && implicit_object_d) {	  out_len_a = ((long)fixed_buff_p - (long)buffer_a) / sizeof(unichar);	  state_d = NO_GPI;	  *fixed_buff_p = NULL_CHAR;	  // used to be token_count > 0	  //	  if (open_index_d && (statement_last_token_d > 0)) {	    // add the last token	    //	    if (token_count_d > 2) {	      if (!index_d.addQuick(param_d, token_count_d + 1,				    statement_last_token_d,				    current_pos - statement_last_token_d)) {		reportIndexError(param_d, token_count_d + 1,				 statement_last_token_d);		return Error::handle(name(), L"preParse - error adding the last token", SofList::ERR, __FILE__, __LINE__, Error::WARNING);	      }	    }	    else {	      if (!index_d.add(param_d, token_count_d + 1,			       statement_last_token_d,			       current_pos - statement_last_token_d)) {		reportIndexError(param_d, token_count_d + 1,				 statement_last_token_d);		return Error::handle(name(), L"preParse - error adding the last token", SofList::ERR, __FILE__, __LINE__, Error::WARNING);	      }	    }	  }	 	  implicit_object_d = false;	  implicit_block_d = false;	  state_d = NO_GPI;	  statement_term_d = current_pos;	  // valid parse	  //	  return true;	}		// invalid parse	//	*fixed_buff_p++ = blockstop_char_d;	return false;      }      // terminal character, output it, return the length of the buffer      //      else if (c.eq(terminator_char_d)) {	// if we hit a terminal character, we better not be looking	// for a nested block end	//	if (nest_d && implicit_block_d && implicit_object_d) {	  // bad parse	  //	  *fixed_buff_p = NULL_CHAR;	  return false;	}		// always output the terminal character, regardless of token	//	*fixed_buff_p = terminator_char_d;	fixed_buff_p++;	// add the last token if last char was not assignment char	//	if (open_index_d && (statement_last_token_d > 0)	    && (*(fixed_buff_p - 2) != assignment_char_d)) {	  	  // add the last token	  //	  if (token_count_d > 2) {	    if (!index_d.addQuick(param_d, token_count_d + 1,				  statement_last_token_d,				  current_pos - statement_last_token_d)) {	      reportIndexError(param_d, token_count_d + 1,			       statement_last_token_d);	      return Error::handle(name(), L"preParse - error adding the last token", SofList::ERR, __FILE__, __LINE__, Error::WARNING);	    }	  }	  else {	    if (!index_d.add(param_d, token_count_d + 1,			     statement_last_token_d,			     current_pos - statement_last_token_d)) {	      reportIndexError(param_d, token_count_d + 1,			       statement_last_token_d);	      return Error::handle(name(), L"preParse - error adding the last token", SofList::ERR, __FILE__, __LINE__, Error::WARNING);	    }	  }	}		statement_term_d = current_pos;	out_len_a = ((long)fixed_buff_p - (long)buffer_a) / sizeof(unichar);	state_d = NO_GPI;	// strip the terminal character from the string	//	*(fixed_buff_p - 1) = NULL_CHAR;	return true;      }            // first assignment character, set values, output      //      else if (c.eq(assignment_char_d)) {	// possibly bad parse	//	if ((statement_asgn_d != -1) || implicit_object_d) {	  *fixed_buff_p = NULL_CHAR;	  return false;	}		// assign the name of the parameter	//	statement_asgn_d = current_pos + 1;	statement_last_token_d = statement_asgn_d;	token_count_d = 0;	long clen = ((long)fixed_buff_p - (long)buffer_a) / sizeof(unichar);	*fixed_buff_p = *buff_p;	if (!assignName(buffer_a, clen)) {	  return Error::handle(name(), L"preParse - error parsing lvalue", ERR_LVALUE, __FILE__, __LINE__, Error::WARNING);	}	fixed_buff_p++;      }	      // whitespace character, go to gpi/lws state, output space      //      else if (c.isSpace()) {		// only output if in current token range	//	if ((token_count_d >= token_start_d)&&(token_count_d < token_stop_d)) {	  *fixed_buff_p = SPACE_CHAR;	  fixed_buff_p++;	  state_d = GPI_LWS;	}		if (delimiter_char_d.eq(SPACE_CHAR)) {	  // do nothing	  //	}      }            // else copy character directly, no change in state      //      else {	// only output within token limits	//	if ((token_count_d >= token_start_d)&&(token_count_d < token_stop_d)) {	  *fixed_buff_p = *buff_p;	  fixed_buff_p++;	}		// add a token	//	if (c.eq(delimiter_char_d)) {	  token_count_d++;	  if (open_index_d && (param_d >= 0) && (statement_last_token_d>=0)) {	    if (token_count_d > 2) {	      if (!index_d.addQuick(param_d, token_count_d,				    statement_last_token_d,				    current_pos - statement_last_token_d)) {		reportIndexError(param_d, token_count_d,				 statement_last_token_d);		return Error::handle(name(), L"preParse - error adding the token", SofList::ERR, __FILE__, __LINE__, Error::WARNING);	      }	    }	    else {	      if (!index_d.add(param_d, token_count_d, statement_last_token_d,			       current_pos - statement_last_token_d)) {		reportIndexError(param_d, token_count_d,				 statement_last_token_d);		return Error::handle(name(), L"preParse - error adding the token", SofList::ERR, __FILE__, __LINE__, Error::WARNING);	      }	    }	    statement_last_token_d = current_pos + 1;	  }	}      }    }                                          // end state_d == GPI        //----------------------------------    // state: quotation operator    //----------------------------------    //    else if (state_d == QUOTE_OP) {            // branch on input      //            // quote operator again, output it and go to gpi state      //      if (c.eq(QUOTE_CHAR)) {	if (block_count_d == 0) {	  state_d = GPI;	  	}	else {	  state_d = BLOCK_GPI;	  	}      }            // literal operator, go into quote literal state, no output      //      else if (c.eq(LITERAL_CHAR)) {	state_d = LITERAL_OP_IN_QUOTE;      }      // else stay in this state copying characters over
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -