📄 sofp_05.cc
字号:
// file: $isip/class/io/SofParser/sofp_05.cc// version: $Id: sofp_05.cc,v 1.8 2002/12/17 16:35:31 parihar Exp $// // isip include files//#include "SofParser.h"// method: preParse//// arguments:// long& out_len: (output) size of clean entry// unichar* buffer: (input/output) parameter file text// long in_len: (input) size of buffer// long offset: (input) where to set absolute positions from//// return: a boolean value indicating status//// this method pre parses the buffer to remove comments, and excess// whitespace, handle literal characters and quotation marks, and// checks to make sure that there are no hanging quotation marks//// this function runs off of a state machine, see documentation// for a diagram if interested.//boolean SofParser::preParse(long& out_len_a, unichar* buffer_a, long in_len_a, long offset_a) { unichar* buff_p; unichar* fixed_buff_p; // buff_p: the pointer to the original buffer that is read from // fixed_buff_p: the pointer that is written to. // // note: we can both read at buff_p and write at fixed_buff_p // since buff_p >= fixed_buff_p and we always read before // we write // buff_p = buffer_a; fixed_buff_p = buffer_a; SysChar c; long current_pos = 0; // go through the state machine // while (((long)buff_p - (long)buffer_a)/(long)sizeof(unichar) < in_len_a) { current_pos = ((long)buff_p - (long)buffer_a) / sizeof(unichar) + offset_a; c.assign((unichar)*buff_p); // give debug information for the state machine // if (debug_level_d > Integral::DETAILED) { SysChar fixed((unichar)*fixed_buff_p); SysString numeric(current_pos); SysString message(L"*buff="); numeric.assign(c); message.concat(numeric); message.concat(L", *fixed="); numeric.assign(fixed); message.concat(numeric); message.concat(L", pos="); message.concat(numeric); numeric.assign(out_len_a); message.concat(L", out_len="); message.concat(numeric); setString(numeric, state_d); message.concat(L", state="); message.concat(numeric); numeric.assign(statement_last_token_d); message.concat(L", last_tok="); message.concat(numeric); numeric.assign(token_count_d); message.concat(L", tok_cnt="); message.concat(numeric); Console::put(message); } // branch on state // //---------------------------------- // state: no good previous input (initial state) //---------------------------------- // if (state_d == NO_GPI) { // branch on input // // input is whitespace, no change of state, no output // if (c.isSpace()) { // nothing is done here // } // input is the terminal character, null valid statement // else if (c.eq(terminator_char_d)) { return false; } // input is comment operator, go into ngpi comment state, no output // else if (c.eq(comment_char_d)) { state_d = COMMENT_OP_NO_GPI; } // input is quote operator, go into quote state, output // else if (c.eq(QUOTE_CHAR)) { state_d = QUOTE_OP; } // input is literal operator, go into literal state, no output // else if (c.eq(LITERAL_CHAR)) { state_d = LITERAL_OP; } // input is a block_start character, do nothing // else if (c.eq(blockstart_char_d)) { // if we are nested, the first block-start encountered // is not output, and we set the implicit_block flag // if (nest_d && (!implicit_block_d)) { implicit_block_d = true; } // else we need to worry about this block-start char // else { // if we are within the token range, output char // if ((token_count_d >= token_start_d) && (token_count_d < token_stop_d)) { *fixed_buff_p = *buff_p; fixed_buff_p++; } // everything within this block is left for the sub-object to parse // block_count_d++; statement_last_token_d = current_pos; state_d = BLOCK_GPI; } } // input is a block_stop character, decrease block_count but // stay in nogpi state // else if (c.eq(blockstop_char_d)) { // if we dug through one level of brackets, then hitting a // blockstop means the data is null // if (nest_d && implicit_block_d) { out_len_a = 0; *fixed_buff_p = NULL_CHAR; // valid read // return true; } // any other time a blockstop char means error // else { return false; } } // else copy character directly, go into gpi state // else { if ((token_count_d >= token_start_d)&&(token_count_d < token_stop_d)) { *fixed_buff_p = *buff_p; fixed_buff_p++; } if (c.eq(delimiter_char_d)) { // just increment the counter, never output the first or last // delimiter character // token_count_d++; } // we wanted to break into a nest, we didn't // if (nest_d && (!implicit_block_d)) { // let's figure we were tokenizing so we never hit the open // brace // implicit_block_d = true; } // if this is an implicit parameter, the first character // parsed is the first character of data // if (implicit_object_d) { // good input received, go into GPI state and set last_token // statement_last_token_d = current_pos; statement_asgn_d = current_pos; } // go into GPI state // state_d = GPI; } } //---------------------------------- // state: comment operator in ngpi state //---------------------------------- // else if (state_d == COMMENT_OP_NO_GPI) { // go back to no gpi state on newline, stay in this state if // not, no output in either case // if (c.eq(NEWLINE_CHAR)) { state_d = NO_GPI; } } //---------------------------------- // state: literal operator //---------------------------------- // else if (state_d == LITERAL_OP) { // if tokenizing, only output if we have surpassed token_start_d // if ((token_count_d >= token_start_d) && (token_count_d < token_stop_d)) { // regardless of input, output the character and go to gpi // state, // *fixed_buff_p = *buff_p; fixed_buff_p++; } // if we are in a block, go back to BLOCK_GPI, else GPI // if (block_count_d == 0) { // in case the first good character is a literal // if (implicit_object_d && (statement_last_token_d < 0)) { statement_last_token_d = current_pos; statement_asgn_d = current_pos; } state_d = GPI; } else { state_d = BLOCK_GPI; } } //---------------------------------- // state: gpi //---------------------------------- // else if (state_d == GPI) { // make sure statement_asgn_d is set for implicit // if (implicit_object_d && (statement_asgn_d == -1)) { return Error::handle(name(), L"preParse - error parsing rvalue", ERR_RVALUE, __FILE__, __LINE__); } // branch on input // // literal character, go into literal state, no output // if (c.eq(LITERAL_CHAR)) { state_d = LITERAL_OP; } // quote character, go into quote state, no output // else if (c.eq(QUOTE_CHAR)) { state_d = QUOTE_OP; } // comment operator, go into gpi comment state, output space // else if (c.eq(comment_char_d)) { // only output if we are in the token range // if ((token_count_d >= token_start_d)&&(token_count_d < token_stop_d)) { *fixed_buff_p = SPACE_CHAR; fixed_buff_p++; } state_d = COMMENT_OP_GPI; } // block start character, increment block start, output // else if (c.eq(blockstart_char_d)) { if ((token_count_d >= token_start_d)&&(token_count_d < token_stop_d)) { *fixed_buff_p = *buff_p; fixed_buff_p++; } block_count_d++; state_d = BLOCK_GPI; } // block stop character (but not in block_gpi state), only if // are nesting was set should this be valid // // else if (c.eq(blockstop_char_d)) { if (nest_d && implicit_block_d && implicit_object_d) { out_len_a = ((long)fixed_buff_p - (long)buffer_a) / sizeof(unichar); state_d = NO_GPI; *fixed_buff_p = NULL_CHAR; // used to be token_count > 0 // if (open_index_d && (statement_last_token_d > 0)) { // add the last token // if (token_count_d > 2) { if (!index_d.addQuick(param_d, token_count_d + 1, statement_last_token_d, current_pos - statement_last_token_d)) { reportIndexError(param_d, token_count_d + 1, statement_last_token_d); return Error::handle(name(), L"preParse - error adding the last token", SofList::ERR, __FILE__, __LINE__, Error::WARNING); } } else { if (!index_d.add(param_d, token_count_d + 1, statement_last_token_d, current_pos - statement_last_token_d)) { reportIndexError(param_d, token_count_d + 1, statement_last_token_d); return Error::handle(name(), L"preParse - error adding the last token", SofList::ERR, __FILE__, __LINE__, Error::WARNING); } } } implicit_object_d = false; implicit_block_d = false; state_d = NO_GPI; statement_term_d = current_pos; // valid parse // return true; } // invalid parse // *fixed_buff_p++ = blockstop_char_d; return false; } // terminal character, output it, return the length of the buffer // else if (c.eq(terminator_char_d)) { // if we hit a terminal character, we better not be looking // for a nested block end // if (nest_d && implicit_block_d && implicit_object_d) { // bad parse // *fixed_buff_p = NULL_CHAR; return false; } // always output the terminal character, regardless of token // *fixed_buff_p = terminator_char_d; fixed_buff_p++; // add the last token if last char was not assignment char // if (open_index_d && (statement_last_token_d > 0) && (*(fixed_buff_p - 2) != assignment_char_d)) { // add the last token // if (token_count_d > 2) { if (!index_d.addQuick(param_d, token_count_d + 1, statement_last_token_d, current_pos - statement_last_token_d)) { reportIndexError(param_d, token_count_d + 1, statement_last_token_d); return Error::handle(name(), L"preParse - error adding the last token", SofList::ERR, __FILE__, __LINE__, Error::WARNING); } } else { if (!index_d.add(param_d, token_count_d + 1, statement_last_token_d, current_pos - statement_last_token_d)) { reportIndexError(param_d, token_count_d + 1, statement_last_token_d); return Error::handle(name(), L"preParse - error adding the last token", SofList::ERR, __FILE__, __LINE__, Error::WARNING); } } } statement_term_d = current_pos; out_len_a = ((long)fixed_buff_p - (long)buffer_a) / sizeof(unichar); state_d = NO_GPI; // strip the terminal character from the string // *(fixed_buff_p - 1) = NULL_CHAR; return true; } // first assignment character, set values, output // else if (c.eq(assignment_char_d)) { // possibly bad parse // if ((statement_asgn_d != -1) || implicit_object_d) { *fixed_buff_p = NULL_CHAR; return false; } // assign the name of the parameter // statement_asgn_d = current_pos + 1; statement_last_token_d = statement_asgn_d; token_count_d = 0; long clen = ((long)fixed_buff_p - (long)buffer_a) / sizeof(unichar); *fixed_buff_p = *buff_p; if (!assignName(buffer_a, clen)) { return Error::handle(name(), L"preParse - error parsing lvalue", ERR_LVALUE, __FILE__, __LINE__, Error::WARNING); } fixed_buff_p++; } // whitespace character, go to gpi/lws state, output space // else if (c.isSpace()) { // only output if in current token range // if ((token_count_d >= token_start_d)&&(token_count_d < token_stop_d)) { *fixed_buff_p = SPACE_CHAR; fixed_buff_p++; state_d = GPI_LWS; } if (delimiter_char_d.eq(SPACE_CHAR)) { // do nothing // } } // else copy character directly, no change in state // else { // only output within token limits // if ((token_count_d >= token_start_d)&&(token_count_d < token_stop_d)) { *fixed_buff_p = *buff_p; fixed_buff_p++; } // add a token // if (c.eq(delimiter_char_d)) { token_count_d++; if (open_index_d && (param_d >= 0) && (statement_last_token_d>=0)) { if (token_count_d > 2) { if (!index_d.addQuick(param_d, token_count_d, statement_last_token_d, current_pos - statement_last_token_d)) { reportIndexError(param_d, token_count_d, statement_last_token_d); return Error::handle(name(), L"preParse - error adding the token", SofList::ERR, __FILE__, __LINE__, Error::WARNING); } } else { if (!index_d.add(param_d, token_count_d, statement_last_token_d, current_pos - statement_last_token_d)) { reportIndexError(param_d, token_count_d, statement_last_token_d); return Error::handle(name(), L"preParse - error adding the token", SofList::ERR, __FILE__, __LINE__, Error::WARNING); } } statement_last_token_d = current_pos + 1; } } } } // end state_d == GPI //---------------------------------- // state: quotation operator //---------------------------------- // else if (state_d == QUOTE_OP) { // branch on input // // quote operator again, output it and go to gpi state // if (c.eq(QUOTE_CHAR)) { if (block_count_d == 0) { state_d = GPI; } else { state_d = BLOCK_GPI; } } // literal operator, go into quote literal state, no output // else if (c.eq(LITERAL_CHAR)) { state_d = LITERAL_OP_IN_QUOTE; } // else stay in this state copying characters over
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -