📄 forlex.c
字号:
} } }/* end else isaletter(c) */ }/* end if(possible_keyword) */ }/* end while(isidletter || isadigit) */ if(keywd_class == FALSE) { /* it is an identifier */ /* Identifier: find its hashtable entry or create a new entry. */ int h; Lsymtab *symt;#ifdef TYPELESS_CONSTANTS /* Watch out for const like X'nnn' */ if(i == 1 && curr_char == '\'') { get_binary_const(token,s_upper[0],NULL); return; }#endif s_upper[i] = '\0'; s_lower[i] = '\0'; i++;#ifdef CASE_SENSITIVE token->value.integer = h = hash_lookup(s_lower);#else token->value.integer = h = hash_lookup(s_upper);#endif /* If it is an array give it a special token class, so that arrays can be distinguished from functions in the grammar. */ if((symt=hashtab[h].loc_symtab) != NULL && symt->array_var) { token->class = tok_array_identifier; } } /* Check identifiers for being juxtaposed to keywords or having internal space. Keywords are immune to warning since want to allow both GOTO and GO TO, etc. */ if(pretty_flag && (token->class==tok_identifier || token->class==tok_array_identifier) && ( isidletter(preceding_c) || isadigit(preceding_c) || has_embedded_space ) ) { ugly_code(token->line_num,token->col_num,"identifier"); msg_tail(hashtab[token->value.integer].name);#if 0 /* Keywords immune for now */ ugly_code(token->line_num,token->col_num,"keyword"); msg_tail(keywords[keytab_index[keywd_class-keytab_offset]].name);#endif if(has_embedded_space) msg_tail("has embedded space"); else msg_tail("not clearly separated from context"); }#ifdef DEBUG_FORLEX if(debug_lexer){ switch(token->class) { case tok_identifier: fprintf(list_fd,"\nIdentifier:\t\t%s",s); break; case tok_array_identifier: fprintf(list_fd,"\nArray_identifier:\t%s",s); break; default: fprintf(list_fd,"\nKeyword:\t\ttok_%s",s); break; } }#endif} /* get_identifier *//* iskeyword: Determines (to the best of its current ability) whether a given identifier is a keyword or not. Hopefully now no keywords are reserved. Method uses context from start of statement up to and including the character following the putative keyword to eliminate as many cases as possible. Any non-IK keywords (those that need not be in the initial series of keywords of statement) have special code to handle them. Any IK's that are always the second word of a pair are accepted if the predecessor was just seen. The rest are handed off to looking_at_keywd which tries to see if it is an assignment statement. Note that some rules that could be used if F77 Standard were adhered to strictly are not used here. The idea is to allow extensions, and leave catching syntax errors in the parser. For example, specification-statement keywords are not excluded after the first executable statement has been seen. The status of a variable as declared array or character type is not consulted in ruling out an assignment statement if following parentheses are present. Etc.*/ /* Macro to test if all the specified bits are set */#define MATCH(CONTEXT) ((keywords[i].context & (CONTEXT)) == (CONTEXT))LEX_SHARED intis_keyword(i) int i; /* Index in keywords table */{ int ans = FALSE; int putative_keyword_class; /* Class of the supposed keyword */ while(iswhitespace(curr_char)) /* Move to lookahead char */ advance();#ifdef DEBUG_IS_KEYWORD if(debug_lexer){ fprintf(list_fd, "\nkeyword %s: initialflag=%d implicitflag=%d ", keywords[i].name,initial_flag,implicit_flag); fprintf(list_fd, "context=%o, next char=%c %o",keywords[i].context, curr_char,curr_char); }#endif putative_keyword_class = keywords[i].class; if( !initial_flag && MATCH(IK) ) { /* Dispose of keywords which can only occur in initial part of statement, if found elsewhere. */ ans = FALSE; }#if 0 /* This does not work: curr_stmt_class not cleared beforehand */ else if(curr_stmt_class == tok_IF && MATCH(NI)) { /* Dispose of keywords which cannot occur in stmt field of logical IF if that is where we are. */ ans = FALSE; }#endif else if(MATCH(NA) && isalpha(curr_char)) { /* Dispose of keywords which cannot be followed by alphabetic character if that is so. */ ans = FALSE; } else if(putative_keyword_class == tok_TO) {/* A non-IK case */ /* TO always follows the word GO or is followed by a variable name (in ASSIGN statement). */#ifdef SPLIT_KEYWORDS#define in_assign_stmt (curr_stmt_class == tok_ASSIGN) ans = (prev_token_class == (in_assign_stmt? tok_integer_const: tok_GO));#else ans = ( curr_stmt_class == tok_ASSIGN && prev_token_class == tok_integer_const);#endif } else if(putative_keyword_class == tok_FUNCTION /* A non-IK case */ && (stmt_sequence_no != 0 /* not the first statement of module */ || !(initial_flag /* if not initial can only be preceded by type */ || is_a_type_token(curr_stmt_class)) )) { ans = FALSE; /* otherwise it will be handled correctly by looking_at */ } else if(putative_keyword_class == tok_WHILE) { /* A non-IK case */ ans = WHILE_expected; /* Only occurs in DO label [,] WHILE */ WHILE_expected = FALSE; } /* Remaining cases are IK in initial part */ /* Eliminate those which can are never followed by '(' or '=' if that is what we have. */ else if(MATCH(NP) && (curr_char == '(' || curr_char == '=') ) { ans = FALSE; } /* Likewise with those that must be followed by '(' but aren't */ else if(MATCH(MP) && curr_char != '(') { ans = FALSE; } /* PRECISION always follows the word DOUBLE */ else if( putative_keyword_class == tok_PRECISION ){ ans = (prev_token_class == tok_DOUBLE); } /* END DO: handle its DO here */ else if( putative_keyword_class == tok_DO && curr_char == EOS ) { /* Also must have prev_token_class == tok_END, but no need to check since end-of-statement suffices. */ ans = TRUE; } /* Other type names always follow the word IMPLICIT */ else if( implicit_flag ) { ans = MATCH(TY); } else { /* Remaining cases are keywords that must be in initial position. If followed by '=' must be an identifier. If followed by '(' then may be an array or character lvalue, so use looking_at to scan ahead to see if this is an assignment statement. */ ans = looking_at_keywd(putative_keyword_class); } /* Save initial token class for use by parser. Either set it to keyword token or to id for assignment stmt. */ if(initial_flag) { curr_stmt_class = (ans? keywords[i].class: tok_identifier); } /* Turn off the initial-keyword flag if this is a keyword that cannot be followed by another keyword or if it is not a keyword. */ if(ans) { if(keywords[i].context & EK) initial_flag = FALSE; return keywords[i].class; } else { /* If no more letters follow, then keyword here is ruled out. Turn off initial_flag. */ if( ! isalpha(curr_char) ) initial_flag = FALSE; return 0; /* Not found in list */ }}/* End of is_keyword *//* init_keyhashtab:*/ /* Hashing is no longer used. This guy now only initializes the table of indices that allow keywords to be looked up by their token class*/voidinit_keyhashtab(){ int i,k,kmin,kmax; kmin = kmax = keywords[0].class; /* Find min and max token classes */ for(i=1; i<NUM_KEYWORDS; i++) { k = keywords[i].class; if(k < kmin) kmin = k; if(k > kmax) kmax = k; } keytab_offset = kmin; /* Index table from [kmin..kmax] -> [0..size-1] */ keytab_size = (unsigned) (kmax-kmin+1); if( (keytab_index=(short *)ckalloc(keytab_size*sizeof(keytab_index[0]))) == (short *)NULL) { oops_message(OOPS_FATAL,NO_LINE_NUM,NO_COL_NUM, "cannot allocate space for keytab_index"); } memset (keytab_index, 0, keytab_size*sizeof(keytab_index[0])); /* Now fill in the lookup table, indexed by class - offset */ for(i=0; i<NUM_KEYWORDS; i++) { k = keywords[i].class; keytab_index[k - keytab_offset] = i; }}PRIVATE voidget_illegal_token(token) /* Handle an illegal input situation */ Token *token;{ token->class = tok_illegal;#ifdef DEBUG_FORLEX if(debug_lexer) fprintf(list_fd,"\nILLEGAL TOKEN");#endif} /* get_illegal_token */ /* Read a label from label field. */PRIVATE voidget_label(token) Token *token;{ int value=0; int space_seen=FALSE, has_embedded_space=FALSE; while( isadigit(curr_char) && col_num < 6 ) { if(space_seen) has_embedded_space = TRUE; value = value*10 + BCD(curr_char); advance(); while(curr_char==' ' && col_num < 6) { space_seen = TRUE; advance(); } } if(pretty_flag && has_embedded_space) { ugly_code(token->line_num,token->col_num, "label has embedded space"); } token->class = tok_label; token->value.integer = value;#ifdef DEBUG_FORLEX if(debug_lexer) fprintf(list_fd,"\nLabel:\t\t\t%d",value);#endif} /* get_label */PRIVATE voidget_letter(token) /* Gets letter in IMPLICIT list */ Token *token;{ token->class = tok_letter; token->subclass = makeupper(curr_char);#ifdef DEBUG_FORLEX if(debug_lexer) fprintf(list_fd,"\nLetter:\t\t\t%c",token->subclass);#endif advance();} /* get_letter */ /* get_number reads a number and determines data type: integer, * real, or double precision. *//* This belongs in ftnchek.h, perhaps. Defines number of significant figures that are reasonable for a single-precision real constant. Works out to 9 for wordsize=4, 21 for wordsize=8. These allow for a couple of extra digits for rounding. Used in -trunc warning. */#define REAL_SIGFIGS (local_wordsize==0? 8: (local_wordsize-1)*3)PRIVATE voidget_number(token) Token *token;{ double dvalue,leftside,rightside,pwr_of_ten; int exponent,expsign,datatype,c; int sigfigs; initial_flag = FALSE; leftside = 0.0; sigfigs = 0; datatype = tok_integer_const; while(isadigit(curr_char)) { leftside = leftside*10.0 + (double)BCD(curr_char); ++sigfigs; if( !integer_context && makeupper(next_char) == 'H' ) inside_hollerith = TRUE;/* get ready for hollerith*/ bi_advance(); } /* If context specifies integer expected, skip to end. Otherwise scan on ahead for more. */ if( integer_context) { if(sigfigs == 0) { yyerror("integer expected"); advance(); /* gobble something to avoid infinite loop */ } } else {/* not integer_context */ if( makeupper(curr_char) == 'H' ){ /* nnH means hollerith */ if(leftside == 0.0) { yyerror("Zero-length hollerith constant"); inside_hollerith = FALSE; advance(); get_illegal_token(token); } else { get_hollerith(token, (int)leftside); } return; } rightside = 0.0; pwr_of_ten = 1.0; closeup(); /* Pull in the lookahead character */ if( curr_char == '.' && /* don't be fooled by 1.eq.N or I.eq.1.and. etc */ !looking_at_relop() ) { datatype = tok_real_const; bi_advance(); while(isadigit(curr_char)) { rightside = rightside*10.0 + (double)BCD(curr_char); ++sigfigs; pwr_of_ten *= 0.10; bi_advance(); } }#ifdef DEBUG_FORLEXif(debug_lexer) dvalue = leftside + rightside*pwr_of_ten;#endif exponent = 0; expsign = 1; /* Integer followed by E or D gives a real/d.p constant */ if( ( (c = makeupper(curr_char)) == 'E' || c == 'D' ) ) { datatype = ((c == 'E')? tok_real_const: tok_dp_const); bi_advance(); if(curr_char == '+') { expsign = 1; bi_advance(); } else if(curr_char == '-') { expsign = -1; bi_advance(); } if(!isadigit(curr_char)) { yyerror("Badly formed real constant"); } else while(isadigit(curr_char)) { exponent = exponent*10 + (curr_char-'0'); bi_advance(); } /* Compute real value only if debugging. If it exceeds max magnitude, computing it may cause crash. At this time, value of real const is not used for anything. */#ifdef DEBUG_FORLEXif(debug_lexer) dvalue *= pow(10.0, (double)(exponent*expsign));else#endif dvalue = 0.0; } }/* end if(!integer_context) */ token->class = datatype; switch(datatype) { case tok_integer_const: token->value.integer = (long)leftside;#ifdef DEBUG_FORLEXif(debug_lexer)fprintf(list_fd,"\nInteger const:\t\t%ld",token->value.integer);#endif break; case tok_real_const: /* store single as double lest it overflow */ token->value.dbl = dvalue; if(trunc_check && sigfigs >= REAL_SIGFIGS) { warning(token->line_num,token->col_num, "Single-precision real constant has more digits than are stored"); }#ifdef DEBUG_FORLEXif(debug_lexer)fprintf(list_fd,"\nReal const:\t\t%g",token->value.dbl);#endif break; case tok_dp_const: token->value.dbl = dvalue;#ifdef DEBUG_FORLEXif(debug_lexer)fprintf(list_fd,"\nDouble const:\t\t%lg",token->value.dbl);#endif break; }} /* get_number */ /* get_complex_constant reads an entity of the form (num,num) where num is any [signed] numeric constant. It will only be called when looking_at() has guaranteed that there is one there. The token receives the real part as a number. The imaginary part is not stored. Whitespace is allowed between ( and num, around the comma, and between num and ) but not within num. */PRIVATE voidget_complex_const(token) Token *token;{ Token imag_part; /* temporary to hold imag part */ double sign=1.0; int dble_size=FALSE; /* flag to set if parts are D floats */ int imag_dble_size=FALSE;/* if imaginary part D float */ unsigned comma_line_num,comma_col_num; initial_flag = FALSE; bi_advance(); /* skip over the initial paren */ if(curr_char == '+' || curr_char == '-') { if(curr_char == '-') sign = -1.0; bi_advance(); }#ifdef DEBUG_FORLEXif(debug_lexer){fprintf(list_fd,"\nComplex const:(");if(sign < 0.0) fprintf(list_fd," -");}#endif get_number(token); switch(token->class) { case tok_integer_const: token->value.dbl = sign*(double)token->value.integer; break; case tok_dp_const:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -