📄 cweave.w
字号:
else if (xisspace(c)) continue; /* ignore spaces and tabs */ if (c=='#' && loc==buffer+1) @<Raise preprocessor flag@>; mistake: @<Compress two-symbol operator@>@; return(c); }}@ Because preprocessor commands do not fit in with the rest of the syntaxof \CEE/,we have to deal with them separately. One solution is to enclose suchcommands between special markers. Thus, when a \.\# is seen as thefirst character of a line, |get_next| returns a special code|left_preproc| and raises a flag |preprocessing|.We can use the same internal code number for |left_preproc| as we dofor |ord|, since |get_next| changes |ord| into a string.@d left_preproc ord /* begins a preprocessor command */@d right_preproc 0217 /* ends a preprocessor command */@<Glob...@>=boolean preprocessing=0; /* are we scanning a preprocessor command? */@ @<Raise prep...@>= { preprocessing=1; @<Check if next token is |include|@>; return (left_preproc);}@ An additional complication is the freakish use of \.< and \.> to delimita file name in lines that start with \.{\#include}. We must treat this filename as a string.@<Glob...@>=boolean sharp_include_line=0; /* are we scanning a |#include| line? */@ @<Check if next token is |include|@>=while (loc<=buffer_end-7 && xisspace(*loc)) loc++;if (loc<=buffer_end-6 && strncmp(loc,"include",7)==0) sharp_include_line=1;@ When we get to the end of a preprocessor line,we lower the flag and send a code |right_preproc|, unlessthe last character was a \.\\.@<Check if we're at...@>= while (loc==limit-1 && preprocessing && *loc=='\\') if (get_line()==0) return(new_section); /* still in preprocessor mode */ if (loc>=limit && preprocessing) { preprocessing=sharp_include_line=0; return(right_preproc); }@ The following code assigns values to the combinations \.{++},\.{--}, \.{->}, \.{>=}, \.{<=}, \.{==}, \.{<<}, \.{>>}, \.{!=}, \.{\v\v}, and\.{\&\&}, and to the \CPLUSPLUS/combinations \.{...}, \.{::}, \.{.*} and \.{->*}.The compound assignment operators (e.g., \.{+=}) aretreated as separate tokens.@d compress(c) if (loc++<=limit) return(c)@<Compress tw...@>=switch(c) { case '/': if (*loc=='*') {compress(begin_comment);} else if (*loc=='/') compress(begin_short_comment); break; case '+': if (*loc=='+') compress(plus_plus); break; case '-': if (*loc=='-') {compress(minus_minus);} else if (*loc=='>') if (*(loc+1)=='*') {loc++; compress(minus_gt_ast);} else compress(minus_gt); break; case '.': if (*loc=='*') {compress(period_ast);} else if (*loc=='.' && *(loc+1)=='.') { loc++; compress(dot_dot_dot); } break; case ':': if (*loc==':') compress(colon_colon); break; case '=': if (*loc=='=') compress(eq_eq); break; case '>': if (*loc=='=') {compress(gt_eq);} else if (*loc=='>') compress(gt_gt); break; case '<': if (*loc=='=') {compress(lt_eq);} else if (*loc=='<') compress(lt_lt); break; case '&': if (*loc=='&') compress(and_and); break; case '|': if (*loc=='|') compress(or_or); break; case '!': if (*loc=='=') compress(not_eq); break;}@ @<Get an identifier@>= { id_first=--loc; while (isalpha(*++loc) || isdigit(*loc) || isxalpha(*loc) || ishigh(*loc)); id_loc=loc; return(identifier);}@ Different conventions are followed by \TEX/ and \CEE/ to express octaland hexadecimal numbers; it is reasonable to stick to each conventionwithin its realm. Thus the \CEE/ part of a \.{CWEB} file has octalsintroduced by \.0 and hexadecimals by \.{0x}, but \.{CWEAVE} will printwith \TeX/ macros that the user can redefine to fit the context.In order to simplify such macros, we replace some of the characters.Notice that in this section and the next, |id_first| and |id_loc|are pointers into the array |section_text|, not into |buffer|.@<Get a constant@>= { id_first=id_loc=section_text+1; if (*(loc-1)=='\\') {*id_loc++='~'; while (xisdigit(*loc)) *id_loc++=*loc++;} /* octal constant */ else if (*(loc-1)=='0') { if (*loc=='x' || *loc=='X') {*id_loc++='^'; loc++; while (xisxdigit(*loc)) *id_loc++=*loc++;} /* hex constant */ else if (xisdigit(*loc)) {*id_loc++='~'; while (xisdigit(*loc)) *id_loc++=*loc++;} /* octal constant */ else goto dec; /* decimal constant */ } else { /* decimal constant */ if (*(loc-1)=='.' && !xisdigit(*loc)) goto mistake; /* not a constant */ dec: *id_loc++=*(loc-1); while (xisdigit(*loc) || *loc=='.') *id_loc++=*loc++; if (*loc=='e' || *loc=='E') { /* float constant */ *id_loc++='_'; loc++; if (*loc=='+' || *loc=='-') *id_loc++=*loc++; while (xisdigit(*loc)) *id_loc++=*loc++; } } while (*loc=='u' || *loc=='U' || *loc=='l' || *loc=='L' || *loc=='f' || *loc=='F') { *id_loc++='$'; *id_loc++=toupper(*loc); loc++; } return(constant);}@ \CEE/ strings and character constants, delimited by double and singlequotes, respectively, can contain newlines or instances of their owndelimiters if they are protected by a backslash. We follow thisconvention, but do not allow the string to be longer than |longest_name|.@<Get a string@>= { char delim = c; /* what started the string */ id_first = section_text+1; id_loc = section_text; if (delim=='\'' && *(loc-2)=='@@') {*++id_loc='@@'; *++id_loc='@@';} *++id_loc=delim; if (delim=='L') { /* wide character constant */ delim=*loc++; *++id_loc=delim; } if (delim=='<') delim='>'; /* for file names in |#include| lines */ while (1) { if (loc>=limit) { if(*(limit-1)!='\\') { err_print("! String didn't end"); loc=limit; break;@.String didn't end@> } if(get_line()==0) { err_print("! Input ended in middle of string"); loc=buffer; break;@.Input ended in middle of string@> } } if ((c=*loc++)==delim) { if (++id_loc<=section_text_end) *id_loc=c; break; } if (c=='\\') if (loc>=limit) continue; else if (++id_loc<=section_text_end) { *id_loc = '\\'; c=*loc++; } if (++id_loc<=section_text_end) *id_loc=c; } if (id_loc>=section_text_end) { printf("\n! String too long: ");@.String too long@> term_write(section_text+1,25); printf("..."); mark_error; } id_loc++; return(string);}@ After an \.{@@} sign has been scanned, the next character tells uswhether there is more work to do.@<Get control code and possible section name@>= { c=*loc++; switch(ccode[(eight_bits)c]) { case translit_code: err_print("! Use @@l in limbo only"); continue;@.Use @@l in limbo...@> case underline: xref_switch=def_flag; continue; case trace: tracing=c-'0'; continue; case xref_roman: case xref_wildcard: case xref_typewriter: case noop: case TeX_string: c=ccode[c]; skip_restricted(); return(c); case section_name: @<Scan the section name and make |cur_section| point to it@>; case verbatim: @<Scan a verbatim string@>; case ord: @<Get a string@>; default: return(ccode[(eight_bits)c]); }}@ The occurrence of a section name sets |xref_switch| to zero,because the section name might (for example) follow \&{int}.@<Scan the section name...@>= { char *k; /* pointer into |section_text| */ cur_section_char=*(loc-1); @<Put section name into |section_text|@>; if (k-section_text>3 && strncmp(k-2,"...",3)==0) cur_section=section_lookup(section_text+1,k-3,1); /* 1 indicates a prefix */ else cur_section=section_lookup(section_text+1,k,0); xref_switch=0; return(section_name);}@ Section names are placed into the |section_text| array with consecutive spaces,tabs, and carriage-returns replaced by single spaces. There will be nospaces at the beginning or the end. (We set |section_text[0]=' '| to facilitatethis, since the |section_lookup| routine uses |section_text[1]| as the firstcharacter of the name.)@<Set init...@>=section_text[0]=' ';@ @<Put section name...@>=k=section_text;while (1) { if (loc>limit && get_line()==0) { err_print("! Input ended in section name");@.Input ended in section name@> loc=buffer+1; break; } c=*loc; @<If end of name or erroneous control code, |break|@>; loc++; if (k<section_text_end) k++; if (xisspace(c)) { c=' '; if (*(k-1)==' ') k--; }*k=c;}if (k>=section_text_end) { printf("\n! Section name too long: ");@.Section name too long@> term_write(section_text+1,25); printf("..."); mark_harmless;}if (*k==' ' && k>section_text) k--;@ @<If end of name...@>=if (c=='@@') { c=*(loc+1); if (c=='>') { loc+=2; break; } if (ccode[(eight_bits)c]==new_section) { err_print("! Section name didn't end"); break;@.Section name didn't end@> } if (c!='@@') { err_print("! Control codes are forbidden in section name"); break;@.Control codes are forbidden...@> } *(++k)='@@'; loc++; /* now |c==*loc| again */}@ This function skips over a restricted context at relatively high speed.@<Predecl...@>=void skip_restricted();@ @cvoidskip_restricted(){ id_first=loc; *(limit+1)='@@';false_alarm: while (*loc!='@@') loc++; id_loc=loc; if (loc++>limit) { err_print("! Control text didn't end"); loc=limit;@.Control text didn't end@> } else { if (*loc=='@@'&&loc<=limit) {loc++; goto false_alarm;} if (*loc++!='>') err_print("! Control codes are forbidden in control text");@.Control codes are forbidden...@> }}@ At the present point in the program wehave |*(loc-1)==verbatim|; we set |id_first| to the beginningof the string itself, and |id_loc| to its ending-plus-one location in thebuffer. We also set |loc| to the position just after the ending delimiter.@<Scan a verbatim string@>= { id_first=loc++; *(limit+1)='@@'; *(limit+2)='>'; while (*loc!='@@' || *(loc+1)!='>') loc++; if (loc>=limit) err_print("! Verbatim string didn't end");@.Verbatim string didn't end@> id_loc=loc; loc+=2; return (verbatim);}@** Phase one processing.We now have accumulated enough subroutines to make it possible to carry out\.{CWEAVE}'s first pass over the source file. If everything works right,both phase one and phase two of \.{CWEAVE} will assign the same numbers tosections, and these numbers will agree with what \.{CTANGLE} does.The global variable |next_control| often contains the most recent output of|get_next|; in interesting cases, this will be the control code thatended a section or part of a section.@<Global...@>=eight_bits next_control; /* control code waiting to be acting upon */@ The overall processing strategy in phase one has the followingstraightforward outline.@<Predecl...@>=void phase_one();@ @cvoidphase_one() { phase=1; reset_input(); section_count=0; skip_limbo(); change_exists=0; while (!input_has_ended) @<Store cross-reference data for the current section@>; changed_section[section_count]=change_exists; /* the index changes if anything does */ phase=2; /* prepare for second phase */ @<Print error messages about unused or undefined section names@>;}@ @<Store cross-reference data...@>={ if (++section_count==max_sections) overflow("section number"); changed_section[section_count]=changing; /* it will become 1 if any line changes */ if (*(loc-1)=='*' && show_progress) { printf("*%d",section_count); update_terminal; /* print a progress report */ } @<Store cross-references in the \TEX/ part of a section@>; @<Store cross-references in the definition part of a section@>; @<Store cross-references in the \CEE/ part of a section@>; if (changed_section[section_count]) change_exists=1;}@ The |C_xref| subroutine stores references to identifiers in\CEE/ text material beginning with the current value of |next_control|and continuing until |next_control| is `\.\{' or `\.{\v}', or until the next``milestone'' is passed (i.e., |next_control>=format_code|). If|next_control>=format_code| when |C_xref| is called, nothing will happen;but if |next_control=='|'| upon entry, the procedure assumes that this isthe `\.{\v}' preceding \CEE/ text that is to be processed.The parameter |spec_ctrl| is used to change this behavior. In most cases|C_xref| is called with |spec_ctrl==ignore|, which triggers the defaultprocessing described above. If |spec_ctrl==section_name|, section names willbe gobbled. This is used when \CEE/ text in the \TEX/ part or inside commentsis parsed: It allows for section names to appear in \pb, but these
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -