📄 ctangle.w
字号:
@d ord 0302 /* control code for `\.{@@'}' */@d control_text 0303 /* control code for `\.{@@t}', `\.{@@\^}', etc. */@d translit_code 0304 /* control code for `\.{@@l}' */@d output_defs_code 0305 /* control code for `\.{@@h}' */@d format_code 0306 /* control code for `\.{@@f}' */@d definition 0307 /* control code for `\.{@@d}' */@d begin_C 0310 /* control code for `\.{@@c}' */@d section_name 0311 /* control code for `\.{@@<}' */@d new_section 0312 /* control code for `\.{@@\ }' and `\.{@@*}' */@<Global...@>=eight_bits ccode[256]; /* meaning of a char following \.{@@} */@ @<Set ini...@>= { int c; /* must be |int| so the |for| loop will end */ for (c=0; c<256; c++) ccode[c]=ignore; ccode[' ']=ccode['\t']=ccode['\n']=ccode['\v']=ccode['\r']=ccode['\f'] =ccode['*']=new_section; ccode['@@']='@@'; ccode['=']=string; ccode['d']=ccode['D']=definition; ccode['f']=ccode['F']=ccode['s']=ccode['S']=format_code; ccode['c']=ccode['C']=ccode['p']=ccode['P']=begin_C; ccode['^']=ccode[':']=ccode['.']=ccode['t']=ccode['T']= ccode['q']=ccode['Q']=control_text; ccode['h']=ccode['H']=output_defs_code; ccode['l']=ccode['L']=translit_code; ccode['&']=join; ccode['<']=ccode['(']=section_name; ccode['\'']=ord;}@ The |skip_ahead| procedure reads through the input at fairly high speeduntil finding the next non-ignorable control code, which it returns.@ceight_bitsskip_ahead() /* skip to next control code */{ eight_bits c; /* control code found */ while (1) { if (loc>limit && (get_line()==0)) return(new_section); *(limit+1)='@@'; while (*loc!='@@') loc++; if (loc<=limit) { loc++; c=ccode[(eight_bits)*loc]; loc++; if (c!=ignore || *(loc-1)=='>') return(c); } }}@ The |skip_comment| procedure reads through the input at somewhat highspeed in order to pass over comments, which \.{CTANGLE} does not transmitto the output. If the comment is introduced by \.{/*}, |skip_comment|proceeds until finding the end-comment token \.{*/} or a newline; in thelatter case |skip_comment| will be called again by |get_next|, since thecomment is not finished. This is done so that the each newline in the\CEE/ part of a section is copied to the output; otherwise the \&{\#line}commands inserted into the \CEE/ file by the output routines become useless.On the other hand, if the comment is introduced by \.{//} (i.e., if itis a \CPLUSPLUS/ ``short comment''), it always is simply delimited by the nextnewline. The boolean argument |is_long_comment| distinguishes betweenthe two types of comments.If |skip_comment| comes to the end of the section, it prints an error message.No comment, long or short, is allowed to contain `\.{@@\ }' or `\.{@@*}'.@<Global...@>=boolean comment_continues=0; /* are we scanning a comment? */@ @cint skip_comment(is_long_comment) /* skips over comments */boolean is_long_comment;{ char c; /* current character */ while (1) { if (loc>limit) { if (is_long_comment) { if(get_line()) return(comment_continues=1); else{ err_print("! Input ended in mid-comment");@.Input ended in mid-comment@> return(comment_continues=0); } } else return(comment_continues=0); } c=*(loc++); if (is_long_comment && c=='*' && *loc=='/') { loc++; return(comment_continues=0); } if (c=='@@') { if (ccode[(eight_bits)*loc]==new_section) { err_print("! Section name ended in mid-comment"); loc--;@.Section name ended in mid-comment@> return(comment_continues=0); } else loc++; } }}@* Inputting the next token.@d constant 03@<Global...@>=name_pointer cur_section_name; /* name of section just scanned */int no_where; /* suppress |print_where|? */@ @<Include...@>=#include <ctype.h> /* definition of |isalpha|, |isdigit| and so on */#include <stdlib.h> /* definition of |exit| */@ As one might expect, |get_next| consists mostly of a big switchthat branches to the various special cases that can arise.@d isxalpha(c) ((c)=='_' || (c)=='$') /* non-alpha characters allowed in identifier */@d ishigh(c) ((unsigned char)(c)>0177)@^high-bit character handling@>@ceight_bitsget_next() /* produces the next input token */{ static int preprocessing=0; eight_bits c; /* the current character */ while (1) { if (loc>limit) { if (preprocessing && *(limit-1)!='\\') preprocessing=0; if (get_line()==0) return(new_section); else if (print_where && !no_where) { print_where=0; @<Insert the line number into |tok_mem|@>; } else return ('\n'); } c=*loc; if (comment_continues || (c=='/' && (*(loc+1)=='*' || *(loc+1)=='/'))) { skip_comment(comment_continues||*(loc+1)=='*'); /* scan to end of comment or newline */ if (comment_continues) return('\n'); else continue; } loc++; if (xisdigit(c) || c=='\\' || c=='.') @<Get a constant@>@; else if (c=='\'' || c=='"' || (c=='L'&&(*loc=='\'' || *loc=='"'))) @<Get a string@>@; else if (isalpha(c) || isxalpha(c) || ishigh(c)) @<Get an identifier@>@; else if (c=='@@') @<Get control code and possible section name@>@; else if (xisspace(c)) { if (!preprocessing || loc>limit) continue; /* we don't want a blank after a final backslash */ else return(' '); /* ignore spaces and tabs, unless preprocessing */ } else if (c=='#' && loc==buffer+1) preprocessing=1; mistake: @<Compress two-symbol operator@>@; return(c); }}@ The following code assigns values to the combinations \.{++},\.{--}, \.{->}, \.{>=}, \.{<=}, \.{==}, \.{<<}, \.{>>}, \.{!=}, \.{||} and\.{\&\&}, and to the \CPLUSPLUS/combinations \.{...}, \.{::}, \.{.*} and \.{->*}.The compound assignment operators (e.g., \.{+=}) aretreated as separate tokens.@d compress(c) if (loc++<=limit) return(c)@<Compress tw...@>=switch(c) { case '+': if (*loc=='+') compress(plus_plus); break; case '-': if (*loc=='-') {compress(minus_minus);} else if (*loc=='>') if (*(loc+1)=='*') {loc++; compress(minus_gt_ast);} else compress(minus_gt); break; case '.': if (*loc=='*') {compress(period_ast);} else if (*loc=='.' && *(loc+1)=='.') { loc++; compress(dot_dot_dot); } break; case ':': if (*loc==':') compress(colon_colon); break; case '=': if (*loc=='=') compress(eq_eq); break; case '>': if (*loc=='=') {compress(gt_eq);} else if (*loc=='>') compress(gt_gt); break; case '<': if (*loc=='=') {compress(lt_eq);} else if (*loc=='<') compress(lt_lt); break; case '&': if (*loc=='&') compress(and_and); break; case '|': if (*loc=='|') compress(or_or); break; case '!': if (*loc=='=') compress(not_eq); break;}@ @<Get an identifier@>= { id_first=--loc; while (isalpha(*++loc) || isdigit(*loc) || isxalpha(*loc) || ishigh(*loc)); id_loc=loc; return(identifier);}@ @<Get a constant@>= { id_first=loc-1; if (*id_first=='.' && !xisdigit(*loc)) goto mistake; /* not a constant */ if (*id_first=='\\') while (xisdigit(*loc)) loc++; /* octal constant */ else { if (*id_first=='0') { if (*loc=='x' || *loc=='X') { /* hex constant */ loc++; while (xisxdigit(*loc)) loc++; goto found; } } while (xisdigit(*loc)) loc++; if (*loc=='.') { loc++; while (xisdigit(*loc)) loc++; } if (*loc=='e' || *loc=='E') { /* float constant */ if (*++loc=='+' || *loc=='-') loc++; while (xisdigit(*loc)) loc++; } } found: while (*loc=='u' || *loc=='U' || *loc=='l' || *loc=='L' || *loc=='f' || *loc=='F') loc++; id_loc=loc; return(constant);}@ \CEE/ strings and character constants, delimited by double and singlequotes, respectively, can contain newlines or instances of their owndelimiters if they are protected by a backslash. We follow thisconvention, but do not allow the string to be longer than |longest_name|.@<Get a string@>= { char delim = c; /* what started the string */ id_first = section_text+1; id_loc = section_text; *++id_loc=delim; if (delim=='L') { /* wide character constant */ delim=*loc++; *++id_loc=delim; } while (1) { if (loc>=limit) { if(*(limit-1)!='\\') { err_print("! String didn't end"); loc=limit; break;@.String didn't end@> } if(get_line()==0) { err_print("! Input ended in middle of string"); loc=buffer; break;@.Input ended in middle of string@> } else if (++id_loc<=section_text_end) *id_loc='\n'; /* will print as \.{"\\\\\\n"} */ } if ((c=*loc++)==delim) { if (++id_loc<=section_text_end) *id_loc=c; break; } if (c=='\\') { if (loc>=limit) continue; if (++id_loc<=section_text_end) *id_loc = '\\'; c=*loc++; } if (++id_loc<=section_text_end) *id_loc=c; } if (id_loc>=section_text_end) { printf("\n! String too long: ");@.String too long@> term_write(section_text+1,25); err_print("..."); } id_loc++; return(string);}@ After an \.{@@} sign has been scanned, the next character tells uswhether there is more work to do.@<Get control code and possible section name@>= { c=ccode[(eight_bits)*loc++]; switch(c) { case ignore: continue; case output_defs_code: output_defs_seen=1; return(c); case translit_code: err_print("! Use @@l in limbo only"); continue;@.Use @@l in limbo...@> case control_text: while ((c=skip_ahead())=='@@'); /* only \.{@@@@} and \.{@@>} are expected */ if (*(loc-1)!='>') err_print("! Double @@ should be used in control text");@.Double @@ should be used...@> continue; case section_name: cur_section_name_char=*(loc-1); @<Scan the section name and make |cur_section_name| point to it@>; case string: @<Scan a verbatim string@>; case ord: @<Scan an ASCII constant@>; default: return(c); }}@ After scanning a valid ASCII constant that follows\.{@@'}, this code plows ahead until it finds the next single quote.(Special care is taken if the quote is part of the constant.)Anything after a valid ASCII constant is ignored;thus, \.{@@'\\nopq'} gives the same result as \.{@@'\\n'}.@<Scan an ASCII constant@>= id_first=loc; if (*loc=='\\') { if (*++loc=='\'') loc++; } while (*loc!='\'') { if (*loc=='@@') { if (*(loc+1)!='@@') err_print("! Double @@ should be used in ASCII constant");@.Double @@ should be used...@> else loc++; } loc++; if (loc>limit) { err_print("! String didn't end"); loc=limit-1; break;@.String didn't end@> } } loc++; return(ord);@ @<Scan the section name...@>= { char *k; /* pointer into |section_text| */ @<Put section name into |section_text|@>; if (k-section_text>3 && strncmp(k-2,"...",3)==0) cur_section_name=section_lookup(section_text+1,k-3,1); /* 1 means is a prefix */ else cur_section_name=section_lookup(section_text+1,k,0); if (cur_section_name_char=='(') @<If it's not there, add |cur_section_name| to the output file stack, or complain we're out of room@>; return(section_name);}@ Section names are placed into the |section_text| array with consecutive spaces,tabs, and carriage-returns replaced by single spaces. There will be nospaces at the beginning or the end. (We set |section_text[0]=' '| to facilitatethis, since the |section_lookup| routine uses |section_text[1]| as the firstcharacter of the name.)@<Set init...@>=section_text[0]=' ';@ @<Put section name...@>=k=section_text;while (1) { if (loc>limit && get_line()==0) { err_print("! Input ended in section name");@.Input ended in section name@> loc=buffer+1; break; } c=*loc; @<If end of name or erroneous nesting, |break|@>; loc++; if (k<section_text_end) k++; if (xisspace(c)) { c=' '; if (*(k-1)==' ') k--; }*k=c;}if (k>=section_text_end) { printf("\n! Section name too long: ");@.Section name too long@> term_write(section_text+1,25); printf("..."); mark_harmless;}if (*k==' ' && k>section_text) k--;@ @<If end of name or erroneous nesting,...@>=if (c=='@@') { c=*(loc+1); if (c=='>') { loc+=2; break; } if (ccode[(eight_bits)c]==new_section) { err_print("! Section name didn't end"); break;@.Section name didn't end@> } if (ccode[(eight_bits)c]==section_name) { err_print("! Nesting of section names not allowed"); break;@.Nesting of section names...@> } *(++k)='@@'; loc++; /* now |c==*loc| again */}@ At the present point in the program wehave |*(loc-1)==string|; we set |id_first| to the beginningof the string itself, and |id_loc| to its ending-plus-one location in thebuffer. We also set |loc| to the position just after the ending delimiter.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -