📄 cweave.w
字号:
token_pointer tok_start[max_texts]; /* directory into |tok_mem| */token_pointer tok_ptr; /* first unused position in |tok_mem| */text_pointer text_ptr; /* first unused position in |tok_start| */text_pointer tok_start_end = tok_start+max_texts-1; /* end of |tok_start| */token_pointer max_tok_ptr; /* largest value of |tok_ptr| */text_pointer max_text_ptr; /* largest value of |text_ptr| */@ @<Set init...@>=tok_ptr=tok_mem+1; text_ptr=tok_start+1; tok_start[0]=tok_mem+1;tok_start[1]=tok_mem+1;max_tok_ptr=tok_mem+1; max_text_ptr=tok_start+1;@ Here are the three procedures needed to complete |id_lookup|:@cint names_match(p,first,l,t)name_pointer p; /* points to the proposed match */char *first; /* position of first character of string */int l; /* length of identifier */eight_bits t; /* desired ilk */{ if (length(p)!=l) return 0; if (p->ilk!=t && !(t==normal && abnormal(p))) return 0; return !strncmp(first,p->byte_start,l);}voidinit_p(p,t)name_pointer p;eight_bits t;{ p->ilk=t; p->xref=(char*)xmem;}voidinit_node(p)name_pointer p;{ p->xref=(char*)xmem;}@ We have to get \CEE/'sreserved words into the hash table, and the simplest way to do this isto insert them every time \.{CWEAVE} is run. Fortunately there are relativelyfew reserved words. (Some of these are not strictly ``reserved,'' butare defined in header files of the ISO Standard \CEE/ Library.)@^reserved words@>@<Store all the reserved words@>=id_lookup("and",NULL,alfop);id_lookup("and_eq",NULL,alfop);id_lookup("asm",NULL,sizeof_like);id_lookup("auto",NULL,int_like);id_lookup("bitand",NULL,alfop);id_lookup("bitor",NULL,alfop);id_lookup("bool",NULL,raw_int);id_lookup("break",NULL,case_like);id_lookup("case",NULL,case_like);id_lookup("catch",NULL,catch_like);id_lookup("char",NULL,raw_int);id_lookup("class",NULL,struct_like);id_lookup("clock_t",NULL,raw_int);id_lookup("compl",NULL,alfop);id_lookup("const",NULL,const_like);id_lookup("const_cast",NULL,raw_int);id_lookup("continue",NULL,case_like);id_lookup("default",NULL,case_like);id_lookup("define",NULL,define_like);id_lookup("defined",NULL,sizeof_like);id_lookup("delete",NULL,delete_like);id_lookup("div_t",NULL,raw_int);id_lookup("do",NULL,do_like);id_lookup("double",NULL,raw_int);id_lookup("dynamic_cast",NULL,raw_int);id_lookup("elif",NULL,if_like);id_lookup("else",NULL,else_like);id_lookup("endif",NULL,if_like);id_lookup("enum",NULL,struct_like);id_lookup("error",NULL,if_like);id_lookup("explicit",NULL,int_like);id_lookup("export",NULL,int_like);id_lookup("extern",NULL,int_like);id_lookup("FILE",NULL,raw_int);id_lookup("float",NULL,raw_int);id_lookup("for",NULL,for_like);id_lookup("fpos_t",NULL,raw_int);id_lookup("friend",NULL,int_like);id_lookup("goto",NULL,case_like);id_lookup("if",NULL,if_like);id_lookup("ifdef",NULL,if_like);id_lookup("ifndef",NULL,if_like);id_lookup("include",NULL,if_like);id_lookup("inline",NULL,int_like);id_lookup("int",NULL,raw_int);id_lookup("jmp_buf",NULL,raw_int);id_lookup("ldiv_t",NULL,raw_int);id_lookup("line",NULL,if_like);id_lookup("long",NULL,raw_int);id_lookup("mutable",NULL,int_like);id_lookup("namespace",NULL,struct_like);id_lookup("new",NULL,new_like);id_lookup("not",NULL,alfop);id_lookup("not_eq",NULL,alfop);id_lookup("NULL",NULL,custom);id_lookup("offsetof",NULL,raw_int);id_lookup("operator",NULL,operator_like);id_lookup("or",NULL,alfop);id_lookup("or_eq",NULL,alfop);id_lookup("pragma",NULL,if_like);id_lookup("private",NULL,public_like);id_lookup("protected",NULL,public_like);id_lookup("ptrdiff_t",NULL,raw_int);id_lookup("public",NULL,public_like);id_lookup("register",NULL,int_like);id_lookup("reinterpret_cast",NULL,raw_int);id_lookup("return",NULL,case_like);id_lookup("short",NULL,raw_int);id_lookup("sig_atomic_t",NULL,raw_int);id_lookup("signed",NULL,raw_int);id_lookup("size_t",NULL,raw_int);id_lookup("sizeof",NULL,sizeof_like);id_lookup("static",NULL,int_like);id_lookup("static_cast",NULL,raw_int);id_lookup("struct",NULL,struct_like);id_lookup("switch",NULL,for_like);id_lookup("template",NULL,template_like);id_lookup("this",NULL,custom);id_lookup("throw",NULL,case_like);id_lookup("time_t",NULL,raw_int);id_lookup("try",NULL,else_like);id_lookup("typedef",NULL,typedef_like);id_lookup("typeid",NULL,raw_int);id_lookup("typename",NULL,struct_like);id_lookup("undef",NULL,if_like);id_lookup("union",NULL,struct_like);id_lookup("unsigned",NULL,raw_int);id_lookup("using",NULL,int_like);id_lookup("va_dcl",NULL,decl); /* Berkeley's variable-arg-list convention */id_lookup("va_list",NULL,raw_int); /* ditto */id_lookup("virtual",NULL,int_like);id_lookup("void",NULL,raw_int);id_lookup("volatile",NULL,const_like);id_lookup("wchar_t",NULL,raw_int);id_lookup("while",NULL,for_like);id_lookup("xor",NULL,alfop);id_lookup("xor_eq",NULL,alfop);res_wd_end=name_ptr;id_lookup("TeX",NULL,custom);id_lookup("make_pair",NULL,func_template);@* Lexical scanning.Let us now consider the subroutines that read the \.{CWEB} source fileand break it into meaningful units. There are four such procedures:One simply skips to the next `\.{@@\ }' or `\.{@@*}' that begins asection; another passes over the \TEX/ text at the beginning of asection; the third passes over the \TEX/ text in a \CEE/ comment;and the last, which is the most interesting, gets the next token ofa \CEE/ text. They all use the pointers |limit| and |loc| intothe line of input currently being studied.@ Control codes in \.{CWEB}, which begin with `\.{@@}', are convertedinto a numeric code designed to simplify \.{CWEAVE}'s logic; for example,larger numbers are given to the control codes that denote more significantmilestones, and the code of |new_section| should be the largest ofall. Some of these numeric control codes take the place of |char|control codes that will not otherwise appear in the output of thescanning routines.@^ASCII code dependencies@>@d ignore 00 /* control code of no interest to \.{CWEAVE} */@d verbatim 02 /* takes the place of extended ASCII \.{\char2} */@d begin_short_comment 03 /* \CPLUSPLUS/ short comment */@d begin_comment '\t' /* tab marks will not appear */@d underline '\n' /* this code will be intercepted without confusion */@d noop 0177 /* takes the place of ASCII delete */@d xref_roman 0203 /* control code for `\.{@@\^}' */@d xref_wildcard 0204 /* control code for `\.{@@:}' */@d xref_typewriter 0205 /* control code for `\.{@@.}' */@d TeX_string 0206 /* control code for `\.{@@t}' */@f TeX_string TeX@d ord 0207 /* control code for `\.{@@'}' */@d join 0210 /* control code for `\.{@@\&}' */@d thin_space 0211 /* control code for `\.{@@,}' */@d math_break 0212 /* control code for `\.{@@\v}' */@d line_break 0213 /* control code for `\.{@@/}' */@d big_line_break 0214 /* control code for `\.{@@\#}' */@d no_line_break 0215 /* control code for `\.{@@+}' */@d pseudo_semi 0216 /* control code for `\.{@@;}' */@d macro_arg_open 0220 /* control code for `\.{@@[}' */@d macro_arg_close 0221 /* control code for `\.{@@]}' */@d trace 0222 /* control code for `\.{@@0}', `\.{@@1}' and `\.{@@2}' */@d translit_code 0223 /* control code for `\.{@@l}' */@d output_defs_code 0224 /* control code for `\.{@@h}' */@d format_code 0225 /* control code for `\.{@@f}' and `\.{@@s}' */@d definition 0226 /* control code for `\.{@@d}' */@d begin_C 0227 /* control code for `\.{@@c}' */@d section_name 0230 /* control code for `\.{@@<}' */@d new_section 0231 /* control code for `\.{@@\ }' and `\.{@@*}' */@ Control codes are converted to \.{CWEAVE}'s internalrepresentation by means of the table |ccode|.@<Global...@>=eight_bits ccode[256]; /* meaning of a char following \.{@@} */@ @<Set ini...@>={int c; for (c=0; c<256; c++) ccode[c]=0;}ccode[' ']=ccode['\t']=ccode['\n']=ccode['\v']=ccode['\r']=ccode['\f'] =ccode['*']=new_section;ccode['@@']='@@'; /* `quoted' at sign */ccode['=']=verbatim;ccode['d']=ccode['D']=definition;ccode['f']=ccode['F']=ccode['s']=ccode['S']=format_code;ccode['c']=ccode['C']=ccode['p']=ccode['P']=begin_C;ccode['t']=ccode['T']=TeX_string;ccode['l']=ccode['L']=translit_code;ccode['q']=ccode['Q']=noop;ccode['h']=ccode['H']=output_defs_code;ccode['&']=join; ccode['<']=ccode['(']=section_name;ccode['!']=underline; ccode['^']=xref_roman;ccode[':']=xref_wildcard; ccode['.']=xref_typewriter; ccode[',']=thin_space;ccode['|']=math_break; ccode['/']=line_break; ccode['#']=big_line_break;ccode['+']=no_line_break; ccode[';']=pseudo_semi;ccode['[']=macro_arg_open; ccode[']']=macro_arg_close;ccode['\'']=ord;@<Special control codes for debugging@>@;@ Users can write\.{@@2}, \.{@@1}, and \.{@@0} to turn tracing fully on, partly on,and off, respectively.@<Special control codes...@>=ccode['0']=ccode['1']=ccode['2']=trace;@ The |skip_limbo| routine is used on the first pass to skip throughportions of the input that are not in any sections, i.e., that precedethe first section. After this procedure has been called, the value of|input_has_ended| will tell whether or not a section has actually been found.There's a complication that we will postpone until later: If the \.{@@s}operation appears in limbo, we want to use it to adjust the defaultinterpretation of identifiers.@<Predec...@>=void skip_limbo();@ @cvoidskip_limbo() { while(1) { if (loc>limit && get_line()==0) return; *(limit+1)='@@'; while (*loc!='@@') loc++; /* look for '@@', then skip two chars */ if (loc++ <=limit) { int c=ccode[(eight_bits)*loc++]; if (c==new_section) return; if (c==noop) skip_restricted(); else if (c==format_code) @<Process simple format in limbo@>; } }}@ The |skip_TeX| routine is used on the first pass to skip throughthe \TEX/ code at the beginning of a section. It returns the nextcontrol code or `\.{\v}' found in the input. A |new_section| isassumed to exist at the very end of the file.@f skip_TeX TeX@cunsignedskip_TeX() /* skip past pure \TEX/ code */{ while (1) { if (loc>limit && get_line()==0) return(new_section); *(limit+1)='@@'; while (*loc!='@@' && *loc!='|') loc++; if (*loc++ =='|') return('|'); if (loc<=limit) return(ccode[(eight_bits)*(loc++)]); }}@*1 Inputting the next token.As stated above, \.{CWEAVE}'s most interesting lexical scanning routine is the|get_next| function that inputs the next token of \CEE/ input. However,|get_next| is not especially complicated.The result of |get_next| is either a |char| code for some special character,or it is a special code representing a pair of characters (e.g., `\.{!=}'),or it is the numeric value computed by the |ccode|table, or it is one of the following special codes:\yskip\hang |identifier|: In this case the global variables |id_first| and|id_loc| will have been set to the beginning and ending-plus-one locationsin the buffer, as required by the |id_lookup| routine.\yskip\hang |string|: The string will have been copied into the array|section_text|; |id_first| and |id_loc| are set as above (now they arepointers into |section_text|).\yskip\hang |constant|: The constant is copied into |section_text|, withslight modifications; |id_first| and |id_loc| are set.\yskip\noindent Furthermore, some of the control codes cause|get_next| to take additional actions:\yskip\hang |xref_roman|, |xref_wildcard|, |xref_typewriter|, |TeX_string|,|verbatim|: The values of |id_first| and |id_loc| will have been set tothe beginning and ending-plus-one locations in the buffer.\yskip\hang |section_name|: In this case the global variable |cur_section| willpoint to the |byte_start| entry for the section name that has just been scanned.The value of |cur_section_char| will be |'('| if the section name waspreceded by \.{@@(} instead of \.{@@<}.\yskip\noindent If |get_next| sees `\.{@@!}'it sets |xref_switch| to |def_flag| and goes on to the next token.@d constant 0200 /* \CEE/ constant */@d string 0201 /* \CEE/ string */@d identifier 0202 /* \CEE/ identifier or reserved word */@<Global...@>=name_pointer cur_section; /* name of section just scanned */char cur_section_char; /* the character just before that name */@ @<Include...@>=#include <ctype.h> /* definition of |isalpha|, |isdigit| and so on */#include <stdlib.h> /* definition of |exit| */@ As one might expect, |get_next| consists mostly of a big switchthat branches to the various special cases that can arise.\CEE/ allows underscores to appear in identifiers, and some \CEE/compilers even allow the dollar sign.@d isxalpha(c) ((c)=='_' || (c)=='$') /* non-alpha characters allowed in identifier */@d ishigh(c) ((eight_bits)(c)>0177)@^high-bit character handling@>@<Predecl...@>=eight_bits get_next();@ @ceight_bitsget_next() /* produces the next input token */{@+eight_bits c; /* the current character */ while (1) { @<Check if we're at the end of a preprocessor command@>; if (loc>limit && get_line()==0) return(new_section); c=*(loc++); if (xisdigit(c) || c=='\\' || c=='.') @<Get a constant@>@; else if (c=='\'' || c=='"' || (c=='L'&&(*loc=='\'' || *loc=='"'))@| || (c=='<' && sharp_include_line==1)) @<Get a string@>@; else if (xisalpha(c) || isxalpha(c) || ishigh(c)) @<Get an identifier@>@; else if (c=='@@') @<Get control code and possible section name@>@;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -