📄 cscan.pm
字号:
} elsif ($c =~ /[\)\]\}]/) { $d++; } last if $d < 0; } if ($s < 0) { # Should not happen warn("panic: could not match braces in\n\t$td\nwhited as\n\t$wh\n"); next loop; } $s++; } else { # We are at toplevel # We need to skip back all the modifiers attached to the first thingy # Guesstimates: everything after the first '*' (inclusive) pos $wh = 0; $wh = /(?=\w)/g; my $ws = pos $wh; my $pre = substr $wh, 0, $ws; $s = $ws; $s = pos $pre if $pre =~ /(?=\*)/g; $e = length $wh; } # Now: need to split $td based on commas in $wh! # And need to split each chunk of $td based on word in the chunk of $wh! my $td_decls = substr($td, $s, $e - $s); my ($pre, $post) = (substr($td, 0, $s), substr($td, $e)); my $wh_decls = substr($wh, $s, $e - $s); my @wh_decls = split /,/, $wh_decls; my $td_s = 0; my (@td_decl, @td_pre, @td_post, @td_word); for my $wh_d (@wh_decls) { my $td_d = substr $td, $td_s, length $wh_d; push @td_decl, $td_d; $wh_d =~ /(\w+)/g; push @td_word, $1; push @td_post, substr $td_d, pos($wh_d); push @td_pre, substr $td_d, pos($wh_d) - length $1, length $1; $td_s += 1 + length $wh_d; # Skip over ',' } for my $i (0..$#wh_decls) { my $p = "$td_post[$i]$post"; $p = '' unless $p =~ /\S/; $out{$td_word[$i]} = ["$pre$td_pre[$i]", $p]; } } elsif ($td =~ /\(\s* \*? \s* ([^)]+) \s* \) \s* \(.*\)/gxs){ # XXX: function pointer typedef $out{$1} = ['XXX: pre_foo', 'XXX: post_bar']; # XXX: not sure what to stuff here #warn "[$1] [$td]" if $verb; } else { # Only one thing defined... $wh =~ /(\w+)/g; my $e = pos $wh; my $s = $e - length $1; my $type = $1; my $pre = substr $td, 0, $s; my $post = substr $td, $e, length($td) - $e; $post = '' unless $post =~ /\S/; $out{$type} = [$pre, $post]; } #die if $verb; } \%out;}sub typedef_structs { my ($typehash, $structs) = @_; my %structs; for (0 .. $#$structs) { my $in = $structs->[$_]; my $key; next unless $in =~ /^struct\s*(\w+)/; next unless $in =~ s{^(struct\s*)(\w+)}{ $key = "struct $2"; $1 . " " x length($2) }e; my $name = parse_struct($in, \%structs); $structs{$key} = defined($name) ? $structs{$name} : undef; } while (my ($key, $text) = each %$typehash) { my $name = parse_struct($text->[0], \%structs); $structs{$key} = defined($name) ? $structs{$name} : undef; } \%structs;}sub parse_struct { my ($in, $structs) = @_; my ($b, $e, $chunk, $vars, $struct, $structname); return "$1 $2" if $in =~ / ^ \s* (struct | union) \s+ (\w+) \s* $ /x; ($structname, $in) = $in =~ / ^ \s* ( (?: struct | union ) (?: \s+ \w+ )? ) \s* { \s* (.*?) \s* } \s* $ /gisx or return; $structname .= " _ANON" unless $structname =~ /\s/; $structname .= " 0" if exists $structs->{$structname}; $structname =~ s/(\d+$)/$1 + 1/e while exists $structs->{$structname}; $structname =~ s/\s+/ /g; $b = 0; while ($in =~ /(\{|;|$)/g) { matchingbrace($in), next if $1 eq '{'; $e = pos($in); next if $b == $e; $chunk = substr($in, $b, $e - $b); $b = $e; if ($chunk =~ /\G\s*(struct|union|enum).*\}/gs) { my $term = pos $chunk; my $name = parse_struct(substr($chunk, 0, $term), $structs); $vars = parse_vars(join ' ', $name, substr $chunk, $term); } else { $vars = parse_vars($chunk); } push @$struct, @{$vars||[]}; } $structs->{$structname} = $struct; $structname;}sub parse_vars { my $in = shift; my ($vars, $type, $word, $id, $post, $func); while ($in =~ /\G\s*([\[;,(]|\*+|:\s*\d+|\S+?\b|$)\s*/gc) { $word = $1; if ($word eq ';' || $word eq '') { next unless defined $id; $type = 'int' unless defined $type; # or is this an error? push @$vars, [ $type, $post, $id ]; ($type, $post, $id, $func) = (undef, undef, undef); } elsif ($word eq ',') { warn "panic: expecting name before comma in '$in'\n" unless defined $id; $type = 'int' unless defined $type; # or is this an error? push @$vars, [ $type, $post, $id ]; $type =~ s/[ *]*$//; $id = undef; } elsif ($word eq '[') { warn "panic: expecting name before '[' in '$in'\n" unless defined $id; $type = 'int' unless defined $type; # or is this an error? my $b = pos $in; matchingbrace($in); $post .= $word . substr $in, $b, pos($in) - $b; } elsif ($word eq '(') { # simple hack for function pointers $type = join ' ', grep defined, $type, $id if defined $id; $type = 'int' unless defined $type; if ($in =~ /\G\s*(\*[\s\*]*?)\s*(\w+)[\[\]\d\s]*(\)\s*\()/gc) { $type .= "($1"; $id = $2; $post = $3; my $b = pos $in; matchingbrace($in); $post .= substr $in, $b, pos($in) - $b; } else { warn "panic: can't parse function pointer declaration in '$in'\n"; return; } } elsif ($word =~ /^:/) { # bitfield $type = 'int' unless defined $type; $post .= $word; } else { if (defined $post) { if ($func) { $post .= $word; } else { warn "panic: not expecting '$word' after array bounds in '$in'\n"; } } else { $type = join ' ', grep defined, $type, $id if defined $id; $id = $word; } } }unless ($vars) { warn sprintf "failed on <%s> with type=<%s>, id=<%s>, post=<%s> at pos=%d\n", $in, $type, $id, $post, pos($in);} $vars;}sub vdecl_hash { my ($vdecls, $mdecls) = @_; my %vdecl_hash; for (@$vdecls, @$mdecls) { next if /[()]/; # ignore functions, and function pointers my $copy = $_; next unless $copy =~ s/^\s*extern\s*//; my $vars = parse_vars($copy); $vdecl_hash{$_->[2]} = [ @$_[0, 1] ] for @$vars; } \%vdecl_hash;}# The output is the list of list of inline chunks and list of# declaration chunks.sub functions_in { # The arg is text without type declarations. my $in = shift; # remove_type_decl(top_level(sanitize($txt))); # What remains now consists of variable and function declarations, # and inline functions. $in =~ /(?=\S)/g; my ($b, $e, $b1, $e1, @inlines, @decls, @mdecls, @fdecls, @vdecls); $b = pos $in; my $chunk; while (defined($b) && $b != length $in) { $in =~ /;/g or pos $in = $b, $in =~ /.*\S|\Z/g ; # Or last non-space $e = pos $in; $chunk = substr $in, $b, $e - $b; # Now subdivide the chunk. # # What we got is one chunk, probably finished by `;'. Whoever, it # may start with several inline functions. # # Note that inline functions contain ( ) { } in the stripped version. $b1 = 0; while ($chunk =~ /\(\s*\)\s*\{\s*\}/g) { $e1 = pos $chunk; push @inlines, $b + $b1, $b + $e1; $chunk =~ /(?=\S)/g; $b1 = pos $chunk; $b1 = length $chunk, last unless defined $b1; } if ($e - $b - $b1 > 0) { my ($isvar, $isfunc) = (1, 1); substr ($chunk, 0, $b1) = ''; if ($chunk =~ /,/) { # Contains multiple declarations. push @mdecls, $b + $b1, $e; } else { # Non-multiple. # Since leading \s* is not optimized, this is quadratic! $chunk =~ s{ ( ( const | __const | __attribute__ \s* \( \s* \) ) \s* )* ( ; \s* )? \Z # Strip from the end }()x; $chunk =~ s/\s*\Z//; if ($chunk =~ /\)\Z/) { # Function declaration ends on ")"! if ($chunk !~ m{ \( .* \( # Multiple parenths }x and $chunk =~ / \w \s* \( /x) { # Most probably pointer to a function? $isvar = 0; } } elsif ($chunk =~ / ^ \s* (enum|struct|union|class) \s+ \w+ \s* $ /x) { $isvar = $isfunc = 0; } if ($isvar) { # Heuristically variable push @vdecls, $b + $b1, $e; } elsif ($isfunc) { push @fdecls, $b + $b1, $e; } } push @decls, $b + $b1, $e if $isvar || $isfunc; } $in =~ /\G\s*/g ; $b = pos $in; } [\@inlines, \@decls, \@mdecls, \@vdecls, \@fdecls];}# XXXX This is heuristical in many respects...# Recipe: remove all struct-ish chunks. Remove all array specifiers.# Remove GCC attribute specifiers.# What remains may contain function's arguments, old types, and newly# defined types.# Remove function arguments using heuristics methods.# Now out of several words in a row the last one is a newly defined type.sub whited_decl { # Input is sanitized. my $keywords_rex = shift; my $in = shift; # Text of a declaration #typedef ret_type*(*func) -> typedef ret_type* (*func) $in =~ s/\*\(\*/* \(*/; my $rest = $in; my $out = $in; # Whited out $in # Remove all the structs while ($out =~ /(\b(struct|union|class|enum)(\s+\w+)?\s*\{)/g) { my $pos_start = pos($out) - length $1; matchingbrace($out); my $pos_end = pos $out; substr($out, $pos_start, $pos_end - $pos_start) = ' ' x ($pos_end - $pos_start); pos $out = $pos_end; } # Deal with glibc's wierd ass __attribute__ tag. Just dump it. # Maaaybe this should check to see if you're using GCC, but I don't # think so since glibc is nice enough to do that for you. [MGS] while ( $out =~ m/(\b(__attribute__|attribute)\s*\((?=\s*\())/g ) { my $att_pos_start = pos($out) - length($1); # Need to figure out where ((..)) ends. matchingbrace($out); my $att_pos_end = pos $out; # Remove the __attribute__ tag. substr($out, $att_pos_start, $att_pos_end - $att_pos_start) = ' ' x ($att_pos_end - $att_pos_start); pos $out = $att_pos_end; } # Remove arguments of functions (heuristics only). # These things (start) arglist of a declared function: # paren word comma # paren word space non-paren # paren keyword paren # start a list of arguments. (May be "cdecl *myfunc"?) XXXXX ????? while ( $out =~ /(\(\s*(\w+(,|\s*[^\)\s])|$keywords_rex\s*\)))/g ) { my $pos_start = pos($out) - length($1); pos $out = $pos_start + 1; matchingbrace($out); substr ($out, $pos_start + 1, pos($out) - 2 - $pos_start) = ' ' x (pos($out) - 2 - $pos_start); } # Remove array specifiers $out =~ s/(\[[\w\s\+]*\])/ ' ' x length $1 /ge; my $tout = $out; # Several words in a row cannot be new typedefs, but the last one. $out =~ s/((\w+\**\s+)+(?=[^\s,;\[\{\)]))/ ' ' x length $1 /ge; unless ($out =~ /\w/) { # Probably a function-type declaration: typedef int f(int); # Redo scan leaving the last word of the first group of words: if ($tout =~ /(\w+\s+)*(\w+)\s*\(/g) { $out = ' ' x (pos($tout) - length $2) . $2 . ' ' x (length($tout) - pos($tout)); } else { # try a different approach to get the last type my $len = length $tout; # cut all non-words at the end of the definition my $end = $tout =~ s/(\W*)$// ? length $1 : 0; # remove everything but the last word
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -