📄 postfault
字号:
#!/usr/local/bin/perl5 -w## Copyright (C) 1996-1998 by the Board of Trustees# of Leland Stanford Junior University.# # This file is part of the SimOS distribution. # See LICENSE file for terms of the license. ### This script post-processes a fault run (using fault.tcl and # one of the PMAKE scripts from gch.disk).# # It answers the following questions.## 1. did the run deviate from the no-fault pattern?# - all four files compiled in run 1# - all four files compiled in run 2# - 'sum' of the files correct both when viewed# on the child cell and on the parent cell## 2. did recovery run?# - which cell launched recovery# - when did LSET start/finish# - when did RECV start/finish# - which cells were considered alive# - does this match the cell where the fault was injected## Analysis outputs green, yellow, or red.## Green flag conditions# - no deviation from std pattern, no recovery# OR# one file not compiled in run 1, all files compiled in run 2# - all sum's correct or '0 0'## Yellow flag conditions# - deviation from std pattern without recovery# - any file not compiled in run 2# - premature termination of the run## Red flag conditions# - corrupt file# - recovery starts but doesn't complete## This script depends on the following## 1) a log message of the following form when a fault is injected# FAULT: <type> cell <cell> cycles <cycles>## 2) log printfs from the pmake script as to when files are started# finsihed, verification of output, etc## 3) log messages done by annotations when (lset, recovery) start and end## 4) cell configuration info (printed by simos)## verbose levels# 0 report green/yellow/red only# 1 explain yellow or red# 2 recovery timings# 3 show forks, compiles, fault injection time# # default verbose level is 1$verbose = 1;# elements in filestatus array$filestate = 0; # state of file $launchcell = 1; # cell started on$slavesum0 = 2; # sum seen by slave$slavesum1 = 3;$mastersum0 = 4; # sum seen by master$mastersum1 = 5;$knownsum0 = 6; # reference sum value$knownsum1 = 7;# elements in recovery status array$faultCell = 0;$wasgcc = 0;if (!$ARGV[0]) { doFile('cpu.log');} else { while ($ARGV[0] =~ /^-([a-z])([0-9]?)$/) { $optlet = $1; $optval = $2; shift(@ARGV); if ($optlet eq "v") { $verbose = $optval; } elsif ($optlet eq "g") { $wasgcc = 1; } else { die "postfault: unknown option '$optlet'\n"; } } foreach $i (0..$#ARGV) { doFile($ARGV[$i]); }}sub doFile { ($currentFile) = @_; local($pmakeNum, $MHZ, %fileStatus, $pmakeStart, $pmakeEnd); local($faultTime, @cpu2cell); if (! $wasgcc) { # when compiled with cc %fileStatus = ('1mainN.o' => [0, 0, -1, -1, -1, -1, 24453, 42], '1bookN.o' => [0, 0, -1, -1, -1, -1, 51971, 52], '1genmovesN.o' => [0, 0, -1, -1, -1, -1, 33655, 20], '1ataks.o' => [0, 0, -1, -1, -1, -1, 33183, 6], '2mainN.o' => [0, 0, -1, -1, -1, -1, 24453, 42], '2bookN.o' => [0, 0, -1, -1, -1, -1, 51971, 52], '2genmovesN.o' => [0, 0, -1, -1, -1, -1, 33655, 20], '2ataks.o' => [0, 0, -1, -1, -1, -1, 33183, 6]); } else { # when compiled with gcc %fileStatus = ('1mainN.o' => [0, 0, -1, -1, -1, -1, 8751, 42], '1bookN.o' => [0, 0, -1, -1, -1, -1, 47809, 56], '1utilN.o' => [0, 0, -1, -1, -1, -1, -1, -1], '1evalN.o' => [0, 0, -1, -1, -1, -1, -1, -1], '1genmovesN.o' => [0, 0, -1, -1, -1, -1, 23599, 28], '1ataks.o' => [0, 0, -1, -1, -1, -1, 54734, 7], '2mainN.o' => [0, 0, -1, -1, -1, -1, 8751, 42], '2bookN.o' => [0, 0, -1, -1, -1, -1, 47809, 56], '2utilN.o' => [0, 0, -1, -1, -1, -1, -1, -1], '2evalN.o' => [0, 0, -1, -1, -1, -1, -1, -1], '2genmovesN.o' => [0, 0, -1, -1, -1, -1, 23599, 28], '2ataks.o' => [0, 0, -1, -1, -1, -1, 54734, 7]); } %recoveryStatus = ('lsetStart' => [0, 0, 0, 0], 'lsetFinish' => [0, 0, 0, 0], 'recoveryStart' => [0, 0, 0, 0], 'recoveryFinish' => [0, 0, 0, 0]); $multipleRecovery = 0; $pmakeNum = 0; reset; open(INFILE, $currentFile); while (<INFILE>) { if (/BLOG \s+ (\d+) \s+ CPU \s+ (\d):/x) { $cycle = $1; $cpu = $2; $rest = $'; if ($rest =~ /starting pmake (\d)/) { $pmakeNum = $1; $pmakeStart[$pmakeNum] = $cycle; } elsif ($rest =~ /finished pmake (\d)/) { $pNum = $1; if ($pNum != $pmakeNum) { die "ASSERT: finished different pmake than I started\n"; } $pmakeEnd[$pmakeNum] = $cycle; } elsif ($rest =~ /started: (\S*)/) { $tfile = $pmakeNum . $1; $fileStatus{$tfile}[$filestate] = "started"; $fileStatus{$tfile}[$launchcell] = $cpu2cell[$cpu]; } elsif ($rest =~ /finished: (\d*) (\d*) (\S*)/) { $sum1 = $1; $sum2 = $2; $tfile = $pmakeNum . $3; $fileStatus{$tfile}[$filestate] = "finished"; $fileStatus{$tfile}[$slavesum0] = $sum1; $fileStatus{$tfile}[$slavesum1] = $sum2; } elsif ($rest =~ /verify: (\d*) (\d*) (\S*)/) { $sum1 = $1; $sum2 = $2; $tfile = $pmakeNum . $3; $fileStatus{$tfile}[$filestate] = "verified"; $fileStatus{$tfile}[$mastersum0] = $sum1; $fileStatus{$tfile}[$mastersum1] = $sum2; } } elsif (/^FAULT: \s+ \S+ \s+ cell \s+ (\d+) \s+ cycles \s+ (\d+) /x) { $faultCell = $1; $faultTime = $2; if ($verbose >= 3) { print; } } elsif (/^RECOVERY \s+ (\S+) \s+ cycle=(\d+) \s+ cpu=(\d+) /x) { $op = $1; $cycle = $2; $cell = $cpu2cell[$3]; if (($recoveryStatus{$op}[$cell] != 0) && ($multipleRecovery == 0)) { $multipleRecovery = 1; if ($verbose > 0) { print "$currentFile multiple recovery\n"; } } $recoveryStatus{$op}[$cell] = $cycle; } elsif (/^param CPU.Count (\d+)/) { $CPUCount = $1; } elsif (/^param HIVE.NumCells (\d+)/) { $ncells = $1; foreach $i (0..($CPUCount-1)) { $cpu2cell[$i] = ($ncells * $i) / $CPUCount; } } elsif (/^fault set/) { if ($verbose >= 2) { print; } } elsif (/^REMFORK/) { if ($verbose >= 3) { print; } } elsif (/^ERROR/) { if ($verbose >= 2) { print; } } elsif (?^MACHINE Clock\s*(\d*)?) { $MHZ = $1; } } close(INFILE); # # finished parsing file # analyze condition # $numcompiled[1] = 0; $numcompiled[2] = 0; $numcorrupt = 0; $numtruncated[1] = 0; $numtruncated[2] = 0; $unstarted = ""; $unfinished = ""; $unverified = ""; foreach $i (1, 2) { foreach $tfile (sort(keys(%fileStatus))) { next if (substr($tfile,0,1) != $i); $f = $fileStatus{$tfile}; if ($f->[$filestate] eq "0") { $unstarted = $unstarted . " $tfile"; } elsif ($f->[$filestate] eq "started") { # special case: don't add to unfinished list # if this is the compile that we expect to fail # (round 1, on the cell where we injected the fault)# if (! (($i == 1)# && ($f->[$launchcell] == $faultCell))) { $unfinished = $unfinished . " $tfile";# } } elsif ($f->[$mastersum0] == -1) { $unverified = $unverified . " $tfile"; } else { $numcompiled[$i]++; # following is currently disabled because we only # have verified outputs in current script, no # starts or finishes since using standard makefile if (0) { if ( ($f->[$slavesum0] == 0) && ($f->[$slavesum1] == 0)) { $numtruncated[$i] ++; if ($verbose > 0) { print "$currentFile truncated output (slave): $tfile\n"; } } elsif ( ($f->[$slavesum0] != $f->[$knownsum0]) || ($f->[$slavesum1] != $f->[$knownsum1])) { $numcorrupt++; if ($verbose > 0) { print "$currentFile corrupt (slave): $tfile $f->[$slavesum0] $f->[$slavesum1]\n"; } } } if ( ($f->[$mastersum0] == 0) && ($f->[$mastersum1] == 0)) { $numtruncated[$i] ++; if ($verbose > 0) { print "$currentFile truncated output (master): $tfile\n"; } } elsif ( $f->[$knownsum0] == -1) { # nop } elsif ( ($f->[$mastersum0] != $f->[$knownsum0]) || ($f->[$mastersum1] != $f->[$knownsum1])) { $numcorrupt++; if ($verbose > 0) { print "$currentFile corrupt (master): $tfile $f->[$mastersum0] $f->[$mastersum1] (known $f->[$knownsum0] $f->[$knownsum1])\n"; } } } } } if ( ($numcompiled[1] == 4) && ($numcompiled[2] == 4) && ($numtruncated[1] == 0) && ($numtruncated[2] == 0) && ($numcorrupt == 0)) { print "$currentFile GREEN\tnominal\n"; } else { $rc = &recoveryCheck; if ( ($numcorrupt != 0) || ($rc == 2) || ($numtruncated[2] != 0)) { print "$currentFile RED\n"; } elsif ( ($numcompiled[1] == 3) && ($numcompiled[2] == 4) && ($rc == 0) && ($numtruncated[1] <= 1) && ($multipleRecovery== 0)) { print "$currentFile GREEN\tcellcrash\n"; } else { if ($verbose > 0) { print "$currentFile not finished: $unfinished\n" if $unfinished; print "$currentFile not verified: $unverified\n" if $unverified; print "$currentFile not started: $unstarted\n" if $unstarted; } print "$currentFile YELLOW\n"; } }}## return values# 0 recovery ran, ok# 1 recovery never ran# 2 recovery ran, some cell did not complete it#sub recoveryCheck { $recoveryRan = 0; foreach $i (0..3) { if ($recoveryStatus{'lsetStart'}[$i] != 0) { $recoveryRan = 1; } } if (! $recoveryRan) { if ($verbose > 0) { print "$currentFile no recovery\n"; } return 1; } $badcells = ""; foreach $i (0..3) { next if ($i == $faultCell); if ($recoveryStatus{'recoveryFinish'}[$i] == 0) { $badcells = $badcells . " " . $i; } } if ($badcells eq "") { return 0; } else { if ($verbose > 0) { print "$currentFile cells$badcells did not complete recovery\n"; } return 2; }}exit;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -