📄 soif2gils.pl.in
字号:
#!@PERL@## kjl/26apr2003## soif2gils.pl## Translate SOIF files to GILS files.## Usage:## soif2gils.pl [-H path]## where path is the path to the broker, usually# /usr/local/harvest/brokers/YOUR_BROKER## You can invoke this directly at command line or# embed it into glimpseindex script.## Edit below if necessary##$DEBUG = 1;# Default broker directory. This can be overriden by# "-H /your/path" argument.$BROKERDIR = "/usr/local/harvest/brokers/tab-server.tab";# End of configuration# No changes necessary below# Where is the broker directory$DATADIR = parse_arg ();if ($DATADIR eq "") { $DATADIR = $BROKERDIR;}chdir $DATADIR or die "Can't chdir to $DATADIR: $!";# directory names$OBJDIR = "objects"; # directory containing objects$GILSDIR = "objects-xml"; # where to put GILS files# create directories for the GILS files if necessaryput_log ("Preparing GILS directory tree");make_subdirs ();put_log ("GILS directory tree ok");put_log ("Parsing objects");process_objs ();put_log ("Finished");# End of program## Parse arguments and fetch the broker directory#sub parse_arg { my $i = 0; my $datadir = ""; foreach (@ARGV) { $i++; last if /\-H/o; } $datadir = $ARGV[$i] if ($i <= $#ARGV); return $datadir;}## print current local time#sub print_time { my $now = localtime(); print "$now";}## log string#sub put_log { my $log = shift; print_time (); print "\t$log\n";}## create directories for GILS files if necessary#sub make_subdirs { my $dir; if (! -d $GILSDIR) { mkdir ($GILSDIR) or die "Can't create directory $GILSDIR: $!"; } if (! -d "$GILSDIR/0") { opendir (DIR, "$OBJDIR"); while ($dir = readdir (DIR)) { next if (($dir eq "\.") or ($dir eq "\.\.")); put_log ("Creating $GILSDIR/$dir"); mkdir ("$GILSDIR/$dir") or die "Can't create directory $GILSDIR/$dir: $!"; } closedir (DIR); }}## recurse through broker's object directory and create GILS files#sub process_objs { my ($dir, $obj); opendir (DIR, "$OBJDIR"); while ($dir = readdir (DIR)) { next if (($dir eq "\.") or ($dir eq "\.\.")); opendir (SUBDIR, "$OBJDIR/$dir"); while ($obj = readdir (SUBDIR)) { next if (($obj eq "\.") or ($obj eq "\.\.")); soif2gils ("$dir/$obj"); } closedir (SUBDIR); } closedir (DIR);}## remove empty lines and encode offending character#sub clean { my $lines = shift; # remove empty lines $lines =~ s/\n\s+\n/\n/g; # encode offending character $lines =~ s/</</g; return $lines;}## convert soif to gils#sub soif2gils { my $obj = shift; my ($ttype, $url, %SOIF); open (IN, "<$OBJDIR/$obj") or die "Can't open $OBJDIR/$obj: $!"; ($ttype, $url, %SOIF) = soif_parse(); close (IN); foreach $key (keys %SOIF) { $SOIF{"$key"} = clean ($SOIF{"$key"}); } put_log ("Writing $GILSDIR/$obj.gils"); open (OUT, ">$GILSDIR/$obj.gils") or die "Can't open $GILSDIR/$obj.gils: $!"; print OUT "<gils>\n\n"; print OUT "<availability>\n"; print OUT " <linkage>\n"; print OUT " $url\n"; print OUT " </linkage>\n"; #print OUT " <linkageType>\n"; #print OUT " $ttype\n"; #print OUT " </linkageType>\n"; print OUT "</availability>\n\n"; print OUT "<dateOfLastModification>\n"; print OUT " ", $SOIF{'last-modification-time'}, "\n"; print OUT "</dateOfLastModification>\n\n"; #print OUT "<controlIdentifier>\n"; #print OUT " ", $SOIF{'md5'}, "\n"; #print OUT "</controlIdentifier>\n\n"; print OUT "<abstract>\n"; print OUT " ", $SOIF{'description'}, "\n"; print OUT "</abstract>\n\n"; print OUT "<author>\n"; print OUT " ", $SOIF{'author'}, "\n"; print OUT "</author>\n\n"; print OUT "<localSubjectIndex>\n"; print OUT " <localSubjectTerm>\n"; print OUT " ", $SOIF{'keywords'}, "\n"; print OUT " </localSubjectTerm>\n"; print OUT "</localSubjectIndex>\n\n"; print OUT "<supplementalInformation>\n"; print OUT " <bytes>\n"; print OUT " ", $SOIF{'file-size'}, "\n"; print OUT " </bytes>\n"; #print OUT " <lastChecked>\n"; #print OUT " ", $SOIF{'update-time'}, "\n"; #print OUT " </lastChecked>\n"; print OUT "</supplementalInformation>\n\n"; print OUT "<crossReference>\n"; print OUT " <linkage>\n"; print OUT " ", $SOIF{'url-references'}, "\n"; print OUT " </linkage>\n"; print OUT "</crossReference>\n\n"; print OUT "<title>\n"; print OUT " ", $SOIF{'title'}, "\n"; print OUT "</title>\n\n"; #print OUT "<sampleText>\n"; print OUT "<Body-of-text>\n"; print OUT $SOIF{'body'}, "\n" if exists $SOIF{'body'}; print OUT $SOIF{'full-text'}, "\n" if exists $SOIF{'full-text'}; print OUT $SOIF{'partial-text'}, "\n" if exists $SOIF{'partial-text'}; print OUT "</Body-of-text>\n\n"; #print OUT "</sampleText>\n\n"; print OUT "</gils>\n"; close (OUT);}## This is from soif.pl.# soif_parse - Returns an associative array containing the SOIF,# the template type, and the URL.#sub soif_parse { print "Inside soif_parse.\n" if ($debug); return () if (eof(IN)); # DW my $template_type = "UNKNOWN"; my $url = "UNKNOWN"; my %SOIF; undef %SOIF; my ($attr, $vsize, $value, $end_value); while (<IN>) { print "READING input line: $_\n" if ($debug); last if (/^\@\S+\s*{\s*\S+\s*$/o); } if (/^\@(\S+)\s*{\s*(\S+)\s*$/o) { $template_type = $1, $url = $2; } else { return ($template_type, $url, %SOIF); # done } while (<IN>) { if (/^\s*([^{]+){(\d+)}:\t(.*\n)/o) { $attr = $1; $vsize = $2; $value = $3; if (length($value) < $vsize) { $nleft = $vsize - length($value); $end_value = ""; $x = read(IN, $end_value, $nleft); die "Cannot read $nleft bytes: $!" if ($x != $nleft); $value .= $end_value; undef $end_value; } chop ($value) if ($value =~ /\n$/); $SOIF{$attr} = $value; undef $value; undef $end_value; next; } last if (/^}/o); } return ($template_type, $url, %SOIF);}=unused## return file name of a string#sub basename { my $path = shift; $path =~ /.*\/(.*)/; return $1;}## return directory name of a string#sub dirname { my $path = shift; $path =~ /(.*)\/.*/; return $1;}=cut
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -