📄 convert_data.pl
字号:
#!/usr/local/bin/perl -w
#-----Get argus from command line
# Read Job Descriptions and only take "Qualification" part then
# apply StopWords filter
die "Usage: $0 [input_path] [stopwords_file] [number of stopwords]"."\n"
unless (@ARGV > 2);
$argv1 = $ARGV[0];
$argv2 = $ARGV[1];
$argv3 = $ARGV[2];
# set path for input txt files
$path = $argv1;
opendir (in_txt,$path);
@in_array_temp = readdir (in_txt);
@in_array = grep{!-d "$path$_"} @in_array_temp;
closedir (in_txt);
#-----Set StopWord file
$num_s = 0;
$engFile = defined($argv2) ? $argv2 : "eng.txt";
open (in_eng, "<$engFile") || die "Could not open '$engFile'";
while($input_eng = <in_eng>) {
while ( $input_eng =~ /\S+/g ) {
$word = lc($&);
$word_num{$word} += 1;
}
if ($num_s > $argv3){last;}
}
close in_eng;
#-----MAIN Function
$flag = 0;
for($in_i = 0; $in_i < scalar(@in_array); $in_i++)
{
open (in_file,"$path\/$in_array[$in_i]");
while ($input_line = <in_file>){
$input_line =~ s/[!\';.?,:()\/]/ /g;
$input_line_lc = lc($input_line);
if ($input_line_lc =~ "qualifications") {$flag = 1;}
while ($input_line =~ /\b\S+/g and $flag == 1) {
$word = lc($&);
$word =~ s/[\!';.?,:()\/]+$//;
if (!exists($word_num{$word}) ) {
$keywords{$word} += 1;
}
}
}
$flag = 0;
}
foreach $keyword (sort keys(%keywords)) {
print ($keyword."\n");
}
#$i = 0;
#foreach $keyword (reverse sort {$tt_num{$a} <=> $tt_num{$b}} keys %keywords)
#{
# printf ("%s\t\t%d\n", $keyword,$keywords{$keyword});
# $i++;
# if ($i > 19){last;}
#}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -