📄 maxseg.pl
字号:
#!/usr/bin/perl -w# Purpose: maximum matching segmentation program# Usage: program dictionary inputfile## # Author: Shen, Jingxue (shenjx1128@gmail.com) # last change: 2006-10-24 ###########################################################################$USAGE = "Usage:\t$0 dictionary inputfile\n\t";if (@ARGV != 2) {print "$USAGE\n"; exit;}%dict = ();$maxwlen = 0; # maximum length of word in dictionaryopen (DIC, $ARGV[0]) or die "$ARGV[0]: $!\n";while (<DIC>) { chop; $dict{$_} = 1; my $l = length($_); $maxwlen = $l if $l > $maxwlen;}close (DIC);open (INFILE, $ARGV[1]) or die "$ARGV[1]: $!\n";$n = 0;while (<INFILE>) { chop; s/\s*//g; # remove all space my $text = $_; while ($text ne "") { $sub = substr($text, 0, $maxwlen); while ($sub ne "") { if ($dict{$sub}) { print "$sub "; for (my $i = 0; $i < length($sub); ++$i) #cutoff $sub in $text { $text =~ s/^.//; #remove a character in begin of $text } last; } $sub =~ s/.$//; #remove a character in end of $sub } if ($sub eq "") # if not found in dictionary { if ($text =~ /^([\x21-\x7e])/) # if a ascii character { print "$1 "; $text =~ s/^.//; } elsif ($text =~ /^([^\x21-\x7e].)/) # if a Chinese character { print "$1 "; $text =~ s/^..//; } else { ## shouldn't happen print STDERR "Oops: shouldn't be here: $n\n"; print "$1 "; $text =~ s/^.//; } } } print "\n"; ++$n; # line number}close(INFILE);exit(0);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -