📄 stemmer.pm
字号:
package KinoSearch::Analysis::Stemmer;use strict;use warnings;use KinoSearch::Util::ToolSet;use base qw( KinoSearch::Analysis::Analyzer );our %supported_languages;BEGIN { __PACKAGE__->init_instance_vars( # constructor params / members stemmifier => undef, );}use Lingua::Stem::Snowball qw( stemmers );# build a list of supported languages.$supported_languages{$_} = 1 for stemmers();sub init_instance { my $self = shift; # verify language param my $language = $self->{language} = lc( $self->{language} ); croak("Unsupported language: '$language'") unless $supported_languages{$language}; # create instance of Snowball stemmer $self->{stemmifier} = Lingua::Stem::Snowball->new( lang => $language );}sub analyze { my ( $self, $batch ) = @_; # replace terms with stemmed versions. my $all_texts = $batch->get_all_texts; $self->{stemmifier}->stem_in_place($all_texts); $batch->set_all_texts($all_texts); $batch->reset; return $batch;}1;__END__=head1 NAMEKinoSearch::Analysis::Stemmer - reduce related words to a shared root=head1 SYNOPSIS my $stemmer = KinoSearch::Analysis::Stemmer->new( language => 'es' ); my $polyanalyzer = KinoSearch::Analysis::PolyAnalyzer->new( analyzers => [ $lc_normalizer, $tokenizer, $stemmer ], );=head1 DESCRIPTIONStemming reduces words to a root form. For instance, "horse", "horses",and "horsing" all become "hors" -- so that a search for 'horse' will alsomatch documents containing 'horses' and 'horsing'. This class is a wrapper aroundL<Lingua::Stem::Snowball|Lingua::Stem::Snowball>, so it supports the samelanguages. =head1 METHODS =head2 newCreate a new stemmer. Takes a single named parameter, C<language>, which mustbe an ISO two-letter code that Lingua::Stem::Snowball understands.=head1 COPYRIGHTCopyright 2005-2007 Marvin Humphrey=head1 LICENSE, DISCLAIMER, BUGS, etc.See L<KinoSearch|KinoSearch> version 0.163.=cut
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -