📄 svm.pm
字号:
package AI::Categorizer::Learner::SVM;$VERSION = '0.01';use strict;use AI::Categorizer::Learner::Boolean;use base qw(AI::Categorizer::Learner::Boolean);use Algorithm::SVM;use Algorithm::SVM::DataSet;use Params::Validate qw(:types);use File::Spec;__PACKAGE__->valid_params ( svm_kernel => {type => SCALAR, default => 'linear'}, );sub create_model { my $self = shift; my $f = $self->knowledge_set->features->as_hash; my $rmap = [ keys %$f ]; $self->{model}{feature_map} = { map { $rmap->[$_], $_ } 0..$#$rmap }; $self->{model}{feature_map_reverse} = $rmap; $self->SUPER::create_model(@_);}sub _doc_2_dataset { my ($self, $doc, $label, $fm) = @_; my $ds = new Algorithm::SVM::DataSet(Label => $label); my $f = $doc->features->as_hash; while (my ($k, $v) = each %$f) { next unless exists $fm->{$k}; $ds->attribute( $fm->{$k}, $v ); } return $ds;}sub create_boolean_model { my ($self, $positives, $negatives, $cat) = @_; my $svm = new Algorithm::SVM(Kernel => $self->{svm_kernel}); my (@pos, @neg); foreach my $doc (@$positives) { push @pos, $self->_doc_2_dataset($doc, 1, $self->{model}{feature_map}); } foreach my $doc (@$negatives) { push @neg, $self->_doc_2_dataset($doc, 0, $self->{model}{feature_map}); } $svm->train(@pos, @neg); return $svm;}sub get_scores { my ($self, $doc) = @_; local $self->{current_doc} = $self->_doc_2_dataset($doc, -1, $self->{model}{feature_map}); return $self->SUPER::get_scores($doc);}sub get_boolean_score { my ($self, $doc, $svm) = @_; return $svm->predict($self->{current_doc});}sub save_state { my ($self, $path) = @_; { local $self->{model}{learners}; local $self->{knowledge_set}; $self->SUPER::save_state($path); } return unless $self->{model}; my $svm_dir = File::Spec->catdir($path, 'svms'); mkdir($svm_dir, 0777) or die "Couldn't create $svm_dir: $!"; while (my ($name, $learner) = each %{$self->{model}{learners}}) { my $path = File::Spec->catfile($svm_dir, $name); $learner->save($path); }}sub restore_state { my ($self, $path) = @_; $self = $self->SUPER::restore_state($path); my $svm_dir = File::Spec->catdir($path, 'svms'); return $self unless -e $svm_dir; opendir my($dh), $svm_dir or die "Can't open directory $svm_dir: $!"; while (defined (my $file = readdir $dh)) { my $full_file = File::Spec->catfile($svm_dir, $file); next if -d $full_file; $self->{model}{learners}{$file} = new Algorithm::SVM(Model => $full_file); } return $self;}1;__END__=head1 NAMEAI::Categorizer::Learner::SVM - Support Vector Machine Learner=head1 SYNOPSIS use AI::Categorizer::Learner::SVM; # Here $k is an AI::Categorizer::KnowledgeSet object my $l = new AI::Categorizer::Learner::SVM(...parameters...); $l->train(knowledge_set => $k); $l->save_state('filename'); ... time passes ... $l = AI::Categorizer::Learner->restore_state('filename'); while (my $document = ... ) { # An AI::Categorizer::Document object my $hypothesis = $l->categorize($document); print "Best assigned category: ", $hypothesis->best_category, "\n"; }=head1 DESCRIPTIONThis class implements a Support Vector Machine machine learner, usingCory Spencer's C<Algorithm::SVM> module. In lots of the recentacademic literature, SVMs perform very well for text categorization.=head1 METHODSThis class inherits from the C<AI::Categorizer::Learner> class, so allof its methods are available unless explicitly mentioned here.=head2 new()Creates a new SVM Learner and returns it. In addition to theparameters accepted by the C<AI::Categorizer::Learner> class, theSVM subclass accepts the following parameters:=over 4=item svm_kernelSpecifies what type of kernel should be used when building the SVM.Default is 'linear'. Possible values are 'linear', 'polynomial','radial' and 'sigmoid'.=back=head2 train(knowledge_set => $k)Trains the categorizer. This prepares it for later use incategorizing documents. The C<knowledge_set> parameter must providean object of the class C<AI::Categorizer::KnowledgeSet> (or a subclassthereof), populated with lots of documents and categories. SeeL<AI::Categorizer::KnowledgeSet> for the details of how to create suchan object.=head2 categorize($document)Returns an C<AI::Categorizer::Hypothesis> object representing thecategorizer's "best guess" about which categories the given documentshould be assigned to. See L<AI::Categorizer::Hypothesis> for moredetails on how to use this object.=head2 save_state($path)Saves the categorizer for later use. This method is inherited fromC<AI::Categorizer::Storable>.=head1 AUTHORKen Williams, ken@mathforum.org=head1 COPYRIGHTCopyright 2000-2003 Ken Williams. All rights reserved.This library is free software; you can redistribute it and/ormodify it under the same terms as Perl itself.=head1 SEE ALSOAI::Categorizer(3), Algorithm::SVM(3)=cut
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -