📄 files.pm
字号:
package AI::Categorizer::Collection::Files;use strict;use AI::Categorizer::Collection;use base qw(AI::Categorizer::Collection);use Params::Validate qw(:types);use File::Spec;__PACKAGE__->valid_params ( path => { type => SCALAR|ARRAYREF }, recurse => { type => BOOLEAN, default => 0 }, );sub new { my $class = shift; my $self = $class->SUPER::new(@_); $self->{dir_fh} = do {local *FH; *FH}; # double *FH avoids a warning # Documents are contained in a directory, or list of directories $self->{path} = [$self->{path}] unless ref $self->{path}; $self->{used} = []; $self->_next_path; return $self;}sub _next_path { my $self = shift; closedir $self->{dir_fh} if $self->{cur_dir}; $self->{cur_dir} = shift @{$self->{path}}; push @{$self->{used}}, $self->{cur_dir}; opendir $self->{dir_fh}, $self->{cur_dir} or die "$self->{cur_dir}: $!";}sub next { my $self = shift; my $file = $self->_read_file; return unless defined $file; warn "No category information about '$file'" unless defined $self->{category_hash}{$file}; my @cats = map AI::Categorizer::Category->by_name(name => $_), @{ $self->{category_hash}{$file} || [] }; return $self->call_method('document', 'read', path => File::Spec->catfile($self->{cur_dir}, $file), name => $file, categories => \@cats, );}sub _read_file { my ($self) = @_; my $file = readdir $self->{dir_fh}; if (!defined $file) { # Directory has been exhausted return undef unless @{$self->{path}}; $self->_next_path; return $self->_read_file; } elsif ($file eq '.' or $file eq '..') { return $self->_read_file; } elsif (-d (my $path = File::Spec->catdir($self->{cur_dir}, $file))) { push @{$self->{path}}, $path # Add for later processing if $self->{recurse} and !grep {$_ eq $path} @{$self->{path}}, @{$self->{used}}; return $self->_read_file; } return $file;}sub rewind { my $self = shift; push @{$self->{path}}, @{$self->{used}}; @{$self->{used}} = (); $self->_next_path;}# This should share an iterator with next()sub count_documents { my $self = shift; return $self->{document_count} if defined $self->{document_count}; $self->rewind; my $count = 0; $count++ while defined $self->_read_file; $self->rewind; return $self->{document_count} = $count;}1;__END__=head1 NAMEAI::Categorizer::Collection::Files - One document per file=head1 SYNOPSIS my $c = new AI::Categorizer::Collection::Files (path => '/tmp/docs/training', category_file => '/tmp/docs/cats.txt'); print "Total number of docs: ", $c->count_documents, "\n"; while (my $document = $c->next) { ... } $c->rewind; # For further operations =head1 DESCRIPTIONThis implements a Collection class in which each document exists as asingle file on a filesystem. The documents can exist in a singledirectory, or in several directories.=head1 METHODSThis is a subclass of the abstract AI::Categorizer::Collection class,so any methods mentioned in its documentation are available here.=over 4=item new()Creates a new Collection object and returns it. In addition to theparameters accepted by the superclass, the following parameters areaccepted:=over 4=item pathIndicates a location on disk where the documents can be found. Thepath may be specified as a string giving the name of a directory, oras a reference to an array of such strings if the documents arelocated in more than one directory.=item recurseIndicates whether subdirectories of the directory (or directories) inthe C<path> parameter should be descended into. If set to a truevalue, they will be descended into. If false, they will be ignored.The default is false.=back=back=head1 AUTHORKen Williams, ken@mathforum.org=head1 COPYRIGHTCopyright 2002-2003 Ken Williams. All rights reserved.This library is free software; you can redistribute it and/ormodify it under the same terms as Perl itself.=head1 SEE ALSOAI::Categorizer::Collection(3)=cut
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -