📄 feed.pm
字号:
package XML::RSS::Feed;use strict;use warnings;use XML::RSS;use XML::RSS::Headline;use Time::HiRes;use Storable qw(store retrieve);=head1 NAMEXML::RSS::Feed - Persistant XML RSS Encapsulation=head1 VERSION2.212=cutour $VERSION = 2.212;=head1 SYNOPSISA quick and dirty non-POE example that uses a blocking B<sleep>. Themagic is in the B<late_breaking_news> method that returns only headlines it hasn't seen. use XML::RSS::Feed; use LWP::Simple qw(get); my $feed = XML::RSS::Feed->new( url => "http://www.jbisbee.com/rdf/", name => "jbisbee", delay => 10, debug => 1, tmpdir => "/tmp", # optional caching ); while (1) { $feed->parse(get($feed->url)); print $_->headline . "\n" for $feed->late_breaking_news; sleep($feed->delay); }ATTENTION! - If you want a non-blocking way to watch multiple RSS sources with one process use L<POE::Component::RSSAggregator>.=head1 CONSTRUCTOR=head2 XML::RSS::Feed->new( url => $url, name => $name )=over 4=item B<Required Params>=over 4=item * B<name> Identifier and hash lookup key for the RSS feed. =item * B<url> The URL of the RSS feed=back=item B<Optional Params>=over 4=item * B<delay> Number of seconds between updates (defaults to 600)=item * B<tmpdir> Directory to keep a cached feed (using Storable) to keep persistance between instances.=item * B<debug>Turn debuging on.=item * B<headline_as_id>Boolean value to use the headline as the id when URL isn't unique within a feed.=item * B<hlobj>A class name sublcassed from L<XML::RSS::Headline>=item * B<max_headlines>The max number of headlines to keep. (default is unlimited)=back=back=cut sub new { my $class = shift; my $self = bless { process_count => 0, rss_headlines => [], rss_headline_ids => {}, max_headlines => 0, }, $class; my %args = @_; foreach my $method ( keys %args ) { if ( $self->can($method) ) { $self->$method( $args{$method} ); } else { warn "Invalid argument '$method'"; } } $self->_load_cached_headlines if $self->{tmpdir}; $self->delay(3600) unless $self->delay; return $self;}sub _load_cached_headlines { my ($self) = @_; my $filename_sto = $self->{tmpdir} . '/' . $self->name . '.sto'; my $filename_xml = $self->{tmpdir} . '/' . $self->name; if ( -s $filename_sto ) { my $cached = retrieve($filename_sto); my $title = $self->title || $cached->{title} || ""; $self->set_last_updated( $cached->{last_updated} ); $self->{process_count}++; $self->process( $cached->{items}, $title, $cached->{link} ); warn "[$self->{name}] Loaded Cached RSS Storable\n" if $self->{debug}; } elsif ( -T $filename_xml ) { # legacy XML caching open( my $fh, $filename_xml ); my $xml = do { local $/, <$fh> }; close $fh; warn "[$self->{name}] Loaded Cached RSS XML\n" if $self->{debug}; $self->{process_count}++; $self->parse($xml); } else { warn "[$self->{name}] No Cache File Found\n" if $self->{debug}; }}sub _strip_whitespace { my ($string) = @_; $string =~ s/^\s+//; $string =~ s/\s+$//; return $string;}sub _mark_all_headlines_seen { my ($self) = @_; return unless $self->{process_count}; $self->{rss_headline_ids}{ $_->id } = 1 for $self->late_breaking_news;}=head1 METHODS=head2 $feed->parse( $xml_string )Pass in a xml string to parse with XML::RSS and then call process to process the results.=cutsub parse { my ( $self, $xml ) = @_; my $rss = XML::RSS->new(); eval { $rss->parse($xml) }; if ($@) { warn "[$self->{name}] [!!] Failed to parse RSS XML: $@\n"; return 0; } else { warn "[$self->{name}] Parsed RSS XML\n" if $self->{debug}; my $items = [ map { { item => $_ } } @{ $rss->{items} } ]; $self->process( $items, ( $self->title || $rss->{channel}{title} ), $rss->{channel}{link} ); return 1; }}=head2 $feed->process( $items, $title, $link )=head2 $feed->process( $items, $title )=head2 $feed->process( $items )Calls B<pre_process>, B<process_items>, B<post_process>, B<title>, and B<link>methods to process the parsed results of an RSS XML feed.=over 4=item * B<$items>An array of hash refs which will eventually become L<XML::RSS::Headline> objects. Lookat XML::RSS::Headline->new() for acceptable arguments.=item * B<$title>The title of the RSS feed.=item * B<$link>The RSS channel link (normally a URL back to the homepage) of the RSS feed.=back=cutsub process { my ( $self, $items, $title, $link ) = @_; if ($items) { $self->pre_process; $self->process_items($items); $self->title($title) if $title; $self->link($link) if $link; $self->post_process; return 1; } return 0;}=head2 $feed->pre_processMark all headlines from previous run as seen.=cutsub pre_process { my ($self) = @_; $self->_mark_all_headlines_seen;}=head2 $feed->process_items( $items )Turn an array refs of hash refs into L<XML::RSS::Headline> objects and added to the internal list of headlines.=cutsub process_items { my ( $self, $items ) = @_; if ($items) { # used 'reverse' so order seen is preserved for my $item ( reverse @$items ) { $self->create_headline(%$item); } return 1; } return 0;}=head2 $feed->post_processPost process cleanup, cache headlines (if tmpdir), and debug messages.=cutsub post_process { my ($self) = @_; if ( $self->init ) { warn "[$self->{name}] " . $self->late_breaking_news . " New Headlines Found\n" if $self->{debug}; } else { $self->_mark_all_headlines_seen; $self->init(1); warn "[$self->{name}] " . $self->num_headlines . " Headlines Initialized\n" if $self->{debug}; } $self->{process_count}++; $self->cache; $self->set_last_updated;}=head2 $feed->create_headline( %args)Create a new L<XML::RSS::Headline> object and add it to the interal list. Check B<< XML::RSS::Headline->new() >> for acceptable values for B<< %args >>.=cutsub create_headline { my ( $self, %args ) = @_; my $hlobj = $self->{hlobj} || "XML::RSS::Headline"; $args{headline_as_id} = $self->{headline_as_id}; my $headline = $hlobj->new(%args); return unless $headline; unshift( @{ $self->{rss_headlines} }, $headline ) unless $self->seen_headline( $headline->id ); # remove the oldest if the new headline put us over the max_headlines limit if ( $self->max_headlines ) { while ( $self->num_headlines > $self->max_headlines ) { my $garbage = pop @{ $self->{rss_headlines} }; # just in case max_headlines < number of headlines in the feed $self->{rss_headline_ids}{ $garbage->id } = 1; warn "[$self->{name}] Exceeded maximum headlines, removing " . "oldest headline\n" if $self->{debug}; } }}=head2 $feed->num_headlinesReturns the number of headlines for the feed.=cutsub num_headlines { my ($self) = @_; return scalar @{ $self->{rss_headlines} };}=head2 $feed->seen_headline( $id )Just a boolean test to see if we've seen a headline or not.=cutsub seen_headline { my ( $self, $id ) = @_; return 1 if exists $self->{rss_headline_ids}{$id}; return 0;}=head2 $feed->headlinesReturns an array or array reference (based on context) of L<XML::RSS::Headline> objects=cutsub headlines { my ($self) = @_; return wantarray ? @{ $self->{rss_headlines} } : $self->{rss_headlines};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -