#!perl -wW
package CyberArmy::Remote::Provider::Feed;

use strict;
use XML::RSS;
use Digest::MD5 qw(md5_hex);
use HTTP::Date qw(str2time);
use LWP::UserAgent;
use CyberArmy::Database;
use CyberArmy::WWW::Request;

$CyberArmy::Feed::Provider::VERSION = '0.1';

sub fetch {
	my $db = CyberArmy::Database->instance;
	my $feeds = $db->prepare('
		SELECT id,url,hash,UNIX_TIMESTAMP(last_update) AS last_update 
			FROM feed_list WHERE remote = ?
	'); $feeds->execute('Y');

	my $ua = LWP::UserAgent->new();
	$ua->agent('CyberArmyFeedHarvester/'
		.$CyberArmy::Feed::Provider::VERSION);

	while ( my $feed = $feeds->fetchrow_hashref() ) {
		my $req = HTTP::Request->new('GET', $feed->{'url'});
		$req->if_modified_since($feed->{'last_update'});

		my $res = $ua->request($req);
		if ($res->is_success) {
			my $content = $res->content;
			my $rss = new XML::RSS;	
			eval { $rss->parse($content) };
			if ($@) { warn "Can't parse $feed->{url}\n"; next }
			## md5 checksum, for sites that won't
			## honor If-Modified-Since req header
			
			if ((str2time($rss->{'pubDate'}||'') > $feed->{'last_update'})
				or ($feed->{'hash'}||'') ne (my $hash = md5_hex($content)) 
			) {
				$db->do($_,undef,$feed->{'id'}) foreach (
					'DELETE FROM feed_item WHERE feed_id = ?',
					'UPDATE feed_list SET last_update=NOW() WHERE id = ?'
				);
				foreach (@{$rss->{'items'}}) {
					my $pubDate = (str2time( 
						$_->{'pubDate'}||$_->{'pubdate'}||$_->{'dc'}->{'date'} 
					))or do {warn "no valid <pubDate/> in $feed->{url}"; last;};

					CyberArmy::WWW::Utils::escapeHtml($_->{'title'}, $_->{'link'},
						$_->{'description'}); ## nb: the xml parser already doesn't allow bare &, etc
					$_->{'title'] ~= s/&amp;/&/g;
					$db->do(
						'INSERT INTO feed_item
							(feed_id,title,url,pubdate,content)
						VALUES (?,?,?,FROM_UNIXTIME(?),?)',
					undef,
						$feed->{'id'},$_->{'title'},$_->{'link'},
						$pubDate,$_->{'description'}
					);
				}
				$db->do(
					'UPDATE feed_list SET hash = ? WHERE id = ?',
						undef, $hash, $feed->{'id'});

				print 'update from ',$feed->{'url'},"\n";
			}
		} else {
			warn "Can't fetch $feed->{url}: ",
				$res->status_line,"\n" unless ($res->code == 304) 
		}
	}
}

1;
