#!/usr/bin/perl -w ####################################################################### ### $Id: rss2mail-0.04.txt,v 1.2 2003/12/28 04:02:08 mate Exp $ ### use strict; use lib '/path/to/mt/extlib'; use XML::RSS::LP; use URI; use LWP::UserAgent; use HTTP::Date; use DB_File; use Jcode; my $VERSION = '0.04'; my (@RSS_URL, $DB_FILE, $DB_EXPIRES, $UA, $UA_PROXY, $UA_PROXY_BYPASS, $UA_TIMEOUT, $UA_NAME, $SENDMAIL, $SENDMAIL_PATH, $MAIL_TO, $MAIL_SUBJECT, $MAIL_FROM, $FOLDLEN, $LOCK_ERR, $DEBUG); ####################################################################### # 設定ここから # RSS 取得先 URL のリスト @RSS_URL = ( 'http://www.domain.com/index.rdf', 'http://www.domain.com/index.rdf', 'http://www.domain.com/index.rdf', ); # キャッシュファイル名 # 一度読んだ記事は、キャッシュがクリアされるまで取得しません。 $DB_FILE = 'rss_cache.db'; # キャッシュされた記事の有効期限(秒数) $DB_EXPIRES = 60 * 60 * 24 * 90; # メール送信(0=しない,1=する) # 0 の場合は標準出力に結果を表示します。 # 追加記事がない場合は送信しません。 $SENDMAIL = 1; # 送信コマンド $SENDMAIL_PATH = '/usr/sbin/sendmail'; # メール件名 $MAIL_SUBJECT = '記事が更新されました! ($total)'; # メール送信先 $MAIL_TO = ''; # メール FROM $MAIL_FROM = "RSS2MAIL <$MAIL_TO>"; # メール本文の幅(文字数) $FOLDLEN = 72; # $UA_PROXY = 'http://my.proxy.server.domain.name:port/'; # $UA_PROXY_BYPASS = ['localhost','127.0.0.1']; # $UA_TIMEOUT = 10; # $UA_NAME = "rss2mail/$VERSION"; $DEBUG = 0; # 設定ここまで ####################################################################### $|=1; $UA = LWP::UserAgent->new; $UA->agent(defined($UA_NAME) ? $UA_NAME : "rss2mail/$VERSION"); $UA->timeout(defined($UA_TIMEOUT) ? $UA_TIMEOUT : 10); $UA->proxy(['http'], $UA_PROXY) if (defined $UA_PROXY); $UA->no_proxy($UA_PROXY_BYPASS) if (defined $UA_PROXY_BYPASS); $UA->env_proxy unless (defined($UA_PROXY) || defined($UA_PROXY_BYPASS)); my (%template, %cache, $cref, $iref); my $total = 0; my $mail_loop = ""; read_template(\%template); lock_db("$DB_FILE.lock"); tie(%cache, 'DB_File', $DB_FILE, O_RDWR|O_CREAT, 0666, $DB_HASH) or die "Can not open $DB_FILE: $!"; foreach (@RSS_URL) { my $uri = URI->new($_); my $rss = get_rss_from_uri($uri, $cache{$uri}); next unless $rss; next unless defined($rss->expand_ns_prefix('#default')); $cache{$uri} = time; undef %$cref; undef @$iref; parse_rss($rss, $cref, $iref); $cref->{total} = cache_items($iref, \%cache); next if $cref->{total} < 1; $total += $cref->{total}; $mail_loop .= parse_template($template{CHANNEL}, $cref); $mail_loop .= parse_template($template{ITEMS}, $iref); } clean_items(\%cache); untie(%cache); unlock_db("$DB_FILE.lock"); exit if ($total < 1); if ($SENDMAIL) { $MAIL_SUBJECT = parse_template($MAIL_SUBJECT, { total => $total }); my %header = (version => $VERSION, to => Jcode->new($MAIL_TO)->mime_encode, subject => Jcode->new($MAIL_SUBJECT)->mime_encode, from => Jcode->new($MAIL_FROM)->mime_encode); my %footer = (); my $mail_head = parse_template($template{HEADER}, \%header); my $mail_foot = parse_template($template{FOOTER}, \%footer); my $mail_body = $mail_loop.$mail_foot; sendmail($MAIL_TO, \$mail_head, \$mail_body); } else { print $mail_loop; } exit; # End of Main ####################################################################### sub get_rss_from_uri { my ($uri, $time) = @_; my $rss = new XML::RSS::LP; my $req = HTTP::Request->new(GET => $uri); $req->header('If-Modified-Since' => HTTP::Date::time2str($time)) if $time; my $res = $UA->request($req); unless ($res->is_success()) { warn("ua->request: $uri: ".$res->message()."\n") if $DEBUG; return; } eval { $rss->parse($res->content); }; if ($@) { warn("rss->parse: $uri: $@\n") if $DEBUG; return; } return $rss; } sub parse_rss { my ($rss, $cref, $iref) = @_; my @tags = ('#default:title', '#default:link', '#default:description', 'dc:date', 'dc:creator', 'dc:subject'); my (%item, $ns, $p, $t); my $i = 0; foreach (@tags) { ($p, $t) = split(/:/); $ns = $rss->expand_ns_prefix($p); $cref->{$t} = defined($rss->{channel}->{ $rss->generate_ns_name($t, $ns) }) ? Jcode->new(decode_xml($rss->{channel}->{ $rss->generate_ns_name($t, $ns) }))->h2z->euc : ""; } foreach my $ref (@{ $rss->{items} }) { undef %item; $item{i} = sprintf("%2d", ++$i); foreach (@tags) { ($p, $t) = split(/:/); $ns = $rss->expand_ns_prefix($p); $item{$t} = defined($ref->{ $rss->generate_ns_name($t, $ns) }) ? Jcode->new($ref->{ $rss->generate_ns_name($t, $ns) })->h2z->euc : ""; if ($t =~ /^description/ && $item{$t} =~ /http/) { $item{$t} =~ s#[^^](https*://)#\n$1#g; } elsif ($t =~ /^date/ && $item{$t} =~ /T/) { # ex. 2003-09-05T16:51:52+09:00 $item{$t} = sprintf("%s/%s/%s %s %s", unpack("a4 x a2 x a2 x a8 a6", $item{$t})); } $item{$t} = join("\n", map { jfold(decode_xml($_)) } split(/\n/, $item{$t})); } push(@$iref, { %item }); } return $i; } sub cache_items { my ($iref, $dbref) = @_; my $time = time; my $i = 0; @$iref = grep(!exists $dbref->{$_->{link}}, @$iref); $dbref->{$_->{link}} = $time, $i++ for @$iref; warn("$DB_FILE: cached $i messages.\n") if $DEBUG; return $i; } sub clean_items { my ($dbref) = @_; my ($key, $val, @items); my $time = time; my $i = 0; while (($key, $val) = each(%$dbref)) { push(@items, $key) if ($time - $dbref->{$key} >= $DB_EXPIRES); } foreach (@items) { delete $dbref->{$_}; if (!exists $dbref->{$_}) { warn("$DB_FILE: removed $_\n") if $DEBUG; $i++; } } warn("$DB_FILE: removed $i messages.\n") if $DEBUG; return $i; } sub read_template { my ($tmplref) = @_; my ($group); while () { last if (/__END__/); if (/^#+\s+(\w+)/) { $group = $1; next; } $tmplref->{$group} .= $_; } } sub parse_template { my ($tmpl, $datref) = @_; if (ref($datref) eq 'HASH') { if ($tmpl =~ /\$\w+/) { $tmpl =~ s/\$(\w+)/defined($$datref{$1}) ? $$datref{$1} : ""/eg; } } elsif (ref($datref) eq 'ARRAY') { my $t = ""; foreach (@$datref) { $t .= parse_template($tmpl, $_); } $tmpl = $t; } $tmpl =~ s#\n[\s\t]+\n#\n\n#g if ($tmpl =~ /\n[\s\t]+\n/); $tmpl =~ s#\n\n\n+#\n\n#g if ($tmpl =~ /\n\n\n/); return $tmpl; } sub decode_xml { my ($str) = @_; my %map = ('&' => '&', '"' => '"', '<' => '<', '>' => '>', ''' => '\''); my $re = join('|', keys %map); $str =~ s#($re)#$map{$1}#g if ($str =~ /\&/); return $str; } sub jfold { my ($str, $foldlen) = @_; my $ret = ""; my $len = 0; # merged from perl cookbook $foldlen ||= $FOLDLEN; while($str =~ m{([\x00-\x7f]) | # ASCII (\x8e[\xa0-\xdf]) | # JIS X 0201 Katakana (\x8f[\xa1-\xfe]{2}) | # JIS X 0212:1990 ([\xa1-\xfe]{2}) | # JIS X 0208:1997 .}gx) { if (defined $1) { $ret .= $1; $len++ }; if (defined $2) { $ret .= $2; $len++ }; if (defined $3) { $ret .= $3; $len += 2 }; if (defined $4) { $ret .= $4; $len += 2 }; if ($foldlen <= $len) { $ret .= "\n"; $len = 0 } } chomp $ret; return $ret; } sub lock_db { my ($lock_file) = @_; $LOCK_ERR = 0; if (-e $lock_file) { $LOCK_ERR = 1; die "$0 is running!\n"; } symlink(".", $lock_file) or die "Can not lock $lock_file: $!"; # END { unlock_db($lock_file); } $SIG{TERM} = $SIG{PIPE} = $SIG{HUP} = $SIG{INT} = $SIG{QUIT} = sub { unlock_db($lock_file); exit(1); }; } sub unlock_db { my ($lock_file) = @_; unlink($lock_file) if $LOCK_ERR != 1 && -e $lock_file; } sub sendmail { my ($to, $headref, $bodyref) = @_; unless ($to) { warn("WARNING: mail to-address not found\n"); return; } unless ($$headref) { warn("WARNING: mail header not found\n"); return; } unless ($$bodyref) { warn("WARNING: mail body not found\n"); return; } unless (-e $SENDMAIL_PATH) { warn("WARNING: $SENDMAIL_PATH not found\n"); return; } open(M, "|-") || exec $SENDMAIL_PATH, $to; print M Jcode->new($$headref)->h2z->jis; print M Jcode->new($$bodyref)->h2z->jis; close(M); return; } __DATA__ ## HEADER To: $to From: $from Subject: $subject X-Mailer: rss2mail/$version ## CHANNEL <$title> ($total) $link ========================================================================= ## ITEMS [$i] $title $link $date $creator $description ## FOOTER ------------------------------------------------------------------------- __END__ ### Local Variables: ### mode: perl ### End: =head1 NAME rss2mail.pl - RSSメール送信スクリプト =head1 SYNOPSIS ./rss2mail.pl =head1 DESCRIPTION 例) cron で実行する場合 0 * * * * /path/to/rss2mail.pl =head1 AUTHOR Yutaka Kojima Eyutaka@asmate.netE This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. The updated version is available from http://www.asmate.net/software/perl/rss2mail/ =cut