newsbot kobitosann
いろいろメンテする。
元の管理者が手放したものなのだが、著作権表示どーすんだろうな……。
2ch.pl
read.cgiのところを、read.htmlに変えるだけ。
print "$line\thttp://$server/test/read.html/$board/$key/\n";
#!/usr/bin/perl ## ## 2ch.pl ## $Id: 2ch.pl,v 2.9 2004/11/09 23:23:50 yu-ji Exp $ ## use strict; use warnings; use utf8; use Encode qw(from_to decode); use LWP::Simple; binmode(STDOUT, ':encoding(euc-jp)'); MAIN: { my($dir, $server, $board, @subback, $response); # check arguments if (@ARGV != 2) { die 'Usage: 2ch.pl' . " [SERVER_FQDN} [BOARD_NAME]\n"; } ($server, $board) = @ARGV; # get $response = get("http://$server/$board/subback.html"); if (not $response) { die 'Could not get response.'; } from_to($response, 'shiftjis', 'iso-2022-jp'); $response = decode('iso-2022-jp', $response); @subback = split(/\n/, $response); # data file directory $dir = $0; $dir =~ s/\/2ch.pl$//; # load last $maxThreadNum my($maxThreadNum); if (-e "$dir/2ch.$board.dat") { open(FILE, "<$dir/2ch.$board.dat") or die "Cannot open to read: $!"; chomp($maxThreadNum = <FILE>); close(FILE); if ((not $maxThreadNum) or (length($maxThreadNum) != 10)){ warn '1 Abnormal $maxThreadNum: ' . "$maxThreadNum\n"; } } else { $maxThreadNum = 0; } # pickup new thread my(%subback, $threadCount); $threadCount = 0; foreach (@subback) { chomp; my($num, $subj); if (($num, $subj) = m/^<a href=\"(\d+)\/.*?:(.*)\(\d+\)<\/a>/) { $threadCount++; # store if $num > $maxThreadNum $subback{$num} = $subj if ($num > $maxThreadNum); } } if ($threadCount < 1) { die 'BBS moved?'; } # print out New Thread Subject and URL my($key); foreach $key (sort keys %subback) { if ((length($key) == 10) and ($key =~ m/^[1-3]/)){ $maxThreadNum = $key; } else { #warn "1 Illegal thread num: $key\n"; } my($line) = $subback{$key}; # print Thread Subject and URL. print "$line\thttp://$server/test/read.html/$board/$key/\n"; } # save $maxThreadNum if (length($maxThreadNum) == 10) { open(FILE, ">$dir/2ch.$board.dat") or die "Cannot open to write: $!"; print FILE "$maxThreadNum\n"; close(FILE); } else { warn '2 Abnormal $maxThreadNum: ' . "$maxThreadNum\n"; } }
hinet.pl
#!/usr/bin/perl ## ## http://www.hinet.bosai.go.jp/ ## $Id: hinet.pl,v 1.1 2005/08/02 07:43:16 yu-ji Exp $ ## use strict; use warnings; use LWP::Simple; use Jcode; MAIN: { my ($baseName, $dir, $url, $response, $news); $baseName = 'hinet'; $url = 'http://www.hinet.bosai.go.jp/'; $dir = $0; $dir =~ s/\/$baseName.pl$//; # get $response = get($url); if (not $response) { die "Could not get response."; } $response = Jcode->new($response)->h2z->euc; if ($response !~ m/CLASS=\"bgstyle3\">(.*?)<\/TABLE>/s) { die "mark not found."; } ($response = $1) =~ s/\n//g; # print "$response"; # my ($origin, $time, $dep, $mag) # = ($response =~ m/震源地.+?NOWRAP>(.+?)<.+?発震時刻.+?NOWRAP>(.+?)<.+?深さ.+?NOWRAP>(.+?)<.+?マグニチュード.+?NOWRAP>(.+?)</); my ($origin, $time, $dep, $mag) # = ($response =~ m/震源地.+?NOWRAP">(.+?)<.+?発震時刻.+?NOWRAP">(.+?)<.+?深さ.+?NOWRAP">(.+?)<.+?マグニチュード.+?NOWRAP">(.+?)</); # = ($response =~ m/震源地.+?NOWRAP>(.+?)<.+?発震時刻.+?NOWRAP>(.+?)<.+?深さ.+?NOWRAP>(.+?)<.+?マグニチュード.+?NOWRAP>(.+?)</); = ($response =~ m/震源地.+?bgstyle4\">(.+?)<.+?震源時.+?bgstyle4\">(.+?)<.+?深さ.+?bgstyle4\">(.+?)<.+?マグニチュード.+?bgstyle4\">(.+?)</); if (not ($origin && $time && $dep && $mag)) { die "pattern not match."; } $news = "[Hi-net] $origin $time 深さ$dep マグニチュード$mag"; $news =~ s/\s+/ /g; $news .= "\t$url"; my ($line, $old, %lookup, @diff); # read oldLines $old = ''; if (open(FILE, "<$dir/$baseName.dat")) { chomp($old = <FILE>); } # write oldLines open(FILE, ">$dir/$baseName.dat") or die 'Cannot create .dat file.'; print FILE $news . "\n"; close(FILE); # diff if ($news ne $old) { print $news . "\n"; } }
people.pl
URLが http://j.peopledaily.com.cn/home.htmlに移っていたので追従する
#!/usr/bin/perl ## ## http://www.people.ne.jp/ ## $Id: people.pl,v 1.2 2005/05/13 11:56:02 yu-ji Exp $ ## use strict; use warnings; use LWP::Simple; use Jcode; MAIN: { my ($baseName, $dir, $url, $response, @news); $baseName = 'people'; $url = 'http://j.peopledaily.com.cn'; $dir = $0; $dir =~ s/\/$baseName.pl$//; # get $response = get($url . '/home.html'); if (not $response) { die "Could not get response."; } $response = Jcode->new($response)->h2z->euc; while ($response =~ m/<a .*?href=[\"]?(\/20.*?\.html)(?:\">|>| .*?>)(.*?)<\/a>/ig) { my($href, $a) = ($1, $2); $a =~ s/<.*?>//g; next if (not $href or not $a or ($a =~ m/\.\.\.$/)); push(@news, "$a\t$url$href"); } if (scalar(@news) < 10) { die "HTML changed?"; } my ($line, @old, %lookup, @diff); # read oldLines if (open(FILE, "<$dir/$baseName.dat")) { foreach $line (<FILE>) { chomp($line); push(@old, $line); } close(FILE); } # uniq %lookup = (); @news = grep { ! $lookup{$_} ++ } @news; # write oldLines open(FILE, ">$dir/$baseName.dat") or die 'Cannot create .dat file.'; foreach $line (@news) { print FILE $line . "\n"; } close(FILE); # diff %lookup = (); @lookup{@old} = (); foreach $line (@news) { push(@diff, $line) unless exists $lookup{$line}; } foreach $line (@diff) { print $line . "\n"; } }
chosun.pl
URLが http://www.chosunonline.com/ に変わっていたので追従させる。
記事の URLが articleを含むものになっていたので書き換える
#!/usr/bin/perl ## ## http://japanese.chosun.com/ ## $Id: chosun.pl,v 1.2 2007/03/31 04:18:06 yu-ji Exp $ ## use strict; use warnings; use LWP::Simple; use Jcode; MAIN: { my ($baseName, $dir, $url, $response, @news); $baseName = 'chosun'; $url = 'http://www.chosunonline.com'; $dir = $0; $dir =~ s/\/$baseName.pl$//; # get $response = get($url . '/'); if (not $response) { die "Could not get response."; } $response = Jcode->new($response)->h2z->euc; while ($response =~ m/<a href=\"(.*?article\/.*?)\"(?:>| .*?>)(.*?)<\/a>/ig) { my($href, $a) = ($1, $2); $a =~ s/<.*?>//g; next if (not $a or ($a =~ m/^\s*$/) or ($a =~ m/\.\.\.$/)); push(@news, "$a\t$href"); } if (scalar(@news) < 10) { die "HTML changed?"; } my ($line, @old, %lookup, @diff); # read oldLines if (open(FILE, "<$dir/$baseName.dat")) { foreach $line (<FILE>) { chomp($line); push(@old, $line); } close(FILE); } # uniq %lookup = (); @news = grep { ! $lookup{$_} ++ } @news; # write oldLines open(FILE, ">./$baseName.dat") or die 'Cannot create .dat file.'; foreach $line (@news) { print FILE $line . "\n"; } close(FILE); # diff %lookup = (); @lookup{@old} = (); foreach $line (@news) { push(@diff, $line) unless exists $lookup{$line}; } foreach $line (@diff) { print $line . "\n"; } }
biztech.pl
biztechが無くなっているらしいので、URLを http://www.nikkeibp.co.jp/に変更する。
文字コードが UTF-8になっていたので、nkfのオプションを -Wedに変える。
「[詳細]」てリンクを弾くようにする。
#!/usr/bin/perl -w ## ## http://biztech.nikkeibp.co.jp/ ## $Id: biztech.pl,v 2.3 2004/01/05 12:00:47 yu-ji Exp $ ## use strict; MAIN: { my($dir, $url, @lines, %news); # data file directory $dir = $0; $dir =~ s/\/biztech.pl$//; $url = 'http://www.nikkeibp.co.jp/'; @lines = `w3m -dump_source $url | nkf -Wed`; foreach $_ (@lines) { if ($_ =~ m/<A HREF=\"(\/news\/.*?)\".*?>(.*?)<\/A>/i) { my($a, $href) = ($2, $1); $a =~ s/<.+?>//g; $a =~ s/\(\d\d:\d\d\)$//; $a =~ s/\(\d{4}\/\d\d\/\d\d\)$//; $a =~ s/^・//; $a =~ s/^●//; $a =~ s/\[全文へ\]//; $a =~ s/\[詳細\]//; $a =~ s/記事全文→//; $a =~ s/記事全文へ//; if ($a) { $news{$a} = "$url$href"; } } } if (scalar(keys %news) < 10) { die scalar(keys %news) . " entries"; } # write new file open(FILE, "> ./biztech.new"); foreach $_ (sort keys %news) { print FILE "$_\t$news{$_}\n"; } close(FILE); # diff @lines = `diff ./biztech.dat ./biztech.new | grep \'^>\'`; if (@lines) { foreach (@lines) { chomp($_); if ($_ =~ m/^> (.*)$/) { print "$1\n"; } } } system('mv', "./biztech.new", "./biztech.dat"); }