newsbot kobitosann

いろいろメンテする。

元の管理者が手放したものなのだが、著作権表示どーすんだろうな……。

2ch.pl


read.cgiのところを、read.htmlに変えるだけ。

print "$line\thttp://$server/test/read.html/$board/$key/\n";

#!/usr/bin/perl

##
## 2ch.pl
## $Id: 2ch.pl,v 2.9 2004/11/09 23:23:50 yu-ji Exp $
##

use strict;
use warnings;
use utf8;

use Encode qw(from_to decode);
use LWP::Simple;

binmode(STDOUT, ':encoding(euc-jp)');

MAIN:
{
    my($dir, $server, $board, @subback, $response);

    # check arguments
    if (@ARGV != 2) {
	die 'Usage: 2ch.pl'
	  . " [SERVER_FQDN} [BOARD_NAME]\n";
    }
    ($server, $board) = @ARGV;

    # get
    $response = get("http://$server/$board/subback.html");
    if (not $response) {
	die 'Could not get response.';
    }
    from_to($response, 'shiftjis', 'iso-2022-jp');
    $response = decode('iso-2022-jp', $response);
    @subback = split(/\n/, $response);

    # data file directory
    $dir = $0;
    $dir =~ s/\/2ch.pl$//;

    # load last $maxThreadNum
    my($maxThreadNum);
    if (-e "$dir/2ch.$board.dat") {
	open(FILE, "<$dir/2ch.$board.dat")
	  or die "Cannot open to read: $!";
	chomp($maxThreadNum = <FILE>);
	close(FILE);
	if ((not $maxThreadNum)
	    or (length($maxThreadNum) != 10)){
	    warn '1 Abnormal $maxThreadNum: ' . "$maxThreadNum\n";
	}
    } else {
	$maxThreadNum = 0;
    }

    # pickup new thread
    my(%subback, $threadCount);
    $threadCount = 0;
    foreach (@subback) {
	chomp;
	my($num, $subj);
	if (($num, $subj) = m/^<a href=\"(\d+)\/.*?:(.*)\(\d+\)<\/a>/) {
	    $threadCount++;
	    # store if $num > $maxThreadNum
	    $subback{$num} = $subj if ($num > $maxThreadNum);
	}
    }
    if ($threadCount < 1) {
	die 'BBS moved?';
    }


    # print out New Thread Subject and URL
    my($key);
    foreach $key (sort keys %subback) {
	if ((length($key) == 10)
	    and ($key =~ m/^[1-3]/)){
	    $maxThreadNum = $key;
	} else {
	    #warn "1 Illegal thread num: $key\n";
	}
	my($line) = $subback{$key};
	# print Thread Subject and URL.
	print "$line\thttp://$server/test/read.html/$board/$key/\n";
    }

    # save $maxThreadNum
    if (length($maxThreadNum) == 10) {
	open(FILE, ">$dir/2ch.$board.dat")
	  or die "Cannot open to write: $!";
	print FILE "$maxThreadNum\n";
	close(FILE);
    } else {
	warn '2 Abnormal $maxThreadNum: ' . "$maxThreadNum\n";
    }
}

hinet.pl

震源時刻」が「震源時」に変わっていたので追従させる

#!/usr/bin/perl

##
## http://www.hinet.bosai.go.jp/
## $Id: hinet.pl,v 1.1 2005/08/02 07:43:16 yu-ji Exp $
##

use strict;
use warnings;
use LWP::Simple;
use Jcode;

MAIN:
{
    my ($baseName, $dir, $url, $response, $news);
    $baseName = 'hinet';
    $url = 'http://www.hinet.bosai.go.jp/';

    $dir = $0;
    $dir =~ s/\/$baseName.pl$//;

    # get
    $response = get($url);
    if (not $response) {
        die "Could not get response.";
    }
    $response = Jcode->new($response)->h2z->euc;
    if ($response !~ m/CLASS=\"bgstyle3\">(.*?)<\/TABLE>/s) {
        die "mark not found.";
    }
    ($response = $1) =~ s/\n//g;

#   print "$response";

#    my ($origin, $time, $dep, $mag)
#      = ($response =~ m/震源地.+?NOWRAP>(.+?)<.+?発震時刻.+?NOWRAP>(.+?)<.+?深さ.+?NOWRAP>(.+?)<.+?マグニチュード.+?NOWRAP>(.+?)</);
    my ($origin, $time, $dep, $mag)
#        = ($response =~ m/震源地.+?NOWRAP">(.+?)<.+?発震時刻.+?NOWRAP">(.+?)<.+?深さ.+?NOWRAP">(.+?)<.+?マグニチュード.+?NOWRAP">(.+?)</);
#  = ($response =~ m/震源地.+?NOWRAP>(.+?)<.+?発震時刻.+?NOWRAP>(.+?)<.+?深さ.+?NOWRAP>(.+?)<.+?マグニチュード.+?NOWRAP>(.+?)</);
  = ($response =~ m/震源地.+?bgstyle4\">(.+?)<.+?震源時.+?bgstyle4\">(.+?)<.+?深さ.+?bgstyle4\">(.+?)<.+?マグニチュード.+?bgstyle4\">(.+?)</);
    if (not ($origin && $time && $dep && $mag)) {
        die "pattern not match.";
    }
    $news = "[Hi-net] $origin $time 深さ$dep マグニチュード$mag";
    $news =~ s/\s+/ /g;
    $news .= "\t$url";

    my ($line, $old, %lookup, @diff);
    # read oldLines
    $old = '';
    if (open(FILE, "<$dir/$baseName.dat")) {
        chomp($old = <FILE>);
    }

    # write oldLines
    open(FILE, ">$dir/$baseName.dat")
      or die 'Cannot create .dat file.';
    print FILE $news . "\n";
    close(FILE);

    # diff
    if ($news ne $old) {
        print $news . "\n";
    }
}

people.pl

URLが http://j.peopledaily.com.cn/home.htmlに移っていたので追従する

#!/usr/bin/perl

##
## http://www.people.ne.jp/
## $Id: people.pl,v 1.2 2005/05/13 11:56:02 yu-ji Exp $
##

use strict;
use warnings;
use LWP::Simple;
use Jcode;

MAIN:
{
    my ($baseName, $dir, $url, $response, @news);
    $baseName = 'people';
    $url = 'http://j.peopledaily.com.cn';

    $dir = $0;
    $dir =~ s/\/$baseName.pl$//;

    # get
    $response = get($url . '/home.html');
    if (not $response) {
        die "Could not get response.";
    }
    $response = Jcode->new($response)->h2z->euc;
    while ($response =~
           m/<a .*?href=[\"]?(\/20.*?\.html)(?:\">|>| .*?>)(.*?)<\/a>/ig) {
        my($href, $a) = ($1, $2);
        $a =~ s/<.*?>//g;
        next if (not $href
                 or not $a
                 or ($a =~ m/\.\.\.$/));
        push(@news, "$a\t$url$href");
    }
    if (scalar(@news) < 10) {
        die "HTML changed?";
    }

    my ($line, @old, %lookup, @diff);
    # read oldLines
    if (open(FILE, "<$dir/$baseName.dat")) {
        foreach $line (<FILE>) {
            chomp($line);
            push(@old, $line);
        }
        close(FILE);
    }

    # uniq
    %lookup = ();
    @news = grep { ! $lookup{$_} ++ } @news;

    # write oldLines
    open(FILE, ">$dir/$baseName.dat")
      or die 'Cannot create .dat file.';
    foreach $line (@news) {
        print FILE $line . "\n";
    }
    close(FILE);
    # diff
    %lookup = ();
    @lookup{@old} = ();
    foreach $line (@news) {
        push(@diff, $line) unless exists $lookup{$line};
    }

    foreach $line (@diff) {
        print $line . "\n";
    }
}

chosun.pl

URLが http://www.chosunonline.com/ に変わっていたので追従させる。

記事の URLが articleを含むものになっていたので書き換える

#!/usr/bin/perl

##
## http://japanese.chosun.com/
## $Id: chosun.pl,v 1.2 2007/03/31 04:18:06 yu-ji Exp $
##

use strict;
use warnings;
use LWP::Simple;
use Jcode;

MAIN:
{
    my ($baseName, $dir, $url, $response, @news);
    $baseName = 'chosun';
    $url = 'http://www.chosunonline.com';

    $dir = $0;
    $dir =~ s/\/$baseName.pl$//;

    # get
    $response = get($url . '/');
    if (not $response) {
	die "Could not get response.";
    }
    $response = Jcode->new($response)->h2z->euc;
    while ($response =~
	   m/<a href=\"(.*?article\/.*?)\"(?:>| .*?>)(.*?)<\/a>/ig) {
	my($href, $a) = ($1, $2);
	$a =~ s/<.*?>//g;
	next if (not $a
		 or ($a =~ m/^\s*$/)
		 or ($a =~ m/\.\.\.$/));
	push(@news, "$a\t$href");
    }
    if (scalar(@news) < 10) {
        die "HTML changed?";
    }

    my ($line, @old, %lookup, @diff);
    # read oldLines
    if (open(FILE, "<$dir/$baseName.dat")) {
        foreach $line (<FILE>) {
            chomp($line);
            push(@old, $line);
        }
        close(FILE);
    }

    # uniq
    %lookup = ();
    @news = grep { ! $lookup{$_} ++ } @news;

    # write oldLines
    open(FILE, ">./$baseName.dat")
      or die 'Cannot create .dat file.';
    foreach $line (@news) {
        print FILE $line . "\n";
    }
    close(FILE);
    # diff
    %lookup = ();
    @lookup{@old} = ();
    foreach $line (@news) {
        push(@diff, $line) unless exists $lookup{$line};
    }

    foreach $line (@diff) {
        print $line . "\n";
    }
}

biztech.pl

biztechが無くなっているらしいので、URLを http://www.nikkeibp.co.jp/に変更する。

文字コードUTF-8になっていたので、nkfのオプションを -Wedに変える。

「[詳細]」てリンクを弾くようにする。

#!/usr/bin/perl -w

##
## http://biztech.nikkeibp.co.jp/
## $Id: biztech.pl,v 2.3 2004/01/05 12:00:47 yu-ji Exp $
##

use strict;

MAIN:
{
    my($dir, $url, @lines, %news);
    # data file directory
    $dir = $0;
    $dir =~ s/\/biztech.pl$//;

    $url = 'http://www.nikkeibp.co.jp/';
    @lines = `w3m -dump_source $url | nkf -Wed`;

    foreach $_ (@lines) {
 if ($_ =~ m/<A HREF=\"(\/news\/.*?)\".*?>(.*?)<\/A>/i) {
     my($a, $href) = ($2, $1);
     $a =~ s/<.+?>//g;
     $a =~ s/\(\d\d:\d\d\)$//;
     $a =~ s/\(\d{4}\/\d\d\/\d\d\)$//;
     $a =~ s/^・//;
     $a =~ s/^●//;
     $a =~ s/\[全文へ\]//;
     $a =~ s/\[詳細\]//;
     $a =~ s/記事全文→//;
     $a =~ s/記事全文へ//;
     if ($a) {
      $news{$a} = "$url$href";
     }
 }
    }
    if (scalar(keys %news) < 10) {
 die scalar(keys %news) . " entries";
    }

    # write new file
    open(FILE, "> ./biztech.new");
    foreach $_ (sort keys %news) {
 print FILE "$_\t$news{$_}\n";
    }
    close(FILE);

    # diff
    @lines = `diff ./biztech.dat ./biztech.new | grep \'^>\'`;
    if (@lines) {
        foreach (@lines) {
            chomp($_);
            if ($_ =~ m/^> (.*)$/) {
                print "$1\n";
            }
        }
    }
    system('mv', "./biztech.new", "./biztech.dat");
}