whtdnsblcheck: webhostingtalk.nl DNSBL database list checker

#!/usr/bin/perl # drbcheck.cgi - dr. jørgen mash's DNS Datadase List Checker # Copyright (C) 2002-2006 by dr. jørgen mash # All rights reserved. # # Build on: # Joe Jarrod's rbcheck-0.10-13 - # # # This is free software with ABSOLUTELY NO WARRANTY. # You can redistribute it and/or modify, if you do please drop # me a line at drbcheck @ moensted.dk # my $ver = "1.5.10"; use CGI::Carp qw(fatalsToBrowser); # 27/09/06 1.5.10 New Declude link # 19/08/06 1.5.9 Cosmetic changes to the web page, and more ad-includes # 09/08/06 1.5.8 Make country codes humane # 03/05/06 1.5.7 Stupid mail server owners try to be removed from whitelists - try to help them... # 14/03/06 1.5.6 bit.nl has URL= in txt, fix webify # 23/06/05 1.5.5 openrbl offline - removed the links to them # 01/04/05 1.5.4 disallow robots to cache lookup pages using meta tag # 15/11/04 1.5.3 removed broken counter # 05/10/04 1.5.2 update link to openrbl lookups # 18/09/03 1.5 Use of cookies to limit number of lookup's, and other minor changes # 14/09/03 1.4.6 remove / from end, be more specific about what an ip address is... # 29/08/03 1.4.5 Trustic is dead - R.I.P # 27/08/03 1.4.4 osirusoft is no more - R.I.P # 24/04/03 1.4.3 new links to SenderBase # 04/03/03 1.4.2 new order in link to other resources, and new whois # 08/02/03 1.4.1 Compleate redesign of lookup we now send our all queries at the beginning, and read then late on... # 01/02/03 1.3.16 Add link to Trustic # 23/01/03 1.3.15 Include link to form for new rbl's, do not make human readable timestamps if first digit is 0 # 15/12/02 1.3.14 Spam Links changed the URL # 12/11/02 1.3.13 add link to -10 prev and next +10 ip address lookup. # 12/11/02 1.3.12 browers that understand now gets the long name when howering timeout sites # 12/11/02 1.3.11 /000\.000\.000\.000/ is now /0+\.0+\.0+\.0+/ # 10/11/02 1.3.10 ^http$ don't need links # 08/11/02 1.3.9 moved external links around # 07/11/02 1.3.8 ip numbers written as 123-123-123-123 are now striped to 123.123.123.123 # 21/10/02 1.3.7 don't cache if there is nothing to lookup # 20/10/02 1.3.6 remove ()[] from addr input # 20/10/02 1.3.5 changes to make the cache work more usefull # 20/10/02 1.3.4 use CGI::Cache (http://cgicache.sourceforge.net/) to speedup lookup times # 20/10/02 1.3.3 webify from GIRL TXT records # 19/10/02 1.3.2 Add link to Spam Links # 17/10/02 1.3.1 Add PayPal link.... # 03/10/02 1.3.0 Removed CGI lib - and only show listings with acutal IP addresses in the lookup # 24/09/02 1.2.4 now shows timeout >= and not > # 23/09/02 1.2.3 change , to . before we try to resolv the input # 15/09/02 1.2.2 drbsites now has a type field - stay tuned to figure how it will be used... # 06/09/02 1.2.1 minor changes to the email detect in webify to handel DRBL server@ns/ is not a e-mail # 02/09/02 1.2.0 new webify sub, creates clickable content from txt records (ides from Joe's 0.12-46) njabl time stamp now readable # ... # 09/06/02 1.0.0 First major update. # - moved the list to it's own file to make updates more easy. # - new field in drbsites: optional name server. # - removed links to local script in drbsites to save space. # - changed all lookups to use Net::DNS (faster?) # - removed JIPPG code # - only shows common RBL's in nomatch part of result page # - don't look up, if no ip/host # $beg = time; %param = &readinput; %cookies = &readcookies; $debug = 1 if $param{'debug'}; $nocache = 1 if $param{'nocache'}; $lookuplimit = 30; # min time in sec between lookups that are not cached $nextlookup = $beg-$cookies{'lastcall'}; if ($param{'addr'}) { $test = $param{'addr'}; $test =~ s/\s+/\./g if $test =~ /^\d+\s+\d+\s+\d+\s+\d+$/; # I'm stupid and type space instead of . $test =~ s/\s+//g; $test =~ s/$|$|\[|\]//g; $test =~ s/\,/\./g; # I'm stupid and type , instead of . $test =~ s/\-/\./g if $test =~ /^\d+\-\d+\-\d+\-\d+$/; # I'm stupid and type - instead of . $test =~ s/\/$//; $test =~ s/\.$//; $test = "" if $test =~ /0+\.0+\.0+\.0+/; } if ($test){ use CGI::Cache; CGI::Cache::setup( { cache_options => { max_size => 20 * 1024 * 1024, default_expires_in => 3600, } } ); CGI::Cache::set_key( $test ); CGI::Cache::invalidate_cache_entry() if ($nocache && ($nextlookup >= $lookuplimit)); &log ("cache $test < $nocache"); CGI::Cache::start() or exit; } $script = $ENV{SCRIPT_NAME}; $script =~ s/(.*)\/index.*/\1\//; # we don't want /index.cgi $drbsitesrev = 0; require 'drbsites.txt'; # http://www.moensted.dk/spam/drbsites.txt if (($test && ($nextlookup < $lookuplimit)) || ($ENV{HTTP_REFERER} =~ /d\:\/MASSEND 2002\.htm/)) { CGI::Cache::stop(0) if $test; &log("wait $test < ".($lookuplimit-$nextlookup)); &head("nobot"); require 'htmlright.txt' if -e "htmlright.txt"; print "

U bent gelimiteerd tot 1 lookup elke $lookuplimit seconden, u kunt het weer proberen over ",($lookuplimit-$nextlookup)," seconden
\n"; print "
De lookup's die zijn gecached kunt u nog wel oproepen.
"; &footer; } elsif ($test) { my %sock = (); use Net::DNS; use Locale::Country; $::RESOLVER = Net::DNS::Resolver->new; $tcptimeout = 5; # the default timeout is 120 seconds (2 minutes) $delay = $tcptimeout; $::WAIT_INCREMENT = 0.1; &sendquery("test", "$test"); print "Set-Cookie: lastcall=$beg\n"; &head("nobot"); print "

"; print "Attentie: Niet al de onderstaande lijsten zijn bedoeld als black/block lijsten!
"; print "U staat ALTIJD op tenminste drie van deze lijsten. Dit wil niet zeggen dat u een spammer bent!

\n"; require 'htmlright.txt' if -e "htmlright.txt"; @resolved = &readquery("test"); if (@resolved) { print "Resolved $test"; for my $resolved (@resolved) { print " to $resolved"; } print "
\n"; $resolved = pop @resolved; if ($test =~ /^\d+\.\d+\.\d+\.\d+$/) { ($a,$b,$c,$d) = split (/\./,$test); &rblbgsend; $host = $resolved; &mx_records($resolved); } else { ($a,$b,$c,$d) = split (/\./,$resolved); &rblbgsend; $host = $test; &mx_records($test); } } elsif ($test =~ /^\d+\.\d+\.\d+\.\d+$/) { ($a,$b,$c,$d) = split (/\./,$test); &rblbgsend; print "[$test]"; } else { print "no IP found for $test - try again
"; &mx_records($test); &footer; exit; } print "

\n"; $count=0; $numberoftest=0; &log ("lookup $a.$b.$c.$d ($param{'addr'})"); $lasttime = $beg; foreach $rblsite (@drbsites) { $duration = (time - $lasttime); $lasttime = time; print "<\;-- took $duration !! $rblcode
" if ($debug); ($rblcode,$rbls,$rblw,$rbln,$rblp,$rbabout,$rbstatus,$rblremoval,$longname,$ok,$txt,$type,$rbldns) = split ('\x3b',$rblsite); &log ("$rblcode ". (time - $beg) ) if $debug; if ($ok) { $test1 = join (".",$d,$c,$b,$a,$rbls); $ipfound = 0; $numberoftest++; my @r1 = &readquery($rblcode); if (@r1[-1] =~ /\d+\.\d+\.\d+\.\d+/) { &sendquery($rblcode.about,$rbabout,"TXT",$rbldns) if ($rbabout); $count++; if (&istype($type,"8")) { print "* - Niet geblokkeerd door deze lijst! Dit toont is je ISP. Er is geen reden voor verwijdering! - *
"; } if (&istype($type,"10")) { print "* - Niet geblokkeerd door deze lijst! Dit toont je land. Er is geen reden voor verwijdering! - *
"; } if (&istype($type,"80")) { print "* - Niet geblokkeerd door deze lijst! Dit is een whitelist of een joke list. Er is geen reden voor verwijdering! - *
"; } print "+ ".($rblp ? "$rblcode": $rblcode)." $longname: "; print ($rblw ? "$rbls " : "$rbls "); for my $r1 (@r1) { print " -> $r1"; } print ""; if ($txt) { my @txtinfo2 = &readquery($rblcode.txt); foreach my $n1 (@txtinfo2) { my @narr = split ('\x22',$n1); foreach my $narrl (@narr) { print "
" . &webify($narrl) if ($narrl); } } } if ($rbabout) { my @txtinfo = &readquery($rblcode.about); foreach my $n1 (@txtinfo) { print "
about: "; my @narr = split ('\x22',$n1); foreach my $narrl (@narr) { print (&webify($narrl)); } } } print ""; print "
[removal]" if ($rblremoval); print "

\n"; } else { # Not listet $timeoutsites .= "" . ($rblw ? "$rblcode " : "$rblcode ") . "" if ((time - $lasttime) >= $tcptimeout); if ($ok == 2) { $nomatch .= "- ".($rblp ? "$rblcode": $rblcode)." $longname: "; $nomatch .= ($rblw ? "$rbls " : "$rbls "); $nomatch .= "[Nominate] " if ($rbln); $nomatch .= " [Check DNS] " if ($rbstatus); $nomatch .= "
\n"; } } } else { # We can't test if ($txt) { $notesting .= ($rblp ? "? $rblcode" : "? $rblcode"); $notesting .= " $longname: "; $notesting .= ($rblw ? "$rbls (click for manual search)" : $rbls); $notesting .= "
\n"; } } } print "\n[<<|<]"; print " $a.$b.$c.$d "; print "[>|>>] "; print "was found in $count lists (of $numberoftest tested)
\n"; print "Attentie: U staat ALTIJD op tenminste drie van deze lijsten. Dit wil niet zeggen dat u een spammer bent.
Ook betekent dit niet altijd dat iemand gebruik maakt van een van de lijsten om mail van uw te blokkeren!
\n"; print "

Geselecteerde lijst waar niet is getest en het kan dus zijn dat u hier wel op staat:
$notesting" if $notesting; print "

[news:*abuse*: $a.$b.$c.$d"; print " | $host" if $host; print "] [SpamCop: Checkblock | "; print "why ORBS] "; print "[SenderBase: $a.$b.$c.$d/24"; print " | $host" if $host; print "]
\n"; print "[whois $a.$b.$c.$d"; print " | $host" if $host; print "] [SS Macro: $a.$b.$c.$d"; print " | $host" if $host; # print "] [Whois/NS-Delegation: "; # $a.$b.$c.$d | # print "$host" if $host; print "]
\n"; print "[DNSbl's "; #openrbl | print "SamSpade | "; print "Multi-RBL | "; print "fpsn.net | "; print "DnsStuff | "; print "Reynolds spam db] "; print "

Selected lists where $a.$b.$c.$d was not found:
$nomatch" if $nomatch; print "
Click here to view the full list of DNSbl's\n"; &footer; } else { &head; print "

Welkom op de webhostingtalk.nl DNSBL database list checker.
"; print "Het kan even duren voordat de resultaten getoond worden (gemiddelde lookup tijd is 10 seconden)! De meeste van de onderstaande " . scalar(@drbsites) . " lijsten worden geraadpleegd. "; print "
Voor meer informatie over de lijsten kunt u op de eerste link van de regel klikken of kijken op Declude.
"; print "

\n"; &listall; &footer; } CGI::Cache::stop(); exit; sub istype { my ($type, $query) = @_; my @bintype = reverse(split("",unpack("B32", pack("N", hex($type))))); my @binquery = reverse(split("",unpack("B32", pack("N", hex($query))))); do { my $a = pop @bintype; my $b = pop @binquery; return 1 if ($b && $a); } while (@bintype); return 0; } sub listall { foreach $rblsite (@drbsites) { ($rblcode,$rbls,$rblw,$rbln,$rblp,$rbabout,$rbstatus,$rblremoval,$longname,$ok,$txt,$type,$rbldns) = split ('\x3b',$rblsite); print ($ok ? "+ ":($txt ? "? ": "- ")); print ($rblp ? "$rblcode" : $rblcode); print " $longname: "; print ($rblw ? "$rbls" : $rbls); print "
\n"; } print "
+ check, - won't check, ? can't check
\n"; } sub head { my $nobot = shift; print "Content-type: text/html; charset=iso-8859-1\n\n"; print qq!whtdnsblcheck: webhostingtalk.nl DNSBL database list checker!; print qq!! if $nobot eq "nobot"; print qq!!; print qq!\n!; require 'htmltop.txt' if -e "htmltop.txt"; if (@latestchange) { for my $change (@latestchange) { print qq!

$change !; } print "

"; } print qq!\n\n!; } sub footer { print "\n

[Declude DNSB List | "; print "Spam Links] "; print "[Compare] "; # &counter; require 'htmlbuttom.txt' if -e "htmlbuttom.txt"; CGI::Cache::pause(); $ende = time - $beg; print "Lookup time: $ende sec. "; CGI::Cache::continue(); print "\n

Timeouts, not looked up: $timeoutsites" if $timeoutsites; print qq!\n \n\n!; } sub mx_records { my $mxhost = shift; $mxhost =~ s/^\s+|\s+$//g; &sendquery("MX$mxhost",$mxhost,"MX"); my @mx_list = &readquery("MX$mxhost"); shift @mx_list if ($mx_list[0] !~ /^\d/); if (@mx_list) { shift @mx_list while ($mx_list[0] !~ /.* \d*/); if ($mx_list[0] =~ /.* \d*/) { if ($#mx_list == 0) { print "[$mxhost has " . ($#mx_list + 1) ." MX record"; } elsif ($#mx_list > 0) { print "[$mxhost has " . ($#mx_list + 1) . " MX records"; } else { print "[error resolving MX for $mxhost]"; return 1; } for $mx (@mx_list) { ($smtp_pref,$smtp_result) = split (" ",$mx); print " $smtp_result($smtp_pref)"; } print "]\n"; } } else { print "$mxhost has no MX records"; my @splitter = split (/\./,$mxhost); shift @splitter; if (scalar(@splitter) > 1) { my $newhost = join ('.',@splitter); print " -> "; &mx_records($newhost); } } } sub rblbgsend { foreach my $rblsite (@drbsites) { my ($rblcode,$rbls,$rblw,$rbln,$rblp,$rbabout,$rbstatus,$rblremoval,$longname,$ok,$txt,$type,$rbldns) = split ('\x3b',$rblsite); &log (" $rblcode ". (time - $beg) ) if $debug; if ($ok) { my $test1 = join (".",$d,$c,$b,$a,$rbls); &sendquery($rblcode,$test1,"A",$rbldns); &sendquery($rblcode.txt,$test1,"TXT",$rbldns) if ($txt); } } } sub sendquery { my $id = shift; my $lookup = shift; my $type = shift; my @dns = @_; my @ns = $::RESOLVER->nameservers; # store default nameservers &log (" bgsend $id $lookup $type $dns") if $debug; $::RESOLVER->nameservers(@dns,"127.0.0.1") if @dns; # set other if apropiate $sock{$id} = $::RESOLVER->bgsend($lookup,$type); $::RESOLVER->nameservers(@ns) # restore default nameservers } sub readquery { my $id = shift; my $timeout = shift; my @returns = (); #my $delay = ($timeout?$timelout: $tcptimeout); #warn ("read $id"); while (($delay > 0) and (not $::RESOLVER->bgisready($sock{$id}))) { select(undef, undef, undef, $::WAIT_INCREMENT); $delay -= $::WAIT_INCREMENT; } my $result = $::RESOLVER->errorstring; if ($::RESOLVER->bgisready($sock{$id})) { # my $result = $::RESOLVER->errorstring; my $look = $::RESOLVER->bgread($sock{$id}); &log (" $test Query:$socket -> $result/OK\n") if $debug; if ($look) { for $answer ($look->answer) { my @ansr = split ('\x22',$answer->rdatastr); foreach my $ans (@ansr) { push @returns, $ans if ($ans); } } } } else { &log ( "noQuery $test :$id:$socket -> $result"); } $sock{$id} = undef; return @returns; } # sub counter { # my $counterVar; # print "

"; # $file = $ENV{SCRIPT_FILENAME}; # $file =~ s/(.*)\/.*/\1/; # $file .= "/drbcheck.cnt"; # must be readable/writable by your webserver's user # open (FILE, "+>>" , $file) or &log (" cannot open $file for reading and appending: $!"); # flock(FILE, 2) or &log ( "cannot lock $file exclusively: $!"); # seek FILE, 0, 0; # my @file_contents = ; # we use an array even though there won't be more than a single line of data here. # if ($file_contents[0] =~ /^(\d+)$/) { # $counterVar = $1; # $1 is captured by the ()'s in the regular expression # $counterVar++; # auto-increment the same variable with 1 # truncate FILE, 0; # print (FILE $counterVar); # } else { # $counterVar = "COUNTER ERROR"; # the regular expression didn't match # } # # close (FILE); # print "This page has been accessed $counterVar times since June 8. 2002
"; # } sub webify { my $line = shift; my $return = ""; my @words = split(" ",$line); foreach my $word (@words) { if ($word =~ /^$|\">|\'>|href/i) { # do nothing } elsif ($word =~ /^<(.*)>$/) { $word = "<\;".&webify("$1")."\>\;"; } elsif ($word =~ /^https?\:\/\/.+/i) { $word = "$word"; } elsif ($word =~ /^URL([:\=])(.*)$/i) { $word = "URL$1".&webify("$2"); } elsif ((($word =~ /(^[^\:]+:)([^\@]*\@[\w\.-]*)(.*)$/) || ($word =~ /^()([^\@]*\@[\w\.-]*)(.*)$/)) && ($word !~ /\@.*\/.*/)) { $word = "$1$2$3"; } elsif (($word =~ /^\d{10,}$/) && ($word !~ /^0/)) { # NJABL.org and other time stamps in human form my $time = localtime ($word); $time =~ s/00:00:00 //; $word = "$word ($time)"; } elsif ($word =~ /^CC\=(\w{2})$/i) { $word = "CC=$1 (" . code2country(lc($1)) . ")"; } elsif ($word =~ /^(.*)\.(\d{10,})$/) { if ($2 !~ /^0/) { # JIPPG style my $time = localtime ($2); $time =~ s/00:00:00 //; $word = "$1.$2 ($time)"; } } else { $word =~ s/\/\>\;/g; } $return .= $word . " "; } chop($return); # we have one space in the end - remove that if ($return =~ /^(\w{2})$/i) { $return = "$1 (" . code2country(lc($1)) . ")"; } return $return; } sub readinput { my $input = shift; my (@fields,%param); unless ($input) { if($ENV{'REQUEST_METHOD'} eq 'POST') { read(stdin,$input,$ENV{'CONTENT_LENGTH'}); } elsif ($ENV{'REQUEST_METHOD'} eq 'GET') { $input = $ENV{'QUERY_STRING'}; } else { return; } } $input =~ tr/+/ /; @fields=split(/\&/,$input); foreach my $i (@fields) { my ($field,$data) = split(/=/,$i); $field =~ s/%(..)/pack("c",hex($1))/ge; $data =~ s/%(..)/pack("c",hex($1))/ge; $param{$field} = $data; } return %param; } sub readcookies { my (@fields,%param); @fields=split(/\;/,$ENV{'HTTP_COOKIE'}); foreach my $i (@fields) { my ($field,$data) = split(/=/,$i); $field =~ s/%(..)/pack("c",hex($1))/ge; $field =~ s/^ //ge; $data =~ s/%(..)/pack("c",hex($1))/ge; $param{$field} = $data; } return %param; } sub nextip { my ($a,$b,$c,$d) = split /\./,shift; my $change = shift; my $startaddr = (($a <<24) +($b <<16) +($c<<8) +$d); my $newip = $startaddr + $change; $a = ($newip >>24); $b = (($newip >>16) &255); $c = (($newip >>8) &255); $d = ($newip &255); return ("$a.$b.$c.$d"); } sub log { my $string = shift; # warn ("[".localtime(time)."] [drbcheck] [client $ENV{REMOTE_ADDR}] ".(" " x (15-length($ENV{REMOTE_ADDR}))) . "$string\n"); warn ("[drbcheck] [client $ENV{REMOTE_ADDR}] ".(" " x (15-length($ENV{REMOTE_ADDR}))) . "$string\n"); }