#! /usr/local/bin/perl
# (c) Louis Granboulan 2001-2003
use Socket;
$proto = getprotobyname('tcp');
$url = shift;
# On récupère le document
($document, $status) = get_document($url);
unless ($status == 200) { print "Status $status\n"; exit 1; }
# On construit la "base"
$base = $url; $base =~ s%[^/]*$%%;
while ($document =~ s/]*)>//is) {
my $link = $1; $link =~ s/^ *"([^"]*)".*/$1/s;
$base = make_url($base,$link); $base =~ s%[^/]*$%%;
}
# On extrait les liens
while ($document =~ s/<(?:A HREF|IMG SRC)=([^>]*)>//is) {
my $link = $1; $link =~ s/^ *"([^"]*)".*/$1/s;
$link = make_url($base,$link);
my $status = get_document($link);
print "Mauvais lien ($status) : $link\n" unless $status =~ /^[23]\d\d$/;
}
sub make_url {
my $base = shift;
my $link = shift;
return $link if $link =~ /^(http|ftp|mailto|https):/;
$base =~ s%^(\w+://[^/]*).*%$1%i if $link =~ /^\//;
return "$base$link";
}
sub get_document {
my $location = shift;
if ($location !~ m%http://([^/]*)(/.*)%)
{ return ("", 503); } # Service Unavailable
my ($disthost,$distport,$distfile) = ($1,80,$2);
if ($disthost =~ /(.*):(\d+)$/) { ($disthost,$distport) = ($1,$2); }
my $iaddr;
unless ($iaddr = inet_aton($disthost))
{ return ("", 404); } # Host Not Found
my $this = pack_sockaddr_in($distport, $iaddr);
socket(CL, &AF_INET, &SOCK_STREAM, $proto) or
die "$0: client socket failed: $!";
connect(CL, $this) or
die "$0: client connect failed: $!";
select(CL); $| = 1; select STDOUT;
print CL "GET $distfile HTTP/1.1\r\nHost: $disthost\r\n\r\n";
my $status = ; $status =~ s/^\S+ (\d\d\d) .*/$1/s;
my $document;
while () { $document .= $_; }
close(CL);
# On supprime les headers
$document =~ s/^.*?(\r\n\r\n|\n\n|\r\r)//s;
return ($document, $status);
}