#!/usr/bin/perl -w use strict; use HTML::TokeParser; use LWP::UserAgent; my $ua = LWP::UserAgent->new; my $proxy = 'http://proxy.hbt.tassie.net.au:8080'; my $netloc = ''; my $realm = ''; my $uname = ''; my $pass = ''; $ua->credentials($netloc, $realm, $uname, $pass); $ua->proxy( 'http', $proxy ); my $url = 'http://www.perlmonks.com/'; $url .= '/' if $url =~ m|http://[^/]+$|i; # add trailing / if forgotten my ($root) = $url =~ m|(http://.*/)|i; my $content = get( $url ); print "Checking links....\n"; my $parse = HTML::TokeParser->new( \$content ); while (my $token = $parse->get_tag('a')) { my $link = $token->[1]{href} || next; my $text=$parse->get_trimmed_text('/a'); if ($link =~ m|^\s*mailto|i) { next; } elsif ($link=~ m|^\s*http://|i){ print &test( $link, $text ); } else { $link =~ s|^\s*/||; print &test( $root.$link, $text ); } } sub get { my $url = shift; print "Getting $url...."; my $request = HTTP::Request->new( 'GET', $url ); my $content = $ua->request( $request ); print "$$content{_msg} $$content{_rc}\n"; return $$content{_content}; } sub test { my ( $url, $text ) = @_; my $request = HTTP::Request->new( 'HEAD', $url ); my $content = $ua->request( $request ); return "$$content{_msg} $$content{_rc} ($text) $url\n"; } __END__ Getting http://www.perlmonks.com/....OK 200 Checking links.... OK 200 (Frank) http://www.perlmonks.com/index.pl?node_id=966&lastnode_id=131 OK 200 (perlmonks) http://www.perlmonks.com/index.pl OK 200 (login) http://www.perlmonks.com/index.pl?node=login&lastnode_id=131 [snip]