#!/usr/bin/perl -w use strict; use LWP::Simple; use HTML::TokeParser; my $url ="http://perlmonks.org/index.pl?node_id=110166"; my $rawHTML = get($url); # attempt to d/l the page to mem die "LWP::Simple messed up $!" unless $rawHTML; my ($tp , %monks ); $tp = HTML::TokeParser->new(\$rawHTML) or die "WTF $tp gone bad: $!"; # And now -- a generic HTML::TokeParser loop while (my $t = $tp->get_token) { if( ($$t[0] eq "S") and ($$t[1] eq "tr") and (exists $$t[2]->{bgcolor} and $$t[2]->{bgcolor} eq "eeeeee") ) { my @t = ( $t,# 0 $tp->get_token,# 1 $tp->get_token,# 2 $tp->get_token,# 3 $tp->get_token,# 4 Re: Name Space $tp->get_token,# 5 $tp->get_token,# 6
$tp->get_token,# 7 by $tp->get_token,# 8 $tp->get_token,# 9 japhy $tp->get_token,#10 $tp->get_token,#11 on Sep 04, 2001 at 13:42 $tp->get_token,#12 $tp->get_token,#13 $tp->get_token,#14 ); if( ($t[0][0] eq "S" and $t[0][1] eq "tr" and $t[0][2]->{'bgcolor'} eq "eeeeee") and ($t[1][0] eq "S" and $t[1][1] eq "td") and ($t[2][0] eq "S" and $t[2][1] eq "font") and ($t[3][0] eq "S" and $t[3][1] eq "a") and # reply link ($t[4][0] eq "T") and # reply to original node ($t[5][0] eq "E" and $t[5][1] eq "a") and ($t[6][0] eq "S" and $t[6][1] eq "br") and ($t[7][0] eq "T" and $t[7][1] =~ /by/ ) and ($t[8][0] eq "S" and $t[8][1] eq "a") and # userlink ($t[9][0] eq "T" ) and # username ($t[10][0] eq "E" and $t[10][1] eq "a") and ($t[11][0] eq "T" and $t[11][1] =~ /on \w{3} \d{2}, \d{4} at/) and ($t[12][0] eq "E" and $t[12][1] eq "font") and ($t[13][0] eq "E" and $t[13][1] eq "td") and ($t[14][0] eq "E" and $t[14][1] eq "tr") ) { print $t[3][4], # a href $t[9][1], # monk name "|\n"; $monks{$t[9][1]}= "$t[3][4]" . "$t[9][1]"; } } } # endof while (my $token = $p->get_token) undef $rawHTML; # no more raw html undef $tp; # destroy the HTML::TokeParser object (don't need it no more) print "

or sorted

\n"; for my $key (sort keys %monks) { print $monks{$key},"|\n"; } __END__ ## one token per line Re: Name Space
by japhy on Sep 04, 2001 at 13:42