All,
You may or may not be aware, but the
tutorials page is not
complete correct. There are a bunch of non-tutorials (primarily
meditations) that are linked as tutorials and there are a bunch of tutorials that are not linked at all.
I asked the SiteDocClan and the devils if there was a utility to get this list because I would volunteer to write one. I received one response - no utility exists. I put it off for some time because, as you may know, I really hate coding anything web related and am not very good at it. I do love the Monastery though, so with some free time today I came up with:
#!/usr/bin/perl
use strict;
use warnings;
use constant USER => 15;
use Getopt::Std;
use HTML::TableContentParser;
use HTML::TokeParser::Simple;
use URI;
use WWW::Mechanize;
use XML::Simple;
my ($monk, $opt, $tut, $listed) = ( {}, {}, [], {} );
Get_Args( $opt );
my $mech = WWW::Mechanize->new( autocheck => 1 );
Get_Tutorials();
Get_Listed();
Print_Report();
sub Get_Args {
my $opt = shift;
my $Usage = qq{Usage: $0 [options]
-h : This help message.
-b : Base URL - default: http://www.perlmonks.org/
} . "\n";
getopts( 'hb:' , $opt ) or die $Usage;
die $Usage if $opt->{h};
$opt->{b} ||= 'http://www.perlmonks.org/';
}
sub Get_Listed {
$mech->get( URL( 'Tutorials' ) );
my %link = map { lc $_->url() => undef } $mech->links();
delete $link{'/index.pl?replies=1&node_id=954&displaytype=print'};
for my $url ( keys %link ) {
next if exists $monk->{$url} || $url =~ /^#/;
if ( $url =~ /^http/ ) {
push @{ $listed->{offsite} }, $url;
next;
}
if ( ! Lookup( $url ) ) {
push @{ $listed->{non_tutorials} }, $url if ! Monk( $url )
+;
}
}
}
sub Get_Tutorials {
$mech->get( URL( 'Super Search' ) );
$mech->field( 'xa', '1' ); # Exclude Authors
$mech->field( 'a', 'NodeReaper' ); # Author NodeReaper
$mech->tick( 'Tu', '1' ); # Select Tutorials
$mech->field( 're', 'N' ); # Exclude Replies
$mech->click_button( name => 'go' ); # Submit Form
while ( $mech->content !~ /Finished searching database/ ) {
Process_Table();
$mech->click_button( name => 'nx' );
}
}
sub Lookup {
my $url = shift;
for ( 0 .. $#$tut ) {
next if ! defined $tut->[$_];
if ( $url eq $tut->[$_]{id} || $url eq $tut->[$_]{name} ) {
delete $tut->[$_];
return 1;
}
}
return 0;
}
sub Monk {
$mech->get( $opt->{b} . $_[0] . '&displaytype=xml' );
my $node = XML::Simple->new()->XMLin( $mech->content() );
return 1 if exists $node->{type}{id} && $node->{type}{id} == USER;
return 0;
}
sub Print_Report {
print "\t\t ---- Non-Tutorials linked as Tutorials ----\n";
print "$_\n" for @{ $listed->{non_tutorials} };
print "\t\t --------- Off-Site Tutorials ---------\n";
print "$_\n" for @{ $listed->{offsite} };
print "\t\t ---- Missing Tutorials (not linked) ----\n";
print "$_->{id}\n" for grep defined , @$tut;
}
sub Process_Link {
my ($link, $type) = @_;
my $p = HTML::TokeParser::Simple->new( \$link->{data} );
my ($node, $label);
while ( my $token = $p->get_token ) {
last if $token->is_end_tag;
if ( $token->is_start_tag( 'a' ) ) {
$node = lc $token->return_attr( 'href' );
next;
}
$label = lc URI->new( '/index.pl?node=' . $token->as_is )->as_
+string if $token->is_text;
}
die "Something went terribly wrong" if ! $node || ! $label;
if ( $type eq 'author' ) {
$monk->{ $label } = undef;
$monk->{ $node } = undef;
}
else {
push @{ $tut } , { id => $node, name => $label };
}
}
sub Process_Table {
my $table = HTML::TableContentParser->new()->parse( $mech->content
+() );
for my $row ( @{ $table->[0]{rows} } ) {
Process_Link( $row->{cells}[1], 'author' );
Process_Link( $row->{cells}[2], 'tutorial' );
}
}
sub URL { return URI->new( $opt->{b} . 'index.pl?node=' . $_[0] . '&di
+splaytype=print' )->as_string }
It isn't perfect - or even elegant, but it mostly works. I am figuring it is far enough along that someone who knows what they are doing can run with it. For those of you who are interested, here is a sample report:
---- Non-Tutorials linked as Tutorials ----
/index.pl?node_id=285498
/index.pl?node_id=285799
/index.pl?node_id=150255
/index.pl?node_id=270014
/index.pl?node_id=20443
/index.pl?node=yibc
/index.pl?node_id=269035
/index.pl?node_id=153155
/index.pl?node=control%20statements%20and%20looping
/index.pl?node_id=234012
/index.pl?node_id=180778
/index.pl?node=using%20the%20perl%20debugger
/index.pl?node_id=288022
/index.pl?node_id=284175
/index.pl?node_id=268891
/index.pl?node_id=51012
/index.pl?node_id=227909
/index.pl?node_id=144315
/index.pl?node_id=284436
/index.pl?node_id=50427
/index.pl?node_id=994
/index.pl?node_id=217597
/index.pl?node=the%20basics
/index.pl?node_id=218778
/index.pl?node_id=149675
/index.pl?node_id=128876
/index.pl?node_id=286241
/index.pl?node=amel
/index.pl?node_id=264485
/index.pl?node=blessables%20--%20what%20can%20you%20make%20into%20obje
+cts%3f
/index.pl?node_id=211441
/index.pl?node_id=317520
/index.pl?node_id=238272
/index.pl?node_id=220132
/index.pl?node=the%20tie%28%29s%20that%20bind
/index.pl?node_id=280658
/index.pl?node_id=104432
/index.pl?node=tie%3a%20creating%20special%20objects
/index.pl?node_id=273952
/index.pl?node_id=245725
/index.pl?node_id=216842
/index.pl?node_id=254078
/index.pl?node_id=187449
/index.pl?node_id=284324
/index.pl?node=mp3%20server%20with%20io%3a%3asocket
/index.pl?node_id=287647
/index.pl?node=control%20statement%20exercises
/index.pl?node=some%20things%20that%20will%20make%20your%20life%20easi
+er%20as%20a%20perl%20coder
/index.pl?node_id=133554
/index.pl?node_id=270016
/index.pl?node_id=301355
/index.pl?node_id=279077
/index.pl?node=pattern%20matching%20exercises
/index.pl?node=the%20fine%20art%20of%20database%20programming
/index.pl?node_id=218480
/index.pl?node=using%20%28s%29printf%28%29
/index.pl?node=operators%3a%20arithmetic%20and%20otherwise
/index.pl?node_id=192255
/index.pl?node_id=174082
/index.pl?node_id=277596
/index.pl?node_id=244635
/index.pl?node_id=280025
/index.pl?node_id=128283
/index.pl?node_id=291543
/index.pl?node_id=105446
/index.pl?node_id=213855
/index.pl?node_id=213388
/index.pl?node_id=280461
/index.pl?node_id=193649
/index.pl?node_id=861
/index.pl?node=file%20input%20and%20output%20exercises
--------- Off-Site Tutorials ---------
http://www.stonehenge.com/merlyn/linuxmag/
http://personal.riverusers.com/~swilhelm/gtkperl-tutorial/
http://www.stonehenge.com/merlyn/unixreview/
http://www.perl.com/
http://www.oreilly.com/
http://linux.oreillynet.com/
http://www.onlamp.com/bsd/
http://www.oreilly.com/catalog/cgi2/chapter/ch08.html
http://www.perl.org/cgi_metafaq.html
http://perl.plover.com/questions.html
http://www.catb.org/~esr/faqs/smart-questions.html
http://www.onlamp.com/
http://users.easystreet.com/ovid/cgi_course/
http://www.perl.com/pub/a/2001/08/21/templating.html
http://perl.plover.com/faqs/namespaces.html
http://tachyon.perlmonk.org/tutorials/behind_the_gui_lives_the_shell.h
+tm
http://www.stonehenge.com/merlyn/webtechniques/
---- Missing Tutorials (not linked) ----
/index.pl?node_id=378226
/index.pl?node_id=377450
/index.pl?node_id=374287
/index.pl?node_id=371938
/index.pl?node_id=371720
/index.pl?node_id=355625
/index.pl?node_id=353259
/index.pl?node_id=339131
/index.pl?node_id=324749
/index.pl?node_id=324638
/index.pl?node_id=321831
/index.pl?node_id=291446
/index.pl?node_id=289076
/index.pl?node_id=288217
/index.pl?node_id=284214
/index.pl?node_id=272369
/index.pl?node_id=272366
/index.pl?node_id=269642
/index.pl?node_id=264471
/index.pl?node_id=264333
/index.pl?node_id=253934
/index.pl?node_id=253797
/index.pl?node_id=237388
/index.pl?node_id=221512
/index.pl?node_id=216644
/index.pl?node_id=214293
/index.pl?node_id=213052
/index.pl?node_id=159373
/index.pl?node_id=158625
/index.pl?node_id=136482
/index.pl?node_id=135462
/index.pl?node_id=135323
/index.pl?node_id=123961
/index.pl?node_id=109641
/index.pl?node_id=108182
/index.pl?node_id=105906
/index.pl?node_id=105620
/index.pl?node_id=105041
/index.pl?node_id=101793
/index.pl?node_id=71192
/index.pl?node_id=54485
/index.pl?node_id=20519
/index.pl?node_id=15838
/index.pl?node_id=15301
/index.pl?node_id=8650
/index.pl?node_id=8344
/index.pl?node_id=8259
/index.pl?node_id=990
/index.pl?node_id=965
Update: It has found some interesting things:
- Typos: yibc vs ybic
- Users who have changed names amel vs dsb
- Differences in URL representations (should have used URI more freely)
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.