Beefy Boxes and Bandwidth Generously Provided by pair Networks
XP is just a number
 
PerlMonks  

Re^2: Dates of Monk promotion (covered)

by choroba (Cardinal)
on Jun 17, 2015 at 17:18 UTC ( [id://1130842]=note: print w/replies, xml ) Need Help??


in reply to Re: Dates of Monk promotion (covered)
in thread Dates of Monk promotion

Thanks. That makes my script that extracted the information from the Wayback Machine useless.
#!/usr/bin/perl use warnings; use strict; use Time::Piece; use WWW::Mechanize; use HTML::TableExtract; 3 == @ARGV or die "Please specify start_date (yyyymmdd), user_id and user_nam +e\n"; my ($start_date, $user_id, $user) = @ARGV; my $url_prefix = 'http://web.archive.org/web/~DATE~000000' . '/http://www.perlmonks.org/?node'; my @url_suffixes = ("_id=$user_id", "=$user"); my $t = 'Time::Piece'->strptime($start_date, '%Y%m%d'); my $today = localtime->ymd; my $w = 'WWW::Mechanize'->new; my %output; my %last; @last{ @url_suffixes } = (0) x @url_suffixes; while ($t->ymd le $today) { my $date = $t->ymd(q()); print STDERR "$date\r"; for my $suffix (@url_suffixes) { my $url = $url_prefix . $suffix; $url =~ s/~DATE~/$date/; $w->get($url); my ($real_date) = $w->uri =~ m%/web/([0-9]{8})%; # WBM redirec +ts to future. my $te = 'HTML::TableExtract'->new; $te->parse($w->content); TABLE: for my $table ($te->tables) { for my $row ($table->rows) { if (grep defined && /Experience:/, @$row) { my $xp = 0 + $row->[1]; last TABLE if $xp == $last{$suffix}; $output{$real_date} = $xp; $last{$suffix} = $xp; last TABLE } } } } $t = $t->add_months(1); } for my $date (sort keys %output) { print "$date\t$output{$date}\n"; }

Interestingly, the outputs of the XML and my script are almost the same. Here's how I extracted the information from the XML (specify its filename as a parameter to the following script):

#!/usr/bin/perl use warnings; use strict; use WWW::Mechanize; use HTML::TableExtract; use XML::XSH2; my $w = 'WWW::Mechanize'->new; $w->get('http://www.perlmonks.org/?node=Voting%2FExperience%20System') +; my $te = 'HTML::TableExtract'->new( headers => [qw[ Level XP ]] ); $te->parse($w->content); my $table = ($te->tables)[0]->rows; package XML::XSH2::Map; our $string; package main; xsh << 'end.'; open {$ARGV[0]} ; $string = xsh:subst(normalize-space(//var[@name="levelchange"]), ';', +"\n", 'g') ; end. $string =~ s/^[0-9]+-//gm; $string =~ s/^([0-9]+)(.*)/$1$2 $table->[$1-1][1]/gm; print $string;

You have to insert dashes into the dates to the output of the Wayback Machine to make it work:

perl -pe 's/(....)(..)/$1-$2-/'

I then used gnuplot to compare them:

set term pngcairo size 1024, 800 set xdata time set timefmt "%Y-%m-%d" set format x '%Y/%m' plot 'pm-xp.txt' using 2:4 with lines title 'XP',\ '' using 2:($1*1000) with lines title 'Level',\ 'wayback.txt' using 1:2 with lines title 'Wayback Machine'

Update: The image.

لսႽ† ᥲᥒ⚪⟊Ⴙᘓᖇ Ꮅᘓᖇ⎱ Ⴙᥲ𝇋ƙᘓᖇ

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: note [id://1130842]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others romping around the Monastery: (4)
As of 2024-03-28 20:23 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found